diff --git a/EdgeImpulse.EI-SDK.pdsc b/EdgeImpulse.EI-SDK.pdsc new file mode 100644 index 0000000..e24b269 --- /dev/null +++ b/EdgeImpulse.EI-SDK.pdsc @@ -0,0 +1,563 @@ + + + + EdgeImpulse + EI-SDK + LICENSE-apache-2.0.txt + Edge Impulse SDK + + hello@edgeimpulse.com + + + EI-SDK + + + + EdgeImpulse + Edge Impulse SDK + + + + True if using one of the Cortex-M core + + + + + + + + + + + + True if Cortex-M core == TRUE and either GCC or ARMCC and device running in little-endian byte ordering + + + + + + + True if CMSIS Core and CMSIS DSP and CMSIS NN are in use + + + + + + + + + + + + + + + + + + + Edge Impulse SDK + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/LICENSE-apache-2.0.txt b/LICENSE-apache-2.0.txt new file mode 100644 index 0000000..0cdd12c --- /dev/null +++ b/LICENSE-apache-2.0.txt @@ -0,0 +1,165 @@ +Apache License +Version 2.0, January 2004 +http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + +"License" shall mean the terms and conditions for use, reproduction, and +distribution as defined by Sections 1 through 9 of this document. + +"Licensor" shall mean the copyright owner or entity authorized by the copyright +owner that is granting the License. + +"Legal Entity" shall mean the union of the acting entity and all other entities +that control, are controlled by, or are under common control with that entity. +For the purposes of this definition, "control" means (i) the power, direct or +indirect, to cause the direction or management of such entity, whether by +contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the +outstanding shares, or (iii) beneficial ownership of such entity. + +"You" (or "Your") shall mean an individual or Legal Entity exercising +permissions granted by this License. + +"Source" form shall mean the preferred form for making modifications, including +but not limited to software source code, documentation source, and configuration +files. + +"Object" form shall mean any form resulting from mechanical transformation or +translation of a Source form, including but not limited to compiled object code, +generated documentation, and conversions to other media types. + +"Work" shall mean the work of authorship, whether in Source or Object form, made +available under the License, as indicated by a copyright notice that is included +in or attached to the work (an example is provided in the Appendix below). + +"Derivative Works" shall mean any work, whether in Source or Object form, that +is based on (or derived from) the Work and for which the editorial revisions, +annotations, elaborations, or other modifications represent, as a whole, an +original work of authorship. For the purposes of this License, Derivative Works +shall not include works that remain separable from, or merely link (or bind by +name) to the interfaces of, the Work and Derivative Works thereof. + +"Contribution" shall mean any work of authorship, including the original version +of the Work and any modifications or additions to that Work or Derivative Works +thereof, that is intentionally submitted to Licensor for inclusion in the Work +by the copyright owner or by an individual or Legal Entity authorized to submit +on behalf of the copyright owner. For the purposes of this definition, +"submitted" means any form of electronic, verbal, or written communication sent +to the Licensor or its representatives, including but not limited to +communication on electronic mailing lists, source code control systems, and +issue tracking systems that are managed by, or on behalf of, the Licensor for +the purpose of discussing and improving the Work, but excluding communication +that is conspicuously marked or otherwise designated in writing by the copyright +owner as "Not a Contribution." + +"Contributor" shall mean Licensor and any individual or Legal Entity on behalf +of whom a Contribution has been received by Licensor and subsequently +incorporated within the Work. + +2. Grant of Copyright License. + +Subject to the terms and conditions of this License, each Contributor hereby +grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, +irrevocable copyright license to reproduce, prepare Derivative Works of, +publicly display, publicly perform, sublicense, and distribute the Work and such +Derivative Works in Source or Object form. + +3. Grant of Patent License. + +Subject to the terms and conditions of this License, each Contributor hereby +grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, +irrevocable (except as stated in this section) patent license to make, have +made, use, offer to sell, sell, import, and otherwise transfer the Work, where +such license applies only to those patent claims licensable by such Contributor +that are necessarily infringed by their Contribution(s) alone or by combination +of their Contribution(s) with the Work to which such Contribution(s) was +submitted. If You institute patent litigation against any entity (including a +cross-claim or counterclaim in a lawsuit) alleging that the Work or a +Contribution incorporated within the Work constitutes direct or contributory +patent infringement, then any patent licenses granted to You under this License +for that Work shall terminate as of the date such litigation is filed. + +4. Redistribution. + +You may reproduce and distribute copies of the Work or Derivative Works thereof +in any medium, with or without modifications, and in Source or Object form, +provided that You meet the following conditions: + +You must give any other recipients of the Work or Derivative Works a copy of +this License; and +You must cause any modified files to carry prominent notices stating that You +changed the files; and +You must retain, in the Source form of any Derivative Works that You distribute, +all copyright, patent, trademark, and attribution notices from the Source form +of the Work, excluding those notices that do not pertain to any part of the +Derivative Works; and +If the Work includes a "NOTICE" text file as part of its distribution, then any +Derivative Works that You distribute must include a readable copy of the +attribution notices contained within such NOTICE file, excluding those notices +that do not pertain to any part of the Derivative Works, in at least one of the +following places: within a NOTICE text file distributed as part of the +Derivative Works; within the Source form or documentation, if provided along +with the Derivative Works; or, within a display generated by the Derivative +Works, if and wherever such third-party notices normally appear. The contents of +the NOTICE file are for informational purposes only and do not modify the +License. You may add Your own attribution notices within Derivative Works that +You distribute, alongside or as an addendum to the NOTICE text from the Work, +provided that such additional attribution notices cannot be construed as +modifying the License. +You may add Your own copyright statement to Your modifications and may provide +additional or different license terms and conditions for use, reproduction, or +distribution of Your modifications, or for any such Derivative Works as a whole, +provided Your use, reproduction, and distribution of the Work otherwise complies +with the conditions stated in this License. + +5. Submission of Contributions. + +Unless You explicitly state otherwise, any Contribution intentionally submitted +for inclusion in the Work by You to the Licensor shall be under the terms and +conditions of this License, without any additional terms or conditions. +Notwithstanding the above, nothing herein shall supersede or modify the terms of +any separate license agreement you may have executed with Licensor regarding +such Contributions. + +6. Trademarks. + +This License does not grant permission to use the trade names, trademarks, +service marks, or product names of the Licensor, except as required for +reasonable and customary use in describing the origin of the Work and +reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. + +Unless required by applicable law or agreed to in writing, Licensor provides the +Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, +including, without limitation, any warranties or conditions of TITLE, +NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are +solely responsible for determining the appropriateness of using or +redistributing the Work and assume any risks associated with Your exercise of +permissions under this License. + +8. Limitation of Liability. + +In no event and under no legal theory, whether in tort (including negligence), +contract, or otherwise, unless required by applicable law (such as deliberate +and grossly negligent acts) or agreed to in writing, shall any Contributor be +liable to You for damages, including any direct, indirect, special, incidental, +or consequential damages of any character arising as a result of this License or +out of the use or inability to use the Work (including but not limited to +damages for loss of goodwill, work stoppage, computer failure or malfunction, or +any and all other commercial damages or losses), even if such Contributor has +been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. + +While redistributing the Work or Derivative Works thereof, You may choose to +offer, and charge a fee for, acceptance of support, warranty, indemnity, or +other liability obligations and/or rights consistent with this License. However, +in accepting such obligations, You may act only on Your own behalf and on Your +sole responsibility, not on behalf of any other Contributor, and only if You +agree to indemnify, defend, and hold each Contributor harmless for any liability +incurred by, or claims asserted against, such Contributor by reason of your +accepting any such warranty or additional liability. \ No newline at end of file diff --git a/README.md b/README.md index e827415..d635729 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,9 @@ -# edge-impulse-sdk-package -Edge Impulse SDK Open CMSIS package +# Edge Impulse DSP and Inferencing SDK + +Portable library for digital signal processing and machine learning inferencing. This repository contains the device implementation in C++ for both processing and learning blocks in [Edge Impulse](https://www.edgeimpulse.com). + +[Documentation](https://docs.edgeimpulse.com/reference#inferencing-sdk) + +## Develop locally + +If you want to develop locally the easiest is to grab the [example-standalone-inferencing](https://github.com/edgeimpulse/example-standalone-inferencing) (Desktop) or [example-standalone-inferencing-mbed](https://github.com/edgeimpulse/example-standalone-inferencing-mbed) (ST IoT Discovery Kit, f.e. to test CMSIS-DSP / CMSIS-NN integration) example applications, add your Edge Impulse project (use the C++ Library export option), then symlink this repository in. diff --git a/edgeimpulse/edge-impulse-sdk/LICENSE b/edgeimpulse/edge-impulse-sdk/LICENSE new file mode 100644 index 0000000..0cdd12c --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/LICENSE @@ -0,0 +1,165 @@ +Apache License +Version 2.0, January 2004 +http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + +"License" shall mean the terms and conditions for use, reproduction, and +distribution as defined by Sections 1 through 9 of this document. + +"Licensor" shall mean the copyright owner or entity authorized by the copyright +owner that is granting the License. + +"Legal Entity" shall mean the union of the acting entity and all other entities +that control, are controlled by, or are under common control with that entity. +For the purposes of this definition, "control" means (i) the power, direct or +indirect, to cause the direction or management of such entity, whether by +contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the +outstanding shares, or (iii) beneficial ownership of such entity. + +"You" (or "Your") shall mean an individual or Legal Entity exercising +permissions granted by this License. + +"Source" form shall mean the preferred form for making modifications, including +but not limited to software source code, documentation source, and configuration +files. + +"Object" form shall mean any form resulting from mechanical transformation or +translation of a Source form, including but not limited to compiled object code, +generated documentation, and conversions to other media types. + +"Work" shall mean the work of authorship, whether in Source or Object form, made +available under the License, as indicated by a copyright notice that is included +in or attached to the work (an example is provided in the Appendix below). + +"Derivative Works" shall mean any work, whether in Source or Object form, that +is based on (or derived from) the Work and for which the editorial revisions, +annotations, elaborations, or other modifications represent, as a whole, an +original work of authorship. For the purposes of this License, Derivative Works +shall not include works that remain separable from, or merely link (or bind by +name) to the interfaces of, the Work and Derivative Works thereof. + +"Contribution" shall mean any work of authorship, including the original version +of the Work and any modifications or additions to that Work or Derivative Works +thereof, that is intentionally submitted to Licensor for inclusion in the Work +by the copyright owner or by an individual or Legal Entity authorized to submit +on behalf of the copyright owner. For the purposes of this definition, +"submitted" means any form of electronic, verbal, or written communication sent +to the Licensor or its representatives, including but not limited to +communication on electronic mailing lists, source code control systems, and +issue tracking systems that are managed by, or on behalf of, the Licensor for +the purpose of discussing and improving the Work, but excluding communication +that is conspicuously marked or otherwise designated in writing by the copyright +owner as "Not a Contribution." + +"Contributor" shall mean Licensor and any individual or Legal Entity on behalf +of whom a Contribution has been received by Licensor and subsequently +incorporated within the Work. + +2. Grant of Copyright License. + +Subject to the terms and conditions of this License, each Contributor hereby +grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, +irrevocable copyright license to reproduce, prepare Derivative Works of, +publicly display, publicly perform, sublicense, and distribute the Work and such +Derivative Works in Source or Object form. + +3. Grant of Patent License. + +Subject to the terms and conditions of this License, each Contributor hereby +grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, +irrevocable (except as stated in this section) patent license to make, have +made, use, offer to sell, sell, import, and otherwise transfer the Work, where +such license applies only to those patent claims licensable by such Contributor +that are necessarily infringed by their Contribution(s) alone or by combination +of their Contribution(s) with the Work to which such Contribution(s) was +submitted. If You institute patent litigation against any entity (including a +cross-claim or counterclaim in a lawsuit) alleging that the Work or a +Contribution incorporated within the Work constitutes direct or contributory +patent infringement, then any patent licenses granted to You under this License +for that Work shall terminate as of the date such litigation is filed. + +4. Redistribution. + +You may reproduce and distribute copies of the Work or Derivative Works thereof +in any medium, with or without modifications, and in Source or Object form, +provided that You meet the following conditions: + +You must give any other recipients of the Work or Derivative Works a copy of +this License; and +You must cause any modified files to carry prominent notices stating that You +changed the files; and +You must retain, in the Source form of any Derivative Works that You distribute, +all copyright, patent, trademark, and attribution notices from the Source form +of the Work, excluding those notices that do not pertain to any part of the +Derivative Works; and +If the Work includes a "NOTICE" text file as part of its distribution, then any +Derivative Works that You distribute must include a readable copy of the +attribution notices contained within such NOTICE file, excluding those notices +that do not pertain to any part of the Derivative Works, in at least one of the +following places: within a NOTICE text file distributed as part of the +Derivative Works; within the Source form or documentation, if provided along +with the Derivative Works; or, within a display generated by the Derivative +Works, if and wherever such third-party notices normally appear. The contents of +the NOTICE file are for informational purposes only and do not modify the +License. You may add Your own attribution notices within Derivative Works that +You distribute, alongside or as an addendum to the NOTICE text from the Work, +provided that such additional attribution notices cannot be construed as +modifying the License. +You may add Your own copyright statement to Your modifications and may provide +additional or different license terms and conditions for use, reproduction, or +distribution of Your modifications, or for any such Derivative Works as a whole, +provided Your use, reproduction, and distribution of the Work otherwise complies +with the conditions stated in this License. + +5. Submission of Contributions. + +Unless You explicitly state otherwise, any Contribution intentionally submitted +for inclusion in the Work by You to the Licensor shall be under the terms and +conditions of this License, without any additional terms or conditions. +Notwithstanding the above, nothing herein shall supersede or modify the terms of +any separate license agreement you may have executed with Licensor regarding +such Contributions. + +6. Trademarks. + +This License does not grant permission to use the trade names, trademarks, +service marks, or product names of the Licensor, except as required for +reasonable and customary use in describing the origin of the Work and +reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. + +Unless required by applicable law or agreed to in writing, Licensor provides the +Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, +including, without limitation, any warranties or conditions of TITLE, +NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are +solely responsible for determining the appropriateness of using or +redistributing the Work and assume any risks associated with Your exercise of +permissions under this License. + +8. Limitation of Liability. + +In no event and under no legal theory, whether in tort (including negligence), +contract, or otherwise, unless required by applicable law (such as deliberate +and grossly negligent acts) or agreed to in writing, shall any Contributor be +liable to You for damages, including any direct, indirect, special, incidental, +or consequential damages of any character arising as a result of this License or +out of the use or inability to use the Work (including but not limited to +damages for loss of goodwill, work stoppage, computer failure or malfunction, or +any and all other commercial damages or losses), even if such Contributor has +been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. + +While redistributing the Work or Derivative Works thereof, You may choose to +offer, and charge a fee for, acceptance of support, warranty, indemnity, or +other liability obligations and/or rights consistent with this License. However, +in accepting such obligations, You may act only on Your own behalf and on Your +sole responsibility, not on behalf of any other Contributor, and only if You +agree to indemnify, defend, and hold each Contributor harmless for any liability +incurred by, or claims asserted against, such Contributor by reason of your +accepting any such warranty or additional liability. \ No newline at end of file diff --git a/edgeimpulse/edge-impulse-sdk/LICENSE-apache-2.0.txt b/edgeimpulse/edge-impulse-sdk/LICENSE-apache-2.0.txt new file mode 100644 index 0000000..0cdd12c --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/LICENSE-apache-2.0.txt @@ -0,0 +1,165 @@ +Apache License +Version 2.0, January 2004 +http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + +"License" shall mean the terms and conditions for use, reproduction, and +distribution as defined by Sections 1 through 9 of this document. + +"Licensor" shall mean the copyright owner or entity authorized by the copyright +owner that is granting the License. + +"Legal Entity" shall mean the union of the acting entity and all other entities +that control, are controlled by, or are under common control with that entity. +For the purposes of this definition, "control" means (i) the power, direct or +indirect, to cause the direction or management of such entity, whether by +contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the +outstanding shares, or (iii) beneficial ownership of such entity. + +"You" (or "Your") shall mean an individual or Legal Entity exercising +permissions granted by this License. + +"Source" form shall mean the preferred form for making modifications, including +but not limited to software source code, documentation source, and configuration +files. + +"Object" form shall mean any form resulting from mechanical transformation or +translation of a Source form, including but not limited to compiled object code, +generated documentation, and conversions to other media types. + +"Work" shall mean the work of authorship, whether in Source or Object form, made +available under the License, as indicated by a copyright notice that is included +in or attached to the work (an example is provided in the Appendix below). + +"Derivative Works" shall mean any work, whether in Source or Object form, that +is based on (or derived from) the Work and for which the editorial revisions, +annotations, elaborations, or other modifications represent, as a whole, an +original work of authorship. For the purposes of this License, Derivative Works +shall not include works that remain separable from, or merely link (or bind by +name) to the interfaces of, the Work and Derivative Works thereof. + +"Contribution" shall mean any work of authorship, including the original version +of the Work and any modifications or additions to that Work or Derivative Works +thereof, that is intentionally submitted to Licensor for inclusion in the Work +by the copyright owner or by an individual or Legal Entity authorized to submit +on behalf of the copyright owner. For the purposes of this definition, +"submitted" means any form of electronic, verbal, or written communication sent +to the Licensor or its representatives, including but not limited to +communication on electronic mailing lists, source code control systems, and +issue tracking systems that are managed by, or on behalf of, the Licensor for +the purpose of discussing and improving the Work, but excluding communication +that is conspicuously marked or otherwise designated in writing by the copyright +owner as "Not a Contribution." + +"Contributor" shall mean Licensor and any individual or Legal Entity on behalf +of whom a Contribution has been received by Licensor and subsequently +incorporated within the Work. + +2. Grant of Copyright License. + +Subject to the terms and conditions of this License, each Contributor hereby +grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, +irrevocable copyright license to reproduce, prepare Derivative Works of, +publicly display, publicly perform, sublicense, and distribute the Work and such +Derivative Works in Source or Object form. + +3. Grant of Patent License. + +Subject to the terms and conditions of this License, each Contributor hereby +grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, +irrevocable (except as stated in this section) patent license to make, have +made, use, offer to sell, sell, import, and otherwise transfer the Work, where +such license applies only to those patent claims licensable by such Contributor +that are necessarily infringed by their Contribution(s) alone or by combination +of their Contribution(s) with the Work to which such Contribution(s) was +submitted. If You institute patent litigation against any entity (including a +cross-claim or counterclaim in a lawsuit) alleging that the Work or a +Contribution incorporated within the Work constitutes direct or contributory +patent infringement, then any patent licenses granted to You under this License +for that Work shall terminate as of the date such litigation is filed. + +4. Redistribution. + +You may reproduce and distribute copies of the Work or Derivative Works thereof +in any medium, with or without modifications, and in Source or Object form, +provided that You meet the following conditions: + +You must give any other recipients of the Work or Derivative Works a copy of +this License; and +You must cause any modified files to carry prominent notices stating that You +changed the files; and +You must retain, in the Source form of any Derivative Works that You distribute, +all copyright, patent, trademark, and attribution notices from the Source form +of the Work, excluding those notices that do not pertain to any part of the +Derivative Works; and +If the Work includes a "NOTICE" text file as part of its distribution, then any +Derivative Works that You distribute must include a readable copy of the +attribution notices contained within such NOTICE file, excluding those notices +that do not pertain to any part of the Derivative Works, in at least one of the +following places: within a NOTICE text file distributed as part of the +Derivative Works; within the Source form or documentation, if provided along +with the Derivative Works; or, within a display generated by the Derivative +Works, if and wherever such third-party notices normally appear. The contents of +the NOTICE file are for informational purposes only and do not modify the +License. You may add Your own attribution notices within Derivative Works that +You distribute, alongside or as an addendum to the NOTICE text from the Work, +provided that such additional attribution notices cannot be construed as +modifying the License. +You may add Your own copyright statement to Your modifications and may provide +additional or different license terms and conditions for use, reproduction, or +distribution of Your modifications, or for any such Derivative Works as a whole, +provided Your use, reproduction, and distribution of the Work otherwise complies +with the conditions stated in this License. + +5. Submission of Contributions. + +Unless You explicitly state otherwise, any Contribution intentionally submitted +for inclusion in the Work by You to the Licensor shall be under the terms and +conditions of this License, without any additional terms or conditions. +Notwithstanding the above, nothing herein shall supersede or modify the terms of +any separate license agreement you may have executed with Licensor regarding +such Contributions. + +6. Trademarks. + +This License does not grant permission to use the trade names, trademarks, +service marks, or product names of the Licensor, except as required for +reasonable and customary use in describing the origin of the Work and +reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. + +Unless required by applicable law or agreed to in writing, Licensor provides the +Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, +including, without limitation, any warranties or conditions of TITLE, +NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are +solely responsible for determining the appropriateness of using or +redistributing the Work and assume any risks associated with Your exercise of +permissions under this License. + +8. Limitation of Liability. + +In no event and under no legal theory, whether in tort (including negligence), +contract, or otherwise, unless required by applicable law (such as deliberate +and grossly negligent acts) or agreed to in writing, shall any Contributor be +liable to You for damages, including any direct, indirect, special, incidental, +or consequential damages of any character arising as a result of this License or +out of the use or inability to use the Work (including but not limited to +damages for loss of goodwill, work stoppage, computer failure or malfunction, or +any and all other commercial damages or losses), even if such Contributor has +been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. + +While redistributing the Work or Derivative Works thereof, You may choose to +offer, and charge a fee for, acceptance of support, warranty, indemnity, or +other liability obligations and/or rights consistent with this License. However, +in accepting such obligations, You may act only on Your own behalf and on Your +sole responsibility, not on behalf of any other Contributor, and only if You +agree to indemnify, defend, and hold each Contributor harmless for any liability +incurred by, or claims asserted against, such Contributor by reason of your +accepting any such warranty or additional liability. \ No newline at end of file diff --git a/edgeimpulse/edge-impulse-sdk/classifier/ei_aligned_malloc.h b/edgeimpulse/edge-impulse-sdk/classifier/ei_aligned_malloc.h new file mode 100644 index 0000000..7ef1a26 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/classifier/ei_aligned_malloc.h @@ -0,0 +1,109 @@ +/* + * Copyright (c) 2022 EdgeImpulse Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an "AS + * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language + * governing permissions and limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef _EDGE_IMPULSE_ALIGNED_MALLOC_H_ +#define _EDGE_IMPULSE_ALIGNED_MALLOC_H_ + +#include +#include "../porting/ei_classifier_porting.h" + +#ifdef __cplusplus +namespace { +#endif // __cplusplus + +/** +* Based on https://github.com/embeddedartistry/embedded-resources/blob/master/examples/c/malloc_aligned.c +*/ + +/** +* Simple macro for making sure memory addresses are aligned +* to the nearest power of two +*/ +#ifndef align_up +#define align_up(num, align) \ + (((num) + ((align) - 1)) & ~((align) - 1)) +#endif + +//Number of bytes we're using for storing the aligned pointer offset +typedef uint16_t offset_t; +#define PTR_OFFSET_SZ sizeof(offset_t) + +/** +* aligned_malloc takes in the requested alignment and size +* We will call malloc with extra bytes for our header and the offset +* required to guarantee the desired alignment. +*/ +__attribute__((unused)) void * ei_aligned_calloc(size_t align, size_t size) +{ + void * ptr = NULL; + + //We want it to be a power of two since align_up operates on powers of two + assert((align & (align - 1)) == 0); + + if(align && size) + { + /* + * We know we have to fit an offset value + * We also allocate extra bytes to ensure we can meet the alignment + */ + uint32_t hdr_size = PTR_OFFSET_SZ + (align - 1); + void * p = ei_calloc(size + hdr_size, 1); + + if(p) + { + /* + * Add the offset size to malloc's pointer (we will always store that) + * Then align the resulting value to the arget alignment + */ + ptr = (void *) align_up(((uintptr_t)p + PTR_OFFSET_SZ), align); + + //Calculate the offset and store it behind our aligned pointer + *((offset_t *)ptr - 1) = (offset_t)((uintptr_t)ptr - (uintptr_t)p); + + } // else NULL, could not malloc + } //else NULL, invalid arguments + + return ptr; +} + +/** +* aligned_free works like free(), but we work backwards from the returned +* pointer to find the correct offset and pointer location to return to free() +* Note that it is VERY BAD to call free() on an aligned_malloc() pointer. +*/ +__attribute__((unused)) void ei_aligned_free(void * ptr) +{ + assert(ptr); + + /* + * Walk backwards from the passed-in pointer to get the pointer offset + * We convert to an offset_t pointer and rely on pointer math to get the data + */ + offset_t offset = *((offset_t *)ptr - 1); + + /* + * Once we have the offset, we can get our original pointer and call free + */ + void * p = (void *)((uint8_t *)ptr - offset); + ei_free(p); +} + +#ifdef __cplusplus +} +#endif // __cplusplus + +#endif // _EDGE_IMPULSE_ALIGNED_MALLOC_H_ diff --git a/edgeimpulse/edge-impulse-sdk/classifier/ei_classifier_config.h b/edgeimpulse/edge-impulse-sdk/classifier/ei_classifier_config.h new file mode 100644 index 0000000..8865a85 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/classifier/ei_classifier_config.h @@ -0,0 +1,102 @@ +/* + * Copyright (c) 2022 EdgeImpulse Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an "AS + * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language + * governing permissions and limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef _EI_CLASSIFIER_CONFIG_H_ +#define _EI_CLASSIFIER_CONFIG_H_ + +// clang-format off + +// This is a file that's only used in benchmarking to override HW optimized kernels +#ifdef __has_include + #if __has_include("source/benchmark.h") + #include "source/benchmark.h" + #endif +#endif + +#if EI_CLASSIFIER_TFLITE_ENABLE_SILABS_MVP == 1 + #define EI_CLASSIFIER_TFLITE_ENABLE_CMSIS_NN 0 + #define EI_CLASSIFIER_TFLITE_LOAD_CMSIS_NN_SOURCES 1 +#endif + +#ifndef EI_CLASSIFIER_TFLITE_ENABLE_CMSIS_NN +#if defined(__MBED__) + #include "mbed_version.h" + #if (MBED_VERSION < MBED_ENCODE_VERSION((5), (7), (0))) + #define EI_CLASSIFIER_TFLITE_ENABLE_CMSIS_NN 0 + #else + #define EI_CLASSIFIER_TFLITE_ENABLE_CMSIS_NN 1 + #endif // Mbed OS 5.7 version check + +// __ARM_ARCH_PROFILE is a predefine of arm-gcc. __TARGET_* is armcc +#elif __ARM_ARCH_PROFILE == 'M' || defined(__TARGET_CPU_CORTEX_M0) || defined(__TARGET_CPU_CORTEX_M0PLUS) || defined(__TARGET_CPU_CORTEX_M3) || defined(__TARGET_CPU_CORTEX_M4) || defined(__TARGET_CPU_CORTEX_M7) || defined(ARDUINO_NRF52_ADAFRUIT) + #define EI_CLASSIFIER_TFLITE_ENABLE_CMSIS_NN 1 +#else + #define EI_CLASSIFIER_TFLITE_ENABLE_CMSIS_NN 0 +#endif +#endif // EI_CLASSIFIER_TFLITE_ENABLE_CMSIS_NN + +// CMSIS-NN falls back to reference kernels when __ARM_FEATURE_DSP and __ARM_FEATURE_MVE are not defined +// we should never use those... So disable CMSIS-NN in that case and throw a warning +#if EI_CLASSIFIER_TFLITE_ENABLE_CMSIS_NN == 1 + #if !defined(__ARM_FEATURE_DSP) && !defined(__ARM_FEATURE_MVE) + #pragma message( \ + "CMSIS-NN enabled, but neither __ARM_FEATURE_DSP nor __ARM_FEATURE_MVE defined. Falling back.") + #undef EI_CLASSIFIER_TFLITE_ENABLE_CMSIS_NN + #define EI_CLASSIFIER_TFLITE_ENABLE_CMSIS_NN 0 + #endif +#endif // EI_CLASSIFIER_TFLITE_ENABLE_CMSIS_NN == 1 + +#if EI_CLASSIFIER_TFLITE_ENABLE_CMSIS_NN == 1 +#define CMSIS_NN 1 +#define EI_CLASSIFIER_TFLITE_LOAD_CMSIS_NN_SOURCES 1 +#endif + +#ifndef EI_CLASSIFIER_TFLITE_ENABLE_ARC +#ifdef CPU_ARC +#define EI_CLASSIFIER_TFLITE_ENABLE_ARC 1 +#else +#define EI_CLASSIFIER_TFLITE_ENABLE_ARC 0 +#endif // CPU_ARC +#endif // EI_CLASSIFIER_TFLITE_ENABLE_ARC + +#ifndef EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN + #if defined(ESP32) + #include "sdkconfig.h" + #define EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN 1 + #define ESP_NN 1 + #endif // ESP32 check + #if defined(CONFIG_IDF_TARGET_ESP32S3) + #define EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN_S3 1 + #endif // ESP32S3 check +#else + #define ESP_NN 1 +#endif + +// no include checks in the compiler? then just include metadata and then ops_define (optional if on EON model) +#ifndef __has_include + #include "model-parameters/model_metadata.h" + #if (EI_CLASSIFIER_INFERENCING_ENGINE == EI_CLASSIFIER_TFLITE) && (EI_CLASSIFIER_COMPILED == 1) + #include "tflite-model/trained_model_ops_define.h" + #endif +#else + #if __has_include("tflite-model/trained_model_ops_define.h") + #include "tflite-model/trained_model_ops_define.h" + #endif +#endif // __has_include + +// clang-format on +#endif // _EI_CLASSIFIER_CONFIG_H_ diff --git a/edgeimpulse/edge-impulse-sdk/classifier/ei_classifier_smooth.h b/edgeimpulse/edge-impulse-sdk/classifier/ei_classifier_smooth.h new file mode 100644 index 0000000..31be582 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/classifier/ei_classifier_smooth.h @@ -0,0 +1,141 @@ +/* + * Copyright (c) 2022 EdgeImpulse Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an "AS + * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language + * governing permissions and limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef _EI_CLASSIFIER_SMOOTH_H_ +#define _EI_CLASSIFIER_SMOOTH_H_ + +#if EI_CLASSIFIER_OBJECT_DETECTION != 1 + +#include + +typedef struct ei_classifier_smooth { + int *last_readings; + size_t last_readings_size; + uint8_t min_readings_same; + float classifier_confidence; + float anomaly_confidence; + uint8_t count[EI_CLASSIFIER_LABEL_COUNT + 2] = { 0 }; + size_t count_size = EI_CLASSIFIER_LABEL_COUNT + 2; +} ei_classifier_smooth_t; + +/** + * Initialize a smooth structure. This is useful if you don't want to trust + * single readings, but rather want consensus + * (e.g. 7 / 10 readings should be the same before I draw any ML conclusions). + * This allocates memory on the heap! + * @param smooth Pointer to an uninitialized ei_classifier_smooth_t struct + * @param n_readings Number of readings you want to store + * @param min_readings_same Minimum readings that need to be the same before concluding (needs to be lower than n_readings) + * @param classifier_confidence Minimum confidence in a class (default 0.8) + * @param anomaly_confidence Maximum error for anomalies (default 0.3) + */ +void ei_classifier_smooth_init(ei_classifier_smooth_t *smooth, size_t n_readings, + uint8_t min_readings_same, float classifier_confidence = 0.8, + float anomaly_confidence = 0.3) { + smooth->last_readings = (int*)ei_malloc(n_readings * sizeof(int)); + for (size_t ix = 0; ix < n_readings; ix++) { + smooth->last_readings[ix] = -1; // -1 == uncertain + } + smooth->last_readings_size = n_readings; + smooth->min_readings_same = min_readings_same; + smooth->classifier_confidence = classifier_confidence; + smooth->anomaly_confidence = anomaly_confidence; + smooth->count_size = EI_CLASSIFIER_LABEL_COUNT + 2; +} + +/** + * Call when a new reading comes in. + * @param smooth Pointer to an initialized ei_classifier_smooth_t struct + * @param result Pointer to a result structure (after calling ei_run_classifier) + * @returns Label, either 'uncertain', 'anomaly', or a label from the result struct + */ +const char* ei_classifier_smooth_update(ei_classifier_smooth_t *smooth, ei_impulse_result_t *result) { + // clear out the count array + memset(smooth->count, 0, EI_CLASSIFIER_LABEL_COUNT + 2); + + // roll through the last_readings buffer + numpy::roll(smooth->last_readings, smooth->last_readings_size, -1); + + int reading = -1; // uncertain + + // print the predictions + // printf("["); + for (size_t ix = 0; ix < EI_CLASSIFIER_LABEL_COUNT; ix++) { + if (result->classification[ix].value >= smooth->classifier_confidence) { + reading = (int)ix; + } + } +#if EI_CLASSIFIER_HAS_ANOMALY == 1 + if (result->anomaly >= smooth->anomaly_confidence) { + reading = -2; // anomaly + } +#endif + + smooth->last_readings[smooth->last_readings_size - 1] = reading; + + // now count last 10 readings and see what we actually see... + for (size_t ix = 0; ix < smooth->last_readings_size; ix++) { + if (smooth->last_readings[ix] >= 0) { + smooth->count[smooth->last_readings[ix]]++; + } + else if (smooth->last_readings[ix] == -1) { // uncertain + smooth->count[EI_CLASSIFIER_LABEL_COUNT]++; + } + else if (smooth->last_readings[ix] == -2) { // anomaly + smooth->count[EI_CLASSIFIER_LABEL_COUNT + 1]++; + } + } + + // then loop over the count and see which is highest + uint8_t top_result = 0; + uint8_t top_count = 0; + bool met_confidence_threshold = false; + uint8_t confidence_threshold = smooth->min_readings_same; // XX% of windows should be the same + for (size_t ix = 0; ix < EI_CLASSIFIER_LABEL_COUNT + 2; ix++) { + if (smooth->count[ix] > top_count) { + top_result = ix; + top_count = smooth->count[ix]; + } + if (smooth->count[ix] >= confidence_threshold) { + met_confidence_threshold = true; + } + } + + if (met_confidence_threshold) { + if (top_result == EI_CLASSIFIER_LABEL_COUNT) { + return "uncertain"; + } + else if (top_result == EI_CLASSIFIER_LABEL_COUNT + 1) { + return "anomaly"; + } + else { + return result->classification[top_result].label; + } + } + return "uncertain"; +} + +/** + * Clear up a smooth structure + */ +void ei_classifier_smooth_free(ei_classifier_smooth_t *smooth) { + ei_free(smooth->last_readings); +} + +#endif // #if EI_CLASSIFIER_OBJECT_DETECTION != 1 + +#endif // _EI_CLASSIFIER_SMOOTH_H_ diff --git a/edgeimpulse/edge-impulse-sdk/classifier/ei_classifier_types.h b/edgeimpulse/edge-impulse-sdk/classifier/ei_classifier_types.h new file mode 100644 index 0000000..45fc645 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/classifier/ei_classifier_types.h @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2022 EdgeImpulse Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an "AS + * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language + * governing permissions and limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef _EDGE_IMPULSE_RUN_CLASSIFIER_TYPES_H_ +#define _EDGE_IMPULSE_RUN_CLASSIFIER_TYPES_H_ + +#include +// needed for standalone C example +#include "model-parameters/model_metadata.h" + +#ifndef EI_CLASSIFIER_MAX_OBJECT_DETECTION_COUNT +#define EI_CLASSIFIER_MAX_OBJECT_DETECTION_COUNT 10 +#endif + +#ifndef EI_CLASSIFIER_MAX_LABELS_COUNT +#define EI_CLASSIFIER_MAX_LABELS_COUNT 25 +#endif + +typedef struct { + const char *label; + float value; +} ei_impulse_result_classification_t; + +typedef struct { + float mean_value; + float max_value; +} ei_impulse_visual_ad_result_t; + +typedef struct { + const char *label; + uint32_t x; + uint32_t y; + uint32_t width; + uint32_t height; + float value; +} ei_impulse_result_bounding_box_t; + +typedef struct { + int sampling; + int dsp; + int classification; + int anomaly; + int64_t dsp_us; + int64_t classification_us; + int64_t anomaly_us; +} ei_impulse_result_timing_t; + +typedef struct { + ei_impulse_result_bounding_box_t *bounding_boxes; + uint32_t bounding_boxes_count; + ei_impulse_result_classification_t classification[EI_CLASSIFIER_MAX_LABELS_COUNT]; + float anomaly; + ei_impulse_result_timing_t timing; + bool copy_output; +#ifdef EI_CLASSIFIER_HAS_VISUAL_ANOMALY + ei_impulse_result_bounding_box_t *visual_ad_grid_cells; + uint32_t visual_ad_count; + ei_impulse_visual_ad_result_t visual_ad_result; +#endif +} ei_impulse_result_t; + +#endif // _EDGE_IMPULSE_RUN_CLASSIFIER_TYPES_H_ diff --git a/edgeimpulse/edge-impulse-sdk/classifier/ei_fill_result_struct.h b/edgeimpulse/edge-impulse-sdk/classifier/ei_fill_result_struct.h new file mode 100644 index 0000000..12429ce --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/classifier/ei_fill_result_struct.h @@ -0,0 +1,968 @@ +/* + * Copyright (c) 2022 EdgeImpulse Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an "AS + * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language + * governing permissions and limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef _EI_CLASSIFIER_FILL_RESULT_STRUCT_H_ +#define _EI_CLASSIFIER_FILL_RESULT_STRUCT_H_ + +using namespace ei; + +#include "model-parameters/model_metadata.h" +#if EI_CLASSIFIER_HAS_MODEL_VARIABLES == 1 +#include "model-parameters/model_variables.h" +#endif +#include "edge-impulse-sdk/classifier/ei_model_types.h" +#include "edge-impulse-sdk/classifier/ei_classifier_types.h" +#include "edge-impulse-sdk/classifier/ei_nms.h" +#include "edge-impulse-sdk/dsp/ei_vector.h" + +#ifndef EI_HAS_OBJECT_DETECTION + #if (EI_CLASSIFIER_OBJECT_DETECTION_LAST_LAYER == EI_CLASSIFIER_LAST_LAYER_SSD) + #define EI_HAS_SSD 1 + #endif + #if (EI_CLASSIFIER_OBJECT_DETECTION_LAST_LAYER == EI_CLASSIFIER_LAST_LAYER_FOMO) + #define EI_HAS_FOMO 1 + #endif + #if (EI_CLASSIFIER_OBJECT_DETECTION_LAST_LAYER == EI_CLASSIFIER_LAST_LAYER_YOLOV5) || (EI_CLASSIFIER_OBJECT_DETECTION_LAST_LAYER == EI_CLASSIFIER_LAST_LAYER_YOLOV5_V5_DRPAI) + #define EI_HAS_YOLOV5 1 + #endif + #if (EI_CLASSIFIER_OBJECT_DETECTION_LAST_LAYER == EI_CLASSIFIER_LAST_LAYER_YOLOX) + #define EI_HAS_YOLOX 1 + #endif + #if (EI_CLASSIFIER_OBJECT_DETECTION_LAST_LAYER == EI_CLASSIFIER_LAST_LAYER_YOLOV7) + #define EI_HAS_YOLOV7 1 + #endif +#endif + +#ifdef EI_HAS_FOMO +typedef struct cube { + size_t x; + size_t y; + size_t width; + size_t height; + float confidence; + const char *label; +} ei_classifier_cube_t; + +/** + * Checks whether a new section overlaps with a cube, + * and if so, will **update the cube** + */ +__attribute__((unused)) static bool ei_cube_check_overlap(ei_classifier_cube_t *c, int x, int y, int width, int height, float confidence) { + bool is_overlapping = !(c->x + c->width < x || c->y + c->height < y || c->x > x + width || c->y > y + height); + if (!is_overlapping) return false; + + // if we overlap, but the x of the new box is lower than the x of the current box + if (x < c->x) { + // update x to match new box and make width larger (by the diff between the boxes) + c->x = x; + c->width += c->x - x; + } + // if we overlap, but the y of the new box is lower than the y of the current box + if (y < c->y) { + // update y to match new box and make height larger (by the diff between the boxes) + c->y = y; + c->height += c->y - y; + } + // if we overlap, and x+width of the new box is higher than the x+width of the current box + if (x + width > c->x + c->width) { + // just make the box wider + c->width += (x + width) - (c->x + c->width); + } + // if we overlap, and y+height of the new box is higher than the y+height of the current box + if (y + height > c->y + c->height) { + // just make the box higher + c->height += (y + height) - (c->y + c->height); + } + // if the new box has higher confidence, then override confidence of the whole box + if (confidence > c->confidence) { + c->confidence = confidence; + } + return true; +} + +__attribute__((unused)) static void ei_handle_cube(std::vector *cubes, int x, int y, float vf, const char *label, float detection_threshold) { + if (vf < detection_threshold) return; + + bool has_overlapping = false; + int width = 1; + int height = 1; + + for (auto c : *cubes) { + // not cube for same class? continue + if (strcmp(c->label, label) != 0) continue; + + if (ei_cube_check_overlap(c, x, y, width, height, vf)) { + has_overlapping = true; + break; + } + } + + if (!has_overlapping) { + ei_classifier_cube_t *cube = new ei_classifier_cube_t(); + cube->x = x; + cube->y = y; + cube->width = 1; + cube->height = 1; + cube->confidence = vf; + cube->label = label; + cubes->push_back(cube); + } +} + +__attribute__((unused)) static void fill_result_struct_from_cubes(ei_impulse_result_t *result, std::vector *cubes, int out_width_factor, uint32_t object_detection_count) { + std::vector bbs; + static std::vector results; + int added_boxes_count = 0; + results.clear(); + for (auto sc : *cubes) { + bool has_overlapping = false; + + int x = sc->x; + int y = sc->y; + int width = sc->width; + int height = sc->height; + const char *label = sc->label; + float vf = sc->confidence; + + for (auto c : bbs) { + // not cube for same class? continue + if (strcmp(c->label, label) != 0) continue; + + if (ei_cube_check_overlap(c, x, y, width, height, vf)) { + has_overlapping = true; + break; + } + } + + if (has_overlapping) { + continue; + } + + bbs.push_back(sc); + + ei_impulse_result_bounding_box_t tmp = { + .label = sc->label, + .x = (uint32_t)(sc->x * out_width_factor), + .y = (uint32_t)(sc->y * out_width_factor), + .width = (uint32_t)(sc->width * out_width_factor), + .height = (uint32_t)(sc->height * out_width_factor), + .value = sc->confidence + }; + + results.push_back(tmp); + added_boxes_count++; + } + + // if we didn't detect min required objects, fill the rest with fixed value + if (added_boxes_count < object_detection_count) { + results.resize(object_detection_count); + for (size_t ix = added_boxes_count; ix < object_detection_count; ix++) { + results[ix].value = 0.0f; + } + } + + for (auto c : *cubes) { + delete c; + } + + result->bounding_boxes = results.data(); + result->bounding_boxes_count = results.size(); +} +#endif + +__attribute__((unused)) static EI_IMPULSE_ERROR fill_result_struct_f32_fomo(const ei_impulse_t *impulse, + ei_impulse_result_t *result, + float *data, + int out_width, + int out_height) { +#ifdef EI_HAS_FOMO + std::vector cubes; + + int out_width_factor = impulse->input_width / out_width; + + for (size_t y = 0; y < out_width; y++) { + // ei_printf(" [ "); + for (size_t x = 0; x < out_height; x++) { + size_t loc = ((y * out_height) + x) * (impulse->label_count + 1); + + for (size_t ix = 1; ix < impulse->label_count + 1; ix++) { + float vf = data[loc+ix]; + + ei_handle_cube(&cubes, x, y, vf, impulse->categories[ix - 1], impulse->object_detection_threshold); + } + } + } + + fill_result_struct_from_cubes(result, &cubes, out_width_factor, impulse->object_detection_count); + + return EI_IMPULSE_OK; +#else + return EI_IMPULSE_LAST_LAYER_NOT_AVAILABLE; +#endif +} + +__attribute__((unused)) static EI_IMPULSE_ERROR fill_result_struct_i8_fomo(const ei_impulse_t *impulse, + ei_impulse_result_t *result, + int8_t *data, + float zero_point, + float scale, + int out_width, + int out_height) { +#ifdef EI_HAS_FOMO + std::vector cubes; + + int out_width_factor = impulse->input_width / out_width; + + for (size_t y = 0; y < out_width; y++) { + // ei_printf(" [ "); + for (size_t x = 0; x < out_height; x++) { + size_t loc = ((y * out_height) + x) * (impulse->label_count + 1); + + for (size_t ix = 1; ix < impulse->label_count + 1; ix++) { + int8_t v = data[loc+ix]; + float vf = static_cast(v - zero_point) * scale; + + ei_handle_cube(&cubes, x, y, vf, impulse->categories[ix - 1], impulse->object_detection_threshold); + } + } + } + + fill_result_struct_from_cubes(result, &cubes, out_width_factor, impulse->object_detection_count); + + return EI_IMPULSE_OK; +#else + return EI_IMPULSE_LAST_LAYER_NOT_AVAILABLE; +#endif +} + +/** + * Fill the result structure from an unquantized output tensor + * (we don't support quantized here a.t.m.) + */ +__attribute__((unused)) static EI_IMPULSE_ERROR fill_result_struct_f32_object_detection(const ei_impulse_t *impulse, + ei_impulse_result_t *result, + float *data, + float *scores, + float *labels, + bool debug) { +#ifdef EI_HAS_SSD + static std::vector results; + results.clear(); + results.resize(impulse->object_detection_count); + for (size_t ix = 0; ix < impulse->object_detection_count; ix++) { + + float score = scores[ix]; + float label = labels[ix]; + + if (score >= impulse->object_detection_threshold) { + float ystart = data[(ix * 4) + 0]; + float xstart = data[(ix * 4) + 1]; + float yend = data[(ix * 4) + 2]; + float xend = data[(ix * 4) + 3]; + + if (xstart < 0) xstart = 0; + if (xstart > 1) xstart = 1; + if (ystart < 0) ystart = 0; + if (ystart > 1) ystart = 1; + if (yend < 0) yend = 0; + if (yend > 1) yend = 1; + if (xend < 0) xend = 0; + if (xend > 1) xend = 1; + if (xend < xstart) xend = xstart; + if (yend < ystart) yend = ystart; + + if (debug) { + ei_printf("%s (", impulse->categories[(uint32_t)label]); + ei_printf_float(label); + ei_printf("): "); + ei_printf_float(score); + ei_printf(" [ "); + ei_printf_float(xstart); + ei_printf(", "); + ei_printf_float(ystart); + ei_printf(", "); + ei_printf_float(xend); + ei_printf(", "); + ei_printf_float(yend); + ei_printf(" ]\n"); + } + + results[ix].label = impulse->categories[(uint32_t)label]; + results[ix].x = static_cast(xstart * static_cast(impulse->input_width)); + results[ix].y = static_cast(ystart * static_cast(impulse->input_height)); + results[ix].width = static_cast((xend - xstart) * static_cast(impulse->input_width)); + results[ix].height = static_cast((yend - ystart) * static_cast(impulse->input_height)); + results[ix].value = score; + } + else { + results[ix].value = 0.0f; + } + } + result->bounding_boxes = results.data(); + result->bounding_boxes_count = results.size(); + + return EI_IMPULSE_OK; +#else + return EI_IMPULSE_LAST_LAYER_NOT_AVAILABLE; +#endif +} + +/** + * Fill the result structure from a quantized output tensor + */ +__attribute__((unused)) static EI_IMPULSE_ERROR fill_result_struct_i8(const ei_impulse_t *impulse, + ei_impulse_result_t *result, + int8_t *data, + float zero_point, + float scale, + bool debug) { + for (uint32_t ix = 0; ix < impulse->label_count; ix++) { + float value = static_cast(data[ix] - zero_point) * scale; + + if (debug) { + ei_printf("%s:\t", impulse->categories[ix]); + ei_printf_float(value); + ei_printf("\n"); + } + result->classification[ix].label = impulse->categories[ix]; + result->classification[ix].value = value; + } + + return EI_IMPULSE_OK; +} + +/** + * Fill the result structure from an unquantized output tensor + */ +__attribute__((unused)) static EI_IMPULSE_ERROR fill_result_struct_f32(const ei_impulse_t *impulse, + ei_impulse_result_t *result, + float *data, + bool debug) { + for (uint32_t ix = 0; ix < impulse->label_count; ix++) { + float value = data[ix]; + + if (debug) { + ei_printf("%s:\t", impulse->categories[ix]); + ei_printf_float(value); + ei_printf("\n"); + } + result->classification[ix].label = impulse->categories[ix]; + result->classification[ix].value = value; + } + + return EI_IMPULSE_OK; +} + +/** + * Fill the visual anomaly result structures from an unquantized output tensor + */ +__attribute__((unused)) static EI_IMPULSE_ERROR fill_result_visual_ad_struct_f32(const ei_impulse_t *impulse, + ei_impulse_result_t *result, + float *data, + bool debug) { +#ifdef EI_CLASSIFIER_HAS_VISUAL_ANOMALY + float max_val = 0; + float sum_val = 0; + // the feature extractor output will be 1/8 of input + // due to the cut-off layer chosen in MobileNetV2 + uint32_t grid_size_x = (impulse->input_width / 8) / 2 - 1; + uint32_t grid_size_y = (impulse->input_height / 8) / 2 - 1; + + for (uint32_t ix = 0; ix < grid_size_x * grid_size_y; ix++) { + float value = data[ix]; + sum_val += value; + if (value > max_val) { + max_val = value; + } + } + + result->visual_ad_result.mean_value = sum_val / (grid_size_x * grid_size_y); + result->visual_ad_result.max_value = max_val; + + static ei_vector results; + + int added_boxes_count = 0; + results.clear(); + + for (uint32_t x = 0; x <= grid_size_x - 1; x++) { + for (uint32_t y = 0; y <= grid_size_y - 1; y++) { + if (data[x * grid_size_x + y] >= impulse->object_detection_threshold) { + ei_impulse_result_bounding_box_t tmp = { + .label = "anomaly", + .x = static_cast(y * (static_cast(impulse->input_height) / grid_size_y)), + .y = static_cast(x * (static_cast(impulse->input_width) / grid_size_x)), + .width = (impulse->input_width / grid_size_x), + .height = (impulse->input_height / grid_size_y), + .value = data[x * grid_size_x + y] + }; + + results.push_back(tmp); + added_boxes_count++; + } + } + } + + // if we didn't detect min required objects, fill the rest with fixed value + if (added_boxes_count < impulse->object_detection_count) { + results.resize(impulse->object_detection_count); + for (size_t ix = added_boxes_count; ix < impulse->object_detection_count; ix++) { + results[ix].value = 0.0f; + } + } + + result->visual_ad_grid_cells = results.data(); + result->visual_ad_count = results.size(); +#endif + return EI_IMPULSE_OK; +} + +/** + * Fill the result structure from an unquantized output tensor + */ +__attribute__((unused)) static EI_IMPULSE_ERROR fill_result_struct_f32_yolov5(const ei_impulse_t *impulse, + ei_impulse_result_t *result, + int version, + float *data, + size_t output_features_count) { +#ifdef EI_HAS_YOLOV5 + static std::vector results; + results.clear(); + + size_t col_size = 5 + impulse->label_count; + size_t row_count = output_features_count / col_size; + + for (size_t ix = 0; ix < row_count; ix++) { + size_t base_ix = ix * col_size; + float xc = data[base_ix + 0]; + float yc = data[base_ix + 1]; + float w = data[base_ix + 2]; + float h = data[base_ix + 3]; + float x = xc - (w / 2.0f); + float y = yc - (h / 2.0f); + if (x < 0) { + x = 0; + } + if (y < 0) { + y = 0; + } + if (x + w > impulse->input_width) { + w = impulse->input_width - x; + } + if (y + h > impulse->input_height) { + h = impulse->input_height - y; + } + + if (w < 0 || h < 0) { + continue; + } + + float score = data[base_ix + 4]; + + uint32_t label = 0; + for (size_t lx = 0; lx < impulse->label_count; lx++) { + float l = data[base_ix + 5 + lx]; + if (l > 0.5f) { + label = lx; + break; + } + } + + if (score >= impulse->object_detection_threshold && score <= 1.0f) { + ei_impulse_result_bounding_box_t r; + r.label = impulse->categories[label]; + + if (version != 5) { + x *= static_cast(impulse->input_width); + y *= static_cast(impulse->input_height); + w *= static_cast(impulse->input_width); + h *= static_cast(impulse->input_height); + } + + r.x = static_cast(x); + r.y = static_cast(y); + r.width = static_cast(w); + r.height = static_cast(h); + r.value = score; + results.push_back(r); + } + } + + EI_IMPULSE_ERROR nms_res = ei_run_nms(&results); + if (nms_res != EI_IMPULSE_OK) { + return nms_res; + } + + // if we didn't detect min required objects, fill the rest with fixed value + size_t added_boxes_count = results.size(); + size_t min_object_detection_count = impulse->object_detection_count; + if (added_boxes_count < min_object_detection_count) { + results.resize(min_object_detection_count); + for (size_t ix = added_boxes_count; ix < min_object_detection_count; ix++) { + results[ix].value = 0.0f; + } + } + + result->bounding_boxes = results.data(); + result->bounding_boxes_count = results.size(); + + return EI_IMPULSE_OK; +#else + return EI_IMPULSE_LAST_LAYER_NOT_AVAILABLE; +#endif +} + +/** + * Fill the result structure from a quantized output tensor +*/ +template +__attribute__((unused)) static EI_IMPULSE_ERROR fill_result_struct_quantized_yolov5(const ei_impulse_t *impulse, + ei_impulse_result_t *result, + int version, + T *data, + float zero_point, + float scale, + size_t output_features_count) { +#ifdef EI_HAS_YOLOV5 + static std::vector results; + results.clear(); + + size_t col_size = 5 + impulse->label_count; + size_t row_count = output_features_count / col_size; + + for (size_t ix = 0; ix < row_count; ix++) { + size_t base_ix = ix * col_size; + float xc = (data[base_ix + 0] - zero_point) * scale; + float yc = (data[base_ix + 1] - zero_point) * scale; + float w = (data[base_ix + 2] - zero_point) * scale; + float h = (data[base_ix + 3] - zero_point) * scale; + float x = xc - (w / 2.0f); + float y = yc - (h / 2.0f); + if (x < 0) { + x = 0; + } + if (y < 0) { + y = 0; + } + if (x + w > impulse->input_width) { + w = impulse->input_width - x; + } + if (y + h > impulse->input_height) { + h = impulse->input_height - y; + } + + if (w < 0 || h < 0) { + continue; + } + + float score = (data[base_ix + 4] - zero_point) * scale; + + uint32_t label = 0; + for (size_t lx = 0; lx < impulse->label_count; lx++) { + float l = (data[base_ix + 5 + lx] - zero_point) * scale; + if (l > 0.5f) { + label = lx; + break; + } + } + + if (score >= impulse->object_detection_threshold && score <= 1.0f) { + ei_impulse_result_bounding_box_t r; + r.label = ei_classifier_inferencing_categories[label]; + + if (version != 5) { + x *= static_cast(impulse->input_width); + y *= static_cast(impulse->input_height); + w *= static_cast(impulse->input_width); + h *= static_cast(impulse->input_height); + } + + r.x = static_cast(x); + r.y = static_cast(y); + r.width = static_cast(w); + r.height = static_cast(h); + r.value = score; + results.push_back(r); + } + } + + EI_IMPULSE_ERROR nms_res = ei_run_nms(&results); + if (nms_res != EI_IMPULSE_OK) { + return nms_res; + } + + // if we didn't detect min required objects, fill the rest with fixed value + size_t added_boxes_count = results.size(); + size_t min_object_detection_count = impulse->object_detection_count; + if (added_boxes_count < min_object_detection_count) { + results.resize(min_object_detection_count); + for (size_t ix = added_boxes_count; ix < min_object_detection_count; ix++) { + results[ix].value = 0.0f; + } + } + + result->bounding_boxes = results.data(); + result->bounding_boxes_count = results.size(); + + return EI_IMPULSE_OK; +#else + return EI_IMPULSE_LAST_LAYER_NOT_AVAILABLE; +#endif +} + +/** + * Fill the result structure from an unquantized output tensor + * (we don't support quantized here a.t.m.) + */ +__attribute__((unused)) static EI_IMPULSE_ERROR fill_result_struct_f32_yolox(const ei_impulse_t *impulse, ei_impulse_result_t *result, + float *data, + size_t output_features_count) { +#ifdef EI_HAS_YOLOX + static std::vector results; + results.clear(); + + // START: def yolox_postprocess() + + // if not p6: + // strides = [8, 16, 32] + // else: + // strides = [8, 16, 32, 64] + const std::vector strides { 8, 16, 32 }; + + // hsizes = [img_size[0] // stride for stride in strides] + // wsizes = [img_size[1] // stride for stride in strides] + std::vector hsizes(strides.size()); + std::vector wsizes(strides.size()); + for (int ix = 0; ix < (int)strides.size(); ix++) { + hsizes[ix] = (int)floor((float)impulse->input_width / (float)strides[ix]); + wsizes[ix] = (int)floor((float)impulse->input_height / (float)strides[ix]); + } + + // for hsize, wsize, stride in zip(hsizes, wsizes, strides): + // grid = np.stack((xv, yv), 2).reshape(1, -1, 2) + // grids.append(grid) + // shape = grid.shape[:2] + // expanded_strides.append(np.full((*shape, 1), stride)) + std::vector grids; + std::vector expanded_strides; + + for (int ix = 0; ix < (int)strides.size(); ix++) { + int hsize = hsizes.at(ix); + int wsize = wsizes.at(ix); + int stride = strides.at(ix); + + // xv, yv = np.meshgrid(np.arange(wsize), np.arange(hsize)) + // grid = np.stack((xv, yv), 2).reshape(1, -1, 2) + matrix_i32_t *grid = new matrix_i32_t(hsize * wsize, 2); + int grid_ix = 0; + for (int h = 0; h < hsize; h++) { + for (int w = 0; w < wsize; w++) { + grid->buffer[grid_ix + 0] = w; + grid->buffer[grid_ix + 1] = h; + grid_ix += 2; + } + } + grids.push_back(grid); + + // shape = grid.shape[:2] + // expanded_strides.append(np.full((*shape, 1), stride)) + matrix_i32_t *expanded_stride = new matrix_i32_t(hsize * wsize, 1); + for (int ix = 0; ix < hsize * wsize; ix++) { + expanded_stride->buffer[ix] = stride; + } + expanded_strides.push_back(expanded_stride); + } + + // grids = np.concatenate(grids, 1) + int total_grid_rows = 0; + for (auto g : grids) { + total_grid_rows += g->rows; + } + matrix_i32_t c_grid(total_grid_rows, 2); + int c_grid_ix = 0; + for (auto g : grids) { + for (int row = 0; row < (int)g->rows; row++) { + c_grid.buffer[c_grid_ix + 0] = g->buffer[(row * 2) + 0]; + c_grid.buffer[c_grid_ix + 1] = g->buffer[(row * 2) + 1]; + c_grid_ix += 2; + } + delete g; + } + + // expanded_strides = np.concatenate(expanded_strides, 1) + int total_stride_rows = 0; + for (auto g : expanded_strides) { + total_stride_rows += g->rows; + } + matrix_i32_t c_expanded_strides(total_stride_rows, 1); + int c_expanded_strides_ix = 0; + for (auto g : expanded_strides) { + for (int row = 0; row < (int)g->rows; row++) { + c_expanded_strides.buffer[c_expanded_strides_ix + 0] = g->buffer[(row * 1) + 0]; + c_expanded_strides_ix += 1; + } + delete g; + } + + const int output_rows = output_features_count / (5 + impulse->label_count); + matrix_t outputs(output_rows, 5 + impulse->label_count, data); + for (int row = 0; row < (int)outputs.rows; row++) { + float v0 = outputs.buffer[(row * outputs.cols) + 0]; + float v1 = outputs.buffer[(row * outputs.cols) + 1]; + float v2 = outputs.buffer[(row * outputs.cols) + 2]; + float v3 = outputs.buffer[(row * outputs.cols) + 3]; + + float cgrid0 = (float)c_grid.buffer[(row * c_grid.cols) + 0]; + float cgrid1 = (float)c_grid.buffer[(row * c_grid.cols) + 1]; + + float stride = (float)c_expanded_strides.buffer[row]; + + // outputs[..., :2] = (outputs[..., :2] + grids) * expanded_strides + outputs.buffer[(row * outputs.cols) + 0] = (v0 + cgrid0) * stride; + outputs.buffer[(row * outputs.cols) + 1] = (v1 + cgrid1) * stride; + + // outputs[..., 2:4] = np.exp(outputs[..., 2:4]) * expanded_strides + outputs.buffer[(row * outputs.cols) + 2] = exp(v2) * stride; + outputs.buffer[(row * outputs.cols) + 3] = exp(v3) * stride; + } + + // END: def yolox_postprocess() + + // boxes = predictions[:, :4] + matrix_t boxes(outputs.rows, 4); + for (int row = 0; row < (int)outputs.rows; row++) { + boxes.buffer[(row * boxes.cols) + 0] = outputs.buffer[(row * outputs.cols) + 0]; + boxes.buffer[(row * boxes.cols) + 1] = outputs.buffer[(row * outputs.cols) + 1]; + boxes.buffer[(row * boxes.cols) + 2] = outputs.buffer[(row * outputs.cols) + 2]; + boxes.buffer[(row * boxes.cols) + 3] = outputs.buffer[(row * outputs.cols) + 3]; + } + + // scores = predictions[:, 4:5] * predictions[:, 5:] + matrix_t scores(outputs.rows, impulse->label_count); + for (int row = 0; row < (int)outputs.rows; row++) { + float confidence = outputs.buffer[(row * outputs.cols) + 4]; + for (int cc = 0; cc < impulse->label_count; cc++) { + scores.buffer[(row * scores.cols) + cc] = confidence * outputs.buffer[(row * outputs.cols) + (5 + cc)]; + } + } + + // iterate through scores to see if we have anything with confidence + for (int row = 0; row < (int)scores.rows; row++) { + for (int col = 0; col < (int)scores.cols; col++) { + float confidence = scores.buffer[(row * scores.cols) + col]; + + if (confidence >= impulse->object_detection_threshold && confidence <= 1.0f) { + ei_impulse_result_bounding_box_t r; + r.label = impulse->categories[col]; + r.value = confidence; + + // now find the box... + float xcenter = boxes.buffer[(row * boxes.cols) + 0]; + float ycenter = boxes.buffer[(row * boxes.cols) + 1]; + float width = boxes.buffer[(row * boxes.cols) + 2]; + float height = boxes.buffer[(row * boxes.cols) + 3]; + + int x = (int)(xcenter - (width / 2.0f)); + int y = (int)(ycenter - (height / 2.0f)); + + if (x < 0) { + x = 0; + } + if (x > (int)impulse->input_width) { + x = impulse->input_width; + } + if (y < 0) { + y = 0; + } + if (y > (int)impulse->input_height) { + y = impulse->input_height; + } + + r.x = x; + r.y = y; + r.width = (int)round(width); + r.height = (int)round(height); + + results.push_back(r); + } + } + } + + EI_IMPULSE_ERROR nms_res = ei_run_nms(&results); + if (nms_res != EI_IMPULSE_OK) { + return nms_res; + } + + // if we didn't detect min required objects, fill the rest with fixed value + size_t added_boxes_count = results.size(); + size_t min_object_detection_count = impulse->object_detection_count; + if (added_boxes_count < min_object_detection_count) { + results.resize(min_object_detection_count); + for (size_t ix = added_boxes_count; ix < min_object_detection_count; ix++) { + results[ix].value = 0.0f; + } + } + + result->bounding_boxes = results.data(); + result->bounding_boxes_count = results.size(); + + return EI_IMPULSE_OK; +#else + return EI_IMPULSE_LAST_LAYER_NOT_AVAILABLE; +#endif // EI_HAS_YOLOX +} + +/** + * Fill the result structure from an unquantized output tensor + * (we don't support quantized here a.t.m.) + */ +__attribute__((unused)) static EI_IMPULSE_ERROR fill_result_struct_f32_yolox_detect(const ei_impulse_t *impulse, ei_impulse_result_t *result, + float *data, + size_t output_features_count) { +#ifdef EI_HAS_YOLOX + static std::vector results; + results.clear(); + + // expected format [xmin ymin xmax ymax score label] + const int output_rows = output_features_count / 6; + matrix_t outputs(output_rows, 6, data); + + // iterate through scores to see if we have anything with confidence + for (int row = 0; row < (int)outputs.rows; row++) { + float confidence = outputs.buffer[(row * outputs.cols) + 4]; + int class_idx = (int)outputs.buffer[(row * outputs.cols) + 5]; + + if (confidence >= impulse->object_detection_threshold && confidence <= 1.0f) { + ei_impulse_result_bounding_box_t r; + r.label = ei_classifier_inferencing_categories[class_idx]; + r.value = confidence; + + // now find the box... + float xmin = outputs.buffer[(row * outputs.cols) + 0]; + float ymin = outputs.buffer[(row * outputs.cols) + 1]; + float xmax = outputs.buffer[(row * outputs.cols) + 2]; + float ymax = outputs.buffer[(row * outputs.cols) + 3]; + + float width = xmax - xmin; + float height = ymax - ymin; + + int x = (int)xmin; + int y = (int)ymin; + + if (x < 0) { + x = 0; + } + if (x > (int)impulse->input_width) { + x = impulse->input_width; + } + if (y < 0) { + y = 0; + } + if (y > (int)impulse->input_height) { + y = impulse->input_height; + } + + r.x = x; + r.y = y; + r.width = (int)round(width); + r.height = (int)round(height); + + results.push_back(r); + } + } + + result->bounding_boxes = results.data(); + result->bounding_boxes_count = results.size(); + + return EI_IMPULSE_OK; +#else + return EI_IMPULSE_LAST_LAYER_NOT_AVAILABLE; +#endif // EI_HAS_YOLOX +} + +/** + * Fill the result structure from an unquantized output tensor + * (we don't support quantized here a.t.m.) + */ +__attribute__((unused)) static EI_IMPULSE_ERROR fill_result_struct_f32_yolov7(const ei_impulse_t *impulse, ei_impulse_result_t *result, + float *data, + size_t output_features_count) { +#ifdef EI_HAS_YOLOV7 + static std::vector results; + results.clear(); + + size_t col_size = 7; + size_t row_count = output_features_count / col_size; + + // output is: + // batch_id, xmin, ymin, xmax, ymax, cls_id, score + for (size_t ix = 0; ix < row_count; ix++) { + size_t base_ix = ix * col_size; + float xmin = data[base_ix + 1]; + float ymin = data[base_ix + 2]; + float xmax = data[base_ix + 3]; + float ymax = data[base_ix + 4]; + uint32_t label = (uint32_t)data[base_ix + 5]; + float score = data[base_ix + 6]; + + if (score >= impulse->object_detection_threshold && score <= 1.0f) { + ei_impulse_result_bounding_box_t r; + r.label = ei_classifier_inferencing_categories[label]; + + r.x = static_cast(xmin); + r.y = static_cast(ymin); + r.width = static_cast(xmax - xmin); + r.height = static_cast(ymax - ymin); + r.value = score; + results.push_back(r); + } + } + + // if we didn't detect min required objects, fill the rest with fixed value + size_t added_boxes_count = results.size(); + size_t min_object_detection_count = impulse->object_detection_count; + if (added_boxes_count < min_object_detection_count) { + results.resize(min_object_detection_count); + for (size_t ix = added_boxes_count; ix < min_object_detection_count; ix++) { + results[ix].value = 0.0f; + } + } + + result->bounding_boxes = results.data(); + result->bounding_boxes_count = results.size(); + + return EI_IMPULSE_OK; +#else + return EI_IMPULSE_LAST_LAYER_NOT_AVAILABLE; +#endif // #ifdef EI_HAS_YOLOV7 +} + +#if EI_CLASSIFIER_SINGLE_FEATURE_INPUT == 0 +bool find_mtx_by_idx(ei_feature_t* mtx, ei::matrix_t** matrix, uint32_t mtx_id, size_t mtx_size) { + for (size_t i = 0; i < mtx_size; i++) { + if (&mtx[i] == NULL) { + continue; + } + if (mtx[i].blockId == mtx_id || mtx[i].blockId == 0) { + *matrix = mtx[i].matrix; + return true; + } + } + return false; +} +#endif + +#endif // _EI_CLASSIFIER_FILL_RESULT_STRUCT_H_ diff --git a/edgeimpulse/edge-impulse-sdk/classifier/ei_model_types.h b/edgeimpulse/edge-impulse-sdk/classifier/ei_model_types.h new file mode 100644 index 0000000..dd9156c --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/classifier/ei_model_types.h @@ -0,0 +1,242 @@ +/* + * Copyright (c) 2022 EdgeImpulse Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an "AS + * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language + * governing permissions and limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef _EDGE_IMPULSE_MODEL_TYPES_H_ +#define _EDGE_IMPULSE_MODEL_TYPES_H_ + +#include + +#include "edge-impulse-sdk/classifier/ei_classifier_types.h" +#include "edge-impulse-sdk/dsp/numpy.hpp" +#if EI_CLASSIFIER_USE_FULL_TFLITE || (EI_CLASSIFIER_INFERENCING_ENGINE == EI_CLASSIFIER_AKIDA) || (EI_CLASSIFIER_INFERENCING_ENGINE == EI_CLASSIFIER_MEMRYX) +#include "tensorflow-lite/tensorflow/lite/c/common.h" +#else +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#endif // EI_CLASSIFIER_USE_FULL_TFLITE + +#define EI_CLASSIFIER_NONE 255 +#define EI_CLASSIFIER_UTENSOR 1 +#define EI_CLASSIFIER_TFLITE 2 +#define EI_CLASSIFIER_CUBEAI 3 +#define EI_CLASSIFIER_TFLITE_FULL 4 +#define EI_CLASSIFIER_TENSAIFLOW 5 +#define EI_CLASSIFIER_TENSORRT 6 +#define EI_CLASSIFIER_DRPAI 7 +#define EI_CLASSIFIER_TFLITE_TIDL 8 +#define EI_CLASSIFIER_AKIDA 9 +#define EI_CLASSIFIER_SYNTIANT 10 +#define EI_CLASSIFIER_ONNX_TIDL 11 +#define EI_CLASSIFIER_MEMRYX 12 + +#define EI_CLASSIFIER_SENSOR_UNKNOWN -1 +#define EI_CLASSIFIER_SENSOR_MICROPHONE 1 +#define EI_CLASSIFIER_SENSOR_ACCELEROMETER 2 +#define EI_CLASSIFIER_SENSOR_CAMERA 3 +#define EI_CLASSIFIER_SENSOR_9DOF 4 +#define EI_CLASSIFIER_SENSOR_ENVIRONMENTAL 5 +#define EI_CLASSIFIER_SENSOR_FUSION 6 + +// These must match the enum values in TensorFlow Lite's "TfLiteType" +#define EI_CLASSIFIER_DATATYPE_FLOAT32 1 +#define EI_CLASSIFIER_DATATYPE_INT8 9 + +#define EI_CLASSIFIER_LAST_LAYER_UNKNOWN -1 +#define EI_CLASSIFIER_LAST_LAYER_SSD 1 +#define EI_CLASSIFIER_LAST_LAYER_FOMO 2 +#define EI_CLASSIFIER_LAST_LAYER_YOLOV5 3 +#define EI_CLASSIFIER_LAST_LAYER_YOLOX 4 +#define EI_CLASSIFIER_LAST_LAYER_YOLOV5_V5_DRPAI 5 +#define EI_CLASSIFIER_LAST_LAYER_YOLOV7 6 + +#define EI_CLASSIFIER_IMAGE_SCALING_NONE 0 +#define EI_CLASSIFIER_IMAGE_SCALING_0_255 1 +#define EI_CLASSIFIER_IMAGE_SCALING_TORCH 2 +#define EI_CLASSIFIER_IMAGE_SCALING_MIN1_1 3 + +struct ei_impulse; + +typedef struct { + ei::matrix_t* matrix; + uint32_t blockId; +} ei_feature_t; + +typedef struct { + uint16_t implementation_version; + bool is_configured; + uint32_t average_window_duration_ms; + float detection_threshold; + uint32_t suppression_ms; + uint32_t suppression_flags; +} ei_model_performance_calibration_t; + +typedef struct { + uint32_t blockId; + size_t n_output_features; + int (*extract_fn)(ei::signal_t *signal, ei::matrix_t *output_matrix, void *config, const float frequency); + void *config; + uint8_t *axes; + size_t axes_size; +} ei_model_dsp_t; + +typedef struct { + float *centroid; + float max_error; +} ei_classifier_anom_cluster_t; + +typedef struct { + uint32_t blockId; + bool keep_output; + EI_IMPULSE_ERROR (*infer_fn)(const ei_impulse *impulse, ei_feature_t *fmatrix, uint32_t* input_block_ids, uint32_t input_block_ids_size, ei_impulse_result_t *result, void *config, bool debug); + void *config; + int image_scaling; + const uint32_t* input_block_ids; + const uint32_t input_block_ids_size; + uint32_t output_features_count; +} ei_learning_block_t; + +typedef struct { + uint16_t implementation_version; + uint8_t input_datatype; + bool input_quantized; + float input_scale; + float input_zeropoint; + uint8_t output_datatype; + bool output_quantized; + float output_scale; + float output_zeropoint; +} ei_config_tensaiflow_graph_t; + +typedef struct { + uint16_t implementation_version; + const unsigned char *model; + size_t model_size; + size_t arena_size; +} ei_config_tflite_graph_t; + +typedef struct { + uint16_t implementation_version; + TfLiteStatus (*model_init)(void*(*alloc_fnc)(size_t, size_t)); + TfLiteStatus (*model_invoke)(); + TfLiteStatus (*model_reset)(void (*free)(void* ptr)); + TfLiteStatus (*model_input)(int, TfLiteTensor*); + TfLiteStatus (*model_output)(int, TfLiteTensor*); +} ei_config_tflite_eon_graph_t; + +typedef struct { + uint16_t implementation_version; + uint32_t block_id; + /* object detection */ + bool object_detection; + int8_t object_detection_last_layer; + uint8_t output_data_tensor; + uint8_t output_labels_tensor; + uint8_t output_score_tensor; + /* tflite graph params */ + bool quantized; + bool compiled; + /* tflite graph config pointer */ + void *graph_config; +} ei_learning_block_config_tflite_graph_t; + +typedef struct { + uint16_t implementation_version; + const uint16_t *anom_axis; + uint16_t anom_axes_size; + const ei_classifier_anom_cluster_t *anom_clusters; + uint16_t anom_cluster_count; + const float *anom_scale; + const float *anom_mean; +} ei_learning_block_config_anomaly_kmeans_t; + +typedef struct { + uint16_t implementation_version; + const uint16_t *anom_axis; + uint16_t anom_axes_size; + float anomaly_threshold; + bool visual; + void* graph_config; +} ei_learning_block_config_anomaly_gmm_t; + +typedef struct ei_impulse { + /* project details */ + uint32_t project_id; + const char *project_owner; + const char *project_name; + uint32_t deploy_version; + + /* DSP details */ + uint32_t nn_input_frame_size; + uint32_t raw_sample_count; + uint32_t raw_samples_per_frame; + uint32_t dsp_input_frame_size; + uint32_t input_width; + uint32_t input_height; + uint32_t input_frames; + float interval_ms; + float frequency; + size_t dsp_blocks_size; + ei_model_dsp_t *dsp_blocks; + + /* object detection */ + bool object_detection; + uint16_t object_detection_count; + float object_detection_threshold; + int8_t object_detection_last_layer; + uint32_t fomo_output_size; + uint32_t tflite_output_features_count; + + /* learning blocks */ + const size_t learning_blocks_size; + const ei_learning_block_t *learning_blocks; + + /* inference parameters */ + uint32_t inferencing_engine; + + /* sensors and on-device inference */ + uint32_t sensor; + const char *fusion_string; + uint32_t slice_size; + uint32_t slices_per_model_window; + + /* output details */ + uint16_t has_anomaly; + uint16_t label_count; + const ei_model_performance_calibration_t calibration; + const char **categories; +} ei_impulse_t; + +typedef struct { + uint32_t block_id; + uint16_t implementation_version; + int axes; + const unsigned char *model; + size_t model_size; + size_t arena_size; +} ei_dsp_config_tflite_t; + +typedef struct { + uint32_t block_id; + uint16_t implementation_version; + int axes; + TfLiteStatus (*init_fn)(void*(*alloc_fnc)(size_t, size_t)); + TfLiteStatus (*invoke_fn)(); + TfLiteStatus (*reset_fn)(void (*free)(void* ptr)); + TfLiteStatus (*input_fn)(int, TfLiteTensor*); + TfLiteStatus (*output_fn)(int, TfLiteTensor*); +} ei_dsp_config_tflite_eon_t; + +#endif // _EDGE_IMPULSE_MODEL_TYPES_H_ diff --git a/edgeimpulse/edge-impulse-sdk/classifier/ei_nms.h b/edgeimpulse/edge-impulse-sdk/classifier/ei_nms.h new file mode 100644 index 0000000..5f6a4aa --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/classifier/ei_nms.h @@ -0,0 +1,300 @@ +/* + * Copyright (c) 2022 EdgeImpulse Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an "AS + * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language + * governing permissions and limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef _EDGE_IMPULSE_NMS_H_ +#define _EDGE_IMPULSE_NMS_H_ + +#include "model-parameters/model_metadata.h" +#if EI_CLASSIFIER_HAS_MODEL_VARIABLES == 1 +#include "model-parameters/model_variables.h" +#endif +#include "edge-impulse-sdk/classifier/ei_model_types.h" +#include "edge-impulse-sdk/classifier/ei_classifier_types.h" +#include "edge-impulse-sdk/porting/ei_classifier_porting.h" + +#if (EI_CLASSIFIER_OBJECT_DETECTION_LAST_LAYER == EI_CLASSIFIER_LAST_LAYER_YOLOV5) || (EI_CLASSIFIER_OBJECT_DETECTION_LAST_LAYER == EI_CLASSIFIER_LAST_LAYER_YOLOV5_V5_DRPAI) || (EI_CLASSIFIER_OBJECT_DETECTION_LAST_LAYER == EI_CLASSIFIER_LAST_LAYER_YOLOX) + +// The code below comes from tensorflow/lite/kernels/internal/reference/non_max_suppression.h +// Copyright 2019 The TensorFlow Authors. All rights reserved. +// Licensed under the Apache License, Version 2.0 +#include +#include +#include +#include + +// A pair of diagonal corners of the box. +struct BoxCornerEncoding { + float y1; + float x1; + float y2; + float x2; +}; + +static inline float ComputeIntersectionOverUnion(const float* boxes, const int i, + const int j) { + auto& box_i = reinterpret_cast(boxes)[i]; + auto& box_j = reinterpret_cast(boxes)[j]; + const float box_i_y_min = std::min(box_i.y1, box_i.y2); + const float box_i_y_max = std::max(box_i.y1, box_i.y2); + const float box_i_x_min = std::min(box_i.x1, box_i.x2); + const float box_i_x_max = std::max(box_i.x1, box_i.x2); + const float box_j_y_min = std::min(box_j.y1, box_j.y2); + const float box_j_y_max = std::max(box_j.y1, box_j.y2); + const float box_j_x_min = std::min(box_j.x1, box_j.x2); + const float box_j_x_max = std::max(box_j.x1, box_j.x2); + + const float area_i = + (box_i_y_max - box_i_y_min) * (box_i_x_max - box_i_x_min); + const float area_j = + (box_j_y_max - box_j_y_min) * (box_j_x_max - box_j_x_min); + if (area_i <= 0 || area_j <= 0) return 0.0; + const float intersection_ymax = std::min(box_i_y_max, box_j_y_max); + const float intersection_xmax = std::min(box_i_x_max, box_j_x_max); + const float intersection_ymin = std::max(box_i_y_min, box_j_y_min); + const float intersection_xmin = std::max(box_i_x_min, box_j_x_min); + const float intersection_area = + std::max(intersection_ymax - intersection_ymin, 0.0) * + std::max(intersection_xmax - intersection_xmin, 0.0); + return intersection_area / (area_i + area_j - intersection_area); +} + +// Implements (Single-Class) Soft NMS (with Gaussian weighting). +// Supports functionality of TensorFlow ops NonMaxSuppressionV4 & V5. +// Reference: "Soft-NMS - Improving Object Detection With One Line of Code" +// [Bodla et al, https://arxiv.org/abs/1704.04503] +// Implementation adapted from the TensorFlow NMS code at +// tensorflow/core/kernels/non_max_suppression_op.cc. +// +// Arguments: +// boxes: box encodings in format [y1, x1, y2, x2], shape: [num_boxes, 4] +// num_boxes: number of candidates +// scores: scores for candidate boxes, in the same order. shape: [num_boxes] +// max_output_size: the maximum number of selections. +// iou_threshold: Intersection-over-Union (IoU) threshold for NMS +// score_threshold: All candidate scores below this value are rejected +// soft_nms_sigma: Soft NMS parameter, used for decaying scores +// +// Outputs: +// selected_indices: all the selected indices. Underlying array must have +// length >= max_output_size. Cannot be null. +// selected_scores: scores of selected indices. Defer from original value for +// Soft NMS. If not null, array must have length >= max_output_size. +// num_selected_indices: Number of selections. Only these many elements are +// set in selected_indices, selected_scores. Cannot be null. +// +// Assumes inputs are valid (for eg, iou_threshold must be >= 0). +static inline void NonMaxSuppression(const float* boxes, const int num_boxes, + const float* scores, const int max_output_size, + const float iou_threshold, + const float score_threshold, + const float soft_nms_sigma, int* selected_indices, + float* selected_scores, + int* num_selected_indices) { + struct Candidate { + int index; + float score; + int suppress_begin_index; + }; + + // Priority queue to hold candidates. + auto cmp = [](const Candidate bs_i, const Candidate bs_j) { + return bs_i.score < bs_j.score; + }; + std::priority_queue, decltype(cmp)> + candidate_priority_queue(cmp); + // Populate queue with candidates above the score threshold. + for (int i = 0; i < num_boxes; ++i) { + if (scores[i] > score_threshold) { + candidate_priority_queue.emplace(Candidate({i, scores[i], 0})); + } + } + + *num_selected_indices = 0; + int num_outputs = std::min(static_cast(candidate_priority_queue.size()), + max_output_size); + if (num_outputs == 0) return; + + // NMS loop. + float scale = 0; + if (soft_nms_sigma > 0.0) { + scale = -0.5 / soft_nms_sigma; + } + while (*num_selected_indices < num_outputs && + !candidate_priority_queue.empty()) { + Candidate next_candidate = candidate_priority_queue.top(); + const float original_score = next_candidate.score; + candidate_priority_queue.pop(); + + // Overlapping boxes are likely to have similar scores, therefore we + // iterate through the previously selected boxes backwards in order to + // see if `next_candidate` should be suppressed. We also enforce a property + // that a candidate can be suppressed by another candidate no more than + // once via `suppress_begin_index` which tracks which previously selected + // boxes have already been compared against next_candidate prior to a given + // iteration. These previous selected boxes are then skipped over in the + // following loop. + bool should_hard_suppress = false; + for (int j = *num_selected_indices - 1; + j >= next_candidate.suppress_begin_index; --j) { + const float iou = ComputeIntersectionOverUnion( + boxes, next_candidate.index, selected_indices[j]); + + // First decide whether to perform hard suppression. + if (iou >= iou_threshold) { + should_hard_suppress = true; + break; + } + + // Suppress score if NMS sigma > 0. + if (soft_nms_sigma > 0.0) { + next_candidate.score = + next_candidate.score * std::exp(scale * iou * iou); + } + + // If score has fallen below score_threshold, it won't be pushed back into + // the queue. + if (next_candidate.score <= score_threshold) break; + } + // If `next_candidate.score` has not dropped below `score_threshold` + // by this point, then we know that we went through all of the previous + // selections and can safely update `suppress_begin_index` to + // `selected.size()`. If on the other hand `next_candidate.score` + // *has* dropped below the score threshold, then since `suppress_weight` + // always returns values in [0, 1], further suppression by items that were + // not covered in the above for loop would not have caused the algorithm + // to select this item. We thus do the same update to + // `suppress_begin_index`, but really, this element will not be added back + // into the priority queue. + next_candidate.suppress_begin_index = *num_selected_indices; + + if (!should_hard_suppress) { + if (next_candidate.score == original_score) { + // Suppression has not occurred, so select next_candidate. + selected_indices[*num_selected_indices] = next_candidate.index; + if (selected_scores) { + selected_scores[*num_selected_indices] = next_candidate.score; + } + ++*num_selected_indices; + } + if (next_candidate.score > score_threshold) { + // Soft suppression might have occurred and current score is still + // greater than score_threshold; add next_candidate back onto priority + // queue. + candidate_priority_queue.push(next_candidate); + } + } + } +} + +/** + * Run non-max suppression over the results array (for bounding boxes) + */ +EI_IMPULSE_ERROR ei_run_nms(std::vector *results) { + + size_t bb_count = 0; + for (size_t ix = 0; ix < results->size(); ix++) { + auto bb = results->at(ix); + if (bb.value == 0) { + continue; + } + bb_count++; + } + + float *boxes = (float*)malloc(4 * bb_count * sizeof(float)); + float *scores = (float*)malloc(1 * bb_count * sizeof(float)); + int *selected_indices = (int*)malloc(1 * bb_count * sizeof(int)); + float *selected_scores = (float*)malloc(1 * bb_count * sizeof(float)); + + if (!scores || !boxes || !selected_indices || !selected_scores) { + free(boxes); + free(scores); + free(selected_indices); + free(selected_scores); + return EI_IMPULSE_OUT_OF_MEMORY; + } + + size_t box_ix = 0; + for (size_t ix = 0; ix < results->size(); ix++) { + auto bb = results->at(ix); + if (bb.value == 0) { + continue; + } + boxes[(box_ix * 4) + 0] = bb.y; + boxes[(box_ix * 4) + 1] = bb.x; + boxes[(box_ix * 4) + 2] = bb.y + bb.height; + boxes[(box_ix * 4) + 3] = bb.x + bb.width; + scores[box_ix] = bb.value; + + box_ix++; + } + + // boxes: box encodings in format [y1, x1, y2, x2], shape: [num_boxes, 4] + // num_boxes: number of candidates + // scores: scores for candidate boxes, in the same order. shape: [num_boxes] + // max_output_size: the maximum number of selections. + // iou_threshold: Intersection-over-Union (IoU) threshold for NMS + // score_threshold: All candidate scores below this value are rejected + // soft_nms_sigma: Soft NMS parameter, used for decaying scores + + int num_selected_indices; + + NonMaxSuppression( + (const float*)boxes, // boxes + bb_count, // num_boxes + (const float*)scores, // scores + bb_count, // max_output_size + 0.2f, // iou_threshold + 0.0f, // score_threshold + 0.0f, // soft_nms_sigma + selected_indices, + selected_scores, + &num_selected_indices); + + std::vector new_results; + + for (size_t ix = 0; ix < (size_t)num_selected_indices; ix++) { + auto bb = results->at(selected_indices[ix]); + + printf("Found bb with label %s\n", bb.label); + + ei_impulse_result_bounding_box_t r; + r.label = bb.label; + r.x = bb.x; + r.y = bb.y; + r.width = bb.width; + r.height = bb.height; + r.value = selected_scores[ix]; + new_results.push_back(r); + } + + results->clear(); + + for (size_t ix = 0; ix < new_results.size(); ix++) { + results->push_back(new_results[ix]); + } + + free(boxes); + free(scores); + free(selected_indices); + free(selected_scores); + + return EI_IMPULSE_OK; +} + +#endif // #if (EI_CLASSIFIER_OBJECT_DETECTION_LAST_LAYER == EI_CLASSIFIER_LAST_LAYER_YOLOV5) || (EI_CLASSIFIER_OBJECT_DETECTION_LAST_LAYER == EI_CLASSIFIER_LAST_LAYER_YOLOV5_V5_DRPAI) || (EI_CLASSIFIER_OBJECT_DETECTION_LAST_LAYER == EI_CLASSIFIER_LAST_LAYER_YOLOX) + +#endif // _EDGE_IMPULSE_NMS_H_ diff --git a/edgeimpulse/edge-impulse-sdk/classifier/ei_performance_calibration.h b/edgeimpulse/edge-impulse-sdk/classifier/ei_performance_calibration.h new file mode 100644 index 0000000..a14c1e5 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/classifier/ei_performance_calibration.h @@ -0,0 +1,199 @@ +/* + * Copyright (c) 2022 EdgeImpulse Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an "AS + * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language + * governing permissions and limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef EI_PERFORMANCE_CALIBRATION_H +#define EI_PERFORMANCE_CALIBRATION_H + +/* Includes ---------------------------------------------------------------- */ +#include "edge-impulse-sdk/dsp/numpy_types.h" +#include "edge-impulse-sdk/dsp/returntypes.hpp" +#include "ei_model_types.h" + +/* Private const types ----------------------------------------------------- */ +#define MEM_ERROR "ERR: Failed to allocate memory for performance calibration\r\n" + +#define EI_PC_RET_NO_EVENT_DETECTED -1 +#define EI_PC_RET_MEMORY_ERROR -2 + +class RecognizeEvents { + +public: + RecognizeEvents( + const ei_model_performance_calibration_t *config, + uint32_t n_labels, + uint32_t sample_length, + float sample_interval_ms) + { + this->_score_array = nullptr; + this->_running_sum = nullptr; + this->_detection_threshold = config->detection_threshold; + this->_suppression_flags = config->suppression_flags; + this->_should_boost = config->is_configured; + this->_n_labels = n_labels; + + /* Determine sample length in ms */ + float sample_length_ms = (static_cast(sample_length) * sample_interval_ms); + + /* Calculate number of inference runs needed for the duration window */ + this->_average_window_duration_samples = + (config->average_window_duration_ms < static_cast(sample_length_ms)) + ? 1 + : static_cast(static_cast(config->average_window_duration_ms) / sample_length_ms); + + /* Calculate number of inference runs for suppression */ + this->_suppression_samples = (config->suppression_ms < static_cast(sample_length_ms)) + ? 0 + : static_cast(static_cast(config->suppression_ms) / sample_length_ms); + + /* Detection threshold should be high enough to only classifiy 1 possibly output */ + if (this->_detection_threshold <= (1.f / this->_n_labels)) { + ei_printf("ERR: Classifier detection threshold too low\r\n"); + return; + } + + /* Array to store scores for all labels */ + this->_score_array = (float *)ei_malloc( + this->_average_window_duration_samples * this->_n_labels * sizeof(float)); + + if (this->_score_array == NULL) { + ei_printf(MEM_ERROR); + return; + } + + for (uint32_t i = 0; i < this->_average_window_duration_samples * this->_n_labels; i++) { + this->_score_array[i] = 0.f; + } + this->_score_idx = 0; + + /* Running sum for all labels */ + this->_running_sum = (float *)ei_malloc(this->_n_labels * sizeof(float)); + + if (this->_running_sum != NULL) { + for (uint32_t i = 0; i < this->_n_labels; i++) { + this->_running_sum[i] = 0.f; + } + } + else { + ei_printf(MEM_ERROR); + return; + } + + this->_suppression_count = this->_suppression_samples; + this->_n_scores_in_array = 0; + } + + ~RecognizeEvents() + { + if (this->_score_array) { + ei_free((void *)this->_score_array); + } + if (this->_running_sum) { + ei_free((void *)this->_running_sum); + } + } + + bool should_boost() + { + return this->_should_boost; + } + + int32_t trigger(ei_impulse_result_classification_t *scores) + { + int32_t recognized_event = EI_PC_RET_NO_EVENT_DETECTED; + float current_top_score = 0.f; + uint32_t current_top_index = 0; + + /* Check pointers */ + if (this->_score_array == NULL || this->_running_sum == NULL) { + return EI_PC_RET_MEMORY_ERROR; + } + + /* Update the score array and running sum */ + for (uint32_t i = 0; i < this->_n_labels; i++) { + this->_running_sum[i] -= this->_score_array[(this->_score_idx * this->_n_labels) + i]; + this->_running_sum[i] += scores[i].value; + this->_score_array[(this->_score_idx * this->_n_labels) + i] = scores[i].value; + } + + if (++this->_score_idx >= this->_average_window_duration_samples) { + this->_score_idx = 0; + } + + /* Number of samples to average, increases until the buffer is full */ + if (this->_n_scores_in_array < this->_average_window_duration_samples) { + this->_n_scores_in_array++; + } + + /* Average data and place in scores & determine top score */ + for (uint32_t i = 0; i < this->_n_labels; i++) { + scores[i].value = this->_running_sum[i] / this->_n_scores_in_array; + + if (scores[i].value > current_top_score) { + if(this->_suppression_flags == 0) { + current_top_score = scores[i].value; + current_top_index = i; + } + else if(this->_suppression_flags & (1 << i)) { + current_top_score = scores[i].value; + current_top_index = i; + } + } + } + + /* Check threshold, suppression */ + if (this->_suppression_samples && this->_suppression_count < this->_suppression_samples) { + this->_suppression_count++; + } + else { + if (current_top_score >= this->_detection_threshold) { + recognized_event = current_top_index; + + if (this->_suppression_flags & (1 << current_top_index)) { + this->_suppression_count = 0; + } + } + } + + return recognized_event; + }; + + void *operator new(size_t size) + { + void *p = ei_malloc(size); + return p; + } + + void operator delete(void *p) + { + ei_free(p); + } + +private: + uint32_t _average_window_duration_samples; + float _detection_threshold; + bool _should_boost; + uint32_t _suppression_samples; + uint32_t _suppression_count; + uint32_t _suppression_flags; + uint32_t _n_labels; + float *_score_array; + uint32_t _score_idx; + float *_running_sum; + uint32_t _n_scores_in_array; +}; + +#endif //EI_PERFORMANCE_CALIBRATION diff --git a/edgeimpulse/edge-impulse-sdk/classifier/ei_quantize.h b/edgeimpulse/edge-impulse-sdk/classifier/ei_quantize.h new file mode 100644 index 0000000..727d920 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/classifier/ei_quantize.h @@ -0,0 +1,37 @@ +/* Edge Impulse inferencing library + * Copyright (c) 2022 EdgeImpulse Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __EI_QUANTIZE__H__ +#define __EI_QUANTIZE__H__ + +#include +#include + +static int32_t pre_cast_quantize(float value, float scale, int32_t zero_point, bool is_signed) { + + int32_t max_value = is_signed ? 127 : 255; + int32_t min_value = is_signed ? -128 : 0; + // Saturate/clip any overflows post scaling + return std::min( std::max( static_cast(round(value / scale)) + zero_point, min_value), max_value); +} + +#endif //!__EI_QUANTIZE__H__ \ No newline at end of file diff --git a/edgeimpulse/edge-impulse-sdk/classifier/ei_run_classifier.h b/edgeimpulse/edge-impulse-sdk/classifier/ei_run_classifier.h new file mode 100644 index 0000000..7c8ad69 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/classifier/ei_run_classifier.h @@ -0,0 +1,816 @@ +/* + * Copyright (c) 2022 EdgeImpulse Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an "AS + * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language + * governing permissions and limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef _EDGE_IMPULSE_RUN_CLASSIFIER_H_ +#define _EDGE_IMPULSE_RUN_CLASSIFIER_H_ + +#include "model-parameters/model_metadata.h" + +#include "ei_run_dsp.h" +#include "ei_classifier_types.h" +#include "ei_signal_with_axes.h" +#include "ei_performance_calibration.h" + +#include "edge-impulse-sdk/porting/ei_classifier_porting.h" + +#if EI_CLASSIFIER_HAS_ANOMALY +#include "inferencing_engines/anomaly.h" +#endif + +#if defined(EI_CLASSIFIER_HAS_SAMPLER) && EI_CLASSIFIER_HAS_SAMPLER == 1 +#include "ei_sampler.h" +#endif + +#if (EI_CLASSIFIER_INFERENCING_ENGINE == EI_CLASSIFIER_TFLITE) && (EI_CLASSIFIER_COMPILED != 1) +#include "edge-impulse-sdk/classifier/inferencing_engines/tflite_micro.h" +#elif EI_CLASSIFIER_COMPILED == 1 +#include "edge-impulse-sdk/classifier/inferencing_engines/tflite_eon.h" +#elif EI_CLASSIFIER_INFERENCING_ENGINE == EI_CLASSIFIER_TFLITE_FULL +#include "edge-impulse-sdk/classifier/inferencing_engines/tflite_full.h" +#elif EI_CLASSIFIER_INFERENCING_ENGINE == EI_CLASSIFIER_TFLITE_TIDL +#include "edge-impulse-sdk/classifier/inferencing_engines/tflite_tidl.h" +#elif (EI_CLASSIFIER_INFERENCING_ENGINE == EI_CLASSIFIER_TENSORRT) +#include "edge-impulse-sdk/classifier/inferencing_engines/tensorrt.h" +#elif EI_CLASSIFIER_INFERENCING_ENGINE == EI_CLASSIFIER_TENSAIFLOW +#include "edge-impulse-sdk/classifier/inferencing_engines/tensaiflow.h" +#elif EI_CLASSIFIER_INFERENCING_ENGINE == EI_CLASSIFIER_DRPAI +#include "edge-impulse-sdk/classifier/inferencing_engines/drpai.h" +#elif EI_CLASSIFIER_INFERENCING_ENGINE == EI_CLASSIFIER_AKIDA +#include "edge-impulse-sdk/classifier/inferencing_engines/akida.h" +#elif EI_CLASSIFIER_INFERENCING_ENGINE == EI_CLASSIFIER_ONNX_TIDL +#include "edge-impulse-sdk/classifier/inferencing_engines/onnx_tidl.h" +#elif EI_CLASSIFIER_INFERENCING_ENGINE == EI_CLASSIFIER_MEMRYX +#include "edge-impulse-sdk/classifier/inferencing_engines/memryx.h" +#elif EI_CLASSIFIER_INFERENCING_ENGINE == EI_CLASSIFIER_NONE +// noop +#else +#error "Unknown inferencing engine" +#endif + +#include "model-parameters/model_variables.h" + +#ifdef __cplusplus +namespace { +#endif // __cplusplus + +/* Function prototypes ----------------------------------------------------- */ +extern "C" EI_IMPULSE_ERROR run_inference(const ei_impulse_t *impulse, ei_feature_t *fmatrix, ei_impulse_result_t *result, bool debug); +extern "C" EI_IMPULSE_ERROR run_classifier_image_quantized(const ei_impulse_t *impulse, signal_t *signal, ei_impulse_result_t *result, bool debug); +static EI_IMPULSE_ERROR can_run_classifier_image_quantized(const ei_impulse_t *impulse, ei_learning_block_t block_ptr); + +/* Private variables ------------------------------------------------------- */ + +static uint64_t classifier_continuous_features_written = 0; +static RecognizeEvents *avg_scores = NULL; + +/* Private functions ------------------------------------------------------- */ + +/* These functions (up to Public functions section) are not exposed to end-user, +therefore changes are allowed. */ + +#if EI_CLASSIFIER_LOAD_IMAGE_SCALING +static const float torch_mean[] = { 0.485, 0.456, 0.406 }; +static const float torch_std[] = { 0.229, 0.224, 0.225 }; + +static EI_IMPULSE_ERROR scale_fmatrix(ei_learning_block_t *block, ei::matrix_t *fmatrix) { + if (block->image_scaling == EI_CLASSIFIER_IMAGE_SCALING_TORCH) { + // @todo; could we write some faster vector math here? + for (size_t ix = 0; ix < fmatrix->rows * fmatrix->cols; ix += 3) { + fmatrix->buffer[ix + 0] = (fmatrix->buffer[ix + 0] - torch_mean[0]) / torch_std[0]; + fmatrix->buffer[ix + 1] = (fmatrix->buffer[ix + 1] - torch_mean[1]) / torch_std[1]; + fmatrix->buffer[ix + 2] = (fmatrix->buffer[ix + 2] - torch_mean[2]) / torch_std[2]; + } + } + else if (block->image_scaling == EI_CLASSIFIER_IMAGE_SCALING_0_255) { + int scale_res = numpy::scale(fmatrix, 255.0f); + if (scale_res != EIDSP_OK) { + ei_printf("ERR: Failed to scale matrix (%d)\n", scale_res); + return EI_IMPULSE_DSP_ERROR; + } + } + else if (block->image_scaling == EI_CLASSIFIER_IMAGE_SCALING_MIN1_1) { + int scale_res = numpy::scale(fmatrix, 2.0f); + if (scale_res != EIDSP_OK) { + ei_printf("ERR: Failed to scale matrix (%d)\n", scale_res); + return EI_IMPULSE_DSP_ERROR; + } + scale_res = numpy::subtract(fmatrix, 1.0f); + if (scale_res != EIDSP_OK) { + ei_printf("ERR: Failed to scale matrix (%d)\n", scale_res); + return EI_IMPULSE_DSP_ERROR; + } + } + + return EI_IMPULSE_OK; +} + +static EI_IMPULSE_ERROR unscale_fmatrix(ei_learning_block_t *block, ei::matrix_t *fmatrix) { + if (block->image_scaling == EI_CLASSIFIER_IMAGE_SCALING_TORCH) { + // @todo; could we write some faster vector math here? + for (size_t ix = 0; ix < fmatrix->rows * fmatrix->cols; ix += 3) { + fmatrix->buffer[ix + 0] = (fmatrix->buffer[ix + 0] * torch_std[0]) + torch_mean[0]; + fmatrix->buffer[ix + 1] = (fmatrix->buffer[ix + 1] * torch_std[1]) + torch_mean[1]; + fmatrix->buffer[ix + 2] = (fmatrix->buffer[ix + 2] * torch_std[2]) + torch_mean[2]; + } + } + else if (block->image_scaling == EI_CLASSIFIER_IMAGE_SCALING_0_255) { + int scale_res = numpy::scale(fmatrix, 1 / 255.0f); + if (scale_res != EIDSP_OK) { + ei_printf("ERR: Failed to scale matrix (%d)\n", scale_res); + return EI_IMPULSE_DSP_ERROR; + } + } + return EI_IMPULSE_OK; +} +#endif + + +/** + * @brief Display the results of the inference + * + * @param result The result + */ +__attribute__((unused)) void display_results(ei_impulse_result_t* result) +{ + // print the predictions + ei_printf("Predictions (DSP: %d ms., Classification: %d ms., Anomaly: %d ms.): \n", + result->timing.dsp, result->timing.classification, result->timing.anomaly); +#if EI_CLASSIFIER_OBJECT_DETECTION == 1 + bool bb_found = result->bounding_boxes[0].value > 0; + for (size_t ix = 0; ix < result->bounding_boxes_count; ix++) { + auto bb = result->bounding_boxes[ix]; + if (bb.value == 0) { + continue; + } + ei_printf(" %s (", bb.label); + ei_printf_float(bb.value); + ei_printf(") [ x: %u, y: %u, width: %u, height: %u ]\n", bb.x, bb.y, bb.width, bb.height); + } + + if (!bb_found) { + ei_printf(" No objects found\n"); + } +#else + for (size_t ix = 0; ix < EI_CLASSIFIER_LABEL_COUNT; ix++) { + ei_printf(" %s: ", result->classification[ix].label); + ei_printf_float(result->classification[ix].value); + ei_printf("\n"); + } +#if EI_CLASSIFIER_HAS_ANOMALY == 1 + ei_printf(" anomaly score: "); + ei_printf_float(result->anomaly); + ei_printf("\n"); +#endif +#endif +} + +/** + * @brief Do inferencing over the processed feature matrix + * + * @param impulse struct with information about model and DSP + * @param fmatrix Processed matrix + * @param result Output classifier results + * @param[in] debug Debug output enable + * + * @return The ei impulse error. + */ +extern "C" EI_IMPULSE_ERROR run_inference( + const ei_impulse_t *impulse, + ei_feature_t *fmatrix, + ei_impulse_result_t *result, + bool debug = false) +{ + for (size_t ix = 0; ix < impulse->learning_blocks_size; ix++) { + + ei_learning_block_t block = impulse->learning_blocks[ix]; + +#if EI_CLASSIFIER_LOAD_IMAGE_SCALING + // we do not plan to have multiple dsp blocks with image + // so just apply scaling to the first one + EI_IMPULSE_ERROR scale_res = scale_fmatrix(&block, fmatrix[0].matrix); + if (scale_res != EI_IMPULSE_OK) { + return scale_res; + } +#endif + + result->copy_output = block.keep_output; + + EI_IMPULSE_ERROR res = block.infer_fn(impulse, fmatrix, (uint32_t*)block.input_block_ids, block.input_block_ids_size, result, block.config, debug); + if (res != EI_IMPULSE_OK) { + return res; + } + +#if EI_CLASSIFIER_LOAD_IMAGE_SCALING + // undo scaling + scale_res = unscale_fmatrix(&block, fmatrix[0].matrix); + if (scale_res != EI_IMPULSE_OK) { + return scale_res; + } +#endif + } + + if (ei_run_impulse_check_canceled() == EI_IMPULSE_CANCELED) { + return EI_IMPULSE_CANCELED; + } + + return EI_IMPULSE_OK; +} + +/** + * @brief Process a complete impulse + * + * @param impulse struct with information about model and DSP + * @param signal Sample data + * @param result Output classifier results + * @param[in] debug Debug output enable + * + * @return The ei impulse error. + */ +extern "C" EI_IMPULSE_ERROR process_impulse(const ei_impulse_t *impulse, + signal_t *signal, + ei_impulse_result_t *result, + bool debug = false) +{ + +#if (EI_CLASSIFIER_QUANTIZATION_ENABLED == 1 && (EI_CLASSIFIER_INFERENCING_ENGINE == EI_CLASSIFIER_TFLITE || EI_CLASSIFIER_INFERENCING_ENGINE == EI_CLASSIFIER_TENSAIFLOW || EI_CLASSIFIER_INFERENCING_ENGINE == EI_CLASSIFIER_ONNX_TIDL)) || EI_CLASSIFIER_INFERENCING_ENGINE == EI_CLASSIFIER_DRPAI + // Shortcut for quantized image models + ei_learning_block_t block = impulse->learning_blocks[0]; + if (can_run_classifier_image_quantized(impulse, block) == EI_IMPULSE_OK) { + return run_classifier_image_quantized(impulse, signal, result, debug); + } +#endif + + memset(result, 0, sizeof(ei_impulse_result_t)); + uint32_t block_num = impulse->dsp_blocks_size + impulse->learning_blocks_size; + + // smart pointer to features array + std::unique_ptr features_ptr(new ei_feature_t[block_num]); + ei_feature_t* features = features_ptr.get(); + + // have it outside of the loop to avoid going out of scope + std::unique_ptr *matrix_ptrs = new std::unique_ptr[block_num]; + + uint64_t dsp_start_us = ei_read_timer_us(); + + size_t out_features_index = 0; + + for (size_t ix = 0; ix < impulse->dsp_blocks_size; ix++) { + ei_model_dsp_t block = impulse->dsp_blocks[ix]; + matrix_ptrs[ix] = std::unique_ptr(new ei::matrix_t(1, block.n_output_features)); + features[ix].matrix = matrix_ptrs[ix].get(); + features[ix].blockId = block.blockId; + + if (out_features_index + block.n_output_features > impulse->nn_input_frame_size) { + ei_printf("ERR: Would write outside feature buffer\n"); + delete[] matrix_ptrs; + return EI_IMPULSE_DSP_ERROR; + } + +#if EIDSP_SIGNAL_C_FN_POINTER + if (block.axes_size != impulse->raw_samples_per_frame) { + ei_printf("ERR: EIDSP_SIGNAL_C_FN_POINTER can only be used when all axes are selected for DSP blocks\n"); + delete[] matrix_ptrs; + return EI_IMPULSE_DSP_ERROR; + } + int ret = block.extract_fn(signal, features[ix].matrix, block.config, impulse->frequency); +#else + SignalWithAxes swa(signal, block.axes, block.axes_size, impulse); + int ret = block.extract_fn(swa.get_signal(), features[ix].matrix, block.config, impulse->frequency); +#endif + + if (ret != EIDSP_OK) { + ei_printf("ERR: Failed to run DSP process (%d)\n", ret); + delete[] matrix_ptrs; + return EI_IMPULSE_DSP_ERROR; + } + + if (ei_run_impulse_check_canceled() == EI_IMPULSE_CANCELED) { + delete[] matrix_ptrs; + return EI_IMPULSE_CANCELED; + } + + out_features_index += block.n_output_features; + } + +#if EI_CLASSIFIER_SINGLE_FEATURE_INPUT == 0 + for (size_t ix = 0; ix < impulse->learning_blocks_size; ix++) { + ei_learning_block_t block = impulse->learning_blocks[ix]; + + if (block.keep_output) { + matrix_ptrs[impulse->dsp_blocks_size + ix] = std::unique_ptr(new ei::matrix_t(1, block.output_features_count)); + features[impulse->dsp_blocks_size + ix].matrix = matrix_ptrs[impulse->dsp_blocks_size + ix].get(); + features[impulse->dsp_blocks_size+ ix].blockId = block.blockId; + } + } +#endif // EI_CLASSIFIER_SINGLE_FEATURE_INPUT + + result->timing.dsp_us = ei_read_timer_us() - dsp_start_us; + result->timing.dsp = (int)(result->timing.dsp_us / 1000); + + if (debug) { + ei_printf("Features (%d ms.): ", result->timing.dsp); + for (size_t ix = 0; ix < block_num; ix++) { + if (features[ix].matrix == nullptr) { + continue; + } + for (size_t jx = 0; jx < features[ix].matrix->cols; jx++) { + ei_printf_float(features[ix].matrix->buffer[jx]); + ei_printf(" "); + } + ei_printf("\n"); + } + } + + if (debug) { + ei_printf("Running impulse...\n"); + } + + EI_IMPULSE_ERROR res = run_inference(impulse, features, result, debug); + + delete[] matrix_ptrs; + + return res; +} + +/** + * @brief Process a complete impulse for continuous inference + * + * @param impulse struct with information about model and DSP + * @param signal Sample data + * @param result Output classifier results + * @param[in] debug Debug output enable + * + * @return The ei impulse error. + */ +extern "C" EI_IMPULSE_ERROR process_impulse_continuous(const ei_impulse_t *impulse, + signal_t *signal, + ei_impulse_result_t *result, + bool debug, + bool enable_maf) +{ + + static ei::matrix_t static_features_matrix(1, impulse->nn_input_frame_size); + if (!static_features_matrix.buffer) { + return EI_IMPULSE_ALLOC_FAILED; + } + + memset(result, 0, sizeof(ei_impulse_result_t)); + + EI_IMPULSE_ERROR ei_impulse_error = EI_IMPULSE_OK; + + uint64_t dsp_start_us = ei_read_timer_us(); + + size_t out_features_index = 0; + bool is_mfcc = false; + bool is_mfe = false; + bool is_spectrogram = false; + + for (size_t ix = 0; ix < impulse->dsp_blocks_size; ix++) { + ei_model_dsp_t block = impulse->dsp_blocks[ix]; + + if (out_features_index + block.n_output_features > impulse->nn_input_frame_size) { + ei_printf("ERR: Would write outside feature buffer\n"); + return EI_IMPULSE_DSP_ERROR; + } + + ei::matrix_t fm(1, block.n_output_features, + static_features_matrix.buffer + out_features_index); + + int (*extract_fn_slice)(ei::signal_t *signal, ei::matrix_t *output_matrix, void *config, const float frequency, matrix_size_t *out_matrix_size); + + /* Switch to the slice version of the mfcc feature extract function */ + if (block.extract_fn == extract_mfcc_features) { + extract_fn_slice = &extract_mfcc_per_slice_features; + is_mfcc = true; + } + else if (block.extract_fn == extract_spectrogram_features) { + extract_fn_slice = &extract_spectrogram_per_slice_features; + is_spectrogram = true; + } + else if (block.extract_fn == extract_mfe_features) { + extract_fn_slice = &extract_mfe_per_slice_features; + is_mfe = true; + } + else { + ei_printf("ERR: Unknown extract function, only MFCC, MFE and spectrogram supported\n"); + return EI_IMPULSE_DSP_ERROR; + } + + matrix_size_t features_written; + +#if EIDSP_SIGNAL_C_FN_POINTER + if (block.axes_size != impulse->raw_samples_per_frame) { + ei_printf("ERR: EIDSP_SIGNAL_C_FN_POINTER can only be used when all axes are selected for DSP blocks\n"); + return EI_IMPULSE_DSP_ERROR; + } + int ret = extract_fn_slice(signal, &fm, block.config, impulse->frequency, &features_written); +#else + SignalWithAxes swa(signal, block.axes, block.axes_size, impulse); + int ret = extract_fn_slice(swa.get_signal(), &fm, block.config, impulse->frequency, &features_written); +#endif + + if (ret != EIDSP_OK) { + ei_printf("ERR: Failed to run DSP process (%d)\n", ret); + return EI_IMPULSE_DSP_ERROR; + } + + if (ei_run_impulse_check_canceled() == EI_IMPULSE_CANCELED) { + return EI_IMPULSE_CANCELED; + } + + classifier_continuous_features_written += (features_written.rows * features_written.cols); + + out_features_index += block.n_output_features; + } + + result->timing.dsp_us = ei_read_timer_us() - dsp_start_us; + result->timing.dsp = (int)(result->timing.dsp_us / 1000); + + if (debug) { + ei_printf("\r\nFeatures (%d ms.): ", result->timing.dsp); + for (size_t ix = 0; ix < static_features_matrix.cols; ix++) { + ei_printf_float(static_features_matrix.buffer[ix]); + ei_printf(" "); + } + ei_printf("\n"); + } + + if (classifier_continuous_features_written >= impulse->nn_input_frame_size) { + dsp_start_us = ei_read_timer_us(); + + ei_feature_t feature; + std::unique_ptr matrix_ptr(new ei::matrix_t(1, impulse->nn_input_frame_size)); + feature.matrix = matrix_ptr.get(); + feature.blockId = 0; + + /* Create a copy of the matrix for normalization */ + for (size_t m_ix = 0; m_ix < impulse->nn_input_frame_size; m_ix++) { + feature.matrix->buffer[m_ix] = static_features_matrix.buffer[m_ix]; + } + + if (is_mfcc) { + calc_cepstral_mean_and_var_normalization_mfcc(feature.matrix, impulse->dsp_blocks[0].config); + } + else if (is_spectrogram) { + calc_cepstral_mean_and_var_normalization_spectrogram(feature.matrix, impulse->dsp_blocks[0].config); + } + else if (is_mfe) { + calc_cepstral_mean_and_var_normalization_mfe(feature.matrix, impulse->dsp_blocks[0].config); + } + result->timing.dsp_us += ei_read_timer_us() - dsp_start_us; + result->timing.dsp = (int)(result->timing.dsp_us / 1000); + + if (debug) { + ei_printf("Running impulse...\n"); + } + + ei_impulse_error = run_inference(impulse, &feature, result, debug); + +#if EI_CLASSIFIER_CALIBRATION_ENABLED + if (impulse->sensor == EI_CLASSIFIER_SENSOR_MICROPHONE) { + if((void *)avg_scores != NULL && enable_maf == true) { + if (enable_maf && !impulse->calibration.is_configured) { + // perfcal is not configured, print msg first time + static bool has_printed_msg = false; + + if (!has_printed_msg) { + ei_printf("WARN: run_classifier_continuous, enable_maf is true, but performance calibration is not configured.\n"); + ei_printf(" Previously we'd run a moving-average filter over your outputs in this case, but this is now disabled.\n"); + ei_printf(" Go to 'Performance calibration' in your Edge Impulse project to configure post-processing parameters.\n"); + ei_printf(" (You can enable this from 'Dashboard' if it's not visible in your project)\n"); + ei_printf("\n"); + + has_printed_msg = true; + } + } + else { + // perfcal is configured + static bool has_printed_msg = false; + + if (!has_printed_msg) { + ei_printf("\nPerformance calibration is configured for your project. If no event is detected, all values are 0.\r\n\n"); + has_printed_msg = true; + } + + int label_detected = avg_scores->trigger(result->classification); + + if (avg_scores->should_boost()) { + for (int i = 0; i < impulse->label_count; i++) { + if (i == label_detected) { + result->classification[i].value = 1.0f; + } + else { + result->classification[i].value = 0.0f; + } + } + } + } + } + } +#endif + } + else { + for (int i = 0; i < impulse->label_count; i++) { + // set label correctly in the result struct if we have no results (otherwise is nullptr) + result->classification[i].label = impulse->categories[(uint32_t)i]; + } + } + + return ei_impulse_error; +} + +/** + * Check if the current impulse could be used by 'run_classifier_image_quantized' + */ +__attribute__((unused)) static EI_IMPULSE_ERROR can_run_classifier_image_quantized(const ei_impulse_t *impulse, ei_learning_block_t block_ptr) { + + if (impulse->inferencing_engine != EI_CLASSIFIER_TFLITE + && impulse->inferencing_engine != EI_CLASSIFIER_TENSAIFLOW + && impulse->inferencing_engine != EI_CLASSIFIER_DRPAI + && impulse->inferencing_engine != EI_CLASSIFIER_ONNX_TIDL) // check later + { + return EI_IMPULSE_UNSUPPORTED_INFERENCING_ENGINE; + } + + // visual anomaly also needs to go through the normal path + if (impulse->has_anomaly){ + return EI_IMPULSE_ONLY_SUPPORTED_FOR_IMAGES; + } + + // Check if we have tflite graph + if (block_ptr.infer_fn != run_nn_inference) { + return EI_IMPULSE_ONLY_SUPPORTED_FOR_IMAGES; + } + + // Check if we have a quantized NN Input layer (input is always quantized for DRP-AI) + ei_learning_block_config_tflite_graph_t *block_config = (ei_learning_block_config_tflite_graph_t*)block_ptr.config; + if (block_config->quantized != 1) { + return EI_IMPULSE_ONLY_SUPPORTED_FOR_IMAGES; + } + + // And if we have one DSP block which operates on images... + if (impulse->dsp_blocks_size != 1 || impulse->dsp_blocks[0].extract_fn != extract_image_features) { + return EI_IMPULSE_ONLY_SUPPORTED_FOR_IMAGES; + } + + return EI_IMPULSE_OK; +} + +#if EI_CLASSIFIER_QUANTIZATION_ENABLED == 1 && (EI_CLASSIFIER_INFERENCING_ENGINE == EI_CLASSIFIER_TFLITE || EI_CLASSIFIER_INFERENCING_ENGINE == EI_CLASSIFIER_TENSAIFLOW || EI_CLASSIFIER_INFERENCING_ENGINE == EI_CLASSIFIER_DRPAI || EI_CLASSIFIER_INFERENCING_ENGINE == EI_CLASSIFIER_ONNX_TIDL) + +/** + * Special function to run the classifier on images, only works on TFLite models (either interpreter, EON, tensaiflow, drpai, tidl, memryx) + * that allocates a lot less memory by quantizing in place. This only works if 'can_run_classifier_image_quantized' + * returns EI_IMPULSE_OK. + */ +extern "C" EI_IMPULSE_ERROR run_classifier_image_quantized( + const ei_impulse_t *impulse, + signal_t *signal, + ei_impulse_result_t *result, + bool debug = false) +{ + memset(result, 0, sizeof(ei_impulse_result_t)); + + return run_nn_inference_image_quantized(impulse, signal, result, impulse->learning_blocks[0].config, debug); +} + +#endif // #if EI_CLASSIFIER_QUANTIZATION_ENABLED == 1 && (EI_CLASSIFIER_INFERENCING_ENGINE == EI_CLASSIFIER_TFLITE || EI_CLASSIFIER_INFERENCING_ENGINE == EI_CLASSIFIER_TENSAIFLOW || EI_CLASSIFIER_INFERENCING_ENGINE == EI_CLASSIFIER_DRPAI) + +/* Public functions ------------------------------------------------------- */ + +/* Thread carefully: public functions are not to be changed +to preserve backwards compatibility. */ + +/** + * @brief Init static vars + */ +extern "C" void run_classifier_init() +{ + + classifier_continuous_features_written = 0; + ei_dsp_clear_continuous_audio_state(); + +#if EI_CLASSIFIER_CALIBRATION_ENABLED + + const ei_impulse_t impulse = ei_default_impulse; + const ei_model_performance_calibration_t *calibration = &impulse.calibration; + + if(calibration != NULL) { + avg_scores = new RecognizeEvents(calibration, + impulse.label_count, impulse.slice_size, impulse.interval_ms); + } +#endif +} + +/** + * @brief Init static vars, for multi-model support + */ +__attribute__((unused)) void run_classifier_init(const ei_impulse_t *impulse) +{ + classifier_continuous_features_written = 0; + ei_dsp_clear_continuous_audio_state(); + +#if EI_CLASSIFIER_CALIBRATION_ENABLED + const ei_model_performance_calibration_t *calibration = &impulse->calibration; + + if(calibration != NULL) { + avg_scores = new RecognizeEvents(calibration, + impulse->label_count, impulse->slice_size, impulse->interval_ms); + } +#endif +} + +extern "C" void run_classifier_deinit(void) +{ + if((void *)avg_scores != NULL) { + delete avg_scores; + } +} + +/** + * @brief Fill the complete matrix with sample slices. From there, run inference + * on the matrix. + * + * @param signal Sample data + * @param result Classification output + * @param[in] debug Debug output enable boot + * + * @return The ei impulse error. + */ +extern "C" EI_IMPULSE_ERROR run_classifier_continuous( + signal_t *signal, + ei_impulse_result_t *result, + bool debug = false, + bool enable_maf = true) +{ + const ei_impulse_t impulse = ei_default_impulse; + return process_impulse_continuous(&impulse, signal, result, debug, enable_maf); +} + +/** + * @brief Fill the complete matrix with sample slices. From there, run impulse + * on the matrix. + * + * @param impulse struct with information about model and DSP + * @param signal Sample data + * @param result Classification output + * @param[in] debug Debug output enable boot + * + * @return The ei impulse error. + */ +__attribute__((unused)) EI_IMPULSE_ERROR run_classifier_continuous( + const ei_impulse_t *impulse, + signal_t *signal, + ei_impulse_result_t *result, + bool debug = false, + bool enable_maf = true) +{ + return process_impulse_continuous(impulse, signal, result, debug, enable_maf); +} + +/** + * Run the classifier over a raw features array + * @param raw_features Raw features array + * @param raw_features_size Size of the features array + * @param result Object to store the results in + * @param debug Whether to show debug messages (default: false) + */ +extern "C" EI_IMPULSE_ERROR run_classifier( + signal_t *signal, + ei_impulse_result_t *result, + bool debug = false) +{ + const ei_impulse_t impulse = ei_default_impulse; + return process_impulse(&impulse, signal, result, debug); +} + +/** + * Run the impulse over a raw features array + * @param impulse struct with information about model and DSP + * @param raw_features Raw features array + * @param raw_features_size Size of the features array + * @param result Object to store the results in + * @param debug Whether to show debug messages (default: false) + */ +__attribute__((unused)) EI_IMPULSE_ERROR run_classifier( + const ei_impulse_t *impulse, + signal_t *signal, + ei_impulse_result_t *result, + bool debug = false) +{ + return process_impulse(impulse, signal, result, debug); +} + +/* Deprecated functions ------------------------------------------------------- */ + +/* These functions are being deprecated and possibly will be removed or moved in future. +Do not use these - if possible, change your code to reflect the upcoming changes. */ + +#if EIDSP_SIGNAL_C_FN_POINTER == 0 + +/** + * Run the impulse, if you provide an instance of sampler it will also persist the data for you + * @param sampler Instance to an **initialized** sampler + * @param result Object to store the results in + * @param data_fn Function to retrieve data from sensors + * @param debug Whether to log debug messages (default false) + */ +__attribute__((unused)) EI_IMPULSE_ERROR run_impulse( +#if defined(EI_CLASSIFIER_HAS_SAMPLER) && EI_CLASSIFIER_HAS_SAMPLER == 1 + EdgeSampler *sampler, +#endif + ei_impulse_result_t *result, +#ifdef __MBED__ + mbed::Callback data_fn, +#else + std::function data_fn, +#endif + bool debug = false) { + + const ei_impulse_t impulse = ei_default_impulse; + + float *x = (float*)calloc(impulse.dsp_input_frame_size, sizeof(float)); + if (!x) { + return EI_IMPULSE_OUT_OF_MEMORY; + } + + uint64_t next_tick = 0; + + uint64_t sampling_us_start = ei_read_timer_us(); + + // grab some data + for (int i = 0; i < (int)impulse.dsp_input_frame_size; i += impulse.raw_samples_per_frame) { + uint64_t curr_us = ei_read_timer_us() - sampling_us_start; + + next_tick = curr_us + (impulse.interval_ms * 1000); + + data_fn(x + i, impulse.raw_samples_per_frame); +#if defined(EI_CLASSIFIER_HAS_SAMPLER) && EI_CLASSIFIER_HAS_SAMPLER == 1 + if (sampler != NULL) { + sampler->write_sensor_data(x + i, impulse.raw_samples_per_frame); + } +#endif + + if (ei_run_impulse_check_canceled() == EI_IMPULSE_CANCELED) { + free(x); + return EI_IMPULSE_CANCELED; + } + + while (next_tick > ei_read_timer_us() - sampling_us_start); + } + + result->timing.sampling = (ei_read_timer_us() - sampling_us_start) / 1000; + + signal_t signal; + int err = numpy::signal_from_buffer(x, impulse.dsp_input_frame_size, &signal); + if (err != 0) { + free(x); + ei_printf("ERR: signal_from_buffer failed (%d)\n", err); + return EI_IMPULSE_DSP_ERROR; + } + + EI_IMPULSE_ERROR r = run_classifier(&signal, result, debug); + free(x); + return r; +} + +#if defined(EI_CLASSIFIER_HAS_SAMPLER) && EI_CLASSIFIER_HAS_SAMPLER == 1 +/** + * Run the impulse, does not persist data + * @param result Object to store the results in + * @param data_fn Function to retrieve data from sensors + * @param debug Whether to log debug messages (default false) + */ +__attribute__((unused)) EI_IMPULSE_ERROR run_impulse( + ei_impulse_result_t *result, +#ifdef __MBED__ + mbed::Callback data_fn, +#else + std::function data_fn, +#endif + bool debug = false) { + return run_impulse(NULL, result, data_fn, debug); +} +#endif + +#endif // #if EIDSP_SIGNAL_C_FN_POINTER == 0 + +#ifdef __cplusplus +} +#endif // __cplusplus + +#endif // _EDGE_IMPULSE_RUN_CLASSIFIER_H_ diff --git a/edgeimpulse/edge-impulse-sdk/classifier/ei_run_classifier_c.cpp b/edgeimpulse/edge-impulse-sdk/classifier/ei_run_classifier_c.cpp new file mode 100644 index 0000000..4419384 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/classifier/ei_run_classifier_c.cpp @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2022 EdgeImpulse Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an "AS + * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language + * governing permissions and limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#if defined(__cplusplus) && EI_C_LINKAGE == 1 + +#include "ei_run_classifier_c.h" + +/** + * This function definition is just there to make sure + * that the symbol is not removed from the library. + */ +EI_IMPULSE_ERROR ei_run_classifier( + signal_t *signal, + ei_impulse_result_t *result, + bool debug) { + + return run_classifier(signal, result, debug); +} + +#endif // #if defined(__cplusplus) && EI_C_LINKAGE == 1 diff --git a/edgeimpulse/edge-impulse-sdk/classifier/ei_run_classifier_c.h b/edgeimpulse/edge-impulse-sdk/classifier/ei_run_classifier_c.h new file mode 100644 index 0000000..426958b --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/classifier/ei_run_classifier_c.h @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2022 EdgeImpulse Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an "AS + * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language + * governing permissions and limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef _EDGE_IMPULSE_RUN_CLASSIFIER_C_H_ +#define _EDGE_IMPULSE_RUN_CLASSIFIER_C_H_ + +#if defined(__cplusplus) && EI_C_LINKAGE == 1 + +#include "ei_run_classifier.h" + +/** + * Run the classifier over a raw features array + * @param raw_features Raw features array + * @param raw_features_size Size of the features array + * @param result Object to store the results in + * @param debug Whether to show debug messages (default: false) + */ +extern "C" EI_IMPULSE_ERROR ei_run_classifier( + signal_t *signal, + ei_impulse_result_t *result, + bool debug = false); + +#endif // #if defined(__cplusplus) && EI_C_LINKAGE == 1 + +#endif // _EDGE_IMPULSE_RUN_CLASSIFIER_H_ diff --git a/edgeimpulse/edge-impulse-sdk/classifier/ei_run_classifier_image.h b/edgeimpulse/edge-impulse-sdk/classifier/ei_run_classifier_image.h new file mode 100644 index 0000000..37ff775 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/classifier/ei_run_classifier_image.h @@ -0,0 +1,25 @@ +/* + * Copyright (c) 2022 Edge Impulse Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an "AS + * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language + * governing permissions and limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef _EDGE_IMPULSE_RUN_CLASSIFIER_IMAGE_H_ +#define _EDGE_IMPULSE_RUN_CLASSIFIER_IMAGE_H_ + +#include "ei_run_classifier.h" + + + +#endif // _EDGE_IMPULSE_RUN_CLASSIFIER_IMAGE_H_ diff --git a/edgeimpulse/edge-impulse-sdk/classifier/ei_run_dsp.h b/edgeimpulse/edge-impulse-sdk/classifier/ei_run_dsp.h new file mode 100644 index 0000000..d04c144 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/classifier/ei_run_dsp.h @@ -0,0 +1,1523 @@ +/* + * Copyright (c) 2022 EdgeImpulse Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an "AS + * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language + * governing permissions and limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef _EDGE_IMPULSE_RUN_DSP_H_ +#define _EDGE_IMPULSE_RUN_DSP_H_ + +#include "edge-impulse-sdk/classifier/ei_model_types.h" +#include "edge-impulse-sdk/dsp/spectral/spectral.hpp" +#include "edge-impulse-sdk/dsp/speechpy/speechpy.hpp" +#include "edge-impulse-sdk/classifier/ei_signal_with_range.h" +#include "model-parameters/model_metadata.h" + +#if defined(__cplusplus) && EI_C_LINKAGE == 1 +extern "C" { + extern void ei_printf(const char *format, ...); +} +#else +extern void ei_printf(const char *format, ...); +#endif + +#ifdef __cplusplus +namespace { +#endif // __cplusplus + +using namespace ei; + +#if defined(EI_DSP_IMAGE_BUFFER_STATIC_SIZE) +float ei_dsp_image_buffer[EI_DSP_IMAGE_BUFFER_STATIC_SIZE]; +#endif + +// this is the frame we work on... allocate it statically so we share between invocations +static float *ei_dsp_cont_current_frame = nullptr; +static size_t ei_dsp_cont_current_frame_size = 0; +static int ei_dsp_cont_current_frame_ix = 0; + +__attribute__((unused)) int extract_spectral_analysis_features( + signal_t *signal, + matrix_t *output_matrix, + void *config_ptr, + const float frequency) +{ + ei_dsp_config_spectral_analysis_t *config = (ei_dsp_config_spectral_analysis_t *)config_ptr; + + // input matrix from the raw signal + matrix_t input_matrix(signal->total_length / config->axes, config->axes); + if (!input_matrix.buffer) { + EIDSP_ERR(EIDSP_OUT_OF_MEM); + } + + signal->get_data(0, signal->total_length, input_matrix.buffer); + +#if EI_DSP_PARAMS_SPECTRAL_ANALYSIS_ANALYSIS_TYPE_WAVELET || EI_DSP_PARAMS_ALL + if (strcmp(config->analysis_type, "Wavelet") == 0) { + return spectral::wavelet::extract_wavelet_features(&input_matrix, output_matrix, config, frequency); + } +#endif + +#if EI_DSP_PARAMS_SPECTRAL_ANALYSIS_ANALYSIS_TYPE_FFT || EI_DSP_PARAMS_ALL + if (strcmp(config->analysis_type, "FFT") == 0) { + if (config->implementation_version == 1) { + return spectral::feature::extract_spectral_analysis_features_v1( + &input_matrix, + output_matrix, + config, + frequency); + } else if (config->implementation_version == 4) { + return spectral::feature::extract_spectral_analysis_features_v4( + &input_matrix, + output_matrix, + config, + frequency); + } else { + return spectral::feature::extract_spectral_analysis_features_v2( + &input_matrix, + output_matrix, + config, + frequency); + } + } +#endif + +#if !EI_DSP_PARAMS_GENERATED || EI_DSP_PARAMS_ALL || !(EI_DSP_PARAMS_SPECTRAL_ANALYSIS_ANALYSIS_TYPE_FFT || EI_DSP_PARAMS_SPECTRAL_ANALYSIS_ANALYSIS_TYPE_WAVELET) + if (config->implementation_version == 1) { + return spectral::feature::extract_spectral_analysis_features_v1( + &input_matrix, + output_matrix, + config, + frequency); + } + if (config->implementation_version == 2) { + return spectral::feature::extract_spectral_analysis_features_v2( + &input_matrix, + output_matrix, + config, + frequency); + } +#endif + return EIDSP_NOT_SUPPORTED; +} + +__attribute__((unused)) int extract_raw_features(signal_t *signal, matrix_t *output_matrix, void *config_ptr, const float frequency) { + ei_dsp_config_raw_t config = *((ei_dsp_config_raw_t*)config_ptr); + + // Because of rounding errors during re-sampling the output size of the block might be + // smaller than the input of the block. Make sure we don't write outside of the bounds + // of the array: + // https://forum.edgeimpulse.com/t/using-custom-sensors-on-raspberry-pi-4/3506/7 + size_t els_to_copy = signal->total_length; + if (els_to_copy > output_matrix->rows * output_matrix->cols) { + els_to_copy = output_matrix->rows * output_matrix->cols; + } + + signal->get_data(0, els_to_copy, output_matrix->buffer); + + // scale the signal + int ret = numpy::scale(output_matrix, config.scale_axes); + if (ret != EIDSP_OK) { + EIDSP_ERR(ret); + } + + return EIDSP_OK; +} + +__attribute__((unused)) int extract_flatten_features(signal_t *signal, matrix_t *output_matrix, void *config_ptr, const float frequency) { + ei_dsp_config_flatten_t config = *((ei_dsp_config_flatten_t*)config_ptr); + + uint32_t expected_matrix_size = 0; + if (config.average) expected_matrix_size += config.axes; + if (config.minimum) expected_matrix_size += config.axes; + if (config.maximum) expected_matrix_size += config.axes; + if (config.rms) expected_matrix_size += config.axes; + if (config.stdev) expected_matrix_size += config.axes; + if (config.skewness) expected_matrix_size += config.axes; + if (config.kurtosis) expected_matrix_size += config.axes; + + if (output_matrix->rows * output_matrix->cols != expected_matrix_size) { + EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH); + } + + int ret; + + // input matrix from the raw signal + matrix_t input_matrix(signal->total_length / config.axes, config.axes); + if (!input_matrix.buffer) { + EIDSP_ERR(EIDSP_OUT_OF_MEM); + } + signal->get_data(0, signal->total_length, input_matrix.buffer); + + // scale the signal + ret = numpy::scale(&input_matrix, config.scale_axes); + if (ret != EIDSP_OK) { + ei_printf("ERR: Failed to scale signal (%d)\n", ret); + EIDSP_ERR(ret); + } + + // transpose the matrix so we have one row per axis (nifty!) + ret = numpy::transpose(&input_matrix); + if (ret != EIDSP_OK) { + ei_printf("ERR: Failed to transpose matrix (%d)\n", ret); + EIDSP_ERR(ret); + } + + size_t out_matrix_ix = 0; + + for (size_t row = 0; row < input_matrix.rows; row++) { + matrix_t row_matrix(1, input_matrix.cols, input_matrix.buffer + (row * input_matrix.cols)); + + if (config.average) { + float fbuffer; + matrix_t out_matrix(1, 1, &fbuffer); + numpy::mean(&row_matrix, &out_matrix); + output_matrix->buffer[out_matrix_ix++] = out_matrix.buffer[0]; + } + + if (config.minimum) { + float fbuffer; + matrix_t out_matrix(1, 1, &fbuffer); + numpy::min(&row_matrix, &out_matrix); + output_matrix->buffer[out_matrix_ix++] = out_matrix.buffer[0]; + } + + if (config.maximum) { + float fbuffer; + matrix_t out_matrix(1, 1, &fbuffer); + numpy::max(&row_matrix, &out_matrix); + output_matrix->buffer[out_matrix_ix++] = out_matrix.buffer[0]; + } + + if (config.rms) { + float fbuffer; + matrix_t out_matrix(1, 1, &fbuffer); + numpy::rms(&row_matrix, &out_matrix); + output_matrix->buffer[out_matrix_ix++] = out_matrix.buffer[0]; + } + + if (config.stdev) { + float fbuffer; + matrix_t out_matrix(1, 1, &fbuffer); + numpy::stdev(&row_matrix, &out_matrix); + output_matrix->buffer[out_matrix_ix++] = out_matrix.buffer[0]; + } + + if (config.skewness) { + float fbuffer; + matrix_t out_matrix(1, 1, &fbuffer); + numpy::skew(&row_matrix, &out_matrix); + output_matrix->buffer[out_matrix_ix++] = out_matrix.buffer[0]; + } + + if (config.kurtosis) { + float fbuffer; + matrix_t out_matrix(1, 1, &fbuffer); + numpy::kurtosis(&row_matrix, &out_matrix); + output_matrix->buffer[out_matrix_ix++] = out_matrix.buffer[0]; + } + } + + // flatten again + output_matrix->cols = output_matrix->rows * output_matrix->cols; + output_matrix->rows = 1; + + return EIDSP_OK; +} + +static class speechpy::processing::preemphasis *preemphasis; +static int preemphasized_audio_signal_get_data(size_t offset, size_t length, float *out_ptr) { + return preemphasis->get_data(offset, length, out_ptr); +} + +__attribute__((unused)) int extract_mfcc_features(signal_t *signal, matrix_t *output_matrix, void *config_ptr, const float sampling_frequency) { + ei_dsp_config_mfcc_t config = *((ei_dsp_config_mfcc_t*)config_ptr); + + if (config.axes != 1) { + EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH); + } + + if((config.implementation_version == 0) || (config.implementation_version > 4)) { + EIDSP_ERR(EIDSP_BLOCK_VERSION_INCORRECT); + } + + if (signal->total_length == 0) { + EIDSP_ERR(EIDSP_PARAMETER_INVALID); + } + + const uint32_t frequency = static_cast(sampling_frequency); + + // preemphasis class to preprocess the audio... + class speechpy::processing::preemphasis pre(signal, config.pre_shift, config.pre_cof, false); + preemphasis = ⪯ + + signal_t preemphasized_audio_signal; + preemphasized_audio_signal.total_length = signal->total_length; + preemphasized_audio_signal.get_data = &preemphasized_audio_signal_get_data; + + // calculate the size of the MFCC matrix + matrix_size_t out_matrix_size = + speechpy::feature::calculate_mfcc_buffer_size( + signal->total_length, frequency, config.frame_length, config.frame_stride, config.num_cepstral, config.implementation_version); + /* Only throw size mismatch error calculated buffer doesn't fit for continuous inferencing */ + if (out_matrix_size.rows * out_matrix_size.cols > output_matrix->rows * output_matrix->cols) { + ei_printf("out_matrix = %dx%d\n", (int)output_matrix->rows, (int)output_matrix->cols); + ei_printf("calculated size = %dx%d\n", (int)out_matrix_size.rows, (int)out_matrix_size.cols); + EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH); + } + + output_matrix->rows = out_matrix_size.rows; + output_matrix->cols = out_matrix_size.cols; + + // and run the MFCC extraction + int ret = speechpy::feature::mfcc(output_matrix, &preemphasized_audio_signal, + frequency, config.frame_length, config.frame_stride, config.num_cepstral, config.num_filters, config.fft_length, + config.low_frequency, config.high_frequency, true, config.implementation_version); + if (ret != EIDSP_OK) { + ei_printf("ERR: MFCC failed (%d)\n", ret); + EIDSP_ERR(ret); + } + + // cepstral mean and variance normalization + ret = speechpy::processing::cmvnw(output_matrix, config.win_size, true, false); + if (ret != EIDSP_OK) { + ei_printf("ERR: cmvnw failed (%d)\n", ret); + EIDSP_ERR(ret); + } + + output_matrix->cols = out_matrix_size.rows * out_matrix_size.cols; + output_matrix->rows = 1; + + return EIDSP_OK; +} + + +static int extract_mfcc_run_slice(signal_t *signal, matrix_t *output_matrix, ei_dsp_config_mfcc_t *config, const float sampling_frequency, matrix_size_t *matrix_size_out, int implementation_version) { + uint32_t frequency = (uint32_t)sampling_frequency; + + int x; + + // calculate the size of the spectrogram matrix + matrix_size_t out_matrix_size = + speechpy::feature::calculate_mfcc_buffer_size( + signal->total_length, frequency, config->frame_length, config->frame_stride, config->num_cepstral, + implementation_version); + + // we roll the output matrix back so we have room at the end... + x = numpy::roll(output_matrix->buffer, output_matrix->rows * output_matrix->cols, + -(out_matrix_size.rows * out_matrix_size.cols)); + if (x != EIDSP_OK) { + EIDSP_ERR(x); + } + + // slice in the output matrix to write to + // the offset in the classification matrix here is always at the end + size_t output_matrix_offset = (output_matrix->rows * output_matrix->cols) - + (out_matrix_size.rows * out_matrix_size.cols); + + matrix_t output_matrix_slice(out_matrix_size.rows, out_matrix_size.cols, output_matrix->buffer + output_matrix_offset); + + // and run the MFCC extraction + x = speechpy::feature::mfcc(&output_matrix_slice, signal, + frequency, config->frame_length, config->frame_stride, config->num_cepstral, config->num_filters, config->fft_length, + config->low_frequency, config->high_frequency, true, implementation_version); + if (x != EIDSP_OK) { + ei_printf("ERR: MFCC failed (%d)\n", x); + EIDSP_ERR(x); + } + + matrix_size_out->rows += out_matrix_size.rows; + if (out_matrix_size.cols > 0) { + matrix_size_out->cols = out_matrix_size.cols; + } + + return EIDSP_OK; +} + +__attribute__((unused)) int extract_mfcc_per_slice_features(signal_t *signal, matrix_t *output_matrix, void *config_ptr, const float sampling_frequency, matrix_size_t *matrix_size_out) { +#if defined(__cplusplus) && EI_C_LINKAGE == 1 + ei_printf("ERR: Continuous audio is not supported when EI_C_LINKAGE is defined\n"); + EIDSP_ERR(EIDSP_NOT_SUPPORTED); +#else + + ei_dsp_config_mfcc_t config = *((ei_dsp_config_mfcc_t*)config_ptr); + + if (config.axes != 1) { + EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH); + } + + if((config.implementation_version == 0) || (config.implementation_version > 4)) { + EIDSP_ERR(EIDSP_BLOCK_VERSION_INCORRECT); + } + + if (signal->total_length == 0) { + EIDSP_ERR(EIDSP_PARAMETER_INVALID); + } + + const uint32_t frequency = static_cast(sampling_frequency); + + // preemphasis class to preprocess the audio... + class speechpy::processing::preemphasis pre(signal, config.pre_shift, config.pre_cof, false); + preemphasis = ⪯ + + signal_t preemphasized_audio_signal; + preemphasized_audio_signal.total_length = signal->total_length; + preemphasized_audio_signal.get_data = &preemphasized_audio_signal_get_data; + + // Go from the time (e.g. 0.25 seconds to number of frames based on freq) + const size_t frame_length_values = frequency * config.frame_length; + const size_t frame_stride_values = frequency * config.frame_stride; + const int frame_overlap_values = static_cast(frame_length_values) - static_cast(frame_stride_values); + + if (frame_overlap_values < 0) { + ei_printf("ERR: frame_length ("); + ei_printf_float(config.frame_length); + ei_printf(") cannot be lower than frame_stride ("); + ei_printf_float(config.frame_stride); + ei_printf(") for continuous classification\n"); + EIDSP_ERR(EIDSP_PARAMETER_INVALID); + } + + int x; + + // have current frame, but wrong size? then free + if (ei_dsp_cont_current_frame && ei_dsp_cont_current_frame_size != frame_length_values) { + ei_free(ei_dsp_cont_current_frame); + ei_dsp_cont_current_frame = nullptr; + } + + int implementation_version = config.implementation_version; + + // this is the offset in the signal from which we'll work + size_t offset_in_signal = 0; + + if (!ei_dsp_cont_current_frame) { + ei_dsp_cont_current_frame = (float*)ei_calloc(frame_length_values * sizeof(float), 1); + if (!ei_dsp_cont_current_frame) { + EIDSP_ERR(EIDSP_OUT_OF_MEM); + } + ei_dsp_cont_current_frame_size = frame_length_values; + ei_dsp_cont_current_frame_ix = 0; + } + + + if ((frame_length_values) > preemphasized_audio_signal.total_length + ei_dsp_cont_current_frame_ix) { + ei_printf("ERR: frame_length (%d) cannot be larger than signal's total length (%d) for continuous classification\n", + (int)frame_length_values, (int)preemphasized_audio_signal.total_length + ei_dsp_cont_current_frame_ix); + EIDSP_ERR(EIDSP_PARAMETER_INVALID); + } + + matrix_size_out->rows = 0; + matrix_size_out->cols = 0; + + // for continuous use v2 stack frame calculations + if (implementation_version == 1) { + implementation_version = 2; + } + + if (ei_dsp_cont_current_frame_ix > (int)ei_dsp_cont_current_frame_size) { + ei_printf("ERR: ei_dsp_cont_current_frame_ix is larger than frame size (ix=%d size=%d)\n", + ei_dsp_cont_current_frame_ix, (int)ei_dsp_cont_current_frame_size); + EIDSP_ERR(EIDSP_PARAMETER_INVALID); + } + + // if we still have some code from previous run + while (ei_dsp_cont_current_frame_ix > 0) { + // then from the current frame we need to read `frame_length_values - ei_dsp_cont_current_frame_ix` + // starting at offset 0 + x = preemphasized_audio_signal.get_data(0, frame_length_values - ei_dsp_cont_current_frame_ix, ei_dsp_cont_current_frame + ei_dsp_cont_current_frame_ix); + if (x != EIDSP_OK) { + EIDSP_ERR(x); + } + + // now ei_dsp_cont_current_frame is complete + signal_t frame_signal; + x = numpy::signal_from_buffer(ei_dsp_cont_current_frame, frame_length_values, &frame_signal); + if (x != EIDSP_OK) { + EIDSP_ERR(x); + } + + x = extract_mfcc_run_slice(&frame_signal, output_matrix, &config, sampling_frequency, matrix_size_out, implementation_version); + if (x != EIDSP_OK) { + EIDSP_ERR(x); + } + + // if there's overlap between frames we roll through + if (frame_stride_values > 0) { + numpy::roll(ei_dsp_cont_current_frame, frame_length_values, -frame_stride_values); + } + + ei_dsp_cont_current_frame_ix -= frame_stride_values; + } + + if (ei_dsp_cont_current_frame_ix < 0) { + offset_in_signal = -ei_dsp_cont_current_frame_ix; + ei_dsp_cont_current_frame_ix = 0; + } + + if (offset_in_signal >= signal->total_length) { + offset_in_signal -= signal->total_length; + return EIDSP_OK; + } + + // now... we need to discard part of the signal... + SignalWithRange signal_with_range(&preemphasized_audio_signal, offset_in_signal, signal->total_length); + + signal_t *range_signal = signal_with_range.get_signal(); + size_t range_signal_orig_length = range_signal->total_length; + + // then we'll just go through normal processing of the signal: + x = extract_mfcc_run_slice(range_signal, output_matrix, &config, sampling_frequency, matrix_size_out, implementation_version); + if (x != EIDSP_OK) { + EIDSP_ERR(x); + } + + // Make sure v1 model are reset to the original length; + range_signal->total_length = range_signal_orig_length; + + // update offset + int length_of_signal_used = speechpy::processing::calculate_signal_used(range_signal->total_length, sampling_frequency, + config.frame_length, config.frame_stride, false, implementation_version); + offset_in_signal += length_of_signal_used; + + // see what's left? + int bytes_left_end_of_frame = signal->total_length - offset_in_signal; + bytes_left_end_of_frame += frame_overlap_values; + + if (bytes_left_end_of_frame > 0) { + // then read that into the ei_dsp_cont_current_frame buffer + x = preemphasized_audio_signal.get_data( + (preemphasized_audio_signal.total_length - bytes_left_end_of_frame), + bytes_left_end_of_frame, + ei_dsp_cont_current_frame); + if (x != EIDSP_OK) { + EIDSP_ERR(x); + } + } + + ei_dsp_cont_current_frame_ix = bytes_left_end_of_frame; + + preemphasis = nullptr; + + return EIDSP_OK; +#endif +} + +__attribute__((unused)) int extract_spectrogram_features(signal_t *signal, matrix_t *output_matrix, void *config_ptr, const float sampling_frequency) { + ei_dsp_config_spectrogram_t config = *((ei_dsp_config_spectrogram_t*)config_ptr); + + if (config.axes != 1) { + EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH); + } + + if (signal->total_length == 0) { + EIDSP_ERR(EIDSP_PARAMETER_INVALID); + } + + const uint32_t frequency = static_cast(sampling_frequency); + + // calculate the size of the MFE matrix + matrix_size_t out_matrix_size = + speechpy::feature::calculate_mfe_buffer_size( + signal->total_length, frequency, config.frame_length, config.frame_stride, config.fft_length / 2 + 1, + config.implementation_version); + /* Only throw size mismatch error calculated buffer doesn't fit for continuous inferencing */ + if (out_matrix_size.rows * out_matrix_size.cols > output_matrix->rows * output_matrix->cols) { + ei_printf("out_matrix = %dx%d\n", (int)output_matrix->rows, (int)output_matrix->cols); + ei_printf("calculated size = %dx%d\n", (int)out_matrix_size.rows, (int)out_matrix_size.cols); + EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH); + } + + output_matrix->rows = out_matrix_size.rows; + output_matrix->cols = out_matrix_size.cols; + + int ret = speechpy::feature::spectrogram(output_matrix, signal, + sampling_frequency, config.frame_length, config.frame_stride, config.fft_length, config.implementation_version); + if (ret != EIDSP_OK) { + ei_printf("ERR: Spectrogram failed (%d)\n", ret); + EIDSP_ERR(ret); + } + + if (config.implementation_version < 3) { + ret = numpy::normalize(output_matrix); + if (ret != EIDSP_OK) { + EIDSP_ERR(ret); + } + } + else { + // normalization + ret = speechpy::processing::spectrogram_normalization(output_matrix, config.noise_floor_db, config.implementation_version == 3); + if (ret != EIDSP_OK) { + ei_printf("ERR: normalization failed (%d)\n", ret); + EIDSP_ERR(ret); + } + } + + output_matrix->cols = out_matrix_size.rows * out_matrix_size.cols; + output_matrix->rows = 1; + + return EIDSP_OK; +} + + +static int extract_spectrogram_run_slice(signal_t *signal, matrix_t *output_matrix, ei_dsp_config_spectrogram_t *config, const float sampling_frequency, matrix_size_t *matrix_size_out) { + uint32_t frequency = (uint32_t)sampling_frequency; + + int x; + + // calculate the size of the spectrogram matrix + matrix_size_t out_matrix_size = + speechpy::feature::calculate_mfe_buffer_size( + signal->total_length, frequency, config->frame_length, config->frame_stride, config->fft_length / 2 + 1, + config->implementation_version); + + // we roll the output matrix back so we have room at the end... + x = numpy::roll(output_matrix->buffer, output_matrix->rows * output_matrix->cols, + -(out_matrix_size.rows * out_matrix_size.cols)); + if (x != EIDSP_OK) { + if (preemphasis) { + delete preemphasis; + } + EIDSP_ERR(x); + } + + // slice in the output matrix to write to + // the offset in the classification matrix here is always at the end + size_t output_matrix_offset = (output_matrix->rows * output_matrix->cols) - + (out_matrix_size.rows * out_matrix_size.cols); + + matrix_t output_matrix_slice(out_matrix_size.rows, out_matrix_size.cols, output_matrix->buffer + output_matrix_offset); + + // and run the spectrogram extraction + int ret = speechpy::feature::spectrogram(&output_matrix_slice, signal, + frequency, config->frame_length, config->frame_stride, config->fft_length, config->implementation_version); + + if (ret != EIDSP_OK) { + ei_printf("ERR: Spectrogram failed (%d)\n", ret); + EIDSP_ERR(ret); + } + + matrix_size_out->rows += out_matrix_size.rows; + if (out_matrix_size.cols > 0) { + matrix_size_out->cols = out_matrix_size.cols; + } + + return EIDSP_OK; +} + +__attribute__((unused)) int extract_spectrogram_per_slice_features(signal_t *signal, matrix_t *output_matrix, void *config_ptr, const float sampling_frequency, matrix_size_t *matrix_size_out) { +#if defined(__cplusplus) && EI_C_LINKAGE == 1 + ei_printf("ERR: Continuous audio is not supported when EI_C_LINKAGE is defined\n"); + EIDSP_ERR(EIDSP_NOT_SUPPORTED); +#else + + ei_dsp_config_spectrogram_t config = *((ei_dsp_config_spectrogram_t*)config_ptr); + + static bool first_run = false; + + if (config.axes != 1) { + EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH); + } + + if (signal->total_length == 0) { + EIDSP_ERR(EIDSP_PARAMETER_INVALID); + } + + const uint32_t frequency = static_cast(sampling_frequency); + + /* Fake an extra frame_length for stack frames calculations. There, 1 frame_length is always + subtracted and there for never used. But skip the first slice to fit the feature_matrix + buffer */ + if(config.implementation_version < 2) { + + if (first_run == true) { + signal->total_length += (size_t)(config.frame_length * (float)frequency); + } + + first_run = true; + } + + // Go from the time (e.g. 0.25 seconds to number of frames based on freq) + const size_t frame_length_values = frequency * config.frame_length; + const size_t frame_stride_values = frequency * config.frame_stride; + const int frame_overlap_values = static_cast(frame_length_values) - static_cast(frame_stride_values); + + if (frame_overlap_values < 0) { + ei_printf("ERR: frame_length ("); + ei_printf_float(config.frame_length); + ei_printf(") cannot be lower than frame_stride ("); + ei_printf_float(config.frame_stride); + ei_printf(") for continuous classification\n"); + EIDSP_ERR(EIDSP_PARAMETER_INVALID); + } + + if (frame_length_values > signal->total_length) { + ei_printf("ERR: frame_length (%d) cannot be larger than signal's total length (%d) for continuous classification\n", + (int)frame_length_values, (int)signal->total_length); + EIDSP_ERR(EIDSP_PARAMETER_INVALID); + } + + int x; + + // have current frame, but wrong size? then free + if (ei_dsp_cont_current_frame && ei_dsp_cont_current_frame_size != frame_length_values) { + ei_free(ei_dsp_cont_current_frame); + ei_dsp_cont_current_frame = nullptr; + } + + if (!ei_dsp_cont_current_frame) { + ei_dsp_cont_current_frame = (float*)ei_calloc(frame_length_values * sizeof(float), 1); + if (!ei_dsp_cont_current_frame) { + EIDSP_ERR(EIDSP_OUT_OF_MEM); + } + ei_dsp_cont_current_frame_size = frame_length_values; + ei_dsp_cont_current_frame_ix = 0; + } + + matrix_size_out->rows = 0; + matrix_size_out->cols = 0; + + // this is the offset in the signal from which we'll work + size_t offset_in_signal = 0; + + if (ei_dsp_cont_current_frame_ix > (int)ei_dsp_cont_current_frame_size) { + ei_printf("ERR: ei_dsp_cont_current_frame_ix is larger than frame size\n"); + EIDSP_ERR(EIDSP_PARAMETER_INVALID); + } + + // if we still have some code from previous run + while (ei_dsp_cont_current_frame_ix > 0) { + // then from the current frame we need to read `frame_length_values - ei_dsp_cont_current_frame_ix` + // starting at offset 0 + x = signal->get_data(0, frame_length_values - ei_dsp_cont_current_frame_ix, ei_dsp_cont_current_frame + ei_dsp_cont_current_frame_ix); + if (x != EIDSP_OK) { + EIDSP_ERR(x); + } + + // now ei_dsp_cont_current_frame is complete + signal_t frame_signal; + x = numpy::signal_from_buffer(ei_dsp_cont_current_frame, frame_length_values, &frame_signal); + if (x != EIDSP_OK) { + EIDSP_ERR(x); + } + + x = extract_spectrogram_run_slice(&frame_signal, output_matrix, &config, sampling_frequency, matrix_size_out); + if (x != EIDSP_OK) { + EIDSP_ERR(x); + } + + // if there's overlap between frames we roll through + if (frame_stride_values > 0) { + numpy::roll(ei_dsp_cont_current_frame, frame_length_values, -frame_stride_values); + } + + ei_dsp_cont_current_frame_ix -= frame_stride_values; + } + + if (ei_dsp_cont_current_frame_ix < 0) { + offset_in_signal = -ei_dsp_cont_current_frame_ix; + ei_dsp_cont_current_frame_ix = 0; + } + + if (offset_in_signal >= signal->total_length) { + offset_in_signal -= signal->total_length; + return EIDSP_OK; + } + + // now... we need to discard part of the signal... + SignalWithRange signal_with_range(signal, offset_in_signal, signal->total_length); + + signal_t *range_signal = signal_with_range.get_signal(); + size_t range_signal_orig_length = range_signal->total_length; + + // then we'll just go through normal processing of the signal: + x = extract_spectrogram_run_slice(range_signal, output_matrix, &config, sampling_frequency, matrix_size_out); + if (x != EIDSP_OK) { + EIDSP_ERR(x); + } + + // update offset + int length_of_signal_used = speechpy::processing::calculate_signal_used(range_signal->total_length, sampling_frequency, + config.frame_length, config.frame_stride, false, config.implementation_version); + offset_in_signal += length_of_signal_used; + + // not sure why this is being manipulated... + range_signal->total_length = range_signal_orig_length; + + // see what's left? + int bytes_left_end_of_frame = signal->total_length - offset_in_signal; + bytes_left_end_of_frame += frame_overlap_values; + + if (bytes_left_end_of_frame > 0) { + // then read that into the ei_dsp_cont_current_frame buffer + x = signal->get_data( + (signal->total_length - bytes_left_end_of_frame), + bytes_left_end_of_frame, + ei_dsp_cont_current_frame); + if (x != EIDSP_OK) { + EIDSP_ERR(x); + } + } + + ei_dsp_cont_current_frame_ix = bytes_left_end_of_frame; + + if (config.implementation_version < 2) { + if (first_run == true) { + signal->total_length -= (size_t)(config.frame_length * (float)frequency); + } + } + + return EIDSP_OK; +#endif +} + + +__attribute__((unused)) int extract_mfe_features(signal_t *signal, matrix_t *output_matrix, void *config_ptr, const float sampling_frequency) { + ei_dsp_config_mfe_t config = *((ei_dsp_config_mfe_t*)config_ptr); + + if (config.axes != 1) { + EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH); + } + + if (signal->total_length == 0) { + EIDSP_ERR(EIDSP_PARAMETER_INVALID); + } + + if ((config.implementation_version == 0) || (config.implementation_version > 4)) { + EIDSP_ERR(EIDSP_BLOCK_VERSION_INCORRECT); + } + + const uint32_t frequency = static_cast(sampling_frequency); + + signal_t preemphasized_audio_signal; + + // before version 3 we did not have preemphasis + if (config.implementation_version < 3) { + preemphasis = nullptr; + + preemphasized_audio_signal.total_length = signal->total_length; + preemphasized_audio_signal.get_data = signal->get_data; + } + else { + // preemphasis class to preprocess the audio... + class speechpy::processing::preemphasis *pre = new class speechpy::processing::preemphasis(signal, 1, 0.98f, true); + preemphasis = pre; + + preemphasized_audio_signal.total_length = signal->total_length; + preemphasized_audio_signal.get_data = &preemphasized_audio_signal_get_data; + } + + // calculate the size of the MFE matrix + matrix_size_t out_matrix_size = + speechpy::feature::calculate_mfe_buffer_size( + preemphasized_audio_signal.total_length, frequency, config.frame_length, config.frame_stride, config.num_filters, + config.implementation_version); + /* Only throw size mismatch error calculated buffer doesn't fit for continuous inferencing */ + if (out_matrix_size.rows * out_matrix_size.cols > output_matrix->rows * output_matrix->cols) { + ei_printf("out_matrix = %dx%d\n", (int)output_matrix->rows, (int)output_matrix->cols); + ei_printf("calculated size = %dx%d\n", (int)out_matrix_size.rows, (int)out_matrix_size.cols); + if (preemphasis) { + delete preemphasis; + } + EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH); + } + + output_matrix->rows = out_matrix_size.rows; + output_matrix->cols = out_matrix_size.cols; + + int ret; + // This probably seems incorrect, but the mfe func can actually handle all versions + // There's a subtle issue with cmvn and v2, not worth tracking down + // So for v2 and v1, we'll just use the old code + // (the new mfe does away with the intermediate filterbank matrix) + if (config.implementation_version > 2) { + ret = speechpy::feature::mfe(output_matrix, nullptr, &preemphasized_audio_signal, + frequency, config.frame_length, config.frame_stride, config.num_filters, config.fft_length, + config.low_frequency, config.high_frequency, config.implementation_version); + } else { + ret = speechpy::feature::mfe_v3(output_matrix, nullptr, &preemphasized_audio_signal, + frequency, config.frame_length, config.frame_stride, config.num_filters, config.fft_length, + config.low_frequency, config.high_frequency, config.implementation_version); + } + + if (preemphasis) { + delete preemphasis; + } + if (ret != EIDSP_OK) { + ei_printf("ERR: MFE failed (%d)\n", ret); + EIDSP_ERR(ret); + } + + if (config.implementation_version < 3) { + // cepstral mean and variance normalization + ret = speechpy::processing::cmvnw(output_matrix, config.win_size, false, true); + if (ret != EIDSP_OK) { + ei_printf("ERR: cmvnw failed (%d)\n", ret); + EIDSP_ERR(ret); + } + } + else { + // normalization + ret = speechpy::processing::mfe_normalization(output_matrix, config.noise_floor_db); + if (ret != EIDSP_OK) { + ei_printf("ERR: normalization failed (%d)\n", ret); + EIDSP_ERR(ret); + } + } + + output_matrix->cols = out_matrix_size.rows * out_matrix_size.cols; + output_matrix->rows = 1; + + return EIDSP_OK; +} + +static int extract_mfe_run_slice(signal_t *signal, matrix_t *output_matrix, ei_dsp_config_mfe_t *config, const float sampling_frequency, matrix_size_t *matrix_size_out) { + uint32_t frequency = (uint32_t)sampling_frequency; + + int x; + + // calculate the size of the spectrogram matrix + matrix_size_t out_matrix_size = + speechpy::feature::calculate_mfe_buffer_size( + signal->total_length, frequency, config->frame_length, config->frame_stride, config->num_filters, + config->implementation_version); + + // we roll the output matrix back so we have room at the end... + x = numpy::roll(output_matrix->buffer, output_matrix->rows * output_matrix->cols, + -(out_matrix_size.rows * out_matrix_size.cols)); + if (x != EIDSP_OK) { + EIDSP_ERR(x); + } + + // slice in the output matrix to write to + // the offset in the classification matrix here is always at the end + size_t output_matrix_offset = (output_matrix->rows * output_matrix->cols) - + (out_matrix_size.rows * out_matrix_size.cols); + + matrix_t output_matrix_slice(out_matrix_size.rows, out_matrix_size.cols, output_matrix->buffer + output_matrix_offset); + + // and run the MFE extraction + // This probably seems incorrect, but the mfe func can actually handle all versions + // There's a subtle issue with cmvn and v2, not worth tracking down + // So for v2 and v1, we'll just use the old code + // (the new mfe does away with the intermediate filterbank matrix) + if (config->implementation_version > 2) { + x = speechpy::feature::mfe(&output_matrix_slice, nullptr, signal, + frequency, config->frame_length, config->frame_stride, config->num_filters, config->fft_length, + config->low_frequency, config->high_frequency, config->implementation_version); + } else { + x = speechpy::feature::mfe_v3(&output_matrix_slice, nullptr, signal, + frequency, config->frame_length, config->frame_stride, config->num_filters, config->fft_length, + config->low_frequency, config->high_frequency, config->implementation_version); + } + if (x != EIDSP_OK) { + ei_printf("ERR: MFE failed (%d)\n", x); + EIDSP_ERR(x); + } + + matrix_size_out->rows += out_matrix_size.rows; + if (out_matrix_size.cols > 0) { + matrix_size_out->cols = out_matrix_size.cols; + } + + return EIDSP_OK; +} + +__attribute__((unused)) int extract_mfe_per_slice_features(signal_t *signal, matrix_t *output_matrix, void *config_ptr, const float sampling_frequency, matrix_size_t *matrix_size_out) { +#if defined(__cplusplus) && EI_C_LINKAGE == 1 + ei_printf("ERR: Continuous audio is not supported when EI_C_LINKAGE is defined\n"); + EIDSP_ERR(EIDSP_NOT_SUPPORTED); +#else + + ei_dsp_config_mfe_t config = *((ei_dsp_config_mfe_t*)config_ptr); + + // signal is already the right size, + // output matrix is not the right size, but we can start writing at offset 0 and then it's OK too + + static bool first_run = false; + + if (config.axes != 1) { + EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH); + } + + if ((config.implementation_version == 0) || (config.implementation_version > 4)) { + EIDSP_ERR(EIDSP_BLOCK_VERSION_INCORRECT); + } + + if (signal->total_length == 0) { + EIDSP_ERR(EIDSP_PARAMETER_INVALID); + } + + const uint32_t frequency = static_cast(sampling_frequency); + + // Fake an extra frame_length for stack frames calculations. There, 1 frame_length is always + // subtracted and there for never used. But skip the first slice to fit the feature_matrix + // buffer + if (config.implementation_version == 1) { + if (first_run == true) { + signal->total_length += (size_t)(config.frame_length * (float)frequency); + } + + first_run = true; + } + + // ok all setup, let's construct the signal (with preemphasis for impl version >3) + signal_t preemphasized_audio_signal; + + // before version 3 we did not have preemphasis + if (config.implementation_version < 3) { + preemphasis = nullptr; + preemphasized_audio_signal.total_length = signal->total_length; + preemphasized_audio_signal.get_data = signal->get_data; + } + else { + // preemphasis class to preprocess the audio... + class speechpy::processing::preemphasis *pre = new class speechpy::processing::preemphasis(signal, 1, 0.98f, true); + preemphasis = pre; + preemphasized_audio_signal.total_length = signal->total_length; + preemphasized_audio_signal.get_data = &preemphasized_audio_signal_get_data; + } + + // Go from the time (e.g. 0.25 seconds to number of frames based on freq) + const size_t frame_length_values = frequency * config.frame_length; + const size_t frame_stride_values = frequency * config.frame_stride; + const int frame_overlap_values = static_cast(frame_length_values) - static_cast(frame_stride_values); + + if (frame_overlap_values < 0) { + ei_printf("ERR: frame_length ("); + ei_printf_float(config.frame_length); + ei_printf(") cannot be lower than frame_stride ("); + ei_printf_float(config.frame_stride); + ei_printf(") for continuous classification\n"); + + if (preemphasis) { + delete preemphasis; + } + EIDSP_ERR(EIDSP_PARAMETER_INVALID); + } + + if (frame_length_values > preemphasized_audio_signal.total_length) { + ei_printf("ERR: frame_length (%d) cannot be larger than signal's total length (%d) for continuous classification\n", + (int)frame_length_values, (int)preemphasized_audio_signal.total_length); + if (preemphasis) { + delete preemphasis; + } + EIDSP_ERR(EIDSP_PARAMETER_INVALID); + } + + int x; + + // have current frame, but wrong size? then free + if (ei_dsp_cont_current_frame && ei_dsp_cont_current_frame_size != frame_length_values) { + ei_free(ei_dsp_cont_current_frame); + ei_dsp_cont_current_frame = nullptr; + } + + if (!ei_dsp_cont_current_frame) { + ei_dsp_cont_current_frame = (float*)ei_calloc(frame_length_values * sizeof(float), 1); + if (!ei_dsp_cont_current_frame) { + if (preemphasis) { + delete preemphasis; + } + EIDSP_ERR(EIDSP_OUT_OF_MEM); + } + ei_dsp_cont_current_frame_size = frame_length_values; + ei_dsp_cont_current_frame_ix = 0; + } + + matrix_size_out->rows = 0; + matrix_size_out->cols = 0; + + // this is the offset in the signal from which we'll work + size_t offset_in_signal = 0; + + if (ei_dsp_cont_current_frame_ix > (int)ei_dsp_cont_current_frame_size) { + ei_printf("ERR: ei_dsp_cont_current_frame_ix is larger than frame size\n"); + if (preemphasis) { + delete preemphasis; + } + EIDSP_ERR(EIDSP_PARAMETER_INVALID); + } + + // if we still have some code from previous run + while (ei_dsp_cont_current_frame_ix > 0) { + // then from the current frame we need to read `frame_length_values - ei_dsp_cont_current_frame_ix` + // starting at offset 0 + x = preemphasized_audio_signal.get_data(0, frame_length_values - ei_dsp_cont_current_frame_ix, ei_dsp_cont_current_frame + ei_dsp_cont_current_frame_ix); + if (x != EIDSP_OK) { + if (preemphasis) { + delete preemphasis; + } + EIDSP_ERR(x); + } + + // now ei_dsp_cont_current_frame is complete + signal_t frame_signal; + x = numpy::signal_from_buffer(ei_dsp_cont_current_frame, frame_length_values, &frame_signal); + if (x != EIDSP_OK) { + if (preemphasis) { + delete preemphasis; + } + EIDSP_ERR(x); + } + + x = extract_mfe_run_slice(&frame_signal, output_matrix, &config, sampling_frequency, matrix_size_out); + if (x != EIDSP_OK) { + if (preemphasis) { + delete preemphasis; + } + EIDSP_ERR(x); + } + + // if there's overlap between frames we roll through + if (frame_stride_values > 0) { + numpy::roll(ei_dsp_cont_current_frame, frame_length_values, -frame_stride_values); + } + + ei_dsp_cont_current_frame_ix -= frame_stride_values; + } + + if (ei_dsp_cont_current_frame_ix < 0) { + offset_in_signal = -ei_dsp_cont_current_frame_ix; + ei_dsp_cont_current_frame_ix = 0; + } + + if (offset_in_signal >= signal->total_length) { + if (preemphasis) { + delete preemphasis; + } + offset_in_signal -= signal->total_length; + return EIDSP_OK; + } + + // now... we need to discard part of the signal... + SignalWithRange signal_with_range(&preemphasized_audio_signal, offset_in_signal, signal->total_length); + + signal_t *range_signal = signal_with_range.get_signal(); + size_t range_signal_orig_length = range_signal->total_length; + + // then we'll just go through normal processing of the signal: + x = extract_mfe_run_slice(range_signal, output_matrix, &config, sampling_frequency, matrix_size_out); + if (x != EIDSP_OK) { + if (preemphasis) { + delete preemphasis; + } + EIDSP_ERR(x); + } + + // update offset + int length_of_signal_used = speechpy::processing::calculate_signal_used(range_signal->total_length, sampling_frequency, + config.frame_length, config.frame_stride, false, config.implementation_version); + offset_in_signal += length_of_signal_used; + + // not sure why this is being manipulated... + range_signal->total_length = range_signal_orig_length; + + // see what's left? + int bytes_left_end_of_frame = signal->total_length - offset_in_signal; + bytes_left_end_of_frame += frame_overlap_values; + + if (bytes_left_end_of_frame > 0) { + // then read that into the ei_dsp_cont_current_frame buffer + x = preemphasized_audio_signal.get_data( + (preemphasized_audio_signal.total_length - bytes_left_end_of_frame), + bytes_left_end_of_frame, + ei_dsp_cont_current_frame); + if (x != EIDSP_OK) { + if (preemphasis) { + delete preemphasis; + } + EIDSP_ERR(x); + } + } + + ei_dsp_cont_current_frame_ix = bytes_left_end_of_frame; + + + if (config.implementation_version == 1) { + if (first_run == true) { + signal->total_length -= (size_t)(config.frame_length * (float)frequency); + } + } + + if (preemphasis) { + delete preemphasis; + } + + return EIDSP_OK; +#endif +} + +__attribute__((unused)) int extract_image_features(signal_t *signal, matrix_t *output_matrix, void *config_ptr, const float frequency) { + ei_dsp_config_image_t config = *((ei_dsp_config_image_t*)config_ptr); + + int16_t channel_count = strcmp(config.channels, "Grayscale") == 0 ? 1 : 3; + + size_t output_ix = 0; + +#if defined(EI_DSP_IMAGE_BUFFER_STATIC_SIZE) + const size_t page_size = EI_DSP_IMAGE_BUFFER_STATIC_SIZE; +#else + const size_t page_size = 1024; +#endif + + // buffered read from the signal + size_t bytes_left = signal->total_length; + for (size_t ix = 0; ix < signal->total_length; ix += page_size) { + size_t elements_to_read = bytes_left > page_size ? page_size : bytes_left; + +#if defined(EI_DSP_IMAGE_BUFFER_STATIC_SIZE) + matrix_t input_matrix(elements_to_read, config.axes, ei_dsp_image_buffer); +#else + matrix_t input_matrix(elements_to_read, config.axes); +#endif + if (!input_matrix.buffer) { + EIDSP_ERR(EIDSP_OUT_OF_MEM); + } + signal->get_data(ix, elements_to_read, input_matrix.buffer); + + for (size_t jx = 0; jx < elements_to_read; jx++) { + uint32_t pixel = static_cast(input_matrix.buffer[jx]); + + // rgb to 0..1 + float r = static_cast(pixel >> 16 & 0xff) / 255.0f; + float g = static_cast(pixel >> 8 & 0xff) / 255.0f; + float b = static_cast(pixel & 0xff) / 255.0f; + + if (channel_count == 3) { + output_matrix->buffer[output_ix++] = r; + output_matrix->buffer[output_ix++] = g; + output_matrix->buffer[output_ix++] = b; + } + else { + // ITU-R 601-2 luma transform + // see: https://pillow.readthedocs.io/en/stable/reference/Image.html#PIL.Image.Image.convert + float v = (0.299f * r) + (0.587f * g) + (0.114f * b); + output_matrix->buffer[output_ix++] = v; + } + } + + bytes_left -= elements_to_read; + } + + return EIDSP_OK; +} + +#if (EI_CLASSIFIER_QUANTIZATION_ENABLED == 1) && (EI_CLASSIFIER_INFERENCING_ENGINE == EI_CLASSIFIER_DRPAI) + +__attribute__((unused)) int extract_drpai_features_quantized(signal_t *signal, matrix_u8_t *output_matrix, void *config_ptr, const float frequency) { + ei_dsp_config_image_t config = *((ei_dsp_config_image_t*)config_ptr); + + int16_t channel_count = strcmp(config.channels, "Grayscale") == 0 ? 1 : 3; + + size_t output_ix = 0; + +#if defined(EI_DSP_IMAGE_BUFFER_STATIC_SIZE) + const size_t page_size = EI_DSP_IMAGE_BUFFER_STATIC_SIZE; +#else + const size_t page_size = 1024; +#endif + + // buffered read from the signal + size_t bytes_left = signal->total_length; + for (size_t ix = 0; ix < signal->total_length; ix += page_size) { + size_t elements_to_read = bytes_left > page_size ? page_size : bytes_left; + +#if defined(EI_DSP_IMAGE_BUFFER_STATIC_SIZE) + matrix_t input_matrix(elements_to_read, config.axes, ei_dsp_image_buffer); +#else + matrix_t input_matrix(elements_to_read, config.axes); +#endif + if (!input_matrix.buffer) { + EIDSP_ERR(EIDSP_OUT_OF_MEM); + } + signal->get_data(ix, elements_to_read, input_matrix.buffer); + + for (size_t jx = 0; jx < elements_to_read; jx++) { + uint32_t pixel = static_cast(input_matrix.buffer[jx]); + + if (channel_count == 3) { + uint8_t r = static_cast(pixel >> 16 & 0xff); + uint8_t g = static_cast(pixel >> 8 & 0xff); + uint8_t b = static_cast(pixel & 0xff); + + output_matrix->buffer[output_ix++] = r; + output_matrix->buffer[output_ix++] = g; + output_matrix->buffer[output_ix++] = b; + } + else { + //NOTE: not implementing greyscale yet + } + } + bytes_left -= elements_to_read; + } + + return EIDSP_OK; +} + +#endif //(EI_CLASSIFIER_QUANTIZATION_ENABLED == 1) && (EI_CLASSIFIER_INFERENCING_ENGINE == EI_CLASSIFIER_DRPAI) + +#if (EI_CLASSIFIER_QUANTIZATION_ENABLED == 1) && (EI_CLASSIFIER_INFERENCING_ENGINE != EI_CLASSIFIER_DRPAI) + +__attribute__((unused)) int extract_image_features_quantized(signal_t *signal, matrix_i8_t *output_matrix, void *config_ptr, float scale, float zero_point, const float frequency, + int image_scaling) { + ei_dsp_config_image_t config = *((ei_dsp_config_image_t*)config_ptr); + + int16_t channel_count = strcmp(config.channels, "Grayscale") == 0 ? 1 : 3; + + size_t output_ix = 0; + + const int32_t iRedToGray = (int32_t)(0.299f * 65536.0f); + const int32_t iGreenToGray = (int32_t)(0.587f * 65536.0f); + const int32_t iBlueToGray = (int32_t)(0.114f * 65536.0f); + + static const float torch_mean[] = { 0.485, 0.456, 0.406 }; + static const float torch_std[] = { 0.229, 0.224, 0.225 }; + +#if defined(EI_DSP_IMAGE_BUFFER_STATIC_SIZE) + const size_t page_size = EI_DSP_IMAGE_BUFFER_STATIC_SIZE; +#else + const size_t page_size = 1024; +#endif + + // buffered read from the signal + size_t bytes_left = signal->total_length; + for (size_t ix = 0; ix < signal->total_length; ix += page_size) { + size_t elements_to_read = bytes_left > page_size ? page_size : bytes_left; + +#if defined(EI_DSP_IMAGE_BUFFER_STATIC_SIZE) + matrix_t input_matrix(elements_to_read, config.axes, ei_dsp_image_buffer); +#else + matrix_t input_matrix(elements_to_read, config.axes); +#endif + if (!input_matrix.buffer) { + EIDSP_ERR(EIDSP_OUT_OF_MEM); + } + signal->get_data(ix, elements_to_read, input_matrix.buffer); + + for (size_t jx = 0; jx < elements_to_read; jx++) { + uint32_t pixel = static_cast(input_matrix.buffer[jx]); + + if (channel_count == 3) { + // fast code path + if (scale == 0.003921568859368563f && zero_point == -128 && image_scaling == EI_CLASSIFIER_IMAGE_SCALING_NONE) { + int32_t r = static_cast(pixel >> 16 & 0xff); + int32_t g = static_cast(pixel >> 8 & 0xff); + int32_t b = static_cast(pixel & 0xff); + + output_matrix->buffer[output_ix++] = static_cast(r + zero_point); + output_matrix->buffer[output_ix++] = static_cast(g + zero_point); + output_matrix->buffer[output_ix++] = static_cast(b + zero_point); + } + // slow code path + else { + float r = static_cast(pixel >> 16 & 0xff); + float g = static_cast(pixel >> 8 & 0xff); + float b = static_cast(pixel & 0xff); + + if (image_scaling == EI_CLASSIFIER_IMAGE_SCALING_NONE) { + r /= 255.0f; + g /= 255.0f; + b /= 255.0f; + } + else if (image_scaling == EI_CLASSIFIER_IMAGE_SCALING_TORCH) { + r /= 255.0f; + g /= 255.0f; + b /= 255.0f; + + r = (r - torch_mean[0]) / torch_std[0]; + g = (g - torch_mean[1]) / torch_std[1]; + b = (b - torch_mean[2]) / torch_std[2]; + } + + output_matrix->buffer[output_ix++] = static_cast(round(r / scale) + zero_point); + output_matrix->buffer[output_ix++] = static_cast(round(g / scale) + zero_point); + output_matrix->buffer[output_ix++] = static_cast(round(b / scale) + zero_point); + } + } + else { + // fast code path + if (scale == 0.003921568859368563f && zero_point == -128 && image_scaling == EI_CLASSIFIER_IMAGE_SCALING_NONE) { + int32_t r = static_cast(pixel >> 16 & 0xff); + int32_t g = static_cast(pixel >> 8 & 0xff); + int32_t b = static_cast(pixel & 0xff); + + // ITU-R 601-2 luma transform + // see: https://pillow.readthedocs.io/en/stable/reference/Image.html#PIL.Image.Image.convert + int32_t gray = (iRedToGray * r) + (iGreenToGray * g) + (iBlueToGray * b); + gray >>= 16; // scale down to int8_t + gray += zero_point; + if (gray < - 128) gray = -128; + else if (gray > 127) gray = 127; + output_matrix->buffer[output_ix++] = static_cast(gray); + } + // slow code path + else { + float r = static_cast(pixel >> 16 & 0xff); + float g = static_cast(pixel >> 8 & 0xff); + float b = static_cast(pixel & 0xff); + + if (image_scaling == EI_CLASSIFIER_IMAGE_SCALING_NONE) { + r /= 255.0f; + g /= 255.0f; + b /= 255.0f; + } + else if (image_scaling == EI_CLASSIFIER_IMAGE_SCALING_TORCH) { + r /= 255.0f; + g /= 255.0f; + b /= 255.0f; + + r = (r - torch_mean[0]) / torch_std[0]; + g = (g - torch_mean[1]) / torch_std[1]; + b = (b - torch_mean[2]) / torch_std[2]; + } + + // ITU-R 601-2 luma transform + // see: https://pillow.readthedocs.io/en/stable/reference/Image.html#PIL.Image.Image.convert + float v = (0.299f * r) + (0.587f * g) + (0.114f * b); + output_matrix->buffer[output_ix++] = static_cast(round(v / scale) + zero_point); + } + } + } + + bytes_left -= elements_to_read; + + } + return EIDSP_OK; +} +#endif // (EI_CLASSIFIER_QUANTIZATION_ENABLED == 1) && (EI_CLASSIFIER_INFERENCING_ENGINE != EI_CLASSIFIER_DRPAI) + +/** + * Clear all state regarding continuous audio. Invoke this function after continuous audio loop ends. + */ +__attribute__((unused)) int ei_dsp_clear_continuous_audio_state() { + if (ei_dsp_cont_current_frame) { + ei_free(ei_dsp_cont_current_frame); + } + + ei_dsp_cont_current_frame = nullptr; + ei_dsp_cont_current_frame_size = 0; + ei_dsp_cont_current_frame_ix = 0; + + return EIDSP_OK; +} + +/** + * @brief Calculates the cepstral mean and variable normalization. + * + * @param matrix Source and destination matrix + * @param config_ptr ei_dsp_config_mfcc_t struct pointer + */ +__attribute__((unused)) void calc_cepstral_mean_and_var_normalization_mfcc(ei_matrix *matrix, void *config_ptr) +{ + ei_dsp_config_mfcc_t *config = (ei_dsp_config_mfcc_t *)config_ptr; + + uint32_t original_matrix_size = matrix->rows * matrix->cols; + + /* Modify rows and colums ration for matrix normalization */ + matrix->rows = original_matrix_size / config->num_cepstral; + matrix->cols = config->num_cepstral; + + // cepstral mean and variance normalization + int ret = speechpy::processing::cmvnw(matrix, config->win_size, true, false); + if (ret != EIDSP_OK) { + ei_printf("ERR: cmvnw failed (%d)\n", ret); + return; + } + + /* Reset rows and columns ratio */ + matrix->rows = 1; + matrix->cols = original_matrix_size; +} + +/** + * @brief Calculates the cepstral mean and variable normalization. + * + * @param matrix Source and destination matrix + * @param config_ptr ei_dsp_config_mfe_t struct pointer + */ +__attribute__((unused)) void calc_cepstral_mean_and_var_normalization_mfe(ei_matrix *matrix, void *config_ptr) +{ + ei_dsp_config_mfe_t *config = (ei_dsp_config_mfe_t *)config_ptr; + + uint32_t original_matrix_size = matrix->rows * matrix->cols; + + /* Modify rows and colums ration for matrix normalization */ + matrix->rows = (original_matrix_size) / config->num_filters; + matrix->cols = config->num_filters; + + if (config->implementation_version < 3) { + // cepstral mean and variance normalization + int ret = speechpy::processing::cmvnw(matrix, config->win_size, false, true); + if (ret != EIDSP_OK) { + ei_printf("ERR: cmvnw failed (%d)\n", ret); + return; + } + } + else { + // normalization + int ret = speechpy::processing::mfe_normalization(matrix, config->noise_floor_db); + if (ret != EIDSP_OK) { + ei_printf("ERR: normalization failed (%d)\n", ret); + return; + } + } + + /* Reset rows and columns ratio */ + matrix->rows = 1; + matrix->cols = (original_matrix_size); +} + +/** + * @brief Calculates the cepstral mean and variable normalization. + * + * @param matrix Source and destination matrix + * @param config_ptr ei_dsp_config_spectrogram_t struct pointer + */ +__attribute__((unused)) void calc_cepstral_mean_and_var_normalization_spectrogram(ei_matrix *matrix, void *config_ptr) +{ + ei_dsp_config_spectrogram_t *config = (ei_dsp_config_spectrogram_t *)config_ptr; + + uint32_t original_matrix_size = matrix->rows * matrix->cols; + + /* Modify rows and colums ration for matrix normalization */ + matrix->cols = config->fft_length / 2 + 1; + matrix->rows = (original_matrix_size) / matrix->cols; + + if (config->implementation_version < 3) { + int ret = numpy::normalize(matrix); + if (ret != EIDSP_OK) { + ei_printf("ERR: normalization failed (%d)\n", ret); + return; + } + } + else { + // normalization + int ret = speechpy::processing::spectrogram_normalization(matrix, config->noise_floor_db, config->implementation_version == 3); + if (ret != EIDSP_OK) { + ei_printf("ERR: normalization failed (%d)\n", ret); + return; + } + } + + /* Reset rows and columns ratio */ + matrix->rows = 1; + matrix->cols = (original_matrix_size); +} + +#ifdef __cplusplus +} +#endif // __cplusplus + +#endif // _EDGE_IMPULSE_RUN_DSP_H_ diff --git a/edgeimpulse/edge-impulse-sdk/classifier/ei_signal_with_axes.h b/edgeimpulse/edge-impulse-sdk/classifier/ei_signal_with_axes.h new file mode 100644 index 0000000..ccf4291 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/classifier/ei_signal_with_axes.h @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2022 EdgeImpulse Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an "AS + * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language + * governing permissions and limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef _EI_CLASSIFIER_SIGNAL_WITH_AXES_H_ +#define _EI_CLASSIFIER_SIGNAL_WITH_AXES_H_ + +#include "edge-impulse-sdk/dsp/numpy_types.h" +#include "edge-impulse-sdk/dsp/returntypes.hpp" +#include "edge-impulse-sdk/classifier/ei_model_types.h" + +#if !EIDSP_SIGNAL_C_FN_POINTER + +using namespace ei; + +class SignalWithAxes { +public: + SignalWithAxes(signal_t *original_signal, uint8_t *axes, size_t axes_count, const ei_impulse_t *impulse): + _original_signal(original_signal), _axes(axes), _axes_count(axes_count), _impulse(impulse) + { + + } + + signal_t * get_signal() { + if (this->_axes_count == _impulse->raw_samples_per_frame) { + return this->_original_signal; + } + + wrapped_signal.total_length = _original_signal->total_length / _impulse->raw_samples_per_frame * _axes_count; +#ifdef __MBED__ + wrapped_signal.get_data = mbed::callback(this, &SignalWithAxes::get_data); +#else + wrapped_signal.get_data = [this](size_t offset, size_t length, float *out_ptr) { + return this->get_data(offset, length, out_ptr); + }; +#endif + return &wrapped_signal; + } + + int get_data(size_t offset, size_t length, float *out_ptr) { + size_t offset_on_original_signal = offset / _axes_count * _impulse->raw_samples_per_frame; + size_t length_on_original_signal = length / _axes_count * _impulse->raw_samples_per_frame; + + size_t out_ptr_ix = 0; + + for (size_t ix = offset_on_original_signal; ix < offset_on_original_signal + length_on_original_signal; ix += _impulse->raw_samples_per_frame) { + for (size_t axis_ix = 0; axis_ix < this->_axes_count; axis_ix++) { + int r = _original_signal->get_data(ix + _axes[axis_ix], 1, &out_ptr[out_ptr_ix++]); + if (r != 0) { + return r; + } + } + } + + return 0; + } + +private: + signal_t *_original_signal; + uint8_t *_axes; + size_t _axes_count; + const ei_impulse_t *_impulse; + signal_t wrapped_signal; +}; + +#endif // #if !EIDSP_SIGNAL_C_FN_POINTER + +#endif // _EI_CLASSIFIER_SIGNAL_WITH_AXES_H_ diff --git a/edgeimpulse/edge-impulse-sdk/classifier/ei_signal_with_range.h b/edgeimpulse/edge-impulse-sdk/classifier/ei_signal_with_range.h new file mode 100644 index 0000000..7571c7e --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/classifier/ei_signal_with_range.h @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2022 EdgeImpulse Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an "AS + * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language + * governing permissions and limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef _EI_CLASSIFIER_SIGNAL_WITH_RANGE_H_ +#define _EI_CLASSIFIER_SIGNAL_WITH_RANGE_H_ + +#include "edge-impulse-sdk/dsp/numpy_types.h" +#include "edge-impulse-sdk/dsp/returntypes.hpp" + +#if !EIDSP_SIGNAL_C_FN_POINTER + +using namespace ei; + +class SignalWithRange { +public: + SignalWithRange(signal_t *original_signal, uint32_t range_start, uint32_t range_end): + _original_signal(original_signal), _range_start(range_start), _range_end(range_end) + { + + } + + signal_t * get_signal() { + if (this->_range_start == 0 && this->_range_end == this->_original_signal->total_length) { + return this->_original_signal; + } + + wrapped_signal.total_length = _range_end - _range_start; +#ifdef __MBED__ + wrapped_signal.get_data = mbed::callback(this, &SignalWithRange::get_data); +#else + wrapped_signal.get_data = [this](size_t offset, size_t length, float *out_ptr) { + return this->get_data(offset, length, out_ptr); + }; +#endif + return &wrapped_signal; + } + + int get_data(size_t offset, size_t length, float *out_ptr) { + return _original_signal->get_data(offset + _range_start, length, out_ptr); + } + +private: + signal_t *_original_signal; + uint32_t _range_start; + uint32_t _range_end; + signal_t wrapped_signal; +}; + +#endif // #if !EIDSP_SIGNAL_C_FN_POINTER + +#endif // _EI_CLASSIFIER_SIGNAL_WITH_RANGE_H_ diff --git a/edgeimpulse/edge-impulse-sdk/classifier/inferencing_engines/akida.h b/edgeimpulse/edge-impulse-sdk/classifier/inferencing_engines/akida.h new file mode 100644 index 0000000..205b542 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/classifier/inferencing_engines/akida.h @@ -0,0 +1,561 @@ +/* Edge Impulse inferencing library + * Copyright (c) 2022 EdgeImpulse Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef EI_CLASSIFIER_INFERENCING_ENGINE_AKIDA_H +#define EI_CLASSIFIER_INFERENCING_ENGINE_AKIDA_H + +#if (EI_CLASSIFIER_INFERENCING_ENGINE == EI_CLASSIFIER_AKIDA) + +/** + * @brief if we are not forcing SOFTWARE inference (simulation) + * then make sure we will try to use hardware + * + */ +#ifndef EI_CLASSIFIER_USE_AKIDA_SOFTWARE +#define EI_CLASSIFIER_USE_AKIDA_HARDWARE 1 +#endif + +/** + * @brief If more than one device is present in system + * setting this to device index can select a proper device. + * e.g.: set to 1 to selct /dev/akida1 + * + */ +#ifndef EI_CLASSIFIER_USE_AKIDA_HARDWARE_NO +#define EI_CLASSIFIER_USE_AKIDA_HARDWARE_NO 0 +#endif + +#include "model-parameters/model_metadata.h" +#include +#include "tensorflow-lite/tensorflow/lite/c/common.h" +#include "tensorflow-lite/tensorflow/lite/interpreter.h" +#include "tensorflow-lite/tensorflow/lite/kernels/register.h" +#include "tensorflow-lite/tensorflow/lite/model.h" +#include "tensorflow-lite/tensorflow/lite/optional_debug_tools.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/tree_ensemble_classifier.h" +#include "edge-impulse-sdk/classifier/ei_model_types.h" +#include "edge-impulse-sdk/porting/ei_classifier_porting.h" +#include "edge-impulse-sdk/classifier/ei_fill_result_struct.h" +#include "tensorflow-lite/tensorflow/lite/kernels/internal/reference/softmax.h" +#undef EI_CLASSIFIER_INFERENCING_ENGINE +#define EI_CLASSIFIER_INFERENCING_ENGINE EI_CLASSIFIER_TFLITE_FULL +#include "tflite_helper.h" +#undef EI_CLASSIFIER_INFERENCING_ENGINE +#define EI_CLASSIFIER_INFERENCING_ENGINE EI_CLASSIFIER_AKIDA +#include +#include +#include +#include +#include +#include +#include +#include "pybind11/embed.h" +#include "pybind11/numpy.h" +#include "pybind11/stl.h" + +namespace py = pybind11; + +std::stringstream engine_info; + +static py::module_ akida; +static py::object model; +static py::object model_predict; +static py::object model_forward; +static py::object device; +static bool akida_initialized = false; +static std::vector input_shape; +static tflite::RuntimeShape softmax_shape; +static tflite::SoftmaxParams dummy_params; +static int model_input_bits = 0; +static float scale; +static int down_scale; +typedef struct { + std::unique_ptr model; + std::unique_ptr interpreter; +} ei_tflite_state_t; + +std::map ei_tflite_instances; + +bool init_akida(const uint8_t *model_arr, size_t model_arr_size, bool debug) +{ + py::module_ sys; + py::list path; + constexpr char model_file_path[] = "/tmp/akida_model.fbz"; + + if(debug) { + try { + sys = py::module_::import("sys"); + path = sys.attr("path"); + ei_printf("DEBUG: sys.path:"); + for (py::handle p: path) { + ei_printf("\t%s\n", p.cast().c_str()); + } + } + catch (py::error_already_set &e) { + ei_printf("ERR: Importing 'sys' library failed:\n%s\n", e.what()); + // as it is only for debug purposes, continue + } + } + + try { + // import Python's akida module + akida = py::module_::import("akida"); + } + catch (py::error_already_set &e) { + ei_printf("ERR: Importing 'akida' library failed:\n%s\n", e.what()); + return false; + } + + if(debug) { + std::string ver = akida.attr("__version__").cast(); + ei_printf("DEBUG: Akida version: %s\n", ver.c_str()); + } + + py::object Model = akida.attr("Model"); + + // deploy akida model file into temporary file + std::ofstream model_file(model_file_path, std::ios::out | std::ios::binary); + model_file.write(reinterpret_cast(model_arr), model_arr_size); + if(model_file.bad()) { + ei_printf("ERR: failed to unpack model ile into %s\n", model_file_path); + model_file.close(); + return false; + } + model_file.close(); + + // load model + try { + model = Model(model_file_path); + } + catch (py::error_already_set &e) { + ei_printf("ERR: Can't load model file from %s\n", model_file_path); + ei_printf("ERR: %s\n", e.what()); + return false; + } + + // get input shape from model + input_shape = model.attr("input_shape").cast>(); + //TODO: temporarily only 3D input data is supported (see note in run_nn_inference) + if(input_shape.size() != 3) { + ei_printf("ERR: Unsupported input data shape. Expected 3 dimensions got %d\n", (int)input_shape.size()); + return false; + } + // extend input by (N, ...) - hardcoded to (1, ...) + input_shape.insert(input_shape.begin(), (size_t)1); + + // get model input_bits + std::vector layers = model.attr("layers").cast>(); + auto input_layer = layers[0]; + model_input_bits = input_layer.attr("input_bits").cast(); + if((model_input_bits != 8) && (model_input_bits != 4)) { + ei_printf("ERR: Unsupported input_bits. Expected 4 or 8 got %d\n", model_input_bits); + return false; + } + + // initialize scale coefficients + if(model_input_bits == 8) { + scale = 255; + down_scale = 1; + } + else if(model_input_bits == 4) { + // these values are recommended by BrainChip + scale = 15; + down_scale = 16; + } + + if(debug) { + ei_printf("INFO: Model input_bits: %d\n", model_input_bits); + ei_printf("INFO: Scale: %f\n", scale); + ei_printf("INFO: Down scale: %d\n", down_scale); + } + +#if (defined(EI_CLASSIFIER_USE_AKIDA_HARDWARE) && (EI_CLASSIFIER_USE_AKIDA_HARDWARE == 1)) + // get list of available devices + py::list devices = akida.attr("devices")(); + if(devices.empty() == true) { + ei_printf("ERR: AKD1000 device not found!\n"); + return false; + } + + if(devices.size() > 1) { + ei_printf("More than one device found! Using /dev/akida%d\n", EI_CLASSIFIER_USE_AKIDA_HARDWARE_NO); + device = devices[EI_CLASSIFIER_USE_AKIDA_HARDWARE_NO]; + } + else { + device = devices[0]; + } + //TODO: check if selected device is correct (compare versions) + // enable power measurement + device.attr("soc").attr("power_measurement_enabled") = true; + + // map model to the device + try { + model.attr("map")(device); + } + catch (py::error_already_set &e) { + ei_printf("ERR: Can't load the ML model onto the AKD1000 SoC\n"); + ei_printf("ERR: %s\n", e.what()); + return false; + } +#elif (defined(EI_CLASSIFIER_USE_AKIDA_SOFTWARE) && (EI_CLASSIFIER_USE_AKIDA_SOFTWARE == 1)) +#warning "Akida model will be run in SIMULATION mode (not on real hardware)!" +#else +#error "Neither EI_CLASSIFIER_USE_AKIDA_HARDWARE or EI_CLASSIFIER_USE_AKIDA_SOFTWARE are defined or set to 1" +#endif + + // init softmax shape + std::vector tmp = model.attr("output_shape").cast>(); + softmax_shape.BuildFrom(tmp); + // dumy beta parameter for softmax purposes + dummy_params.beta = 1; + + // get reference to predict function + model_predict = model.attr("predict"); + model_forward = model.attr("forward"); + + // clear info stream + engine_info.str(""); + + return true; +} + +template +void debug_print(const std::vector vec, const int val_per_row = 3) +{ + int n = 0; + for(auto it = vec.begin(); it != vec.end(); it++) { + ei_printf("%f ", *it); + if(++n > val_per_row - 1) { + ei_printf("\n"); + n = 0; + } + } +} + +/** + * @brief Do neural network inferencing over the processed feature matrix + * + * @param impulse Struct describing impulse architecture + * @param fmatrix Processed matrix + * @param result Output classifier results + * @param[in] debug Debug output enable + * + * @return The ei impulse error. + */ +EI_IMPULSE_ERROR run_nn_inference( + const ei_impulse_t *impulse, + ei_feature_t *fmatrix, + uint32_t* input_block_ids, + uint32_t input_block_ids_size, + ei_impulse_result_t *result, + void *config_ptr, + bool debug) +{ + ei_learning_block_config_tflite_graph_t *block_config = ((ei_learning_block_config_tflite_graph_t*)config_ptr); + ei_config_tflite_graph_t *graph_config = ((ei_config_tflite_graph_t*)block_config->graph_config); + + EI_IMPULSE_ERROR fill_res = EI_IMPULSE_OK; + + // init Python embedded interpreter (should be called once!) + static py::scoped_interpreter guard{}; + + // check if we've initialized the interpreter and device? + if (akida_initialized == false) { + if(init_akida(graph_config->model, graph_config->model_size, debug) == false) { + return EI_IMPULSE_AKIDA_ERROR; + } + akida_initialized = true; + } + + // according to: + // https://doc.brainchipinc.com/api_reference/akida_apis.html#akida.Model.predict + // input type is always uint8 + py::array_t input_data(input_shape); + + /* + * convert data to uint8 and copy features into input tensor + * For images RGB shape is (width, height, colors) + * For images BW shape is (width, height, 1) + * For Audio shape is (width, height, 1) - spectrogram + * TODO: test with other ML models/data types + * For details see: + * https://pybind11.readthedocs.io/en/stable/advanced/pycpp/numpy.html#direct-access + */ + auto r = input_data.mutable_unchecked<4>(); + float temp; + + size_t mtx_size = impulse->dsp_blocks_size + impulse->learning_blocks_size; + for (size_t i = 0; i < input_block_ids_size; i++) { + uint16_t cur_mtx = input_block_ids[i]; +#if EI_CLASSIFIER_SINGLE_FEATURE_INPUT == 0 + ei::matrix_t* matrix = NULL; + + if (!find_mtx_by_idx(fmatrix, &matrix, cur_mtx, mtx_size)) { + ei_printf("ERR: Cannot find matrix with id %zu\n", cur_mtx); + return EI_IMPULSE_INVALID_SIZE; + } +#else + ei::matrix_t* matrix = fmatrix[0].matrix; +#endif + for (py::ssize_t x = 0; x < r.shape(1); x++) { + for (py::ssize_t y = 0; y < r.shape(2); y++) { + for(py::ssize_t z = 0; z < r.shape(3); z++) { + temp = (matrix->buffer[x * r.shape(2) * r.shape(3) + y * r.shape(3) + z] * scale); + temp = std::max(0.0f, std::min(temp, 255.0f)); + r(0, x, y, z) = (uint8_t)(temp / down_scale); + } + } + } + } + + // Run inference on AKD1000 + uint64_t ctx_start_us = ei_read_timer_us(); + py::array_t potentials; + try { + potentials = model_predict(input_data); + } + catch (py::error_already_set &e) { + ei_printf("ERR: Inference error:\n%s\n", e.what()); + return EI_IMPULSE_AKIDA_ERROR; + } + // TODO: 'forward' is returning int8 or int32, but EI SDK supports int8 or float32 only + // py::array_t potentials = model_forward(input_data); + uint64_t ctx_end_us = ei_read_timer_us(); + + potentials = potentials.squeeze(); + + if(debug) { + std::string ret_str = py::str(potentials).cast(); + ei_printf("AKD1000 raw output:\n%s\n", ret_str.c_str()); + } + + // convert to vector of floats to make further processing much easier + std::vector potentials_v;// = potentials.cast>(); + + // TODO: output conversion depending on output shape? + if (impulse->object_detection == false) { + potentials_v = potentials.squeeze().cast>(); + } + else { + // TODO: output from AkidaNet/MobileNet is always N x M x P (3 dimensions)? + auto q = potentials.unchecked<>(); + for (py::ssize_t x = 0; x < q.shape(0); x++) { + for (py::ssize_t y = 0; y < q.shape(1); y++) { + for(py::ssize_t z = 0; z < q.shape(2); z++) { + potentials_v.push_back(q(x, y, z)); + } + } + } + } + + // apply softmax, becuase Akida is not supporting this operation + tflite::reference_ops::Softmax(dummy_params, softmax_shape, potentials_v.data(), softmax_shape, potentials_v.data()); + + if(debug == true) { + ei_printf("After softmax:\n"); + debug_print(potentials_v); + } + + float active_power = 0; +#if (defined(EI_CLASSIFIER_USE_AKIDA_HARDWARE)) + // power measurement post-processing + float floor_power = device.attr("soc").attr("power_meter").attr("floor").cast(); + py::array pwr_events = device.attr("soc").attr("power_meter").attr("events")(); + auto events = pwr_events.mutable_unchecked(); + for (py::ssize_t i = 0; i < events.shape(0); i++) { + active_power += events(i).attr("power").cast(); + } + active_power = (active_power/pwr_events.size()) - floor_power; +#endif + + result->timing.classification_us = ctx_end_us - ctx_start_us; + result->timing.classification = (int)(result->timing.classification_us / 1000); + + // clear info + engine_info.str(""); + engine_info << "Power consumption: " << std::fixed << std::setprecision(2) << active_power << " mW\n"; + engine_info << "Inferences per second: " << (1000000 / result->timing.classification_us); + + if (impulse->object_detection) { + switch (impulse->object_detection_last_layer) { + case EI_CLASSIFIER_LAST_LAYER_FOMO: { + fill_res = fill_result_struct_f32_fomo( + impulse, + result, + potentials_v.data(), + impulse->fomo_output_size, + impulse->fomo_output_size); + break; + } + case EI_CLASSIFIER_LAST_LAYER_SSD: { + ei_printf("ERR: MobileNet SSD models are not implemented for Akida (%d)\n", + impulse->object_detection_last_layer); + return EI_IMPULSE_UNSUPPORTED_INFERENCING_ENGINE; + } + case EI_CLASSIFIER_LAST_LAYER_YOLOV5: { + ei_printf("ERR: YOLO v5 models are not implemented for Akida (%d)\n", + impulse->object_detection_last_layer); + return EI_IMPULSE_UNSUPPORTED_INFERENCING_ENGINE; + } + default: { + ei_printf("ERR: Unsupported object detection last layer (%d)\n", + impulse->object_detection_last_layer); + return EI_IMPULSE_UNSUPPORTED_INFERENCING_ENGINE; + } + } + } + else { + fill_res = fill_result_struct_f32(impulse, result, potentials_v.data(), debug); + } + + return fill_res; +} + +/** + * Construct a tflite interpreter (creates it if needed) + */ +static EI_IMPULSE_ERROR get_interpreter(ei_learning_block_config_tflite_graph_t *block_config, tflite::Interpreter **interpreter) { + // not in the map yet... + if (!ei_tflite_instances.count(block_config->block_id)) { + ei_config_tflite_graph_t *graph_config = (ei_config_tflite_graph_t*)block_config->graph_config; + ei_tflite_state_t *new_state = new ei_tflite_state_t(); + + auto new_model = tflite::FlatBufferModel::BuildFromBuffer((const char*)graph_config->model, graph_config->model_size); + new_state->model = std::move(new_model); + if (!new_state->model) { + ei_printf("Failed to build TFLite model from buffer\n"); + return EI_IMPULSE_TFLITE_ERROR; + } + + tflite::ops::builtin::BuiltinOpResolver resolver; +#if EI_CLASSIFIER_HAS_TREE_ENSEMBLE_CLASSIFIER + resolver.AddCustom("TreeEnsembleClassifier", + tflite::ops::custom::Register_TREE_ENSEMBLE_CLASSIFIER()); +#endif + tflite::InterpreterBuilder builder(*new_state->model, resolver); + builder(&new_state->interpreter); + + if (!new_state->interpreter) { + ei_printf("Failed to construct interpreter\n"); + return EI_IMPULSE_TFLITE_ERROR; + } + + if (new_state->interpreter->AllocateTensors() != kTfLiteOk) { + ei_printf("AllocateTensors failed\n"); + return EI_IMPULSE_TFLITE_ERROR; + } + + int hw_thread_count = (int)std::thread::hardware_concurrency(); + hw_thread_count -= 1; // leave one thread free for the other application + if (hw_thread_count < 1) { + hw_thread_count = 1; + } + + if (new_state->interpreter->SetNumThreads(hw_thread_count) != kTfLiteOk) { + ei_printf("SetNumThreads failed\n"); + return EI_IMPULSE_TFLITE_ERROR; + } + + ei_tflite_instances.insert(std::make_pair(block_config->block_id, new_state)); + } + + auto tflite_state = ei_tflite_instances[block_config->block_id]; + *interpreter = tflite_state->interpreter.get(); + return EI_IMPULSE_OK; +} + + +extern "C" EI_IMPULSE_ERROR run_nn_inference_from_dsp( + ei_learning_block_config_tflite_graph_t *block_config, + signal_t *signal, + matrix_t *output_matrix) +{ + tflite::Interpreter *interpreter; + auto interpreter_ret = get_interpreter(block_config, &interpreter); + if (interpreter_ret != EI_IMPULSE_OK) { + return interpreter_ret; + } + + TfLiteTensor *input = interpreter->input_tensor(0); + TfLiteTensor *output = interpreter->output_tensor(0); + + if (!input) { + return EI_IMPULSE_INPUT_TENSOR_WAS_NULL; + } + if (!output) { + return EI_IMPULSE_OUTPUT_TENSOR_WAS_NULL; + } + + auto input_res = fill_input_tensor_from_signal(signal, input); + if (input_res != EI_IMPULSE_OK) { + return input_res; + } + + TfLiteStatus status = interpreter->Invoke(); + if (status != kTfLiteOk) { + ei_printf("ERR: interpreter->Invoke() failed with %d\n", status); + return EI_IMPULSE_TFLITE_ERROR; + } + + auto output_res = fill_output_matrix_from_tensor(output, output_matrix); + if (output_res != EI_IMPULSE_OK) { + return output_res; + } + + // on Linux we're not worried about free'ing (for now) + + return EI_IMPULSE_OK; +} + +__attribute__((unused)) int extract_tflite_features(signal_t *signal, matrix_t *output_matrix, void *config_ptr, const float frequency) { + + ei_dsp_config_tflite_t *dsp_config = (ei_dsp_config_tflite_t*)config_ptr; + + ei_config_tflite_graph_t ei_config_tflite_graph_0 = { + .implementation_version = 1, + .model = dsp_config->model, + .model_size = dsp_config->model_size, + .arena_size = dsp_config->arena_size + }; + + ei_learning_block_config_tflite_graph_t ei_learning_block_config = { + .implementation_version = 1, + .block_id = dsp_config->block_id, + .object_detection = false, + .object_detection_last_layer = EI_CLASSIFIER_LAST_LAYER_UNKNOWN, + .output_data_tensor = 0, + .output_labels_tensor = 255, + .output_score_tensor = 255, + .graph_config = &ei_config_tflite_graph_0 + }; + + auto x = run_nn_inference_from_dsp(&ei_learning_block_config, signal, output_matrix); + if (x != 0) { + return x; + } + + return EIDSP_OK; +} + +#endif // EI_CLASSIFIER_INFERENCING_ENGINE == EI_CLASSIFIER_AKIDA + +#endif /* EI_CLASSIFIER_INFERENCING_ENGINE_AKIDA_H */ diff --git a/edgeimpulse/edge-impulse-sdk/classifier/inferencing_engines/anomaly.h b/edgeimpulse/edge-impulse-sdk/classifier/inferencing_engines/anomaly.h new file mode 100644 index 0000000..ea7d729 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/classifier/inferencing_engines/anomaly.h @@ -0,0 +1,264 @@ +/* + * Copyright (c) 2022 EdgeImpulse Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an "AS + * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language + * governing permissions and limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef _EDGE_IMPULSE_INFERENCING_ANOMALY_H_ +#define _EDGE_IMPULSE_INFERENCING_ANOMALY_H_ + +#if (EI_CLASSIFIER_HAS_ANOMALY) + +#include +#include +#include +#include +#include + +#include "edge-impulse-sdk/classifier/ei_classifier_types.h" +#include "edge-impulse-sdk/classifier/ei_aligned_malloc.h" +#include "edge-impulse-sdk/porting/ei_classifier_porting.h" +#include "edge-impulse-sdk/classifier/inferencing_engines/engines.h" +#include "edge-impulse-sdk/classifier/ei_fill_result_struct.h" + +#ifdef __cplusplus +namespace { +#endif // __cplusplus + +/** + * Standard scaler, scales all values in the input vector + * Note that this *modifies* the array in place! + * @param input Array of input values + * @param scale Array of scale values (obtain from StandardScaler in Python) + * @param mean Array of mean values (obtain from StandardScaler in Python) + * @param input_size Size of input, scale and mean arrays + */ +void standard_scaler(float *input, const float *scale, const float *mean, size_t input_size) { + for (size_t ix = 0; ix < input_size; ix++) { + input[ix] = (input[ix] - mean[ix]) / scale[ix]; + } +} + +/** + * Calculate the distance between input vector and the cluster + * @param input Array of input values (already scaled by standard_scaler) + * @param input_size Size of the input array + * @param cluster A cluster (number of centroids should match input_size) + */ +float calculate_cluster_distance(float *input, size_t input_size, const ei_classifier_anom_cluster_t *cluster) { + // todo: check input_size and centroid size? + + float dist = 0.0f; + for (size_t ix = 0; ix < input_size; ix++) { + dist += pow(input[ix] - cluster->centroid[ix], 2); + } + return sqrt(dist) - cluster->max_error; +} + +/** + * Get minimum distance to a cluster + * @param input Array of input values (already scaled by standard_scaler) + * @param input_size Size of the input array + * @param clusters Array of clusters + * @param cluster_size Size of cluster array + */ +float get_min_distance_to_cluster(float *input, size_t input_size, const ei_classifier_anom_cluster_t *clusters, size_t cluster_size) { + float min = 1000.0f; + for (size_t ix = 0; ix < cluster_size; ix++) { + float dist = calculate_cluster_distance(input, input_size, &clusters[ix]); + if (dist < min) { + min = dist; + } + } + return min; +} + +#ifdef __cplusplus +} +#endif // __cplusplus + + +/** + * Extracts the input values from the feature matrix based on the anomaly axes. + * @param fmatrix Feature matrix + * @param input_block_ids Array of block IDs to extract from the feature matrix + * @param input_block_ids_size Size of input_block_ids array + * @param block_config Anomaly block configuration + * @param input Array to store the extracted input values + * @return EI_IMPULSE_OK if successful, otherwise an error code + */ +EI_IMPULSE_ERROR extract_anomaly_input_values( + ei_feature_t *fmatrix, + uint32_t* input_block_ids, + uint32_t input_block_ids_size, + uint32_t anom_axes_size, + const uint16_t *anom_axis, + float *input) +{ + if (input_block_ids_size == 1) { + for (size_t ix = 0; ix < anom_axes_size; ix++) { + input[ix] = fmatrix[0].matrix->buffer[anom_axis[ix]]; + } + } + else { +#if EI_CLASSIFIER_SINGLE_FEATURE_INPUT == 0 + ei::matrix_t* matrix = NULL; +#endif + // tracks where we are now in the combined feature matrix + uint32_t global_buf_pos = 0; + // we add the size of passed matrix to it + uint32_t buf_offset = 0; + // current index of input feature + uint32_t input_pos = 0; + + for (size_t i = 0; i < input_block_ids_size; i++) { +#if EI_CLASSIFIER_SINGLE_FEATURE_INPUT == 0 + size_t cur_mtx = input_block_ids[i]; + if (!find_mtx_by_idx(fmatrix, &matrix, cur_mtx, anom_axes_size)) { + ei_printf("ERR: Cannot find matrix with id %zu\n", cur_mtx); + return EI_IMPULSE_INVALID_SIZE; + } +#else + ei::matrix_t* matrix = fmatrix[0].matrix; +#endif + for (size_t ix = 0; ix < anom_axes_size; ix++) { + global_buf_pos = anom_axis[input_pos]; + if (global_buf_pos <= buf_offset + (matrix->rows * matrix->cols)) { + input[input_pos] = matrix->buffer[anom_axis[input_pos] - buf_offset]; + input_pos++; + if (input_pos >= anom_axes_size) { goto end; } + } + else { + break; + } + } + buf_offset += matrix->rows * matrix->cols; + } + end:; + } + return EI_IMPULSE_OK; +} + + +EI_IMPULSE_ERROR run_kmeans_anomaly( + const ei_impulse_t *impulse, + ei_feature_t *fmatrix, + uint32_t* input_block_ids, + uint32_t input_block_ids_size, + ei_impulse_result_t *result, + void *config_ptr, + bool debug = false) +{ + ei_learning_block_config_anomaly_kmeans_t *block_config = (ei_learning_block_config_anomaly_kmeans_t*)config_ptr; + + uint64_t anomaly_start_ms = ei_read_timer_ms(); + + float *input = (float*)ei_malloc(block_config->anom_axes_size * sizeof(float)); + if (!input) { + ei_printf("Failed to allocate memory for anomaly input buffer"); + return EI_IMPULSE_OUT_OF_MEMORY; + } + + extract_anomaly_input_values(fmatrix, input_block_ids, input_block_ids_size, block_config->anom_axes_size, block_config->anom_axis, input); + + standard_scaler(input, block_config->anom_scale, block_config->anom_mean, block_config->anom_axes_size); + float anomaly = get_min_distance_to_cluster( + input, block_config->anom_axes_size, block_config->anom_clusters, block_config->anom_cluster_count); + + uint64_t anomaly_end_ms = ei_read_timer_ms(); + + if (debug) { + ei_printf("Anomaly score (time: %d ms.): ", static_cast(anomaly_end_ms - anomaly_start_ms)); + ei_printf_float(anomaly); + ei_printf("\n"); + } + + result->timing.anomaly = anomaly_end_ms - anomaly_start_ms; + result->anomaly = anomaly; + ei_free(input); + + return EI_IMPULSE_OK; +} + +#if (EI_CLASSIFIER_INFERENCING_ENGINE != EI_CLASSIFIER_NONE) +EI_IMPULSE_ERROR run_gmm_anomaly( + const ei_impulse_t *impulse, + ei_feature_t *fmatrix, + uint32_t* input_block_ids, + uint32_t input_block_ids_size, + ei_impulse_result_t *result, + void *config_ptr, + bool debug = false) +{ + ei_learning_block_config_anomaly_gmm_t *block_config = (ei_learning_block_config_anomaly_gmm_t*)config_ptr; + + ei_learning_block_config_tflite_graph_t ei_learning_block_config_gmm = { + .implementation_version = 1, + .block_id = 0, + .object_detection = 0, + .object_detection_last_layer = EI_CLASSIFIER_LAST_LAYER_UNKNOWN, + .output_data_tensor = 0, + .output_labels_tensor = 0, + .output_score_tensor = 0, + .quantized = 0, + .compiled = 0, + .graph_config = block_config->graph_config + }; + + ei_impulse_result_t anomaly_result = { 0 }; + + std::unique_ptr input_ptr(new ei_feature_t[1]); + ei_feature_t* input = input_ptr.get(); + + memset(&anomaly_result, 0, sizeof(ei_impulse_result_t)); + +#if EI_CLASSIFIER_HAS_VISUAL_ANOMALY + input = fmatrix; +#else + std::unique_ptr matrix_ptr(new ei::matrix_t(1, block_config->anom_axes_size)); + input[0].matrix = matrix_ptr.get(); + input[0].blockId = 0; + + extract_anomaly_input_values(fmatrix, input_block_ids, input_block_ids_size, block_config->anom_axes_size, block_config->anom_axis, input[0].matrix->buffer); + input_block_ids_size = 1; +#endif + + EI_IMPULSE_ERROR res = run_nn_inference(impulse, input, input_block_ids, input_block_ids_size, &anomaly_result, (void*)&ei_learning_block_config_gmm, debug); + if (res != EI_IMPULSE_OK) { + return res; + } + + if (debug) { + ei_printf("Anomaly score (time: %d ms.): ", anomaly_result.timing.classification); + ei_printf_float(anomaly_result.classification[0].value); + ei_printf("\n"); + } + + result->timing.anomaly = anomaly_result.timing.classification; + +#if EI_CLASSIFIER_HAS_VISUAL_ANOMALY + result->visual_ad_grid_cells = anomaly_result.visual_ad_grid_cells; + result->visual_ad_count = anomaly_result.visual_ad_count; + result->visual_ad_result.mean_value = anomaly_result.visual_ad_result.mean_value; + result->visual_ad_result.max_value = anomaly_result.visual_ad_result.max_value; +#else + result->anomaly = anomaly_result.classification[0].value; +#endif + + return EI_IMPULSE_OK; +} +#endif // (EI_CLASSIFIER_INFERENCING_ENGINE != EI_CLASSIFIER_NONE) + +#endif //#if (EI_CLASSIFIER_HAS_ANOMALY == 1) +#endif // _EDGE_IMPULSE_INFERENCING_ANOMALY_H_ diff --git a/edgeimpulse/edge-impulse-sdk/classifier/inferencing_engines/drpai.h b/edgeimpulse/edge-impulse-sdk/classifier/inferencing_engines/drpai.h new file mode 100644 index 0000000..ec1cc65 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/classifier/inferencing_engines/drpai.h @@ -0,0 +1,753 @@ +/* + * Copyright (c) 2022 EdgeImpulse Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an "AS + * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language + * governing permissions and limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef _EI_CLASSIFIER_INFERENCING_ENGINE_DRPAI_H_ +#define _EI_CLASSIFIER_INFERENCING_ENGINE_DRPAI_H_ + +#if (EI_CLASSIFIER_INFERENCING_ENGINE == EI_CLASSIFIER_DRPAI) + +/***************************************** + * includes + ******************************************/ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#if ((EI_CLASSIFIER_OBJECT_DETECTION == 1) && (EI_CLASSIFIER_OBJECT_DETECTION_LAST_LAYER == EI_CLASSIFIER_LAST_LAYER_YOLOV5_V5_DRPAI)) +// For a YOLOV5_V5_DRPAI model we ran the unsupported layers with TF +#include +#include "tensorflow-lite/tensorflow/lite/c/common.h" +#include "tensorflow-lite/tensorflow/lite/interpreter.h" +#include "tensorflow-lite/tensorflow/lite/kernels/register.h" +#include "tensorflow-lite/tensorflow/lite/model.h" +#include "tensorflow-lite/tensorflow/lite/optional_debug_tools.h" +#endif +#include "edge-impulse-sdk/tensorflow/lite/kernels/tree_ensemble_classifier.h" +#include "edge-impulse-sdk/classifier/ei_fill_result_struct.h" +#include "edge-impulse-sdk/classifier/ei_model_types.h" +#include "edge-impulse-sdk/classifier/ei_run_dsp.h" +#include "edge-impulse-sdk/porting/ei_logging.h" + +#include +#include + + + +/***************************************** + * Macro + ******************************************/ +/*Maximum DRP-AI Timeout threshold*/ +#define DRPAI_TIMEOUT (5) + +/*Buffer size for writing data to memory via DRP-AI Driver.*/ +#define BUF_SIZE (1024) + +/*Index to access drpai_file_path[]*/ +#define INDEX_D (0) +#define INDEX_C (1) +#define INDEX_P (2) +#define INDEX_A (3) +#define INDEX_W (4) + +/***************************************** + * Public global vars + ******************************************/ +// input and output buffer pointers for memory mapped regions used by DRP-AI +uint8_t *drpai_input_buf = (uint8_t *)NULL; +float *drpai_output_buf = (float *)NULL; + +/***************************************** + * Typedef + ******************************************/ +/* For DRP-AI Address List */ +typedef struct { + unsigned long desc_aimac_addr; + unsigned long desc_aimac_size; + unsigned long desc_drp_addr; + unsigned long desc_drp_size; + unsigned long drp_param_addr; + unsigned long drp_param_size; + unsigned long data_in_addr; + unsigned long data_in_size; + unsigned long data_addr; + unsigned long data_size; + unsigned long work_addr; + unsigned long work_size; + unsigned long data_out_addr; + unsigned long data_out_size; + unsigned long drp_config_addr; + unsigned long drp_config_size; + unsigned long weight_addr; + unsigned long weight_size; +} st_addr_t; + +/***************************************** + * static vars + ******************************************/ +static st_addr_t drpai_address; +static uint64_t udmabuf_address = 0; + +static int drpai_fd = -1; + +drpai_data_t proc[DRPAI_INDEX_NUM]; + +void get_udmabuf_memory_start_addr() +{ /* Obtain udmabuf memory area starting address */ + + int8_t fd = 0; + char addr[1024]; + int32_t read_ret = 0; + errno = 0; + + fd = open("/sys/class/u-dma-buf/udmabuf0/phys_addr", O_RDONLY); + if (0 > fd) + { + fprintf(stderr, "[ERROR] Failed to open udmabuf0/phys_addr : errno=%d\n", errno); + } + + read_ret = read(fd, addr, 1024); + if (0 > read_ret) + { + fprintf(stderr, "[ERROR] Failed to read udmabuf0/phys_addr : errno=%d\n", errno); + close(fd); + } + + sscanf(addr, "%lx", &udmabuf_address); + close(fd); + + /* Filter the bit higher than 32 bit */ + udmabuf_address &=0xFFFFFFFF; +} + +uint8_t drpai_init_mem(uint32_t input_frame_size) { + int32_t i = 0; + + int udmabuf_fd0 = open("/dev/udmabuf0", O_RDWR); + if (udmabuf_fd0 < 0) { + return -1; + } + + // input_frame_size === data_in_size + uint8_t *addr = + (uint8_t *)mmap(NULL, input_frame_size, + PROT_READ | PROT_WRITE, MAP_SHARED, udmabuf_fd0, 0); + + drpai_input_buf = addr; + + /* Write once to allocate physical memory to u-dma-buf virtual space. + * Note: Do not use memset() for this. + * Because it does not work as expected. */ + for (i = 0; i < input_frame_size; i++) { + drpai_input_buf[i] = 0; + } + + + get_udmabuf_memory_start_addr(); + if (0 == udmabuf_address) { + return EI_IMPULSE_DRPAI_INIT_FAILED; + } + + return 0; +} + +/***************************************** + * Function Name : read_addrmap_txt + * Description : Loads address and size of DRP-AI Object files into struct + *addr. Arguments : addr_file = filename of addressmap file (from + *DRP-AI Object files) Return value : 0 if succeeded not 0 otherwise + ******************************************/ +static int8_t read_addrmap_txt() { + // create a stream from the DRP-AI model data without copying + std::istringstream ifs; + ifs.rdbuf()->pubsetbuf((char *)ei_ei_addrmap_intm_txt, ei_ei_addrmap_intm_txt_len); + + std::string str; + unsigned long l_addr; + unsigned long l_size; + std::string element, a, s; + + if (ifs.fail()) { + return -1; + } + + while (getline(ifs, str)) { + std::istringstream iss(str); + iss >> element >> a >> s; + l_addr = strtol(a.c_str(), NULL, 16); + l_size = strtol(s.c_str(), NULL, 16); + + if (element == "drp_config") { + drpai_address.drp_config_addr = l_addr; + drpai_address.drp_config_size = l_size; + } else if (element == "desc_aimac") { + drpai_address.desc_aimac_addr = l_addr; + drpai_address.desc_aimac_size = l_size; + } else if (element == "desc_drp") { + drpai_address.desc_drp_addr = l_addr; + drpai_address.desc_drp_size = l_size; + } else if (element == "drp_param") { + drpai_address.drp_param_addr = l_addr; + drpai_address.drp_param_size = l_size; + } else if (element == "weight") { + drpai_address.weight_addr = l_addr; + drpai_address.weight_size = l_size; + } else if (element == "data_in") { + drpai_address.data_in_addr = l_addr; + drpai_address.data_in_size = l_size; + } else if (element == "data") { + drpai_address.data_addr = l_addr; + drpai_address.data_size = l_size; + } else if (element == "data_out") { + drpai_address.data_out_addr = l_addr; + drpai_address.data_out_size = l_size; + } else if (element == "work") { + drpai_address.work_addr = l_addr; + drpai_address.work_size = l_size; + } + } + + return 0; +} + +/***************************************** + * Function Name : load_data_to_mem + * Description : Loads a binary blob DRP-AI Driver Memory + * Arguments : data_ptr = pointer to the bytes to write + * drpai_fd = file descriptor of DRP-AI Driver + * from = memory start address where the data is + *written size = data size to be written Return value : 0 if succeeded not 0 + *otherwise + ******************************************/ +static int8_t load_data_to_mem(unsigned char *data_ptr, int drpai_fd, + unsigned long from, unsigned long size) { + drpai_data_t drpai_data; + + drpai_data.address = from; + drpai_data.size = size; + + errno = 0; + if (-1 == ioctl(drpai_fd, DRPAI_ASSIGN, &drpai_data)) { + return -1; + } + + errno = 0; + if (-1 == write(drpai_fd, data_ptr, size)) { + return -1; + } + + return 0; +} + +/***************************************** + * Function Name : load_drpai_data + * Description : Loads DRP-AI Object files to memory via DRP-AI Driver. + * Arguments : drpai_fd = file descriptor of DRP-AI Driver + * Return value : 0 if succeeded + * : not 0 otherwise + ******************************************/ +static int load_drpai_data(int drpai_fd) { + unsigned long addr, size; + unsigned char *data_ptr; + for (int i = 0; i < 5; i++) { + switch (i) { + case (INDEX_W): + addr = drpai_address.weight_addr; + size = drpai_address.weight_size; + data_ptr = ei_ei_weight_dat; + break; + case (INDEX_C): + addr = drpai_address.drp_config_addr; + size = drpai_address.drp_config_size; + data_ptr = ei_ei_drpcfg_mem; + break; + case (INDEX_P): + addr = drpai_address.drp_param_addr; + size = drpai_address.drp_param_size; + data_ptr = ei_drp_param_bin; + break; + case (INDEX_A): + addr = drpai_address.desc_aimac_addr; + size = drpai_address.desc_aimac_size; + data_ptr = ei_aimac_desc_bin; + break; + case (INDEX_D): + addr = drpai_address.desc_drp_addr; + size = drpai_address.desc_drp_size; + data_ptr = ei_drp_desc_bin; + break; + default: + return -1; + break; + } + if (0 != load_data_to_mem(data_ptr, drpai_fd, addr, size)) { + return -1; + } + } + return 0; +} + +EI_IMPULSE_ERROR drpai_init_classifier() { + // retval for drpai status + int ret_drpai; + + // Read DRP-AI Object files address and size + if (0 != read_addrmap_txt()) { + ei_printf("ERR: read_addrmap_txt failed : %d\n", errno); + return EI_IMPULSE_DRPAI_INIT_FAILED; + } + + // DRP-AI Driver Open + drpai_fd = open("/dev/drpai0", O_RDWR); + if (drpai_fd < 0) { + ei_printf("ERR: Failed to Open DRP-AI Driver: errno=%d\n", errno); + return EI_IMPULSE_DRPAI_INIT_FAILED; + } + + // Load DRP-AI Data from Filesystem to Memory via DRP-AI Driver + ret_drpai = load_drpai_data(drpai_fd); + if (ret_drpai != 0) { + ei_printf("ERR: Failed to load DRPAI Data\n"); + if (0 != close(drpai_fd)) { + ei_printf("ERR: Failed to Close DRPAI Driver: errno=%d\n", errno); + } + return EI_IMPULSE_DRPAI_INIT_FAILED; + } + + // statically store DRP object file addresses and sizes + proc[DRPAI_INDEX_INPUT].address = (uint32_t)udmabuf_address; + proc[DRPAI_INDEX_INPUT].size = drpai_address.data_in_size; + proc[DRPAI_INDEX_DRP_CFG].address = drpai_address.drp_config_addr; + proc[DRPAI_INDEX_DRP_CFG].size = drpai_address.drp_config_size; + proc[DRPAI_INDEX_DRP_PARAM].address = drpai_address.drp_param_addr; + proc[DRPAI_INDEX_DRP_PARAM].size = drpai_address.drp_param_size; + proc[DRPAI_INDEX_AIMAC_DESC].address = drpai_address.desc_aimac_addr; + proc[DRPAI_INDEX_AIMAC_DESC].size = drpai_address.desc_aimac_size; + proc[DRPAI_INDEX_DRP_DESC].address = drpai_address.desc_drp_addr; + proc[DRPAI_INDEX_DRP_DESC].size = drpai_address.desc_drp_size; + proc[DRPAI_INDEX_WEIGHT].address = drpai_address.weight_addr; + proc[DRPAI_INDEX_WEIGHT].size = drpai_address.weight_size; + proc[DRPAI_INDEX_OUTPUT].address = drpai_address.data_out_addr; + proc[DRPAI_INDEX_OUTPUT].size = drpai_address.data_out_size; + + EI_LOGD("proc[DRPAI_INDEX_INPUT] addr: %p, size: %p\r\n", proc[DRPAI_INDEX_INPUT].address, proc[DRPAI_INDEX_INPUT].size); + EI_LOGD("proc[DRPAI_INDEX_DRP_CFG] addr: %p, size: %p\r\n", proc[DRPAI_INDEX_DRP_CFG].address, proc[DRPAI_INDEX_DRP_CFG].size); + EI_LOGD("proc[DRPAI_INDEX_DRP_PARAM] addr: %p, size: %p\r\n", proc[DRPAI_INDEX_DRP_PARAM].address, proc[DRPAI_INDEX_DRP_PARAM].size); + EI_LOGD("proc[DRPAI_INDEX_AIMAC_DESC] addr: %p, size: %p\r\n", proc[DRPAI_INDEX_AIMAC_DESC].address, proc[DRPAI_INDEX_AIMAC_DESC].size); + EI_LOGD("proc[DRPAI_INDEX_DRP_DESC] addr: %p, size: %p\r\n", proc[DRPAI_INDEX_DRP_DESC].address, proc[DRPAI_INDEX_DRP_DESC].size); + EI_LOGD("proc[DRPAI_INDEX_WEIGHT] addr: %p, size: %p\r\n", proc[DRPAI_INDEX_WEIGHT].address, proc[DRPAI_INDEX_WEIGHT].size); + EI_LOGD("proc[DRPAI_INDEX_OUTPUT] addr: %p, size: %p\r\n", proc[DRPAI_INDEX_OUTPUT].address, proc[DRPAI_INDEX_OUTPUT].size); + + drpai_output_buf = (float *)ei_malloc(drpai_address.data_out_size); + + return EI_IMPULSE_OK; +} + +EI_IMPULSE_ERROR drpai_run_classifier_image_quantized() { +#if EI_CLASSIFIER_COMPILED == 1 +#error "DRP-AI is not compatible with EON Compiler" +#endif + // output data from DRPAI model + drpai_data_t drpai_data; + // status used to query if any internal errors occured during inferencing + drpai_status_t drpai_status; + // descriptor used for checking if DRPAI is done inferencing + fd_set rfds; + // struct used to define DRPAI timeout + struct timespec tv; + // retval for drpai status + int ret_drpai; + // retval when querying drpai status + int inf_status = 0; + + // DRP-AI Output Memory Preparation + drpai_data.address = drpai_address.data_out_addr; + drpai_data.size = drpai_address.data_out_size; + + // Start DRP-AI driver + EI_LOGD("Start DRPAI inference\r\n"); + int ioret = ioctl(drpai_fd, DRPAI_START, &proc[0]); + if (0 != ioret) { + EI_LOGE("Failed to Start DRPAI Inference: %d\n", errno); + return EI_IMPULSE_DRPAI_RUNTIME_FAILED; + } + + // Settings For pselect - this is how DRPAI signals inferencing complete + FD_ZERO(&rfds); + FD_SET(drpai_fd, &rfds); + // Define a timeout for DRP-AI to complete + tv.tv_sec = DRPAI_TIMEOUT; + tv.tv_nsec = 0; + + // Wait until DRP-AI ends + EI_LOGD("Waiting on DRPAI inference results\r\n"); + ret_drpai = pselect(drpai_fd + 1, &rfds, NULL, NULL, &tv, NULL); + if (ret_drpai == 0) { + EI_LOGE("DRPAI Inference pselect() Timeout: %d\n", errno); + return EI_IMPULSE_DRPAI_RUNTIME_FAILED; + } else if (ret_drpai < 0) { + EI_LOGE("DRPAI Inference pselect() Error: %d\n", errno); + return EI_IMPULSE_DRPAI_RUNTIME_FAILED; + } + + // Checks for DRPAI inference status errors + EI_LOGD("Getting DRPAI Status\r\n"); + inf_status = ioctl(drpai_fd, DRPAI_GET_STATUS, &drpai_status); + if (inf_status != 0) { + EI_LOGE("DRPAI Internal Error: %d\n", errno); + return EI_IMPULSE_DRPAI_RUNTIME_FAILED; + } + + EI_LOGD("Getting inference results\r\n"); + if (ioctl(drpai_fd, DRPAI_ASSIGN, &drpai_data) != 0) { + EI_LOGE("Failed to Assign DRPAI data: %d\n", errno); + return EI_IMPULSE_DRPAI_RUNTIME_FAILED; + } + + if (read(drpai_fd, drpai_output_buf, drpai_data.size) < 0) { + EI_LOGE("Failed to read DRPAI output data: %d\n", errno); + return EI_IMPULSE_DRPAI_RUNTIME_FAILED; + } + return EI_IMPULSE_OK; +} + +// close the driver (reset file handles) +EI_IMPULSE_ERROR drpai_close(uint32_t input_frame_size) { + munmap(drpai_input_buf, input_frame_size); + free(drpai_output_buf); + if (drpai_fd > 0) { + if (0 != close(drpai_fd)) { + EI_LOGE("Failed to Close DRP-AI Driver: errno=%d\n", errno); + return EI_IMPULSE_DRPAI_RUNTIME_FAILED; + } + drpai_fd = -1; + } + return EI_IMPULSE_OK; +} + +#if ((EI_CLASSIFIER_OBJECT_DETECTION == 1) && (EI_CLASSIFIER_OBJECT_DETECTION_LAST_LAYER == EI_CLASSIFIER_LAST_LAYER_YOLOV5_V5_DRPAI)) +EI_IMPULSE_ERROR drpai_run_yolov5_postprocessing( + const ei_impulse_t *impulse, + signal_t *signal, + ei_impulse_result_t *result, + bool debug = false) +{ + static std::unique_ptr model = nullptr; + static std::unique_ptr interpreter = nullptr; + + if (!model) { + model = tflite::FlatBufferModel::BuildFromBuffer((const char*)yolov5_part2, yolov5_part2_len); + if (!model) { + ei_printf("Failed to build TFLite model from buffer\n"); + return EI_IMPULSE_TFLITE_ERROR; + } + + tflite::ops::builtin::BuiltinOpResolver resolver; + tflite::InterpreterBuilder builder(*model, resolver); + builder(&interpreter); + + if (!interpreter) { + ei_printf("Failed to construct interpreter\n"); + return EI_IMPULSE_TFLITE_ERROR; + } + + if (interpreter->AllocateTensors() != kTfLiteOk) { + ei_printf("AllocateTensors failed\n"); + return EI_IMPULSE_TFLITE_ERROR; + } + + int hw_thread_count = (int)std::thread::hardware_concurrency(); + hw_thread_count -= 1; // leave one thread free for the other application + if (hw_thread_count < 1) { + hw_thread_count = 1; + } + + if (interpreter->SetNumThreads(hw_thread_count) != kTfLiteOk) { + ei_printf("SetNumThreads failed\n"); + return EI_IMPULSE_TFLITE_ERROR; + } + } + + const size_t drpai_buff_size = drpai_address.data_out_size / sizeof(float); + const size_t drpai_features = drpai_buff_size; + + const size_t els_per_grid = drpai_features / ((NUM_GRID_1 * NUM_GRID_1) + (NUM_GRID_2 * NUM_GRID_2) + (NUM_GRID_3 * NUM_GRID_3)); + + const size_t grid_1_offset = 0; + const size_t grid_1_size = (NUM_GRID_1 * NUM_GRID_1) * els_per_grid; + + const size_t grid_2_offset = grid_1_offset + grid_1_size; + const size_t grid_2_size = (NUM_GRID_2 * NUM_GRID_2) * els_per_grid; + + const size_t grid_3_offset = grid_2_offset + grid_2_size; + const size_t grid_3_size = (NUM_GRID_3 * NUM_GRID_3) * els_per_grid; + + // Now we don't know the exact tensor order for some reason + // so let's do that dynamically + for (size_t ix = 0; ix < 3; ix++) { + TfLiteTensor * tensor = interpreter->input_tensor(ix); + size_t tensor_size = 1; + for (size_t ix = 0; ix < tensor->dims->size; ix++) { + tensor_size *= tensor->dims->data[ix]; + } + + EI_LOGD("input tensor %d, tensor_size=%d\n", (int)ix, (int)tensor_size); + + float *input = interpreter->typed_input_tensor(ix); + + if (tensor_size == grid_1_size) { + memcpy(input, drpai_output_buf + grid_1_offset, grid_1_size * sizeof(float)); + } + else if (tensor_size == grid_2_size) { + memcpy(input, drpai_output_buf + grid_2_offset, grid_2_size * sizeof(float)); + } + else if (tensor_size == grid_3_size) { + memcpy(input, drpai_output_buf + grid_3_offset, grid_3_size * sizeof(float)); + } + else { + ei_printf("ERR: Cannot determine which grid to use for input tensor %d with %d tensor size\n", + (int)ix, (int)tensor_size); + return EI_IMPULSE_TFLITE_ERROR; + } + } + + uint64_t ctx_start_us = ei_read_timer_us(); + + interpreter->Invoke(); + + uint64_t ctx_end_us = ei_read_timer_us(); + + EI_LOGD("Invoke took %d ms.\n", (int)((ctx_end_us - ctx_start_us) / 1000)); + + float* out_data = interpreter->typed_output_tensor(0); + + const size_t out_size = impulse->tflite_output_features_count; + + if (debug) { + printf("First 20 bytes: "); + for (size_t ix = 0; ix < 20; ix++) { + ei_printf("%f ", out_data[ix]); + } + ei_printf("\n"); + } + + // printf("Last 5 bytes: "); + // for (size_t ix = out_size - 5; ix < out_size; ix++) { + // printf("%f ", out_data[ix]); + // } + // printf("\n"); + + return fill_result_struct_f32_yolov5(impulse, result, 5, out_data, out_size); +} +#endif + +/** + * @brief Do neural network inferencing over the processed feature matrix + * + * @param fmatrix Processed matrix + * @param result Output classifier results + * @param[in] debug Debug output enable + * + * @return The ei impulse error. + */ +EI_IMPULSE_ERROR run_nn_inference( + const ei_impulse_t *impulse, + ei_feature_t *fmatrix, + uint32_t* input_block_ids, + uint32_t input_block_ids_size, + ei_impulse_result_t *result, + void *config_ptr, + bool debug) +{ + // dummy, not used for DRPAI +} + +/** + * Special function to run the classifier on images, only works on TFLite models (either interpreter or EON or for tensaiflow) + * that allocates a lot less memory by quantizing in place. This only works if 'can_run_classifier_image_quantized' + * returns EI_IMPULSE_OK. + */ +EI_IMPULSE_ERROR run_nn_inference_image_quantized( + const ei_impulse_t *impulse, + signal_t *signal, + ei_impulse_result_t *result, + void *config_ptr, + bool debug = false) +{ + // this needs to be changed for multi-model, multi-impulse + static bool first_run = true; + uint64_t ctx_start_us; + uint64_t dsp_start_us = ei_read_timer_us(); + + if (first_run) { + // map memory regions to the DRP-AI UDMA. This is required for passing data + // to and from DRP-AI + int t = drpai_init_mem(impulse->nn_input_frame_size); + if (t != 0) { + return EI_IMPULSE_DRPAI_INIT_FAILED; + } + + EI_IMPULSE_ERROR ret = drpai_init_classifier(); + if (ret != EI_IMPULSE_OK) { + drpai_close(impulse->nn_input_frame_size); + return EI_IMPULSE_DRPAI_INIT_FAILED; + } + + EI_LOGI("Initialized input and output buffers:\r\n"); + EI_LOGI("input buf (addr: %p, size: 0x%x)\r\n", drpai_input_buf, drpai_address.data_in_size); + EI_LOGI("output buf (addr: %p, size: 0x%x)\r\n", drpai_output_buf, drpai_address.data_out_size); + EI_LOGI("udmabuf_addr: %p\n", udmabuf_address); + } + + EI_LOGD("Starting DSP...\n"); + int ret; + + EI_LOGD("fmatrix size == Bpp * signal.total_length ( %p == %p * %p = %p )\r\n", proc[DRPAI_INDEX_INPUT].size, 3, signal->total_length, 3 * signal->total_length); + // Creates a features matrix mapped to the DRP-AI UDMA input region + ei::matrix_u8_t features_matrix(1, proc[DRPAI_INDEX_INPUT].size, drpai_input_buf); + + // Grabs the raw image buffer from the signal, DRP-AI will automatically + // extract features + ret = extract_drpai_features_quantized( + signal, + &features_matrix, + impulse->dsp_blocks[0].config, + impulse->frequency); + if (ret != EIDSP_OK) { + ei_printf("ERR: Failed to run DSP process (%d)\n", ret); + return EI_IMPULSE_DSP_ERROR; + } + + if (ei_run_impulse_check_canceled() == EI_IMPULSE_CANCELED) { + return EI_IMPULSE_CANCELED; + } + + result->timing.dsp_us = ei_read_timer_us() - dsp_start_us; + result->timing.dsp = (int)(result->timing.dsp_us / 1000); + if (debug) { + ei_printf("Features (%d ms.): ", result->timing.dsp); + for (size_t ix = 0; ix < EI_CLASSIFIER_NN_INPUT_FRAME_SIZE; ix++) { + ei_printf("0x%hhx, ", drpai_input_buf[ix]); + } + ei_printf("\n"); + } + + ctx_start_us = ei_read_timer_us(); + + // Run DRP-AI inference, a static buffer is used to store the raw output + // results + ret = drpai_run_classifier_image_quantized(); + + // close driver to reset memory, file pointer + if (ret != EI_IMPULSE_OK) { + drpai_close(impulse->nn_input_frame_size); + first_run = true; + } + else { + // drpai_reset(); + first_run = false; + } + + EI_IMPULSE_ERROR fill_res = EI_IMPULSE_OK; + + if (impulse->object_detection) { + switch (impulse->object_detection_last_layer) { + case EI_CLASSIFIER_LAST_LAYER_FOMO: { + if (debug) { + ei_printf("DEBUG: raw drpai output"); + ei_printf("\n["); + for (uint32_t i = 0; i < impulse->tflite_output_features_count; i++) { + ei_printf_float(drpai_output_buf[i]); + ei_printf(" "); + } + ei_printf("]\n"); + } + + fill_res = fill_result_struct_f32_fomo( + impulse, + result, + drpai_output_buf, + impulse->fomo_output_size, + impulse->fomo_output_size); + break; + } + case EI_CLASSIFIER_LAST_LAYER_SSD: { + ei_printf("ERR: MobileNet SSD models are not implemented for DRP-AI (%d)\n", + impulse->object_detection_last_layer); + return EI_IMPULSE_UNSUPPORTED_INFERENCING_ENGINE; + } + case EI_CLASSIFIER_LAST_LAYER_YOLOV5_V5_DRPAI: { + #if EI_CLASSIFIER_TFLITE_OUTPUT_QUANTIZED == 1 + ei_printf("ERR: YOLOv5 does not support quantized inference\n"); + return EI_IMPULSE_UNSUPPORTED_INFERENCING_ENGINE; + #else + if (debug) { + ei_printf("DEBUG: raw drpai output"); + ei_printf("\n["); + // impulse->tflite_output_features_count can't be used here as this is not the final output + // so print only the first 10 values. + for (uint32_t i = 0; i < 10; i++) { + ei_printf_float(drpai_output_buf[i]); + ei_printf(" "); + } + ei_printf("]\n"); + } + +#if ((EI_CLASSIFIER_OBJECT_DETECTION == 1) && (EI_CLASSIFIER_OBJECT_DETECTION_LAST_LAYER == EI_CLASSIFIER_LAST_LAYER_YOLOV5_V5_DRPAI)) + // do post processing + fill_res = drpai_run_yolov5_postprocessing(impulse, signal, result, debug); +#endif + + #endif + + break; + } + default: { + ei_printf("ERR: Unsupported object detection last layer (%d)\n", + impulse->object_detection_last_layer); + return EI_IMPULSE_UNSUPPORTED_INFERENCING_ENGINE; + } + } + } + else { + fill_res = fill_result_struct_f32(impulse, result, drpai_output_buf, debug); + } + + if (fill_res != EI_IMPULSE_OK) { + return fill_res; + } + + result->timing.classification_us = ei_read_timer_us() - ctx_start_us; + result->timing.classification = (int)(result->timing.classification_us / 1000); + return EI_IMPULSE_OK; +} + +#endif // #if (EI_CLASSIFIER_INFERENCING_ENGINE == EI_CLASSIFIER_DRPAI) +#endif // _EI_CLASSIFIER_INFERENCING_ENGINE_DRPAI_H_ diff --git a/edgeimpulse/edge-impulse-sdk/classifier/inferencing_engines/engines.h b/edgeimpulse/edge-impulse-sdk/classifier/inferencing_engines/engines.h new file mode 100644 index 0000000..eea5134 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/classifier/inferencing_engines/engines.h @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2022 EdgeImpulse Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an "AS + * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language + * governing permissions and limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef _EI_CLASSIFIER_ENGINES_H_ +#define _EI_CLASSIFIER_ENGINES_H_ + +#include "edge-impulse-sdk/classifier/ei_model_types.h" + +EI_IMPULSE_ERROR run_kmeans_anomaly( + const ei_impulse_t *impulse, + ei_feature_t *fmatrix, + uint32_t* input_block_ids, + uint32_t input_block_ids_size, + ei_impulse_result_t *result, + void *config_ptr, + bool debug); + +EI_IMPULSE_ERROR run_gmm_anomaly( + const ei_impulse_t *impulse, + ei_feature_t *fmatrix, + uint32_t* input_block_ids, + uint32_t input_block_ids_size, + ei_impulse_result_t *result, + void *config_ptr, + bool debug); + +EI_IMPULSE_ERROR run_nn_inference( + const ei_impulse_t *impulse, + ei_feature_t *fmatrix, + uint32_t* input_block_ids, + uint32_t input_block_ids_size, + ei_impulse_result_t *result, + void *config_ptr, + bool debug); + +int extract_tflite_eon_features(signal_t *signal, matrix_t *output_matrix, + void *config_ptr, const float frequency); + +int extract_tflite_features(signal_t *signal, matrix_t *output_matrix, + void *config_ptr, const float frequency); + +#endif // _EI_CLASSIFIER_ENGINES_H_s \ No newline at end of file diff --git a/edgeimpulse/edge-impulse-sdk/classifier/inferencing_engines/memryx.h b/edgeimpulse/edge-impulse-sdk/classifier/inferencing_engines/memryx.h new file mode 100644 index 0000000..bf72583 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/classifier/inferencing_engines/memryx.h @@ -0,0 +1,475 @@ +/* Edge Impulse inferencing library + * Copyright (c) 2023 EdgeImpulse Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef EI_CLASSIFIER_INFERENCING_ENGINE_MEMRYX_H +#define EI_CLASSIFIER_INFERENCING_ENGINE_MEMRYX_H + +#if (EI_CLASSIFIER_INFERENCING_ENGINE == EI_CLASSIFIER_MEMRYX) + +/** + * @brief We can use a lot of space on linux and + * mx3 is capable of it + * + */ +#undef EI_CLASSIFIER_MAX_LABELS_COUNT +#define EI_CLASSIFIER_MAX_LABELS_COUNT 2000 + +/** + * @brief we are forcing SOFTWARE inference (simulation), + * beacuse use of hardware is not ready + * + */ +#ifndef EI_CLASSIFIER_USE_MEMRYX_SOFTWARE +#define EI_CLASSIFIER_USE_MEMRYX_HARDWARE 1 +#endif + +/** + * @brief Memryx accelerator can leverage up to four MX3 chips for inference. + * Specify here the number of chips to be used for acceleration, + * e.g. set to 4 in order to use all four chips of the M3X board. + */ +#ifndef EI_CLASSIFIER_USE_MEMRYX_CHIPS_COUNT +#define EI_CLASSIFIER_USE_MEMRYX_CHIPS_COUNT 1 +#endif + +#include "model-parameters/model_metadata.h" +#if EI_CLASSIFIER_HAS_MODEL_VARIABLES == 1 +#include "model-parameters/model_variables.h" +#endif + +#include "edge-impulse-sdk/porting/ei_classifier_porting.h" +#include "edge-impulse-sdk/classifier/ei_fill_result_struct.h" +#include "tensorflow-lite/tensorflow/lite/kernels/internal/reference/softmax.h" +#include +#include +#include +#include +#include +#include +#ifdef EI_CLASSIFIER_USE_MEMRYX_SOFTWARE +#include "pybind11/embed.h" +#include "pybind11/numpy.h" +#include "pybind11/stl.h" +#else +#include "memx/memx.h" +#endif +/* Headers below help us bundle the DFP model with EIM in single binary */ +#include "memryx-model/memryx-model.h" +#include "utils/model_header_utils.h" + +/* Result delivered by memryx simulator contains 3 fields, indexes for print */ +#define MX_SIM_RES_OUTPUTS 0 +#define MX_SIM_RES_LATENCY 1 +#define MX_SIM_RES_FPS 2 + +std::stringstream engine_info; + +static bool memryx_initialized = false; + +#ifdef EI_CLASSIFIER_USE_MEMRYX_SOFTWARE +/* brings in the `_a` literals to set args to python API */ +using namespace pybind11::literals; +namespace py = pybind11; +/* PyBind variables for EIM with Simulator */ +static py::module_ memryx; +static py::module_ np; +static py::object zeroes; +static py::object Simulator; +static py::object model; +static py::object device; +static std::vector vec; +#endif + +#ifdef EI_CLASSIFIER_USE_MEMRYX_HARDWARE +/* Variables for EIM with Hardware */ +const uint8_t flow_id = 0; // flow port 0 +const uint8_t model_id = 0; // model 0 +const uint8_t group_id = 0; // MPU device group 0 +const int timeout = 0; // was 200 ms +int argmax = 0; // index with maximum score +#endif + +/* We need a workaround for softmax because + * the MX3+ is not coming out this year, and + * the MX3 does not support the SoftMax layer + */ +static tflite::RuntimeShape softmax_shape; +static tflite::SoftmaxParams dummy_params; + +static bool verbose_debug = 0; + +bool init_memryx(bool debug, const ei_impulse_t *impulse) +{ + /* Unpack DFP model to file system */ + std::string project_file_path = "/tmp/" + std::string(impulse->project_name) + "-" + std::to_string(impulse->project_id) + "-" + std::to_string(impulse->deploy_version); + create_project_if_not_exists(project_file_path, model_h_files, model_h_files_len); + + std::string proj_model_path = project_file_path + "/memryx_trained.dfp"; + const char * model_file_path = proj_model_path.c_str(); +#if (defined(EI_CLASSIFIER_USE_MEMRYX_HARDWARE) && (EI_CLASSIFIER_USE_MEMRYX_HARDWARE == 1)) +#warning "Building EIM for use with MemryX Hardware" + memx_status status = MEMX_STATUS_OK; + // 1. Bind MPU device group 0 as MX3:Cascade to model 0. + status = memx_open(model_id, group_id, MEMX_DEVICE_CASCADE); + if(memx_status_error(status)) { + return false; + } + ei_printf("Memryx device opened.\n"); + + // 2. Download model from a DFP file to MPU device group, input and + // output feature map shape is auto, configured after download complete. + status = memx_download_model(model_id, model_file_path, 0, // model_idx = 0 + MEMX_DOWNLOAD_TYPE_WTMEM_AND_MODEL); + if(memx_status_error(status)) { + return false; + } + ei_printf("Memryx model downloaded.\n"); + + // 3. Enable data transfer of this model to device. Set to no wait here + // since driver will go to data transfer state eventually. + status = memx_set_stream_enable(model_id, 0); + if(memx_status_error(status)) { + return false; + } + ei_printf("Data streaming to and from the MX3 board is enabled\n"); +#elif (defined(EI_CLASSIFIER_USE_MEMRYX_SOFTWARE) && (EI_CLASSIFIER_USE_MEMRYX_SOFTWARE == 1)) +#warning "MEMRYX model will be run in SIMULATION mode (not on real hardware)!" + py::list path; + // import Python's memryx module + try { + memryx = py::module_::import("memryx"); + if(debug) printf("Memryx PyModule init\n"); + } + catch (py::error_already_set &e) { + ei_printf("ERR: Importing 'memryx' library failed:\n%s\n", e.what()); + return false; + } + + Simulator = memryx.attr("Simulator"); + if(debug) printf("Simulator API init\n"); + + // load model + try { + model = Simulator("dfp"_a = model_file_path); + if(debug) printf("Model API init\n"); + } + catch (py::error_already_set &e) { + ei_printf("ERR: Can't load model file from %s\n", model_file_path); + return false; + } +#else +#error "Neither EI_CLASSIFIER_USE_MEMRYX_HARDWARE or EI_CLASSIFIER_USE_MEMRYX_SOFTWARE are defined or set to 1" +#endif + + // clear info + engine_info.str(""); + + return true; +} + + +/** + * @brief Do neural network inferencing over the processed feature matrix + * + * @param impulse Struct describing impulse architecture + * @param fmatrix Processed matrix + * @param result Output classifier results + * @param[in] debug Debug output enable + * + * @return The ei impulse error. + */ +#if (defined(EI_CLASSIFIER_USE_MEMRYX_HARDWARE) && (EI_CLASSIFIER_USE_MEMRYX_HARDWARE == 1)) +EI_IMPULSE_ERROR run_nn_inference( + const ei_impulse_t *impulse, + ei_feature_t *fmatrix, + uint32_t* input_block_ids, + uint32_t input_block_ids_size, + ei_impulse_result_t *result, + void *config_ptr, + bool debug = false) +{ + memx_status status = MEMX_STATUS_OK; + int32_t ifmap_height, ifmap_width, ifmap_channel_number, ifmap_format; + int32_t ofmap_height, ofmap_width, ofmap_channel_number, ofmap_format; + uint64_t ctx_start_us = 0; + uint64_t ctx_end_us = 0; + + // check if we've initialized the interpreter and device? + if (memryx_initialized == false) { + if(init_memryx(debug, impulse) == false) { + return EI_IMPULSE_MEMRYX_ERROR; + } + memryx_initialized = true; + } + + /* 4. get input shape - Not needed during runtime, available only for debugging */ + if(verbose_debug) { + status = memx_get_ifmap_size(model_id, flow_id, &ifmap_height, &ifmap_width, &ifmap_channel_number, &ifmap_format); + ei_printf("status = %d, ifmap shape = (%d, %d, %d), format = %d\n", + status, ifmap_height, ifmap_width, ifmap_channel_number, ifmap_format); + } + + // 5. get output shape + status = memx_get_ofmap_size(model_id, flow_id, &ofmap_height, &ofmap_width, &ofmap_channel_number, &ofmap_format); + if(debug) { + ei_printf("status = %d, ofmap shape = (%d, %d, %d), format = %d\n", + status, ofmap_height, ofmap_width, ofmap_channel_number, ofmap_format); + } + if(memx_status_error(status)) { + return EI_IMPULSE_MEMRYX_ERROR; + } + + // 6. Prepare input and output buffers + float* ofmap = new float [ofmap_width * ofmap_height * ofmap_channel_number]; + +#if EI_CLASSIFIER_SINGLE_FEATURE_INPUT == 0 + size_t mtx_size = impulse->dsp_blocks_size + impulse->learning_blocks_size; + ei::matrix_t* matrix = NULL; + + ei::matrix_t combined_matrix(1, impulse->nn_input_frame_size); + uint32_t buf_pos = 0; + + for (size_t i = 0; i < input_block_ids_size; i++) { + size_t cur_mtx = input_block_ids[i]; + + if (!find_mtx_by_idx(fmatrix, &matrix, cur_mtx, mtx_size)) { + ei_printf("ERR: Cannot find matrix with id %zu\n", cur_mtx); + return EI_IMPULSE_INVALID_SIZE; + } + + for (size_t ix = 0; ix < matrix->rows * matrix->cols; ix++) { + combined_matrix.buffer[buf_pos++] = matrix->buffer[ix]; + } + } + matrix = &combined_matrix; +#else + ei::matrix_t* matrix = fmatrix[0].matrix; +#endif + + float* ifmap = (float*)matrix->buffer; + + if(verbose_debug) { + for(int fidx = 0; fidx < (ofmap_width*ofmap_height); fidx++) { + ei_printf("%f\t", matrix->buffer[fidx]); + if(!(fidx % ofmap_width)) ei_printf("\n"); + } + } + + // TODO stream_ifmap only copies buffer to MX3 board, + // we need a different approach to measure latency + ctx_start_us = ei_read_timer_us(); + // 7. Stream inputs to device and start inference. + status = memx_stream_ifmap(model_id, 0, ifmap, timeout); + ctx_end_us = ei_read_timer_us(); + if(memx_status_error(status)) { + return EI_IMPULSE_MEMRYX_ERROR; + } + + result->timing.classification_us = ctx_end_us - ctx_start_us; + result->timing.classification = (int)(result->timing.classification_us / 1000); + + engine_info.str(""); + engine_info << "Inferences per second: " << (1000000 / result->timing.classification_us); + + // 6. Stream output results from device after inference + status = memx_stream_ofmap(model_id, 0, ofmap, timeout); + if(debug) { + ei_printf(" memx_stream_ofmap (status=%d)\n", status); + } + if(memx_status_error(status)) { + return EI_IMPULSE_MEMRYX_ERROR; + } + + // init softmax shape + std::vector output_shape = {static_cast(ofmap_height),static_cast(ofmap_width), + static_cast(ofmap_channel_number)}; + softmax_shape.BuildFrom(output_shape); + // dumy beta parameter for softmax purposes + dummy_params.beta = 1; + + // apply softmax, becuase MX3 does not support this operation + tflite::reference_ops::Softmax(dummy_params, softmax_shape, ofmap, softmax_shape, ofmap); + + // handle inference outputs + if (impulse->object_detection) { + switch (impulse->object_detection_last_layer) { + case EI_CLASSIFIER_LAST_LAYER_FOMO: { + ei_printf("FOMO executed on Memryx\n"); + fill_result_struct_f32_fomo( + impulse, + result, + ofmap, + impulse->fomo_output_size, + impulse->fomo_output_size); + break; + } + case EI_CLASSIFIER_LAST_LAYER_SSD: { + ei_printf("Mobilenet SSD is not implemented for Edge Impulse MemryX engine, please contact Edge Impulse Support\n"); + break; + } + default: { + ei_printf("ERR: Unsupported object detection last layer (%d)\n", + impulse->object_detection_last_layer); + return EI_IMPULSE_UNSUPPORTED_INFERENCING_ENGINE; + } + } + } + else { + fill_result_struct_f32(impulse, result, ofmap, debug); + } + + delete ofmap; + // Device is closed only at EIM exit, therefore we do not use memx_close() + return EI_IMPULSE_OK; +} + +#elif (defined(EI_CLASSIFIER_USE_MEMRYX_SOFTWARE) && (EI_CLASSIFIER_USE_MEMRYX_SOFTWARE == 1)) +EI_IMPULSE_ERROR run_nn_inference( + const ei_impulse_t *impulse, + ei_feature_t *fmatrix, + uint32_t* inputBlockIds, + ei_impulse_result_t *result, + void *config_ptr, + bool debug = false) +{ + // init Python embedded interpreter (should be called once!) + static py::scoped_interpreter guard{}; + + // check if we've initialized the interpreter and device? + if (memryx_initialized == false) { + if(init_memryx(debug, impulse) == false) { + return EI_IMPULSE_MEMRYX_ERROR; + } + memryx_initialized = true; + } + + std::vector input_shape = {1, impulse->input_width, impulse->input_height, 3}; + py::array_t input_data(input_shape); // = zeroes(input_shape, 0); + + printf("impulse->w=%d h=%d\n", impulse->input_width, impulse->input_height); + + /* + * convert features data to the expected shape (4dim) + * For images RGB shape is (width, height, colors) + * For images BW shape is (width, height, 1) + * For Audio shape is (width, height, 1) - spectrogram + */ + auto r = input_data.mutable_unchecked<4>(); + + for (size_t i = 0; i < input_block_ids_size; i++) { + uint16_t cur_mtx = input_block_ids[i]; +#if EI_CLASSIFIER_SINGLE_FEATURE_INPUT == 0 + ei::matrix_t* matrix = NULL; + + if (!find_mtx_by_idx(fmatrix, &matrix, cur_mtx, mtx_size)) { + ei_printf("ERR: Cannot find matrix with id %zu\n", cur_mtx); + return EI_IMPULSE_INVALID_SIZE; + } +#else + ei::matrix_t* matrix = fmatrix[0].matrix; +#endif + for (py::ssize_t x = 0; x < r.shape(1); x++) { + for (py::ssize_t y = 0; y < r.shape(2); y++) { + for(py::ssize_t z = 0; z < r.shape(3); z++) { + r(0, x, y, z) = (float)(fmatrix.buffer[x * r.shape(2) * r.shape(3) + y * r.shape(3) + z]); + } + } + } + } + + py::object runmodel = model.attr("run"); + // result from mx_sim is {np array, float, float} + py::tuple args = py::make_tuple(py::none(), 0.00, 0.00); + // run inference in sumualtor + printf("start inference\n"); + uint64_t ctx_start_us = ei_read_timer_us(); + args = runmodel("inputs"_a=input_data,"frames"_a=1); + uint64_t ctx_end_us = ei_read_timer_us(); + printf("end of inference\n"); + + result->timing.classification_us = ctx_end_us - ctx_start_us; + result->timing.classification = (int)(result->timing.classification_us / 1000); + + engine_info.str(""); + engine_info << "Inferences per second: " << (1000000 / result->timing.classification_us); + + py::array outputs = py::list(args[0]); + py::array_t potentials; + std::vector potentials_v; + + potentials = outputs.squeeze().cast>(); + + if (impulse->object_detection == false) { + potentials_v = outputs.squeeze().cast>(); + } + else { + auto q = potentials.unchecked<>(); + for (py::ssize_t x = 0; x < q.shape(0); x++) { + for (py::ssize_t y = 0; y < q.shape(1); y++) { + for(py::ssize_t z = 0; z < q.shape(2); z++) { + potentials_v.push_back(q(x, y, z)); + } + } + } + } + + if(debug) { + std::string ret_str = py::str(potentials).cast(); + ei_printf("Memryx raw output:\n%s\n", ret_str.c_str()); + } + + if (impulse->object_detection) { + switch (impulse->object_detection_last_layer) { + case EI_CLASSIFIER_LAST_LAYER_FOMO: { + ei_printf("FOMO executed on Memryx\n"); + fill_result_struct_f32_fomo( + impulse, + result, + potentials_v.data(), + impulse->fomo_output_size, + impulse->fomo_output_size); + break; + } + case EI_CLASSIFIER_LAST_LAYER_SSD: { + ei_printf("Mobilenet SSD executed on Memryx\n"); + break; + } + default: { + ei_printf("ERR: Unsupported object detection last layer (%d)\n", + impulse->object_detection_last_layer); + return EI_IMPULSE_UNSUPPORTED_INFERENCING_ENGINE; + } + } + } + else { + fill_result_struct_f32(impulse, result, potentials_v.data(), debug); + } + + return EI_IMPULSE_OK; +} +#else +#error "Neither EI_CLASSIFIER_USE_MEMRYX_HARDWARE or EI_CLASSIFIER_USE_MEMRYX_SOFTWARE are defined or set to 1" +#endif // USE_HARDWARE + +#endif // EI_CLASSIFIER_INFERENCING_ENGINE == EI_CLASSIFIER_MEMRYX + +#endif /* EI_CLASSIFIER_INFERENCING_ENGINE_MEMRYX_H */ diff --git a/edgeimpulse/edge-impulse-sdk/classifier/inferencing_engines/onnx_tidl.h b/edgeimpulse/edge-impulse-sdk/classifier/inferencing_engines/onnx_tidl.h new file mode 100644 index 0000000..54e4e1f --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/classifier/inferencing_engines/onnx_tidl.h @@ -0,0 +1,697 @@ +/* + * Copyright (c) 2022 EdgeImpulse Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an "AS + * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language + * governing permissions and limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef _EI_CLASSIFIER_INFERENCING_ENGINE_ONNX_TIDL_H_ +#define _EI_CLASSIFIER_INFERENCING_ENGINE_ONNX_TIDL_H_ + +#if (EI_CLASSIFIER_INFERENCING_ENGINE == EI_CLASSIFIER_ONNX_TIDL) && (EI_CLASSIFIER_COMPILED != 1) + +#include "model-parameters/model_metadata.h" +#if EI_CLASSIFIER_HAS_MODEL_VARIABLES == 1 +#include "model-parameters/model_variables.h" +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "itidl_rt.h" +#include +#include +#include + +#include +#include "edge-impulse-sdk/classifier/ei_aligned_malloc.h" +#include "edge-impulse-sdk/classifier/ei_fill_result_struct.h" +#include "edge-impulse-sdk/classifier/ei_model_types.h" + +#include "onnx-model/tidl-model.h" +#include "utils/model_header_utils.h" + +#define TI_PREPROC_DEFAULT_WIDTH 320 +#define TI_PREPROC_DEFAULT_HEIGHT 240 + +using namespace std; + +/** + * \brief returns time in micro sec + * @returns void + */ +double getUs(struct timeval t) +{ + return(t.tv_sec * 1000000 + t.tv_usec); +} + +/** + * \brief print tensor info + * \param session onnx session + * \param input_node_names input array node names + * @returns int status + */ +int printTensorInfo(Ort::Session *session, std::vector *input_node_names, std::vector *output_node_names) +{ + size_t num_input_nodes = (*session).GetInputCount(); + size_t num_output_nodes = (*session).GetOutputCount(); + Ort::TypeInfo type_info = (*session).GetInputTypeInfo(0); + auto tensor_info = type_info.GetTensorTypeAndShapeInfo(); + std::vector input_node_dims = tensor_info.GetShape(); + ei_printf("LOG_INFO: number of inputs:%d \n", num_input_nodes); + ei_printf("LOG_INFO: number of outputs: %d\n", num_output_nodes); + ei_printf("LOG_INFO: input(0) name: %s\n", (*input_node_names)[0]); + + Ort::TypeInfo type_info_out = (*session).GetOutputTypeInfo(0); + auto tensor_info_out = type_info_out.GetTensorTypeAndShapeInfo(); + std::vector output_node_dims = tensor_info_out.GetShape(); + /* iterate over all input nodes */ + for (int i = 0; i < num_input_nodes; i++) + { + /* print input node names */ + ei_printf("LOG_INFO: Input %d : name=%s\n", i, (*input_node_names)[i]); + + /* print input node types */ + Ort::TypeInfo type_info = (*session).GetInputTypeInfo(i); + auto tensor_info = type_info.GetTensorTypeAndShapeInfo(); + + ONNXTensorElementDataType type = tensor_info.GetElementType(); + ei_printf("LOG_INFO: Input %d : type=%d\n", i, type); + /* print input shapes/dims */ + input_node_dims = tensor_info.GetShape(); + ei_printf("LOG_INFO: Input %d : num_dims=%zu\n", i, input_node_dims.size()); + for (int j = 0; j < input_node_dims.size(); j++) + { + ei_printf("LOG_INFO: Input %d : dim %d=%jd\n", i, j, input_node_dims[j]); + } + } + if (num_input_nodes != 1) + { + ei_printf("LOG_INFO: supports only single input model \n"); + return EI_IMPULSE_ONNX_ERROR; + } + + for (int i = 0; i < num_output_nodes; i++) + { + /* print output node names */ + ei_printf("LOG_INFO: Output %d : name=%s\n", i, (*output_node_names)[i]); + + /* print output node types */ + Ort::TypeInfo type_info = (*session).GetOutputTypeInfo(i); + auto tensor_info = type_info.GetTensorTypeAndShapeInfo(); + + ONNXTensorElementDataType type = tensor_info.GetElementType(); + ei_printf("LOG_INFO: Output %d : type=%d\n", i, type); + /* print output shapes/dims */ + output_node_dims = tensor_info.GetShape(); + ei_printf("LOG_INFO: Output %d : num_dims=%zu\n", i, output_node_dims.size()); + for (int j = 0; j < output_node_dims.size(); j++) + { + ei_printf("LOG_INFO: Output %d : dim %d=%jd\n", i, j, output_node_dims[j]); + } + } + return EI_IMPULSE_OK; +} + +void * allocTensorMem(int size, int accel) +{ + void * ptr = NULL; + if (accel) + { + #ifdef DEVICE_AM62 + ei_printf("LOG_INFO: TIDL Delgate mode is not allowed on AM62 devices...\n"); + ei_printf("LOG_ERROR: Could not allocate memory for a Tensor of size %d \n ", size); + exit(0); + #else + ptr = TIDLRT_allocSharedMem(64, size); + #endif + } + else + { + ptr = malloc(size); + } + if (ptr == NULL) + { + ei_printf("LOG_ERROR: Could not allocate memory for a Tensor of size %d \n ", size); + exit(0); + } + return ptr; +} + +void freeTensorMem(void * ptr, int accel) +{ + if (accel) + { + #ifndef DEVICE_AM62 + TIDLRT_freeSharedMem(ptr); + #endif + } + else + { + free(ptr); + } +} + +/** + * Setup the ONNX runtime + * + * @param ctx_start_us Pointer to the start time + * @param input Pointer to input tensor + * @param output Pointer to output tensor + * @param micro_interpreter Pointer to interpreter (for non-compiled models) + * @param micro_tensor_arena Pointer to the arena that will be allocated + * + * @return EI_IMPULSE_OK if successful + */ +static EI_IMPULSE_ERROR inference_onnx_setup( + const ei_impulse_t *impulse, + uint64_t *ctx_start_us, + std::vector* input_tensors, + std::vector* output_tensors, + Ort::Session** session_ptr, + Ort::RunOptions** run_options_ptr, + Ort::IoBinding** binding_ptr) { + + static bool onnx_first_run = true; + // Nothing to do after first run + if (!onnx_first_run) { + return EI_IMPULSE_OK; + } + + std::string proj_artifacts_path = "/tmp/" + std::string(impulse->project_name) + "-" + std::to_string(impulse->project_id) + "-" + std::to_string(impulse->deploy_version); + + create_project_if_not_exists(proj_artifacts_path, model_h_files, model_h_files_len); + + std::string proj_model_path = proj_artifacts_path + "/model.onnx"; + + ei_printf("test onnx tidl: %s\n", __FUNCTION__); + #pragma message ( "test onnx tidl: run_nn_inference") + + /* Initialize enviroment, maintains thread pools and state info */ + Ort::Env env(ORT_LOGGING_LEVEL_WARNING, "test"); + /* Initialize session options */ + Ort::SessionOptions session_options; + //TODO: from where do we load number of threads? + session_options.SetIntraOpNumThreads(1); + + ei_printf("LOG_INFO: model accelerated \n"); + c_api_tidl_options *options = (c_api_tidl_options *)malloc(sizeof(c_api_tidl_options)); + OrtStatus *def_status = OrtSessionsOptionsSetDefault_Tidl(options); + ei_printf("LOG_INFO: artifacts: %s \n", proj_artifacts_path.c_str()); + strcpy(options->artifacts_folder, proj_artifacts_path.c_str()); + if(NULL == options){ + ei_printf("LOG_ERROR: faild to allocate c_api_tidl_options \n"); + return EI_IMPULSE_ONNX_ERROR; + } + OrtStatus *status = OrtSessionOptionsAppendExecutionProvider_Tidl(session_options, options); + + session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_EXTENDED); + Ort::AllocatorWithDefaultOptions allocator; + + /* ORT Session */ + Ort::Session* session = new Ort::Session(env, proj_model_path.c_str(), session_options); + *session_ptr = session; + ei_printf("LOG_INFO: Loaded model %s\n", proj_model_path.c_str()); + + /* Input information */ + size_t num_input_nodes = session->GetInputCount(); + std::vector input_node_names(num_input_nodes); + Ort::TypeInfo type_info = session->GetInputTypeInfo(0); + auto tensor_info = type_info.GetTensorTypeAndShapeInfo(); + std::vector input_node_dims = tensor_info.GetShape(); + ONNXTensorElementDataType input_tensor_type = tensor_info.GetElementType(); + + /* output information */ + size_t num_output_nodes = session->GetOutputCount(); + std::vector output_node_names(num_output_nodes); + for (int i = 0; i < num_output_nodes; i++) + { + output_node_names[i] = session->GetOutputName(i, allocator); + } + for (int i = 0; i < num_input_nodes; i++) + { + input_node_names[i] = session->GetInputName(i, allocator); + } + + type_info = session->GetOutputTypeInfo(0); + auto output_tensor_info = type_info.GetTensorTypeAndShapeInfo(); + std::vector output_node_dims = output_tensor_info.GetShape(); + size_t output_tensor_size = output_node_dims[1]; + + if (EI_IMPULSE_ONNX_ERROR == printTensorInfo(session, &input_node_names, &output_node_names)) { + ei_printf("LOG_ERROR: print tensor information failed!\n"); + return EI_IMPULSE_ONNX_ERROR; + } + + ssize_t input_tensor_size_bytes; + /* simplify ... using known dim values to calculate size */ + size_t input_tensor_size = impulse->nn_input_frame_size; + void *inData; + if (input_tensor_type == ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT) + { + input_tensor_size_bytes = input_tensor_size * sizeof(float); + inData = allocTensorMem(input_tensor_size_bytes, true); + } + else if (input_tensor_type == ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT8) + { + input_tensor_size_bytes = input_tensor_size * sizeof(uint8_t); + inData = allocTensorMem(input_tensor_size_bytes, true); + } + else + { + ei_printf("LOG_ERROR: indata type not supported yet \n "); + return EI_IMPULSE_ONNX_ERROR; + } + auto memory_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault); + + Ort::Value input_tensor = Ort::Value::CreateTensor(memory_info, inData, input_tensor_size_bytes, input_node_dims.data(), 4, input_tensor_type); + input_tensors->push_back(std::move(input_tensor)); + + Ort::RunOptions* run_options = new Ort::RunOptions(); + *run_options_ptr = run_options; + run_options->SetRunLogVerbosityLevel(2); + auto output_tensors_warm_up = session->Run(*run_options, input_node_names.data(), input_tensors->data(), 1, output_node_names.data(), num_output_nodes); + + //void *outData = allocTensorMem(output_tensor_size * sizeof(float), true); + Ort::IoBinding* binding = new Ort::IoBinding(*session); + *binding_ptr = binding; + binding->BindInput(input_node_names[0], (*input_tensors)[0]); + + for(int idx=0; idx < num_output_nodes; idx++) + { + auto node_dims = output_tensors_warm_up[idx].GetTypeInfo().GetTensorTypeAndShapeInfo().GetShape(); + size_t tensor_size = 1; + for(int j = node_dims.size()-1; j >= 0; j--) + { + tensor_size *= node_dims[j]; + } + ONNXTensorElementDataType tensor_type = output_tensors_warm_up[idx].GetTypeInfo().GetTensorTypeAndShapeInfo().GetElementType(); + if(tensor_type == ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT) + { + tensor_size *= sizeof(float); + } + else if(tensor_type == ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT8) + { + tensor_size *= sizeof(uint8_t); + } + else if(tensor_type == ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64) + { + tensor_size *= sizeof(int64_t); + } + else + { + ei_printf("LOG_ERROR: Un Supported output tensor_type\n"); + return EI_IMPULSE_ONNX_ERROR; + } + + void * outData = allocTensorMem(tensor_size, true); + auto output_tensor = Ort::Value::CreateTensor(memory_info, (void *)outData, tensor_size, node_dims.data(), node_dims.size(),tensor_type); + output_tensors->push_back(std::move(output_tensor)); + binding->BindOutput(output_node_names[idx], (*output_tensors)[idx]); + } + + onnx_first_run = false; + + return EI_IMPULSE_OK; +} + +/** + * Run ONNX model + * + * @param ctx_start_us Start time of the setup function (see above) + * @param output_tensors Output tensors + * @param session ONNX session + * @param run_options ONNX run options + * @param binding IO bindings + * @param debug Whether to print debug info + * + * @return EI_IMPULSE_OK if successful + */ +static EI_IMPULSE_ERROR inference_onnx_run(const ei_impulse_t *impulse, + uint64_t ctx_start_us, + std::vector* input_tensors, + std::vector* output_tensors, + Ort::Session* session, + Ort::RunOptions* run_options, + Ort::IoBinding* binding, + ei_impulse_result_t *result, + bool debug) { + + session->Run(*run_options, *binding); + + uint64_t ctx_end_us = ei_read_timer_us(); + result->timing.classification_us = ctx_end_us - ctx_start_us; + result->timing.classification = (int)(result->timing.classification_us / 1000); + + ONNXTensorElementDataType output_tensor_type = (*output_tensors).at(0).GetTypeInfo().GetTensorTypeAndShapeInfo().GetElementType(); + void *out_data = output_tensors->front().GetTensorMutableData(); + + // get output features count from model + auto node_dims = (*output_tensors).at(0).GetTypeInfo().GetTensorTypeAndShapeInfo().GetShape(); + size_t output_tensor_features_count = 1; + for(int j = node_dims.size()-1; j >= 0; j--) + { + output_tensor_features_count *= node_dims[j]; + } + + // Read the predicted y value from the model's output tensor + if (debug) { + ei_printf("Predictions (time: %d ms.):\n", result->timing.classification); + } + + EI_IMPULSE_ERROR fill_res = EI_IMPULSE_OK; + + // NOTE: for now only yolox object detection supported + if (impulse->object_detection) { + switch (impulse->object_detection_last_layer) { + case EI_CLASSIFIER_LAST_LAYER_YOLOX: { + #if EI_CLASSIFIER_TFLITE_OUTPUT_QUANTIZED == 1 + ei_printf("ERR: YOLOX does not support quantized inference\n"); + return EI_IMPULSE_UNSUPPORTED_INFERENCING_ENGINE; + #else + + if (debug) { + ei_printf("YOLOX OUTPUT (%d ms.): ", result->timing.classification); + for (size_t ix = 0; ix < output_tensor_features_count; ix++) { + ei_printf_float(((float*)out_data)[ix]); + ei_printf(" "); + } + ei_printf("\n"); + } + fill_res = fill_result_struct_f32_yolox_detect( + impulse, + result, + (float*)out_data, + output_tensor_features_count); + #endif + break; + } + default: { + ei_printf("ERR: Unsupported object detection last layer (%d)\n", + impulse->object_detection_last_layer); + break; + } + } + } + else { +#if EI_CLASSIFIER_TFLITE_OUTPUT_QUANTIZED == 1 + + switch (output_tensor_type) { + case ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT8: { + fill_res = fill_result_struct_i8(impulse, result, (int8_t*)out_data, impulse->tflite_output_zeropoint, impulse->tflite_output_scale, debug); + break; + } + case ONNX_TENSOR_ELEMENT_DATA_TYPE_INT8: { + fill_res = fill_result_struct_i8(impulse, result, (int8_t*)out_data, impulse->tflite_output_zeropoint, impulse->tflite_output_scale, debug); + break; + } + default: { + ei_printf("ERR: Cannot handle output type (%d)\n", output_tensor_type); + return EI_IMPULSE_OUTPUT_TENSOR_WAS_NULL; + } + } + +#else + switch (output_tensor_type) { + case ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT: { + fill_res = fill_result_struct_f32(impulse, result, (float*)out_data, debug); + break; + } + default: { + ei_printf("ERR: Cannot handle output type (%d)\n", output_tensor_type); + return EI_IMPULSE_OUTPUT_TENSOR_WAS_NULL; + } + } +#endif + } + + ///* freeing shared mem*/ + //for (size_t i = 0; i < output_tensors->size(); i++) + //{ + // void *ptr = (*output_tensors)[i].GetTensorMutableData(); + // freeTensorMem(ptr, true); + //} + //for (size_t i = 0; i < input_tensors->size(); i++) + //{ + // void *ptr = (*input_tensors)[i].GetTensorMutableData(); + // freeTensorMem(ptr, true); + //} + + if (fill_res != EI_IMPULSE_OK) { + return fill_res; + } + + return EI_IMPULSE_OK; +} + +/** + * @brief Do neural network inferencing over the processed feature matrix + * + * @param fmatrix Processed matrix >> features [array of features] this is input + * @param result Output classifier results >> output + * @param[in] debug Debug output enable + * + * @return The ei impulse error. + */ +EI_IMPULSE_ERROR run_nn_inference( + const ei_impulse_t *impulse, + ei_feature_t *afmatrix, + uint32_t* input_block_ids, + uint32_t input_block_ids_size, + ei_impulse_result_t *result, + void *config_ptr, + bool debug = false) +{ + static std::vector input_tensors; + static std::vector output_tensors; + static Ort::Session* session; + static Ort::RunOptions* run_options; + static Ort::IoBinding* binding; + uint64_t ctx_start_us; + + ei_printf("test onnx tidl: %s\n", __FUNCTION__); + #pragma message ( "test onnx tidl: run_nn_inference") + + EI_IMPULSE_ERROR init_res = inference_onnx_setup(impulse, + &ctx_start_us, + &input_tensors, + &output_tensors, + &session, + &run_options, + &binding); + + if (init_res != EI_IMPULSE_OK || session == NULL || run_options == NULL || + binding == NULL) { + ei_printf("LOG_ERROR: ONNX inference setup failed!\n"); + return EI_IMPULSE_ONNX_ERROR; + } + + uint64_t dsp_chw_start_us; + dsp_chw_start_us = ei_read_timer_us(); + + /* + ** Convert to CHW from HWC + */ + // features matrix maps around the input tensor to not allocate any memory + float *input_buffer = input_tensors.front().GetTensorMutableData(); + ei::matrix_t fmatrix(1, impulse->nn_input_frame_size, input_buffer); + + ei_dsp_config_image_t *config = (ei_dsp_config_image_t *)impulse->dsp_blocks[0].config; + + size_t channels = strcmp(config->channels, "Grayscale") == 0 ? 1 : 3; + size_t height = impulse->input_height; + size_t width = impulse->input_width; + + ei::matrix_t* matrix = afmatrix[0].matrix; + + int dest_ix = 0; + for (size_t c=0; c < channels; c++) { + for (size_t h=0; h < height; h++) { + for (size_t w=0; w < width; w++) { + uint32_t src_ix = channels * width * h + w*channels + c; + fmatrix.buffer[dest_ix++] = matrix->buffer[src_ix]; + } + } + } + + uint64_t dsp_chw_end_us = ei_read_timer_us(); + result->timing.dsp_us += dsp_chw_end_us - dsp_chw_start_us; + result->timing.dsp = (int)(result->timing.dsp_us / 1000); + + if (debug) { + ei_printf("After Features (%ld us.): ", result->timing.dsp_us); + for (size_t ix = 0; ix < fmatrix.cols; ix++) { + ei_printf_float(fmatrix.buffer[ix]); + ei_printf(" "); + } + ei_printf("\n"); + } + + ctx_start_us = ei_read_timer_us(); + EI_IMPULSE_ERROR run_res = inference_onnx_run(impulse, + ctx_start_us, + &input_tensors, + &output_tensors, + session, + run_options, + binding, + result, debug); + + if (run_res != EI_IMPULSE_OK) { + return run_res; + } + + return EI_IMPULSE_OK; +} + +#if EI_CLASSIFIER_QUANTIZATION_ENABLED == 1 +/** + * Special function to run the classifier on images, only works on TFLite models (either interpreter or EON or for tensaiflow) + * that allocates a lot less memory by quantizing in place. This only works if 'can_run_classifier_image_quantized' + * returns EI_IMPULSE_OK. + */ +EI_IMPULSE_ERROR run_nn_inference_image_quantized( + const ei_impulse_t *impulse, + signal_t *signal, + ei_impulse_result_t *result, + void *config_ptr, + bool debug = false) +{ + static std::vector input_tensors; + static std::vector output_tensors; + static Ort::Session* session; + static Ort::RunOptions* run_options; + static Ort::IoBinding* binding; + uint64_t ctx_start_us; + + ei_printf("test onnx tidl: %s\n", __FUNCTION__); + #pragma message ( "test onnx tidl: run_nn_inference_image_quantized") + + EI_IMPULSE_ERROR init_res = inference_onnx_setup(impulse, + &ctx_start_us, + &input_tensors, &output_tensors, + &session, + &run_options, + &binding); + + if (init_res != EI_IMPULSE_OK || session == NULL || run_options == NULL || + binding == NULL) { + ei_printf("LOG_ERROR: ONNX inference setup failed!\n"); + return EI_IMPULSE_ONNX_ERROR; + } + + ONNXTensorElementDataType input_tensor_type = input_tensors.at(0).GetTypeInfo().GetTensorTypeAndShapeInfo().GetElementType(); + if (input_tensor_type != ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT8 && + input_tensor_type != ONNX_TENSOR_ELEMENT_DATA_TYPE_INT8) { + return EI_IMPULSE_ONLY_SUPPORTED_FOR_IMAGES; + } + + uint64_t dsp_start_us = ei_read_timer_us(); + + ei::matrix_i8_t a_features_matrix(1, impulse->nn_input_frame_size); + + // run DSP process and quantize automatically + int ret = extract_image_features_quantized(impulse, signal, &a_features_matrix, impulse->dsp_blocks[0].config, impulse->frequency, + impulse->learning_blocks[0].image_scaling); + if (ret != EIDSP_OK) { + ei_printf("ERR: Failed to run DSP process (%d)\n", ret); + return EI_IMPULSE_DSP_ERROR; + } + + if (ei_run_impulse_check_canceled() == EI_IMPULSE_CANCELED) { + return EI_IMPULSE_CANCELED; + } + + if (debug) { + ei_printf("Before Features: "); + for (size_t ix = 0; ix < a_features_matrix.cols; ix++) { + ei_printf("%d", (uint8_t)a_features_matrix.buffer[ix]); + ei_printf(" "); + } + ei_printf("\n"); + } + + /* + ** Convert to CHW from HWC + */ + // features matrix maps around the input tensor to not allocate any memory + uint8_t *input_buffer = input_tensors.front().GetTensorMutableData(); + ei::matrix_i8_t features_matrix(1, impulse->nn_input_frame_size, (int8_t*) input_buffer); + + ei_dsp_config_image_t *config = (ei_dsp_config_image_t *)impulse->dsp_blocks[0].config; + + size_t channels = strcmp(config->channels, "Grayscale") == 0 ? 1 : 3; + size_t height = impulse->input_height; + size_t width = impulse->input_width; + + int dest_ix = 0; + for (size_t c=0; c < channels; c++) { + for (size_t h=0; h < height; h++) { + for (size_t w=0; w < width; w++) { + uint32_t src_ix = channels * width * h + w*channels + c; + features_matrix.buffer[dest_ix++] = a_features_matrix.buffer[src_ix]; + } + } + } + + if (debug) { + ei_printf("After Features: "); + for (size_t ix = 0; ix < features_matrix.cols; ix++) { + ei_printf("%d", (uint8_t)features_matrix.buffer[ix]); + ei_printf(" "); + } + ei_printf("\n"); + } + + result->timing.dsp_us = ei_read_timer_us() - dsp_start_us; + result->timing.dsp = (int)(result->timing.dsp_us / 1000); + + if (debug) { + ei_printf("Features (%d ms.): ", result->timing.dsp); + for (size_t ix = 0; ix < features_matrix.cols; ix++) { + // expects scale of (1/255) and zeropoint of 0 + ei_printf_float(static_cast(((uint8_t)features_matrix.buffer[ix] - impulse->tflite_input_zeropoint) * impulse->tflite_input_scale)); + ei_printf(" "); + } + ei_printf("\n"); + } + + ctx_start_us = ei_read_timer_us(); + EI_IMPULSE_ERROR run_res = inference_onnx_run(impulse, + ctx_start_us, + &input_tensors, + &output_tensors, + session, + run_options, + binding, + result, debug); + + if (run_res != EI_IMPULSE_OK) { + return run_res; + } + + return EI_IMPULSE_OK; +} +#endif // EI_CLASSIFIER_QUANTIZATION_ENABLED == 1 + +#endif // #if (EI_CLASSIFIER_INFERENCING_ENGINE == EI_CLASSIFIER_ONNX_TIDL) && (EI_CLASSIFIER_COMPILED != 1) +#endif // _EI_CLASSIFIER_INFERENCING_ENGINE_ONNX_TIDL_H_ diff --git a/edgeimpulse/edge-impulse-sdk/classifier/inferencing_engines/tensaiflow.h b/edgeimpulse/edge-impulse-sdk/classifier/inferencing_engines/tensaiflow.h new file mode 100644 index 0000000..3d6b4b9 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/classifier/inferencing_engines/tensaiflow.h @@ -0,0 +1,215 @@ +/* + * Copyright (c) 2022 EdgeImpulse Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an "AS + * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language + * governing permissions and limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef _EI_CLASSIFIER_INFERENCING_ENGINE_TENSAILFOW_H_ +#define _EI_CLASSIFIER_INFERENCING_ENGINE_TENSAILFOW_H_ + +#if (EI_CLASSIFIER_INFERENCING_ENGINE == EI_CLASSIFIER_TENSAIFLOW) + +#include "model-parameters/model_metadata.h" +#include "edge-impulse-sdk/porting/ei_classifier_porting.h" +#include "edge-impulse-sdk/classifier/ei_fill_result_struct.h" +#include "edge-impulse-sdk/classifier/ei_run_dsp.h" + +#include "mcu.h" + +extern "C" void infer(const void *impulse_arg, uint32_t* time, uint32_t* cycles); +int8_t *processed_features; + +#ifdef EI_CLASSIFIER_NN_OUTPUT_COUNT +int8_t infer_result[EI_CLASSIFIER_NN_OUTPUT_COUNT]; +#else +int8_t infer_result[EI_CLASSIFIER_LABEL_COUNT]; +#endif + +extern "C" void get_data(const void *impulse_arg, int8_t *in_buf_0, uint16_t in_buf_0_dim_0, uint16_t in_buf_0_dim_1, uint16_t in_buf_0_dim_2) +{ + ei_impulse_t *impulse = (ei_impulse_t *) impulse_arg; + + if ((impulse->sensor == EI_CLASSIFIER_SENSOR_CAMERA) && + ((impulse->dsp_blocks_size == 1) || + (impulse->dsp_blocks[0].extract_fn == extract_image_features))) { + + memcpy(in_buf_0, processed_features, impulse->nn_input_frame_size); + } +} + +extern "C" void post_process(const void *impulse_arg, int8_t *out_buf_0, int8_t *out_buf_1) +{ + ei_impulse_t *impulse = (ei_impulse_t *) impulse_arg; + + #ifdef EI_CLASSIFIER_NN_OUTPUT_COUNT + memcpy(infer_result, out_buf_0, impulse->tflite_output_features_count); + #else + memcpy(infer_result, out_buf_0, impulse->label_count); + #endif +} + +/** + * @brief Do neural network inferencing over the processed feature matrix + * + * @param fmatrix Processed matrix + * @param result Output classifier results + * @param[in] debug Debug output enable + * + * @return The ei impulse error. + */ +EI_IMPULSE_ERROR run_nn_inference( + const ei_impulse_t *impulse, + ei_feature_t *fmatrix, + uint32_t* input_block_ids, + uint32_t input_block_ids_size, + ei_impulse_result_t *result, + void *config_ptr, + bool debug = false) +{ + ei_learning_block_config_tflite_graph_t *block_config = (ei_learning_block_config_tflite_graph_t*)config_ptr; + ei_config_tensaiflow_graph_t *graph_config = (ei_config_tensaiflow_graph_t*)block_config->graph_config; + + if (impulse->object_detection) { + ei_printf("ERR: Object detection models are not supported with TensaiFlow\n"); + return EI_IMPULSE_UNSUPPORTED_INFERENCING_ENGINE; + } + + uint64_t ctx_start_us = ei_read_timer_us(); + uint32_t time, cycles; + + /* Run tensaiflow inference */ + infer((const void *)impulse, &time, &cycles); + + // Inference results returned by post_process() and copied into infer_results + + result->timing.classification_us = ei_read_timer_us() - ctx_start_us; + result->timing.classification = (int)(result->timing.classification_us / 1000); + + for (uint32_t ix = 0; ix < impulse->label_count; ix++) { + float value; + // Dequantize the output if it is int8 + value = static_cast(infer_result[ix] - graph_config->output_zeropoint) * + graph_config->output_scale; + + if (debug) { + ei_printf("%s:\t", impulse->categories[ix]); + ei_printf_float(value); + ei_printf("\n"); + } + result->classification[ix].label = impulse->categories[ix]; + result->classification[ix].value = value; + } + + return EI_IMPULSE_OK; + +} + +/** + * Special function to run the classifier on images, only works on TFLite models (either interpreter or EON or for tensaiflow) + * that allocates a lot less memory by quantizing in place. This only works if 'can_run_classifier_image_quantized' + * returns EI_IMPULSE_OK. + */ +EI_IMPULSE_ERROR run_nn_inference_image_quantized( + const ei_impulse_t *impulse, + signal_t *signal, + ei_impulse_result_t *result, + void *config_ptr, + bool debug = false) +{ + ei_learning_block_config_tflite_graph_t *block_config = (ei_learning_block_config_tflite_graph_t*)config_ptr; + ei_config_tensaiflow_graph_t *graph_config = (ei_config_tensaiflow_graph_t*)block_config->graph_config; + + uint64_t ctx_start_us; + uint64_t dsp_start_us = ei_read_timer_us(); + + ei::matrix_i8_t features_matrix(1, impulse->nn_input_frame_size); + processed_features = (int8_t *) features_matrix.buffer; + + // run DSP process and quantize automatically + int ret = extract_image_features_quantized(signal, &features_matrix, impulse->dsp_blocks[0].config, graph_config->input_scale, graph_config->input_zeropoint, + impulse->frequency, impulse->learning_blocks[0].image_scaling); + if (ret != EIDSP_OK) { + ei_printf("ERR: Failed to run DSP process (%d)\n", ret); + return EI_IMPULSE_DSP_ERROR; + } + + if (ei_run_impulse_check_canceled() == EI_IMPULSE_CANCELED) { + return EI_IMPULSE_CANCELED; + } + + result->timing.dsp_us = ei_read_timer_us() - dsp_start_us; + result->timing.dsp = (int)(result->timing.dsp_us / 1000); + + if (debug) { + ei_printf("Features (%d ms.): ", result->timing.dsp); + for (size_t ix = 0; ix < features_matrix.cols; ix++) { + ei_printf_float((features_matrix.buffer[ix] - graph_config->input_zeropoint) * graph_config->input_scale); + ei_printf(" "); + } + ei_printf("\n"); + } + + uint32_t time, cycles; + ctx_start_us = ei_read_timer_us(); + + /* Run tensaiflow inference */ + infer((const void *)impulse, &time, &cycles); + + // Inference results returned by post_process() and copied into infer_results + + EI_IMPULSE_ERROR fill_res = EI_IMPULSE_OK; + + if (impulse->object_detection) { + switch (impulse->object_detection_last_layer) { + case EI_CLASSIFIER_LAST_LAYER_FOMO: { + #if EI_CLASSIFIER_TFLITE_OUTPUT_QUANTIZED == 1 + fill_res = fill_result_struct_i8_fomo(impulse, result, infer_result, + graph_config->output_zeropoint, graph_config->output_scale, + impulse->fomo_output_size, impulse->fomo_output_size); + #else + ei_printf("ERR: TensaiFlow does not support float32 inference\n"); + return EI_IMPULSE_UNSUPPORTED_INFERENCING_ENGINE; + #endif + break; + } + default: { + ei_printf("ERR: Unsupported object detection last layer (%d)\n", + impulse->object_detection_last_layer); + return EI_IMPULSE_UNSUPPORTED_INFERENCING_ENGINE; + } + } + } + else { + #if EI_CLASSIFIER_TFLITE_OUTPUT_QUANTIZED == 1 + fill_res = fill_result_struct_i8(impulse, result, infer_result, + graph_config->output_zeropoint, graph_config->output_scale, debug); + #else + ei_printf("ERR: TensaiFlow does not support float32 inference\n"); + return EI_IMPULSE_UNSUPPORTED_INFERENCING_ENGINE; + #endif + } + + if (fill_res != EI_IMPULSE_OK) { + return fill_res; + } + + + result->timing.classification_us = ei_read_timer_us() - ctx_start_us; + result->timing.classification = (int)(result->timing.classification_us / 1000); + return EI_IMPULSE_OK; + +} + +#endif // #if (EI_CLASSIFIER_INFERENCING_ENGINE == EI_CLASSIFIER_TENSAILFOW) +#endif // _EI_CLASSIFIER_INFERENCING_ENGINE_TENSAILFOW_H_ diff --git a/edgeimpulse/edge-impulse-sdk/classifier/inferencing_engines/tensorrt.h b/edgeimpulse/edge-impulse-sdk/classifier/inferencing_engines/tensorrt.h new file mode 100644 index 0000000..ed42a61 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/classifier/inferencing_engines/tensorrt.h @@ -0,0 +1,232 @@ +/* + * Copyright (c) 2022 EdgeImpulse Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an "AS + * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language + * governing permissions and limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef _EI_CLASSIFIER_INFERENCING_ENGINE_TENSORRT_H_ +#define _EI_CLASSIFIER_INFERENCING_ENGINE_TENSORRT_H_ + +#if (EI_CLASSIFIER_INFERENCING_ENGINE == EI_CLASSIFIER_TENSORRT) + +#include "model-parameters/model_metadata.h" + +#include "edge-impulse-sdk/porting/ei_classifier_porting.h" +#include "edge-impulse-sdk/classifier/ei_fill_result_struct.h" + +#include +#include "tflite/linux-jetson-nano/libeitrt.h" + +EiTrt *ei_trt_handle = NULL; + +inline bool file_exists(char *model_file_name) +{ + if (FILE *file = fopen(model_file_name, "r")) { + fclose(file); + return true; + } + else { + return false; + } +} + +/** + * @brief Do neural network inferencing over the processed feature matrix + * + * @param fmatrix Processed matrix + * @param result Output classifier results + * @param[in] debug Debug output enable + * + * @return The ei impulse error. + */ +EI_IMPULSE_ERROR run_nn_inference( + const ei_impulse_t *impulse, + ei_feature_t *fmatrix, + uint32_t* input_block_ids, + uint32_t input_block_ids_size, + ei_impulse_result_t *result, + void *config_ptr, + bool debug = false) +{ + ei_learning_block_config_tflite_graph_t *block_config = (ei_learning_block_config_tflite_graph_t*)config_ptr; + ei_config_tflite_graph_t *graph_config = (ei_config_tflite_graph_t*)block_config->graph_config; + + #if EI_CLASSIFIER_QUANTIZATION_ENABLED == 1 + #error "TensorRT requires an unquantized network" + #endif + + static char model_file_name[128]; + snprintf( + model_file_name, + 128, + "/tmp/%s-%d-%d.engine", + impulse->project_name, + impulse->project_id, + impulse->deploy_version); + + static bool first_run = !file_exists(model_file_name); + if (first_run) { + ei_printf("INFO: Model file '%s' does not exist, creating now. \n", model_file_name); + + FILE *file = fopen(model_file_name, "w"); + if (!file) { + ei_printf("ERR: TensorRT init failed to open '%s'\n", model_file_name); + return EI_IMPULSE_TENSORRT_INIT_FAILED; + } + + if (fwrite(graph_config->model, graph_config->model_size, 1, file) != 1) { + ei_printf("ERR: TensorRT init fwrite failed.\n"); + return EI_IMPULSE_TENSORRT_INIT_FAILED; + } + + if (fclose(file) != 0) { + ei_printf("ERR: TensorRT init fclose failed.\n"); + return EI_IMPULSE_TENSORRT_INIT_FAILED; + } + } + + uint32_t out_data_size = 0; + + if (impulse->object_detection) { + switch (impulse->object_detection_last_layer) { + case EI_CLASSIFIER_LAST_LAYER_FOMO: + case EI_CLASSIFIER_LAST_LAYER_YOLOV5: { + out_data_size = impulse->tflite_output_features_count; + break; + } + case EI_CLASSIFIER_LAST_LAYER_SSD: { + ei_printf("ERR: SSD models are not supported using TensorRT \n"); + return EI_IMPULSE_UNSUPPORTED_INFERENCING_ENGINE; + break; + } + default: { + ei_printf( + "ERR: Unsupported object detection last layer (%d)\n", + impulse->object_detection_last_layer); + return EI_IMPULSE_UNSUPPORTED_INFERENCING_ENGINE; + } + } + } + else { + out_data_size = impulse->label_count; + } + + float *out_data = (float*)ei_malloc(out_data_size * sizeof(float)); + if (out_data == nullptr) { + ei_printf("ERR: Cannot allocate memory for output data \n"); + } + + // lazy initialize tensorRT context + if (ei_trt_handle == nullptr) { + ei_trt_handle = libeitrt::create_EiTrt(model_file_name, debug); + } + +#if EI_CLASSIFIER_SINGLE_FEATURE_INPUT == 0 + size_t mtx_size = impulse->dsp_blocks_size + impulse->learning_blocks_size; + ei::matrix_t* matrix = NULL; + + ei::matrix_t combined_matrix(1, impulse->nn_input_frame_size); + uint32_t buf_pos = 0; + + for (size_t i = 0; i < input_block_ids_size; i++) { + size_t cur_mtx = input_block_ids[i]; + + if (!find_mtx_by_idx(fmatrix, &matrix, cur_mtx, mtx_size)) { + ei_printf("ERR: Cannot find matrix with id %zu\n", cur_mtx); + return EI_IMPULSE_INVALID_SIZE; + } + + for (size_t ix = 0; ix < matrix->rows * matrix->cols; ix++) { + combined_matrix.buffer[buf_pos++] = matrix->buffer[ix]; + } + } + matrix = &combined_matrix; +#else + ei::matrix_t* matrix = fmatrix[0].matrix; +#endif + + uint64_t ctx_start_us = ei_read_timer_us(); + + libeitrt::infer(ei_trt_handle, matrix->buffer, out_data, out_data_size); + + uint64_t ctx_end_us = ei_read_timer_us(); + + result->timing.classification_us = ctx_end_us - ctx_start_us; + result->timing.classification = (int)(result->timing.classification_us / 1000); + + EI_IMPULSE_ERROR fill_res = EI_IMPULSE_OK; + + if (impulse->object_detection) { + switch (impulse->object_detection_last_layer) { + case EI_CLASSIFIER_LAST_LAYER_FOMO: { + fill_res = fill_result_struct_f32_fomo( + impulse, + result, + out_data, + impulse->fomo_output_size, + impulse->fomo_output_size); + break; + } + case EI_CLASSIFIER_LAST_LAYER_SSD: { + ei_printf("ERR: SSD models are not supported using TensorRT \n"); + return EI_IMPULSE_UNSUPPORTED_INFERENCING_ENGINE; + break; + } + case EI_CLASSIFIER_LAST_LAYER_YOLOV5: { + fill_res = fill_result_struct_f32_yolov5( + impulse, + result, + 6, + out_data, + impulse->tflite_output_features_count); + break; + } + default: { + ei_printf( + "ERR: Unsupported object detection last layer (%d)\n", + impulse->object_detection_last_layer); + return EI_IMPULSE_UNSUPPORTED_INFERENCING_ENGINE; + } + } + } + else { + fill_res = fill_result_struct_f32(impulse, result, out_data, debug); + } + + ei_free(out_data); + + if (fill_res != EI_IMPULSE_OK) { + return fill_res; + } + + return EI_IMPULSE_OK; +} + +/** + * Special function to run the classifier on images, only works on TFLite models (either interpreter or EON or for tensaiflow) + * that allocates a lot less memory by quantizing in place. This only works if 'can_run_classifier_image_quantized' + * returns EI_IMPULSE_OK. + */ +EI_IMPULSE_ERROR run_nn_inference_image_quantized( + const ei_impulse_t *impulse, + signal_t *signal, + ei_impulse_result_t *result, + void *config_ptr, + bool debug = false) +{ + return EI_IMPULSE_UNSUPPORTED_INFERENCING_ENGINE; +} + +#endif // #if (EI_CLASSIFIER_INFERENCING_ENGINE == EI_CLASSIFIER_TENSORRT) +#endif // _EI_CLASSIFIER_INFERENCING_ENGINE_TENSORRT_H_ diff --git a/edgeimpulse/edge-impulse-sdk/classifier/inferencing_engines/tflite_eon.h b/edgeimpulse/edge-impulse-sdk/classifier/inferencing_engines/tflite_eon.h new file mode 100644 index 0000000..e58ab8b --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/classifier/inferencing_engines/tflite_eon.h @@ -0,0 +1,403 @@ +/* + * Copyright (c) 2022 EdgeImpulse Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an "AS + * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language + * governing permissions and limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef _EI_CLASSIFIER_INFERENCING_ENGINE_TFLITE_EON_H_ +#define _EI_CLASSIFIER_INFERENCING_ENGINE_TFLITE_EON_H_ + +#if (EI_CLASSIFIER_INFERENCING_ENGINE == EI_CLASSIFIER_TFLITE) && (EI_CLASSIFIER_COMPILED == 1) + +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h" +#include "edge-impulse-sdk/classifier/ei_aligned_malloc.h" +#include "edge-impulse-sdk/classifier/ei_fill_result_struct.h" +#include "edge-impulse-sdk/classifier/ei_model_types.h" +#include "edge-impulse-sdk/classifier/inferencing_engines/tflite_helper.h" +#include "edge-impulse-sdk/classifier/ei_run_dsp.h" + +/** + * Setup the TFLite runtime + * + * @param ctx_start_us Pointer to the start time + * @param input Pointer to input tensor + * @param output Pointer to output tensor + * @param micro_tensor_arena Pointer to the arena that will be allocated + * + * @return EI_IMPULSE_OK if successful + */ +static EI_IMPULSE_ERROR inference_tflite_setup( + ei_learning_block_config_tflite_graph_t *block_config, + uint64_t *ctx_start_us, + TfLiteTensor* input, + TfLiteTensor* output, + TfLiteTensor* output_labels, + TfLiteTensor* output_scores, + ei_unique_ptr_t& p_tensor_arena) { + + ei_config_tflite_eon_graph_t *graph_config = (ei_config_tflite_eon_graph_t*)block_config->graph_config; + + *ctx_start_us = ei_read_timer_us(); + + TfLiteStatus init_status = graph_config->model_init(ei_aligned_calloc); + if (init_status != kTfLiteOk) { + ei_printf("Failed to initialize the model (error code %d)\n", init_status); + return EI_IMPULSE_TFLITE_ARENA_ALLOC_FAILED; + } + + TfLiteStatus status; + + status = graph_config->model_input(0, input); + if (status != kTfLiteOk) { + return EI_IMPULSE_TFLITE_ERROR; + } + status = graph_config->model_output(block_config->output_data_tensor, output); + if (status != kTfLiteOk) { + return EI_IMPULSE_TFLITE_ERROR; + } + + if (block_config->object_detection_last_layer == EI_CLASSIFIER_LAST_LAYER_SSD) { + status = graph_config->model_output(block_config->output_score_tensor, output_scores); + if (status != kTfLiteOk) { + return EI_IMPULSE_TFLITE_ERROR; + } + status = graph_config->model_output(block_config->output_labels_tensor, output_labels); + if (status != kTfLiteOk) { + return EI_IMPULSE_TFLITE_ERROR; + } + } + + return EI_IMPULSE_OK; +} + +/** + * Run TFLite model + * + * @param ctx_start_us Start time of the setup function (see above) + * @param output Output tensor + * @param interpreter TFLite interpreter (non-compiled models) + * @param tensor_arena Allocated arena (will be freed) + * @param result Struct for results + * @param debug Whether to print debug info + * + * @return EI_IMPULSE_OK if successful + */ +static EI_IMPULSE_ERROR inference_tflite_run( + const ei_impulse_t *impulse, + ei_learning_block_config_tflite_graph_t *block_config, + uint64_t ctx_start_us, + TfLiteTensor* output, + TfLiteTensor* labels_tensor, + TfLiteTensor* scores_tensor, + uint8_t* tensor_arena, + ei_impulse_result_t *result, + bool debug) { + + ei_config_tflite_eon_graph_t *graph_config = (ei_config_tflite_eon_graph_t*)block_config->graph_config; + + if(graph_config->model_invoke() != kTfLiteOk) { + return EI_IMPULSE_TFLITE_ERROR; + } + + uint64_t ctx_end_us = ei_read_timer_us(); + + result->timing.classification_us = ctx_end_us - ctx_start_us; + result->timing.classification = (int)(result->timing.classification_us / 1000); + + // Read the predicted y value from the model's output tensor + if (debug) { + ei_printf("Predictions (time: %d ms.):\n", result->timing.classification); + } + + EI_IMPULSE_ERROR fill_res = fill_result_struct_from_output_tensor_tflite( + impulse, output, labels_tensor, scores_tensor, result, debug); + + if (fill_res != EI_IMPULSE_OK) { + return fill_res; + } + + if (ei_run_impulse_check_canceled() == EI_IMPULSE_CANCELED) { + return EI_IMPULSE_CANCELED; + } + + return EI_IMPULSE_OK; +} + +/** + * @brief Do neural network inferencing over a signal (from the DSP) + * + * @param fmatrix Processed matrix + * @param result Output classifier results + * @param[in] debug Debug output enable + * + * @return The ei impulse error. + */ +EI_IMPULSE_ERROR run_nn_inference_from_dsp( + ei_learning_block_config_tflite_graph_t *block_config, + signal_t *signal, + matrix_t *output_matrix) +{ + TfLiteTensor input; + TfLiteTensor output; + TfLiteTensor output_scores; + TfLiteTensor output_labels; + uint64_t ctx_start_us = ei_read_timer_us(); + ei_unique_ptr_t p_tensor_arena(nullptr, ei_aligned_free); + ei_config_tflite_eon_graph_t *graph_config = (ei_config_tflite_eon_graph_t*)block_config->graph_config; + + EI_IMPULSE_ERROR init_res = inference_tflite_setup( + block_config, + &ctx_start_us, + &input, + &output, + &output_labels, + &output_scores, + p_tensor_arena); + + if (init_res != EI_IMPULSE_OK) { + return init_res; + } + + auto input_res = fill_input_tensor_from_signal(signal, &input); + if (input_res != EI_IMPULSE_OK) { + return input_res; + } + + // invoke the model + if (graph_config->model_invoke() != kTfLiteOk) { + return EI_IMPULSE_TFLITE_ERROR; + } + + auto output_res = fill_output_matrix_from_tensor(&output, output_matrix); + if (output_res != EI_IMPULSE_OK) { + return output_res; + } + + if (graph_config->model_reset(ei_aligned_free) != kTfLiteOk) { + return EI_IMPULSE_TFLITE_ERROR; + } + + return EI_IMPULSE_OK; +} + +/** + * @brief Do neural network inferencing over a feature matrix + * + * @param fmatrix Processed matrix + * @param result Output classifier results + * @param[in] debug Debug output enable + * + * @return The ei impulse error. + */ +EI_IMPULSE_ERROR run_nn_inference( + const ei_impulse_t *impulse, + ei_feature_t *fmatrix, + uint32_t* input_block_ids, + uint32_t input_block_ids_size, + ei_impulse_result_t *result, + void *config_ptr, + bool debug = false) +{ + ei_learning_block_config_tflite_graph_t *block_config = (ei_learning_block_config_tflite_graph_t*)config_ptr; + ei_config_tflite_eon_graph_t *graph_config = (ei_config_tflite_eon_graph_t*)block_config->graph_config; + + TfLiteTensor input; + TfLiteTensor output; + TfLiteTensor output_scores; + TfLiteTensor output_labels; + + uint64_t ctx_start_us = ei_read_timer_us(); + ei_unique_ptr_t p_tensor_arena(nullptr, ei_aligned_free); + + EI_IMPULSE_ERROR init_res = inference_tflite_setup( + block_config, + &ctx_start_us, + &input, + &output, + &output_labels, + &output_scores, + p_tensor_arena); + + if (init_res != EI_IMPULSE_OK) { + return init_res; + } + + uint8_t* tensor_arena = static_cast(p_tensor_arena.get()); + + size_t mtx_size = impulse->dsp_blocks_size + impulse->learning_blocks_size; + auto input_res = fill_input_tensor_from_matrix(fmatrix, &input, input_block_ids, input_block_ids_size, mtx_size); + if (input_res != EI_IMPULSE_OK) { + return input_res; + } + + EI_IMPULSE_ERROR run_res = inference_tflite_run( + impulse, + block_config, + ctx_start_us, + &output, + &output_labels, + &output_scores, + tensor_arena, result, debug); + + if (result->copy_output) { + auto output_res = fill_output_matrix_from_tensor(&output, fmatrix[impulse->dsp_blocks_size].matrix); + if (output_res != EI_IMPULSE_OK) { + return output_res; + } + } + + graph_config->model_reset(ei_aligned_free); + + result->timing.classification_us = ei_read_timer_us() - ctx_start_us; + + if (run_res != EI_IMPULSE_OK) { + return run_res; + } + + return EI_IMPULSE_OK; +} + +#if EI_CLASSIFIER_QUANTIZATION_ENABLED == 1 +/** + * Special function to run the classifier on images, only works on TFLite models (either interpreter or EON or for tensaiflow) + * that allocates a lot less memory by quantizing in place. This only works if 'can_run_classifier_image_quantized' + * returns EI_IMPULSE_OK. + */ +EI_IMPULSE_ERROR run_nn_inference_image_quantized( + const ei_impulse_t *impulse, + signal_t *signal, + ei_impulse_result_t *result, + void *config_ptr, + bool debug = false) { + + ei_learning_block_config_tflite_graph_t *block_config = (ei_learning_block_config_tflite_graph_t*)config_ptr; + ei_config_tflite_eon_graph_t *graph_config = (ei_config_tflite_eon_graph_t*)block_config->graph_config; + + memset(result, 0, sizeof(ei_impulse_result_t)); + + uint64_t ctx_start_us; + TfLiteTensor input; + TfLiteTensor output; + TfLiteTensor output_scores; + TfLiteTensor output_labels; + + ei_unique_ptr_t p_tensor_arena(nullptr, ei_aligned_free); + + EI_IMPULSE_ERROR init_res = inference_tflite_setup( + block_config, + &ctx_start_us, + &input, &output, + &output_labels, + &output_scores, + p_tensor_arena); + + if (init_res != EI_IMPULSE_OK) { + return init_res; + } + + if (input.type != TfLiteType::kTfLiteInt8 && input.type != TfLiteType::kTfLiteUInt8) { + return EI_IMPULSE_ONLY_SUPPORTED_FOR_IMAGES; + } + + uint64_t dsp_start_us = ei_read_timer_us(); + + // features matrix maps around the input tensor to not allocate any memory + ei::matrix_i8_t features_matrix(1, impulse->nn_input_frame_size, input.data.int8); + + // run DSP process and quantize automatically + int ret = extract_image_features_quantized(signal, &features_matrix, impulse->dsp_blocks[0].config, input.params.scale, input.params.zero_point, + impulse->frequency, impulse->learning_blocks[0].image_scaling); + + if (ret != EIDSP_OK) { + ei_printf("ERR: Failed to run DSP process (%d)\n", ret); + return EI_IMPULSE_DSP_ERROR; + } + + if (ei_run_impulse_check_canceled() == EI_IMPULSE_CANCELED) { + return EI_IMPULSE_CANCELED; + } + + result->timing.dsp_us = ei_read_timer_us() - dsp_start_us; + result->timing.dsp = (int)(result->timing.dsp_us / 1000); + + if (debug) { + ei_printf("Features (%d ms.): ", result->timing.dsp); + for (size_t ix = 0; ix < features_matrix.cols; ix++) { + ei_printf_float((features_matrix.buffer[ix] - input.params.zero_point) * input.params.scale); + ei_printf(" "); + } + ei_printf("\n"); + } + + ctx_start_us = ei_read_timer_us(); + + EI_IMPULSE_ERROR run_res = inference_tflite_run( + impulse, + block_config, + ctx_start_us, + &output, + &output_labels, + &output_scores, + static_cast(p_tensor_arena.get()), + result, + debug); + + graph_config->model_reset(ei_aligned_free); + + if (run_res != EI_IMPULSE_OK) { + return run_res; + } + + result->timing.classification_us = ei_read_timer_us() - ctx_start_us; + + return EI_IMPULSE_OK; +} +#endif // EI_CLASSIFIER_QUANTIZATION_ENABLED == 1 + +__attribute__((unused)) int extract_tflite_eon_features(signal_t *signal, matrix_t *output_matrix, void *config_ptr, const float frequency) { + ei_dsp_config_tflite_eon_t *dsp_config = (ei_dsp_config_tflite_eon_t*)config_ptr; + + ei_config_tflite_eon_graph_t ei_config_tflite_graph_0 = { + .implementation_version = 1, + .model_init = dsp_config->init_fn, + .model_invoke = dsp_config->invoke_fn, + .model_reset = dsp_config->reset_fn, + .model_input = dsp_config->input_fn, + .model_output = dsp_config->output_fn, + }; + + ei_learning_block_config_tflite_graph_t ei_learning_block_config = { + .implementation_version = 1, + .block_id = dsp_config->block_id, + .object_detection = false, + .object_detection_last_layer = EI_CLASSIFIER_LAST_LAYER_UNKNOWN, + .output_data_tensor = 0, + .output_labels_tensor = 255, + .output_score_tensor = 255, + .quantized = 0, + .compiled = 1, + .graph_config = &ei_config_tflite_graph_0 + }; + + auto x = run_nn_inference_from_dsp(&ei_learning_block_config, signal, output_matrix); + if (x != 0) { + return x; + } + + return EIDSP_OK; +} + +#endif // (EI_CLASSIFIER_INFERENCING_ENGINE == EI_CLASSIFIER_TFLITE) && (EI_CLASSIFIER_COMPILED == 1) +#endif // _EI_CLASSIFIER_INFERENCING_ENGINE_TFLITE_EON_H_ diff --git a/edgeimpulse/edge-impulse-sdk/classifier/inferencing_engines/tflite_full.h b/edgeimpulse/edge-impulse-sdk/classifier/inferencing_engines/tflite_full.h new file mode 100644 index 0000000..6b2e3cb --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/classifier/inferencing_engines/tflite_full.h @@ -0,0 +1,242 @@ +/* + * Copyright (c) 2022 EdgeImpulse Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an "AS + * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language + * governing permissions and limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef _EI_CLASSIFIER_INFERENCING_ENGINE_TFLITE_FULL_H_ +#define _EI_CLASSIFIER_INFERENCING_ENGINE_TFLITE_FULL_H_ + +#if (EI_CLASSIFIER_INFERENCING_ENGINE == EI_CLASSIFIER_TFLITE_FULL) + +#include "model-parameters/model_metadata.h" +#include "tflite-model/trained_model_ops_define.h" + +#include +#include "tensorflow-lite/tensorflow/lite/c/common.h" +#include "tensorflow-lite/tensorflow/lite/interpreter.h" +#include "tensorflow-lite/tensorflow/lite/kernels/register.h" +#include "tensorflow-lite/tensorflow/lite/model.h" +#include "tensorflow-lite/tensorflow/lite/optional_debug_tools.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/tree_ensemble_classifier.h" +#include "edge-impulse-sdk/classifier/ei_fill_result_struct.h" +#include "edge-impulse-sdk/classifier/ei_model_types.h" +#include "edge-impulse-sdk/classifier/inferencing_engines/tflite_helper.h" + +typedef struct { + std::unique_ptr model; + std::unique_ptr interpreter; +} ei_tflite_state_t; + +std::map ei_tflite_instances; + +/** + * Construct a tflite interpreter (creates it if needed) + */ +static EI_IMPULSE_ERROR get_interpreter(ei_learning_block_config_tflite_graph_t *block_config, tflite::Interpreter **interpreter) { + // not in the map yet... + if (!ei_tflite_instances.count(block_config->block_id)) { + ei_config_tflite_graph_t *graph_config = (ei_config_tflite_graph_t*)block_config->graph_config; + ei_tflite_state_t *new_state = new ei_tflite_state_t(); + + auto new_model = tflite::FlatBufferModel::BuildFromBuffer((const char*)graph_config->model, graph_config->model_size); + new_state->model = std::move(new_model); + if (!new_state->model) { + ei_printf("Failed to build TFLite model from buffer\n"); + return EI_IMPULSE_TFLITE_ERROR; + } + + tflite::ops::builtin::BuiltinOpResolver resolver; +#if EI_CLASSIFIER_HAS_TREE_ENSEMBLE_CLASSIFIER + resolver.AddCustom("TreeEnsembleClassifier", + tflite::ops::custom::Register_TREE_ENSEMBLE_CLASSIFIER()); +#endif + tflite::InterpreterBuilder builder(*new_state->model, resolver); + builder(&new_state->interpreter); + + if (!new_state->interpreter) { + ei_printf("Failed to construct interpreter\n"); + return EI_IMPULSE_TFLITE_ERROR; + } + + if (new_state->interpreter->AllocateTensors() != kTfLiteOk) { + ei_printf("AllocateTensors failed\n"); + return EI_IMPULSE_TFLITE_ERROR; + } + + int hw_thread_count = (int)std::thread::hardware_concurrency(); + hw_thread_count -= 1; // leave one thread free for the other application + if (hw_thread_count < 1) { + hw_thread_count = 1; + } + + if (new_state->interpreter->SetNumThreads(hw_thread_count) != kTfLiteOk) { + ei_printf("SetNumThreads failed\n"); + return EI_IMPULSE_TFLITE_ERROR; + } + + ei_tflite_instances.insert(std::make_pair(block_config->block_id, new_state)); + } + + auto tflite_state = ei_tflite_instances[block_config->block_id]; + *interpreter = tflite_state->interpreter.get(); + return EI_IMPULSE_OK; +} + +extern "C" EI_IMPULSE_ERROR run_nn_inference_from_dsp( + ei_learning_block_config_tflite_graph_t *block_config, + signal_t *signal, + matrix_t *output_matrix) +{ + tflite::Interpreter *interpreter; + auto interpreter_ret = get_interpreter(block_config, &interpreter); + if (interpreter_ret != EI_IMPULSE_OK) { + return interpreter_ret; + } + + TfLiteTensor *input = interpreter->input_tensor(0); + TfLiteTensor *output = interpreter->output_tensor(0); + + if (!input) { + return EI_IMPULSE_INPUT_TENSOR_WAS_NULL; + } + if (!output) { + return EI_IMPULSE_OUTPUT_TENSOR_WAS_NULL; + } + + auto input_res = fill_input_tensor_from_signal(signal, input); + if (input_res != EI_IMPULSE_OK) { + return input_res; + } + + TfLiteStatus status = interpreter->Invoke(); + if (status != kTfLiteOk) { + ei_printf("ERR: interpreter->Invoke() failed with %d\n", status); + return EI_IMPULSE_TFLITE_ERROR; + } + + auto output_res = fill_output_matrix_from_tensor(output, output_matrix); + if (output_res != EI_IMPULSE_OK) { + return output_res; + } + + // on Linux we're not worried about free'ing (for now) + + return EI_IMPULSE_OK; +} + +EI_IMPULSE_ERROR run_nn_inference( + const ei_impulse_t *impulse, + ei_feature_t *fmatrix, + uint32_t* input_block_ids, + uint32_t input_block_ids_size, + ei_impulse_result_t *result, + void *config_ptr, + bool debug = false) +{ + ei_learning_block_config_tflite_graph_t *block_config = (ei_learning_block_config_tflite_graph_t*)config_ptr; + + tflite::Interpreter *interpreter; + auto interpreter_ret = get_interpreter(block_config, &interpreter); + if (interpreter_ret != EI_IMPULSE_OK) { + return interpreter_ret; + } + + TfLiteTensor *input = interpreter->input_tensor(0); + TfLiteTensor *output = interpreter->output_tensor(block_config->output_data_tensor); + + if (!input) { + return EI_IMPULSE_INPUT_TENSOR_WAS_NULL; + } + if (!output) { + return EI_IMPULSE_OUTPUT_TENSOR_WAS_NULL; + } + + size_t mtx_size = impulse->dsp_blocks_size + impulse->learning_blocks_size; + auto input_res = fill_input_tensor_from_matrix(fmatrix, input, input_block_ids, input_block_ids_size, mtx_size); + if (input_res != EI_IMPULSE_OK) { + return input_res; + } + + uint64_t ctx_start_us = ei_read_timer_us(); + + TfLiteStatus status = interpreter->Invoke(); + if (status != kTfLiteOk) { + ei_printf("ERR: interpreter->Invoke() failed with %d\n", status); + return EI_IMPULSE_TFLITE_ERROR; + } + + uint64_t ctx_end_us = ei_read_timer_us(); + + result->timing.classification_us = ctx_end_us - ctx_start_us; + result->timing.classification = (int)(result->timing.classification_us / 1000); + + if (result->copy_output) { + auto output_res = fill_output_matrix_from_tensor(output, fmatrix[impulse->dsp_blocks_size].matrix); + if (output_res != EI_IMPULSE_OK) { + return output_res; + } + } + + if (debug) { + ei_printf("Predictions (time: %d ms.):\n", result->timing.classification); + } + + TfLiteTensor *scores_tensor = interpreter->output_tensor(block_config->output_score_tensor); + TfLiteTensor *labels_tensor = interpreter->output_tensor(block_config->output_labels_tensor); + + EI_IMPULSE_ERROR fill_res = fill_result_struct_from_output_tensor_tflite( + impulse, output, labels_tensor, scores_tensor, result, debug); + + if (fill_res != EI_IMPULSE_OK) { + return fill_res; + } + + // on Linux we're not worried about free'ing (for now) + + return EI_IMPULSE_OK; +} + +__attribute__((unused)) int extract_tflite_features(signal_t *signal, matrix_t *output_matrix, void *config_ptr, const float frequency) { + + ei_dsp_config_tflite_t *dsp_config = (ei_dsp_config_tflite_t*)config_ptr; + + ei_config_tflite_graph_t ei_config_tflite_graph_0 = { + .implementation_version = 1, + .model = dsp_config->model, + .model_size = dsp_config->model_size, + .arena_size = dsp_config->arena_size + }; + + ei_learning_block_config_tflite_graph_t ei_learning_block_config = { + .implementation_version = 1, + .block_id = dsp_config->block_id, + .object_detection = false, + .object_detection_last_layer = EI_CLASSIFIER_LAST_LAYER_UNKNOWN, + .output_data_tensor = 0, + .output_labels_tensor = 255, + .output_score_tensor = 255, + .graph_config = &ei_config_tflite_graph_0 + }; + + auto x = run_nn_inference_from_dsp(&ei_learning_block_config, signal, output_matrix); + if (x != 0) { + return x; + } + + return EIDSP_OK; +} + +#endif // (EI_CLASSIFIER_INFERENCING_ENGINE == EI_CLASSIFIER_TFLITE_FULL) +#endif // _EI_CLASSIFIER_INFERENCING_ENGINE_TFLITE_FULL_H_ diff --git a/edgeimpulse/edge-impulse-sdk/classifier/inferencing_engines/tflite_helper.h b/edgeimpulse/edge-impulse-sdk/classifier/inferencing_engines/tflite_helper.h new file mode 100644 index 0000000..cd827f2 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/classifier/inferencing_engines/tflite_helper.h @@ -0,0 +1,422 @@ +/* + * Copyright (c) 2022 EdgeImpulse Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an "AS + * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language + * governing permissions and limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef _EI_CLASSIFIER_INFERENCING_ENGINE_TFLITE_HELPER_H_ +#define _EI_CLASSIFIER_INFERENCING_ENGINE_TFLITE_HELPER_H_ + +#include "edge-impulse-sdk/classifier/ei_quantize.h" +#if (EI_CLASSIFIER_INFERENCING_ENGINE == EI_CLASSIFIER_TFLITE_FULL) || (EI_CLASSIFIER_INFERENCING_ENGINE == EI_CLASSIFIER_TFLITE) + +#if EI_CLASSIFIER_INFERENCING_ENGINE == EI_CLASSIFIER_TFLITE_FULL +#include +#include "tensorflow-lite/tensorflow/lite/c/common.h" +#include "tensorflow-lite/tensorflow/lite/interpreter.h" +#include "tensorflow-lite/tensorflow/lite/kernels/register.h" +#include "tensorflow-lite/tensorflow/lite/model.h" +#include "tensorflow-lite/tensorflow/lite/optional_debug_tools.h" +#endif // EI_CLASSIFIER_INFERENCING_ENGINE == EI_CLASSIFIER_TFLITE_FULL + +#if EI_CLASSIFIER_INFERENCING_ENGINE == EI_CLASSIFIER_TFLITE +#include +#include "edge-impulse-sdk/tensorflow/lite/micro/all_ops_resolver.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_interpreter.h" +#include "edge-impulse-sdk/tensorflow/lite/schema/schema_generated.h" +#endif // EI_CLASSIFIER_INFERENCING_ENGINE == EI_CLASSIFIER_TFLITE + +EI_IMPULSE_ERROR fill_input_tensor_from_matrix( + ei_feature_t *fmatrix, + TfLiteTensor *input, + uint32_t* input_block_ids, + uint32_t input_block_ids_size, + size_t mtx_size +) { + size_t matrix_els = 0; + uint32_t input_idx = 0; + + for (size_t i = 0; i < input_block_ids_size; i++) { +#if EI_CLASSIFIER_SINGLE_FEATURE_INPUT == 0 + size_t cur_mtx = input_block_ids[i]; + ei::matrix_t* matrix = NULL; + + if (!find_mtx_by_idx(fmatrix, &matrix, cur_mtx, mtx_size)) { + ei_printf("ERR: Cannot find matrix with id %zu\n", cur_mtx); + return EI_IMPULSE_INVALID_SIZE; + } +#else + ei::matrix_t* matrix = fmatrix[0].matrix; +#endif + + matrix_els += matrix->rows * matrix->cols; + + switch (input->type) { + case kTfLiteFloat32: { + for (size_t ix = 0; ix < matrix->rows * matrix->cols; ix++) { + input->data.f[input_idx++] = matrix->buffer[ix]; + } + break; + } + case kTfLiteInt8: { + for (size_t ix = 0; ix < matrix->rows * matrix->cols; ix++) { + float val = (float)matrix->buffer[ix]; + input->data.int8[input_idx++] = static_cast( + pre_cast_quantize(val, input->params.scale, input->params.zero_point, true)); + } + break; + } + case kTfLiteUInt8: { + for (size_t ix = 0; ix < matrix->rows * matrix->cols; ix++) { + float val = (float)matrix->buffer[ix]; + input->data.uint8[input_idx++] = static_cast( + pre_cast_quantize(val, input->params.scale, input->params.zero_point, false)); } + break; + } + default: { + ei_printf("ERR: Cannot handle input type (%d)\n", input->type); + return EI_IMPULSE_INPUT_TENSOR_WAS_NULL; + } + } + } + + if (input->bytes / 4 != matrix_els && input->bytes != matrix_els) { + ei_printf("ERR: input tensor has size %d bytes, but input matrix has has size %d bytes\n", + (int)input->bytes, (int)matrix_els); + return EI_IMPULSE_INVALID_SIZE; + } + + return EI_IMPULSE_OK; +} + +EI_IMPULSE_ERROR fill_input_tensor_from_signal( + signal_t *signal, + TfLiteTensor *input +) { + switch (input->type) { + case kTfLiteFloat32: { + if (input->bytes / 4 != signal->total_length) { + ei_printf("ERR: input tensor has size %d, but signal has size %d\n", + (int)input->bytes / 4, (int)signal->total_length); + return EI_IMPULSE_INVALID_SIZE; + } + + auto x = signal->get_data(0, signal->total_length, input->data.f); + if (x != EIDSP_OK) { + return EI_IMPULSE_DSP_ERROR; + } + break; + } + case kTfLiteInt8: + case kTfLiteUInt8: { + // we don't have a good signaling way here (this is DSP blocks where + // we don't understand the input very well; guess whether this is an RGB input) + bool is_rgb = input->bytes / 3 == signal->total_length; + + if (!is_rgb) { + // otherwise expect an exact match in length + if (input->bytes != signal->total_length) { + ei_printf("ERR: input tensor has size %d, but signal has size %d\n", + (int)input->bytes, (int)signal->total_length); + return EI_IMPULSE_INVALID_SIZE; + } + } + + float scale = input->params.scale; + int zero_point = input->params.zero_point; + if (scale == 0.0f) { // not quantized? + if (is_rgb) { + scale = 0.003921568859368563f; + } + else { + scale = 1.0f; + } + + if (input->type == kTfLiteInt8 && zero_point == 0) { + zero_point = -128; + } + } + + size_t output_ix = 0; + const size_t page_size = 1024; + + // buffered read from the signal + size_t bytes_left = signal->total_length; + for (size_t ix = 0; ix < signal->total_length; ix += page_size) { + size_t elements_to_read = bytes_left > page_size ? page_size : bytes_left; + + matrix_t input_matrix(elements_to_read, 1); + if (!input_matrix.buffer) { + return EI_IMPULSE_ALLOC_FAILED; + } + signal->get_data(ix, elements_to_read, input_matrix.buffer); + + for (size_t jx = 0; jx < elements_to_read; jx++) { + if (is_rgb) { + uint32_t value = static_cast(input_matrix.buffer[jx]); + + // fast code path + if (scale == 0.003921568859368563f && zero_point == -128) { + int32_t r = static_cast(value >> 16 & 0xff); + int32_t g = static_cast(value >> 8 & 0xff); + int32_t b = static_cast(value & 0xff); + + if (input->type == kTfLiteInt8) { + input->data.int8[output_ix++] = static_cast(r + zero_point); + input->data.int8[output_ix++] = static_cast(g + zero_point); + input->data.int8[output_ix++] = static_cast(b + zero_point); + } + else { + input->data.uint8[output_ix++] = static_cast(r + zero_point); + input->data.uint8[output_ix++] = static_cast(g + zero_point); + input->data.uint8[output_ix++] = static_cast(b + zero_point); + } + } + // slow code path + else { + float r = static_cast(value >> 16 & 0xff) / 255.0f; + float g = static_cast(value >> 8 & 0xff) / 255.0f; + float b = static_cast(value & 0xff) / 255.0f; + + if (input->type == kTfLiteInt8) { + input->data.int8[output_ix++] = static_cast(round(r / scale) + zero_point); + input->data.int8[output_ix++] = static_cast(round(g / scale) + zero_point); + input->data.int8[output_ix++] = static_cast(round(b / scale) + zero_point); + } + else { + input->data.uint8[output_ix++] = static_cast(round(r / scale) + zero_point); + input->data.uint8[output_ix++] = static_cast(round(g / scale) + zero_point); + input->data.uint8[output_ix++] = static_cast(round(b / scale) + zero_point); + } + } + } + else { + float value = input_matrix.buffer[jx]; + if (input->type == kTfLiteInt8) { + input->data.int8[output_ix++] = static_cast(round(value / scale) + zero_point); + } + else { // uint8 + input->data.uint8[output_ix++] = static_cast((value / scale) + zero_point); + } + } + } + } + break; + } + default: { + ei_printf("ERR: Cannot handle input type (%d)\n", input->type); + return EI_IMPULSE_INPUT_TENSOR_WAS_NULL; + } + } + + return EI_IMPULSE_OK; +} + +EI_IMPULSE_ERROR fill_output_matrix_from_tensor( + TfLiteTensor *output, + matrix_t *output_matrix +) { + const size_t matrix_els = output_matrix->rows * output_matrix->cols; + + switch (output->type) { + case kTfLiteFloat32: { + if (output->bytes / 4 != matrix_els) { + ei_printf("ERR: output tensor has size %d, but input matrix has has size %d\n", + (int)output->bytes / 4, (int)matrix_els); + return EI_IMPULSE_INVALID_SIZE; + } + + memcpy(output_matrix->buffer, output->data.f, output->bytes); + break; + } + case kTfLiteInt8: { + if (output->bytes != matrix_els) { + ei_printf("ERR: output tensor has size %d, but input matrix has has size %d\n", + (int)output->bytes, (int)matrix_els); + return EI_IMPULSE_INVALID_SIZE; + } + + for (size_t ix = 0; ix < output->bytes; ix++) { + float value = static_cast(output->data.int8[ix] - output->params.zero_point) * output->params.scale; + output_matrix->buffer[ix] = value; + } + break; + } + case kTfLiteUInt8: { + if (output->bytes != matrix_els) { + ei_printf("ERR: output tensor has size %d, but input matrix has has size %d\n", + (int)output->bytes, (int)matrix_els); + return EI_IMPULSE_INVALID_SIZE; + } + + for (size_t ix = 0; ix < output->bytes; ix++) { + float value = static_cast(output->data.uint8[ix] - output->params.zero_point) * output->params.scale; + output_matrix->buffer[ix] = value; + } + break; + } + default: { + ei_printf("ERR: Cannot handle output type (%d)\n", output->type); + return EI_IMPULSE_OUTPUT_TENSOR_WAS_NULL; + } + } + + return EI_IMPULSE_OK; +} + +EI_IMPULSE_ERROR fill_result_struct_from_output_tensor_tflite( + const ei_impulse_t *impulse, + TfLiteTensor* output, + TfLiteTensor* labels_tensor, + TfLiteTensor* scores_tensor, + ei_impulse_result_t *result, + bool debug +) { + + EI_IMPULSE_ERROR fill_res = EI_IMPULSE_OK; + + if (impulse->object_detection) { + switch (impulse->object_detection_last_layer) { + case EI_CLASSIFIER_LAST_LAYER_FOMO: { + bool int8_output = output->type == TfLiteType::kTfLiteInt8; + if (int8_output) { + fill_res = fill_result_struct_i8_fomo(impulse, result, output->data.int8, output->params.zero_point, output->params.scale, + impulse->fomo_output_size, impulse->fomo_output_size); + } + else { + fill_res = fill_result_struct_f32_fomo(impulse, result, output->data.f, + impulse->fomo_output_size, impulse->fomo_output_size); + } + break; + } +#if EI_CLASSIFIER_INFERENCING_ENGINE == EI_CLASSIFIER_TFLITE_FULL + case EI_CLASSIFIER_LAST_LAYER_SSD: { + if (!scores_tensor->data.f) { + return EI_IMPULSE_SCORE_TENSOR_WAS_NULL; + } + if (!labels_tensor->data.f) { + return EI_IMPULSE_LABEL_TENSOR_WAS_NULL; + } + if (output->type == kTfLiteFloat32) { + fill_res = fill_result_struct_f32_object_detection(impulse, result, output->data.f, scores_tensor->data.f, labels_tensor->data.f, debug); + } + else { + ei_printf("ERR: MobileNet SSD does not support quantized inference\n"); + return EI_IMPULSE_UNSUPPORTED_INFERENCING_ENGINE; + } + break; + } +#else + case EI_CLASSIFIER_LAST_LAYER_SSD: { + ei_printf("ERR: MobileNet SSD is not supported in EON or TensorFlow Lite Micro\n"); + return EI_IMPULSE_UNSUPPORTED_INFERENCING_ENGINE; + } +#endif // EI_CLASSIFIER_INFERENCING_ENGINE == EI_CLASSIFIER_TFLITE_FULL + case EI_CLASSIFIER_LAST_LAYER_YOLOV5: + case EI_CLASSIFIER_LAST_LAYER_YOLOV5_V5_DRPAI: { + int version = impulse->object_detection_last_layer == EI_CLASSIFIER_LAST_LAYER_YOLOV5_V5_DRPAI ? + 5 : 6; + + if (output->type == kTfLiteInt8) { + fill_res = fill_result_struct_quantized_yolov5( + impulse, + result, + version, + output->data.int8, + output->params.zero_point, + output->params.scale, + impulse->tflite_output_features_count); + } + else if (output->type == kTfLiteUInt8) { + fill_res = fill_result_struct_quantized_yolov5( + impulse, + result, + version, + output->data.uint8, + output->params.zero_point, + output->params.scale, + impulse->tflite_output_features_count); + } + else if (output->type == kTfLiteFloat32) { + fill_res = fill_result_struct_f32_yolov5( + impulse, + result, + version, + output->data.f, + impulse->tflite_output_features_count); + } + else { + ei_printf("ERR: Invalid output type (%d) for YOLOv5 last layer\n", output->type); + return EI_IMPULSE_UNSUPPORTED_INFERENCING_ENGINE; + } + break; + } + case EI_CLASSIFIER_LAST_LAYER_YOLOX: { + #if EI_CLASSIFIER_TFLITE_OUTPUT_QUANTIZED == 1 + ei_printf("ERR: YOLOX does not support quantized inference\n"); + return EI_IMPULSE_UNSUPPORTED_INFERENCING_ENGINE; + #else + fill_res = fill_result_struct_f32_yolox( + impulse, + result, + output->data.f, + impulse->tflite_output_features_count); + #endif + break; + } + case EI_CLASSIFIER_LAST_LAYER_YOLOV7: { + #if EI_CLASSIFIER_TFLITE_OUTPUT_QUANTIZED == 1 + ei_printf("ERR: YOLOV7 does not support quantized inference\n"); + return EI_IMPULSE_UNSUPPORTED_INFERENCING_ENGINE; + #else + size_t output_feature_count = 1; + for (int ix = 0; ix < output->dims->size; ix++) { + output_feature_count *= output->dims->data[ix]; + } + fill_res = fill_result_struct_f32_yolov7( + impulse, + result, + output->data.f, + output_feature_count); + #endif + break; + } + default: { + ei_printf("ERR: Unsupported object detection last layer (%d)\n", + impulse->object_detection_last_layer); + return EI_IMPULSE_UNSUPPORTED_INFERENCING_ENGINE; + } + } + } + else if (impulse->has_anomaly == 3 && !result->copy_output) + { + fill_res = fill_result_visual_ad_struct_f32(impulse, result, output->data.f, debug); + } + // if we copy the output, we don't need to process it as classification + else if (!result->copy_output) + { + bool int8_output = output->type == TfLiteType::kTfLiteInt8; + if (int8_output) { + fill_res = fill_result_struct_i8(impulse, result, output->data.int8, output->params.zero_point, output->params.scale, debug); + } + else { + fill_res = fill_result_struct_f32(impulse, result, output->data.f, debug); + } + } + + return fill_res; +} +#endif // #if (EI_CLASSIFIER_INFERENCING_ENGINE == EI_CLASSIFIER_TFLITE_FULL) || (EI_CLASSIFIER_INFERENCING_ENGINE == EI_CLASSIFIER_TFLITE) + +#endif // _EI_CLASSIFIER_INFERENCING_ENGINE_TFLITE_HELPER_H_ diff --git a/edgeimpulse/edge-impulse-sdk/classifier/inferencing_engines/tflite_micro.h b/edgeimpulse/edge-impulse-sdk/classifier/inferencing_engines/tflite_micro.h new file mode 100644 index 0000000..2fc5abe --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/classifier/inferencing_engines/tflite_micro.h @@ -0,0 +1,466 @@ +/* + * Copyright (c) 2022 EdgeImpulse Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an "AS + * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language + * governing permissions and limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef _EI_CLASSIFIER_INFERENCING_ENGINE_TFLITE_MICRO_H_ +#define _EI_CLASSIFIER_INFERENCING_ENGINE_TFLITE_MICRO_H_ + +#if (EI_CLASSIFIER_INFERENCING_ENGINE == EI_CLASSIFIER_TFLITE) && (EI_CLASSIFIER_COMPILED != 1) + +#include "model-parameters/model_metadata.h" + +#include +#include "edge-impulse-sdk/tensorflow/lite/micro/all_ops_resolver.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_interpreter.h" +#include "edge-impulse-sdk/tensorflow/lite/schema/schema_generated.h" +#include "edge-impulse-sdk/tensorflow/lite/schema/schema_generated_full.h" +#include "edge-impulse-sdk/classifier/ei_aligned_malloc.h" +#include "edge-impulse-sdk/classifier/ei_fill_result_struct.h" +#include "edge-impulse-sdk/classifier/ei_model_types.h" +#include "edge-impulse-sdk/classifier/inferencing_engines/tflite_helper.h" + +#if defined(EI_CLASSIFIER_HAS_TFLITE_OPS_RESOLVER) && EI_CLASSIFIER_HAS_TFLITE_OPS_RESOLVER == 1 +#include "tflite-model/tflite-resolver.h" +#endif // EI_CLASSIFIER_HAS_TFLITE_OPS_RESOLVER + +#ifdef EI_CLASSIFIER_ALLOCATION_STATIC +#if defined __GNUC__ +#define ALIGN(X) __attribute__((aligned(X))) +#elif defined _MSC_VER +#define ALIGN(X) __declspec(align(X)) +#elif defined __TASKING__ +#define ALIGN(X) __align(X) +#endif +#endif + +/** + * Setup the TFLite runtime + * + * @param ctx_start_us Pointer to the start time + * @param input Pointer to input tensor + * @param output Pointer to output tensor + * @param micro_interpreter Pointer to interpreter (for non-compiled models) + * @param micro_tensor_arena Pointer to the arena that will be allocated + * + * @return EI_IMPULSE_OK if successful + */ +static EI_IMPULSE_ERROR inference_tflite_setup( + ei_learning_block_config_tflite_graph_t *block_config, + uint64_t *ctx_start_us, + TfLiteTensor** input, + TfLiteTensor** output, + TfLiteTensor** output_labels, + TfLiteTensor** output_scores, + tflite::MicroInterpreter** micro_interpreter, + ei_unique_ptr_t& p_tensor_arena) { + + *ctx_start_us = ei_read_timer_us(); + + ei_config_tflite_graph_t *graph_config = (ei_config_tflite_graph_t*)block_config->graph_config; + +#ifdef EI_CLASSIFIER_ALLOCATION_STATIC + // Assign a no-op lambda to the "free" function in case of static arena + static uint8_t tensor_arena[EI_CLASSIFIER_TFLITE_ARENA_SIZE] ALIGN(16); + p_tensor_arena = ei_unique_ptr_t(tensor_arena, [](void*){}); +#else + // Create an area of memory to use for input, output, and intermediate arrays. + uint8_t *tensor_arena = (uint8_t*)ei_aligned_calloc(16, graph_config->arena_size); + if (tensor_arena == NULL) { + ei_printf("Failed to allocate TFLite arena (%zu bytes)\n", graph_config->arena_size); + return EI_IMPULSE_TFLITE_ARENA_ALLOC_FAILED; + } + p_tensor_arena = ei_unique_ptr_t(tensor_arena, ei_aligned_free); +#endif + + static bool tflite_first_run = true; + static uint8_t *model_arr = NULL; + + if (model_arr != graph_config->model) { + tflite_first_run = true; + model_arr = (uint8_t*)graph_config->model; + } + + static const tflite::Model* model = nullptr; + + // ====== + // Initialization code start + // This part can be run once, but that would require the TFLite arena + // to be allocated at all times, which is not ideal (e.g. when doing MFCC) + // ====== + if (tflite_first_run) { + // Map the model into a usable data structure. This doesn't involve any + // copying or parsing, it's a very lightweight operation. + model = tflite::GetModel(graph_config->model); + if (model->version() != TFLITE_SCHEMA_VERSION) { + ei_printf( + "Model provided is schema version %d not equal " + "to supported version %d.", + model->version(), TFLITE_SCHEMA_VERSION); + return EI_IMPULSE_TFLITE_ERROR; + } + tflite_first_run = false; + } + +#ifdef EI_TFLITE_RESOLVER + EI_TFLITE_RESOLVER +#else + static tflite::AllOpsResolver resolver; // needs static to match the life of the interpreter +#endif + + // Build an interpreter to run the model with. + tflite::MicroInterpreter *interpreter = new tflite::MicroInterpreter( + model, resolver, tensor_arena, graph_config->arena_size); + + *micro_interpreter = interpreter; + + // Allocate memory from the tensor_arena for the model's tensors. + TfLiteStatus allocate_status = interpreter->AllocateTensors(true); + if (allocate_status != kTfLiteOk) { + ei_printf("AllocateTensors() failed"); + return EI_IMPULSE_TFLITE_ERROR; + } + + // Obtain pointers to the model's input and output tensors. + *input = interpreter->input(0); + *output = interpreter->output(block_config->output_data_tensor); + + if (block_config->object_detection_last_layer == EI_CLASSIFIER_LAST_LAYER_SSD) { + *output_scores = interpreter->output(block_config->output_score_tensor); + *output_labels = interpreter->output(block_config->output_labels_tensor); + } + + if (tflite_first_run) { + tflite_first_run = false; + } + + return EI_IMPULSE_OK; +} + +/** + * Run TFLite model + * + * @param ctx_start_us Start time of the setup function (see above) + * @param output Output tensor + * @param interpreter TFLite interpreter (non-compiled models) + * @param tensor_arena Allocated arena (will be freed) + * @param result Struct for results + * @param debug Whether to print debug info + * + * @return EI_IMPULSE_OK if successful + */ +static EI_IMPULSE_ERROR inference_tflite_run( + const ei_impulse_t *impulse, + ei_learning_block_config_tflite_graph_t *config, + uint64_t ctx_start_us, + TfLiteTensor* output, + TfLiteTensor* labels_tensor, + TfLiteTensor* scores_tensor, + tflite::MicroInterpreter* interpreter, + uint8_t* tensor_arena, + ei_impulse_result_t *result, + bool debug) { + + // Run inference, and report any error + TfLiteStatus invoke_status = interpreter->Invoke(); + if (invoke_status != kTfLiteOk) { + delete interpreter; + ei_printf("Invoke failed (%d)\n", invoke_status); + return EI_IMPULSE_TFLITE_ERROR; + } + + uint64_t ctx_end_us = ei_read_timer_us(); + + result->timing.classification_us = ctx_end_us - ctx_start_us; + result->timing.classification = (int)(result->timing.classification_us / 1000); + + // Read the predicted y value from the model's output tensor + if (debug) { + ei_printf("Predictions (time: %d ms.):\n", result->timing.classification); + } + + EI_IMPULSE_ERROR fill_res = fill_result_struct_from_output_tensor_tflite( + impulse, output, labels_tensor, scores_tensor, result, debug); + + delete interpreter; + + if (fill_res != EI_IMPULSE_OK) { + return fill_res; + } + + if (ei_run_impulse_check_canceled() == EI_IMPULSE_CANCELED) { + return EI_IMPULSE_CANCELED; + } + + return EI_IMPULSE_OK; +} + + +/** + * @brief Do neural network inferencing over a signal (from the DSP) + * + * @param fmatrix Processed matrix + * @param result Output classifier results + * @param[in] debug Debug output enable + * + * @return The ei impulse error. + */ +EI_IMPULSE_ERROR run_nn_inference_from_dsp( + ei_learning_block_config_tflite_graph_t *config, + signal_t *signal, + matrix_t *output_matrix) +{ + TfLiteTensor* input; + TfLiteTensor* output; + TfLiteTensor* output_scores; + TfLiteTensor* output_labels; + uint64_t ctx_start_us = ei_read_timer_us(); + ei_unique_ptr_t p_tensor_arena(nullptr, ei_aligned_free); + + tflite::MicroInterpreter* interpreter; + EI_IMPULSE_ERROR init_res = inference_tflite_setup( + config, + &ctx_start_us, + &input, &output, + &output_labels, + &output_scores, + &interpreter, p_tensor_arena); + + if (init_res != EI_IMPULSE_OK) { + return init_res; + } + + auto input_res = fill_input_tensor_from_signal(signal, input); + if (input_res != EI_IMPULSE_OK) { + return input_res; + } + + // Run inference, and report any error + TfLiteStatus invoke_status = interpreter->Invoke(); + if (invoke_status != kTfLiteOk) { + ei_printf("Invoke failed (%d)\n", invoke_status); + return EI_IMPULSE_TFLITE_ERROR; + } + + auto output_res = fill_output_matrix_from_tensor(output, output_matrix); + if (output_res != EI_IMPULSE_OK) { + return output_res; + } + + delete interpreter; + + return EI_IMPULSE_OK; +} + +/** + * @brief Do neural network inferencing over the processed feature matrix + * + * @param fmatrix Processed matrix + * @param result Output classifier results + * @param[in] debug Debug output enable + * + * @return The ei impulse error. + */ +EI_IMPULSE_ERROR run_nn_inference( + const ei_impulse_t *impulse, + ei_feature_t *fmatrix, + uint32_t* input_block_ids, + uint32_t input_block_ids_size, + ei_impulse_result_t *result, + void *config_ptr, + bool debug = false) +{ + ei_learning_block_config_tflite_graph_t *block_config = (ei_learning_block_config_tflite_graph_t*)config_ptr; + + TfLiteTensor* input; + TfLiteTensor* output; + TfLiteTensor* output_scores; + TfLiteTensor* output_labels; + uint64_t ctx_start_us = ei_read_timer_us(); + ei_unique_ptr_t p_tensor_arena(nullptr, ei_aligned_free); + + tflite::MicroInterpreter* interpreter; + EI_IMPULSE_ERROR init_res = inference_tflite_setup( + block_config, + &ctx_start_us, + &input, &output, + &output_labels, + &output_scores, + &interpreter, + p_tensor_arena); + + if (init_res != EI_IMPULSE_OK) { + return init_res; + } + + uint8_t* tensor_arena = static_cast(p_tensor_arena.get()); + + size_t mtx_size = impulse->dsp_blocks_size + impulse->learning_blocks_size; + auto input_res = fill_input_tensor_from_matrix(fmatrix, input, input_block_ids, input_block_ids_size, mtx_size); + if (input_res != EI_IMPULSE_OK) { + return input_res; + } + + EI_IMPULSE_ERROR run_res = inference_tflite_run( + impulse, + block_config, + ctx_start_us, + output, + output_labels, + output_scores, + interpreter, tensor_arena, result, debug); + + if (result->copy_output) { + auto output_res = fill_output_matrix_from_tensor(output, fmatrix[impulse->dsp_blocks_size].matrix); + if (output_res != EI_IMPULSE_OK) { + return output_res; + } + } + + result->timing.classification_us = ei_read_timer_us() - ctx_start_us; + + if (run_res != EI_IMPULSE_OK) { + return run_res; + } + + return EI_IMPULSE_OK; +} + +#if EI_CLASSIFIER_QUANTIZATION_ENABLED == 1 +/** + * Special function to run the classifier on images, only works on TFLite models (either interpreter or EON or for tensaiflow) + * that allocates a lot less memory by quantizing in place. This only works if 'can_run_classifier_image_quantized' + * returns EI_IMPULSE_OK. + */ +EI_IMPULSE_ERROR run_nn_inference_image_quantized( + const ei_impulse_t *impulse, + signal_t *signal, + ei_impulse_result_t *result, + void *config_ptr, + bool debug = false) +{ + ei_learning_block_config_tflite_graph_t *block_config = (ei_learning_block_config_tflite_graph_t*)config_ptr; + + memset(result, 0, sizeof(ei_impulse_result_t)); + + uint64_t ctx_start_us; + TfLiteTensor* input; + TfLiteTensor* output; + TfLiteTensor* output_scores; + TfLiteTensor* output_labels; + ei_unique_ptr_t p_tensor_arena(nullptr, ei_aligned_free); + + tflite::MicroInterpreter* interpreter; + EI_IMPULSE_ERROR init_res = inference_tflite_setup( + block_config, + &ctx_start_us, + &input, &output, + &output_labels, + &output_scores, + &interpreter, + p_tensor_arena); + + if (init_res != EI_IMPULSE_OK) { + return init_res; + } + + if (input->type != TfLiteType::kTfLiteInt8 && input->type != TfLiteType::kTfLiteUInt8) { + return EI_IMPULSE_ONLY_SUPPORTED_FOR_IMAGES; + } + + uint64_t dsp_start_us = ei_read_timer_us(); + + // features matrix maps around the input tensor to not allocate any memory + ei::matrix_i8_t features_matrix(1, impulse->nn_input_frame_size, input->data.int8); + + // run DSP process and quantize automatically + int ret = extract_image_features_quantized(signal, &features_matrix, impulse->dsp_blocks[0].config, input->params.scale, input->params.zero_point, + impulse->frequency, impulse->learning_blocks[0].image_scaling); + if (ret != EIDSP_OK) { + ei_printf("ERR: Failed to run DSP process (%d)\n", ret); + return EI_IMPULSE_DSP_ERROR; + } + + if (ei_run_impulse_check_canceled() == EI_IMPULSE_CANCELED) { + return EI_IMPULSE_CANCELED; + } + + result->timing.dsp_us = ei_read_timer_us() - dsp_start_us; + result->timing.dsp = (int)(result->timing.dsp_us / 1000); + + if (debug) { + ei_printf("Features (%d ms.): ", result->timing.dsp); + for (size_t ix = 0; ix < features_matrix.cols; ix++) { + ei_printf_float((features_matrix.buffer[ix] - input->params.zero_point) * input->params.scale); + ei_printf(" "); + } + ei_printf("\n"); + } + + ctx_start_us = ei_read_timer_us(); + + EI_IMPULSE_ERROR run_res = inference_tflite_run(impulse, + block_config, + ctx_start_us, + output, + output_labels, + output_scores, + interpreter, + static_cast(p_tensor_arena.get()), + result, debug); + + if (run_res != EI_IMPULSE_OK) { + return run_res; + } + + result->timing.classification_us = ei_read_timer_us() - ctx_start_us; + + return EI_IMPULSE_OK; +} +#endif // EI_CLASSIFIER_QUANTIZATION_ENABLED == 1 + +__attribute__((unused)) int extract_tflite_features(signal_t *signal, matrix_t *output_matrix, void *config_ptr, const float frequency) { + ei_dsp_config_tflite_t *dsp_config = (ei_dsp_config_tflite_t*)config_ptr; + + ei_config_tflite_graph_t ei_config_tflite_graph_0 = { + .implementation_version = 1, + .model = dsp_config->model, + .model_size = dsp_config->model_size, + .arena_size = dsp_config->arena_size + }; + + ei_learning_block_config_tflite_graph_t ei_learning_block_config = { + .implementation_version = 1, + .block_id = dsp_config->block_id, + .object_detection = false, + .object_detection_last_layer = EI_CLASSIFIER_LAST_LAYER_UNKNOWN, + .output_data_tensor = 0, + .output_labels_tensor = 255, + .output_score_tensor = 255, + .quantized = 0, + .compiled = 0, + .graph_config = &ei_config_tflite_graph_0 + }; + + auto x = run_nn_inference_from_dsp(&ei_learning_block_config, signal, output_matrix); + if (x != 0) { + return x; + } + + return EIDSP_OK; +} + +#endif // (EI_CLASSIFIER_INFERENCING_ENGINE == EI_CLASSIFIER_TFLITE) && (EI_CLASSIFIER_COMPILED != 1) +#endif // _EI_CLASSIFIER_INFERENCING_ENGINE_TFLITE_MICRO_H_ diff --git a/edgeimpulse/edge-impulse-sdk/classifier/inferencing_engines/tflite_tidl.h b/edgeimpulse/edge-impulse-sdk/classifier/inferencing_engines/tflite_tidl.h new file mode 100644 index 0000000..fa5aa6e --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/classifier/inferencing_engines/tflite_tidl.h @@ -0,0 +1,365 @@ +/* + * Copyright (c) 2022 EdgeImpulse Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an "AS + * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language + * governing permissions and limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef _EI_CLASSIFIER_INFERENCING_ENGINE_TFLITE_TIDL_H_ +#define _EI_CLASSIFIER_INFERENCING_ENGINE_TFLITE_TIDL_H_ + +#if (EI_CLASSIFIER_INFERENCING_ENGINE == EI_CLASSIFIER_TFLITE_TIDL) + +#include "model-parameters/model_metadata.h" + +#include +#include "tensorflow-lite/tensorflow/lite/c/common.h" +#include "tensorflow-lite/tensorflow/lite/interpreter.h" +#include "tensorflow-lite/tensorflow/lite/kernels/register.h" +#include "tensorflow-lite/tensorflow/lite/model.h" +#include "tensorflow-lite/tensorflow/lite/optional_debug_tools.h" +#include "edge-impulse-sdk/classifier/ei_fill_result_struct.h" +#include "edge-impulse-sdk/classifier/ei_model_types.h" + +#include "itidl_rt.h" +#if ARMNN_ENABLE +#include "DelegateOptions.hpp" +#include "armnn_delegate.hpp" +#endif + +#include + +// old models don't have this, add this here +#ifndef EI_CLASSIFIER_TFLITE_OUTPUT_DATA_TENSOR +#define EI_CLASSIFIER_TFLITE_OUTPUT_DATA_TENSOR 0 +#endif // not defined EI_CLASSIFIER_TFLITE_OUTPUT_DATA_TENSOR + +#include "tflite-model/tidl-model.h" +#include "utils/model_header_utils.h" + +void *in_ptrs[16] = {NULL}; +void *out_ptrs[16] = {NULL}; + +EI_IMPULSE_ERROR run_nn_inference( + const ei_impulse_t *impulse, + ei_feature_t *fmatrix, + uint32_t* input_block_ids, + uint32_t input_block_ids_size, + ei_impulse_result_t *result, + void *config_ptr, + bool debug) +{ + ei_learning_block_config_tflite_graph_t *config = (ei_learning_block_config_tflite_graph_t*)config_ptr; + + static std::unique_ptr model = nullptr; + static std::unique_ptr interpreter = nullptr; + static std::vector inputs; + static std::vector outputs; + + if (!model) { + + std::string proj_artifacts_path = "/tmp/" + std::string(impulse->project_name) + "-" + std::to_string(impulse->project_id) + "-" + std::to_string(impulse->deploy_version); + + create_project_if_not_exists(proj_artifacts_path, model_h_files, model_h_files_len); + + std::string proj_model_path = proj_artifacts_path + "/trained.tflite"; + + model = tflite::FlatBufferModel::BuildFromFile(proj_model_path.c_str()); + if (!model) { + ei_printf("Failed to build TFLite model from buffer\n"); + return EI_IMPULSE_TFLITE_ERROR; + } + + tflite::ops::builtin::BuiltinOpResolver resolver; + tflite::InterpreterBuilder builder(*model, resolver); + builder(&interpreter); + + if (!interpreter) { + ei_printf("Failed to construct interpreter\n"); + return EI_IMPULSE_TFLITE_ERROR; + } + + /* This part creates the dlg_ptr */ + ei_printf("TIDL delegate mode\n"); + typedef TfLiteDelegate *(*tflite_plugin_create_delegate)(char **, char **, size_t, void (*report_error)(const char *)); + tflite_plugin_create_delegate tflite_plugin_dlg_create; + char *keys[] = {(char *)"artifacts_folder", (char *)"num_tidl_subgraphs", (char *)"debug_level"}; + char *values[] = {(char *)proj_artifacts_path.c_str(), (char *)"16", (char *)"0"}; + void *lib = dlopen("libtidl_tfl_delegate.so", RTLD_NOW); + assert(lib); + tflite_plugin_dlg_create = (tflite_plugin_create_delegate)dlsym(lib, "tflite_plugin_create_delegate"); + TfLiteDelegate *dlg_ptr = tflite_plugin_dlg_create(keys, values, 3, NULL); + interpreter->ModifyGraphWithDelegate(dlg_ptr); + ei_printf("ModifyGraphWithDelegate - Done \n"); + + + if (interpreter->AllocateTensors() != kTfLiteOk) { + ei_printf("AllocateTensors failed\n"); + return EI_IMPULSE_TFLITE_ERROR; + } + + int hw_thread_count = (int)std::thread::hardware_concurrency(); + hw_thread_count -= 1; // leave one thread free for the other application + if (hw_thread_count < 1) { + hw_thread_count = 1; + } + + if (interpreter->SetNumThreads(hw_thread_count) != kTfLiteOk) { + ei_printf("SetNumThreads failed\n"); + return EI_IMPULSE_TFLITE_ERROR; + } + } + + inputs = interpreter->inputs(); + outputs = interpreter->outputs(); + + ei_printf("device mem enabled\n"); + for (uint32_t i = 0; i < inputs.size(); i++) + { + const TfLiteTensor *tensor = interpreter->input_tensor(i); + in_ptrs[i] = TIDLRT_allocSharedMem(tflite::kDefaultTensorAlignment, tensor->bytes); + if (in_ptrs[i] == NULL) + { + ei_printf("Could not allocate Memory for input: %s\n", tensor->name); + } + interpreter->SetCustomAllocationForTensor(inputs[i], {in_ptrs[i], tensor->bytes}); + } + for (uint32_t i = 0; i < outputs.size(); i++) + { + const TfLiteTensor *tensor = interpreter->output_tensor(i); + out_ptrs[i] = TIDLRT_allocSharedMem(tflite::kDefaultTensorAlignment, tensor->bytes); + if (out_ptrs[i] == NULL) + { + ei_printf("Could not allocate Memory for ouput: %s\n", tensor->name); + } + interpreter->SetCustomAllocationForTensor(outputs[i], {out_ptrs[i], tensor->bytes}); + } + + // Obtain pointers to the model's input and output tensors. +#if EI_CLASSIFIER_QUANTIZATION_ENABLED == 1 + int8_t* input = interpreter->typed_input_tensor(0); +#else + float* input = interpreter->typed_input_tensor(0); +#endif + + if (!input) { + return EI_IMPULSE_INPUT_TENSOR_WAS_NULL; + } + + size_t mtx_size = impulse->dsp_blocks_size + impulse->learning_blocks_size; + + for (size_t i = 0; i < input_block_ids_size; i++) { + uint16_t cur_mtx = input_block_ids[i]; +#if EI_CLASSIFIER_SINGLE_FEATURE_INPUT == 0 + ei::matrix_t* matrix = NULL; + + if (!find_mtx_by_idx(fmatrix, &matrix, cur_mtx, mtx_size)) { + ei_printf("ERR: Cannot find matrix with id %zu\n", cur_mtx); + return EI_IMPULSE_INVALID_SIZE; + } +#else + ei::matrix_t* matrix = fmatrix[0].matrix; +#endif + + for (uint32_t ix = 0; ix < matrix->rows * matrix->cols; ix++) { + if (impulse->object_detection) { +#if EI_CLASSIFIER_QUANTIZATION_ENABLED == 1 + float pixel = (float)matrix->buffer[ix]; + input[ix] = static_cast((pixel / input->tflite_input_scale) + input->tflite_input_zeropoint); +#else + input[ix] = matrix->buffer[ix]; +#endif + } + else { +#if EI_CLASSIFIER_QUANTIZATION_ENABLED == 1 + input[ix] = static_cast(round(matrix->buffer[ix] / input->tflite_input_scale) + input->tflite_input_zeropoint); +#else + input[ix] = matrix->buffer[ix]; +#endif + } + } + } + + uint64_t ctx_start_us = ei_read_timer_us(); + + interpreter->Invoke(); + + uint64_t ctx_end_us = ei_read_timer_us(); + + result->timing.classification_us = ctx_end_us - ctx_start_us; + result->timing.classification = (int)(result->timing.classification_us / 1000); + +#if EI_CLASSIFIER_TFLITE_OUTPUT_QUANTIZED == 1 + int8_t* out_data = interpreter->typed_output_tensor(config->output_data_tensor); +#else + float* out_data = interpreter->typed_output_tensor(config->output_data_tensor); +#endif + + if (debug) { + ei_printf("LOG_INFO tensors size: %ld \n", interpreter->tensors_size()); + ei_printf("LOG_INFO nodes size: %ld\n", interpreter->nodes_size()); + ei_printf("LOG_INFO number of inputs: %ld\n", inputs.size()); + ei_printf("LOG_INFO number of outputs: %ld\n", outputs.size()); + ei_printf("LOG_INFO input(0) name: %s\n", interpreter->GetInputName(0)); + + int t_size = interpreter->tensors_size(); + for (int i = 0; i < t_size; i++) + { + if (interpreter->tensor(i)->name) { + ei_printf("LOG_INFO %d: %s,%ld,%d,%f,%d,size(", i, interpreter->tensor(i)->name, + interpreter->tensor(i)->bytes, + interpreter->tensor(i)->type, + interpreter->tensor(i)->params.scale, + interpreter->tensor(i)->params.zero_point); + + for (int k=0; k < interpreter->tensor(i)->dims->size; k++) { + if (k == interpreter->tensor(i)->dims->size - 1) { + ei_printf("%d", interpreter->tensor(i)->dims->data[k]); + } else { + ei_printf("%d,", interpreter->tensor(i)->dims->data[k]); + } + } + ei_printf(")\n"); + } + + } + } + + if (!out_data) { + return EI_IMPULSE_OUTPUT_TENSOR_WAS_NULL; + } + + if (debug) { + ei_printf("Predictions (time: %d ms.):\n", result->timing.classification); + } + + EI_IMPULSE_ERROR fill_res = EI_IMPULSE_OK; + + if (impulse->object_detection) { + switch (impulse->object_detection_last_layer) { + case EI_CLASSIFIER_LAST_LAYER_FOMO: { + #if EI_CLASSIFIER_TFLITE_OUTPUT_QUANTIZED == 1 + fill_res = fill_result_struct_i8_fomo(impulse, result, out_data, out_data->tflite_output_zeropoint, out_data->tflite_output_scale, + impulse->fomo_output_size, impulse->fomo_output_size); + #else + fill_res = fill_result_struct_f32_fomo(impulse, result, out_data, + impulse->fomo_output_size, impulse->fomo_output_size); + #endif + break; + } + case EI_CLASSIFIER_LAST_LAYER_SSD: { + float *scores_tensor = interpreter->typed_output_tensor(config->output_score_tensor); + float *label_tensor = interpreter->typed_output_tensor(config->output_labels_tensor); + if (!scores_tensor) { + return EI_IMPULSE_SCORE_TENSOR_WAS_NULL; + } + if (!label_tensor) { + return EI_IMPULSE_LABEL_TENSOR_WAS_NULL; + } + #if EI_CLASSIFIER_TFLITE_OUTPUT_QUANTIZED == 1 + ei_printf("ERR: MobileNet SSD does not support quantized inference\n"); + return EI_IMPULSE_UNSUPPORTED_INFERENCING_ENGINE; + #else + fill_res = fill_result_struct_f32_object_detection(impulse, result, out_data, scores_tensor, label_tensor, debug); + #endif + break; + } + case EI_CLASSIFIER_LAST_LAYER_YOLOV5: + case EI_CLASSIFIER_LAST_LAYER_YOLOV5_V5_DRPAI: { + #if EI_CLASSIFIER_TFLITE_OUTPUT_QUANTIZED == 1 + ei_printf("ERR: YOLOv5 does not support quantized inference\n"); + return EI_IMPULSE_UNSUPPORTED_INFERENCING_ENGINE; + #else + int version = impulse->object_detection_last_layer == EI_CLASSIFIER_LAST_LAYER_YOLOV5_V5_DRPAI ? + 5 : 6; + fill_res = fill_result_struct_f32_yolov5( + impulse, + result, + version, + out_data, + impulse->tflite_output_features_count); + #endif + break; + } + case EI_CLASSIFIER_LAST_LAYER_YOLOX: { + #if EI_CLASSIFIER_TFLITE_OUTPUT_QUANTIZED == 1 + ei_printf("ERR: YOLOX does not support quantized inference\n"); + return EI_IMPULSE_UNSUPPORTED_INFERENCING_ENGINE; + #else + fill_res = fill_result_struct_f32_yolox( + impulse, + result, + out_data, + impulse->tflite_output_features_count); + #endif + break; + } + case EI_CLASSIFIER_LAST_LAYER_YOLOV7: { + #if EI_CLASSIFIER_TFLITE_OUTPUT_QUANTIZED == 1 + ei_printf("ERR: YOLOV7 does not support quantized inference\n"); + return EI_IMPULSE_UNSUPPORTED_INFERENCING_ENGINE; + #else + TfLiteTensor *output = interpreter->output_tensor(0); + size_t output_feature_count = 1; + for (int ix = 0; ix < output->dims->size; ix++) { + output_feature_count *= output->dims->data[ix]; + } + fill_res = fill_result_struct_f32_yolov7( + impulse, + result, + output->data.f, + output_feature_count); + #endif + break; + } + default: { + ei_printf("ERR: Unsupported object detection last layer (%d)\n", + impulse->object_detection_last_layer); + break; + } + } + } + else { +#if EI_CLASSIFIER_TFLITE_OUTPUT_QUANTIZED == 1 + fill_res = fill_result_struct_i8(impulse, result, out_data, out_data->tflite_output_zeropoint, out_data->tflite_output_scale, debug); +#else + fill_res = fill_result_struct_f32(impulse, result, out_data, debug); +#endif + } + + for (uint32_t i = 0; i < inputs.size(); i++) + { + if (in_ptrs[i]) + { + TIDLRT_freeSharedMem(in_ptrs[i]); + } + } + for (uint32_t i = 0; i < outputs.size(); i++) + { + if (out_ptrs[i]) + { + TIDLRT_freeSharedMem(out_ptrs[i]); + } + } + + if (fill_res != EI_IMPULSE_OK) { + return fill_res; + } + + // on Linux we're not worried about free'ing (for now) + + return EI_IMPULSE_OK; +} + +#endif // (EI_CLASSIFIER_INFERENCING_ENGINE == EI_CLASSIFIER_TFLITE_TIDL) +#endif // _EI_CLASSIFIER_INFERENCING_ENGINE_TFLITE_TIDL_H_ diff --git a/edgeimpulse/edge-impulse-sdk/dsp/README.md b/edgeimpulse/edge-impulse-sdk/dsp/README.md new file mode 100644 index 0000000..6bf484f --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/dsp/README.md @@ -0,0 +1,3 @@ +See notes in the various block folders in studio/dsp-pipeline + +studio/dsp-pipeline/mfcc/README.md \ No newline at end of file diff --git a/edgeimpulse/edge-impulse-sdk/dsp/config.hpp b/edgeimpulse/edge-impulse-sdk/dsp/config.hpp new file mode 100644 index 0000000..86f638c --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/dsp/config.hpp @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2022 EdgeImpulse Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an "AS + * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language + * governing permissions and limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef _EIDSP_CPP_CONFIG_H_ +#define _EIDSP_CPP_CONFIG_H_ + +// clang-format off +#ifndef EIDSP_USE_CMSIS_DSP // __ARM_ARCH_PROFILE is a predefine of arm-gcc. __TARGET_* is armcc +#if defined(__MBED__) || __ARM_ARCH_PROFILE == 'M' || defined(__TARGET_CPU_CORTEX_M0) || defined(__TARGET_CPU_CORTEX_M0PLUS) || defined(__TARGET_CPU_CORTEX_M3) || defined(__TARGET_CPU_CORTEX_M4) || defined(__TARGET_CPU_CORTEX_M7) || defined(USE_HAL_DRIVER) || defined(ARDUINO_NRF52_ADAFRUIT) + // Mbed OS versions before 5.7 are not based on CMSIS5, disable CMSIS-DSP and CMSIS-NN instructions + #if defined(__MBED__) + #include "mbed_version.h" + #if (MBED_VERSION < MBED_ENCODE_VERSION((5), (7), (0))) + #define EIDSP_USE_CMSIS_DSP 0 + #else + #define EIDSP_USE_CMSIS_DSP 1 + #endif // Mbed OS 5.7 version check + + // Arduino on Mbed targets prior to Mbed OS 6.0.0 ship their own CMSIS-DSP sources + #if defined(ARDUINO) && (MBED_VERSION < MBED_ENCODE_VERSION((6), (0), (0))) + #define EIDSP_LOAD_CMSIS_DSP_SOURCES 0 + #else + #define EIDSP_LOAD_CMSIS_DSP_SOURCES 1 + #endif // Mbed OS 6.0 version check + #else + #define EIDSP_USE_CMSIS_DSP 1 + #define EIDSP_LOAD_CMSIS_DSP_SOURCES 1 + #endif +#else + #define EIDSP_USE_CMSIS_DSP 0 +#endif // Mbed / ARM Core check +#endif // ifndef EIDSP_USE_CMSIS_DSP + +#if EIDSP_USE_CMSIS_DSP == 1 +#define EIDSP_i32 int32_t +#define EIDSP_i16 int16_t +#define EIDSP_i8 q7_t +#define ARM_MATH_ROUNDING 1 +#else +#define EIDSP_i32 int32_t +#define EIDSP_i16 int16_t +#define EIDSP_i8 int8_t +#endif // EIDSP_USE_CMSIS_DSP + +#ifndef EIDSP_USE_ASSERTS +#define EIDSP_USE_ASSERTS 0 +#endif // EIDSP_USE_ASSERTS + +#if EIDSP_USE_ASSERTS == 1 +#include +#define EIDSP_ERR(err_code) ei_printf("ERR: %d (%s)\n", err_code, #err_code); assert(false) +#else // EIDSP_USE_ASSERTS == 0 +#define EIDSP_ERR(err_code) return(err_code) +#endif + +// To save memory you can quantize the filterbanks, +// this has an effect on runtime speed as CMSIS-DSP does not have optimized instructions +// for q7 matrix multiplication and matrix transformation... +#ifndef EIDSP_QUANTIZE_FILTERBANK +#define EIDSP_QUANTIZE_FILTERBANK 1 +#endif // EIDSP_QUANTIZE_FILTERBANK + +// prints buffer allocations to stdout, useful when debugging +#ifndef EIDSP_TRACK_ALLOCATIONS +#define EIDSP_TRACK_ALLOCATIONS 0 +#endif // EIDSP_TRACK_ALLOCATIONS + +// set EIDSP_TRACK_ALLOCATIONS=1 and EIDSP_PRINT_ALLOCATIONS=0 +// to track but not print allocations +#ifndef EIDSP_PRINT_ALLOCATIONS +#define EIDSP_PRINT_ALLOCATIONS 1 +#endif + +#ifndef EIDSP_SIGNAL_C_FN_POINTER +#define EIDSP_SIGNAL_C_FN_POINTER 0 +#endif // EIDSP_SIGNAL_C_FN_POINTER + +// clang-format on +#endif // _EIDSP_CPP_CONFIG_H_ diff --git a/edgeimpulse/edge-impulse-sdk/dsp/dct/.clang-format b/edgeimpulse/edge-impulse-sdk/dsp/dct/.clang-format new file mode 100644 index 0000000..20ffce8 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/dsp/dct/.clang-format @@ -0,0 +1,2 @@ +"DisableFormat": true +"SortIncludes": false diff --git a/edgeimpulse/edge-impulse-sdk/dsp/dct/LICENSE b/edgeimpulse/edge-impulse-sdk/dsp/dct/LICENSE new file mode 100644 index 0000000..08a96e9 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/dsp/dct/LICENSE @@ -0,0 +1,18 @@ +Copyright (c) 2018 Project Nayuki. (MIT License) +https://www.nayuki.io/page/fast-discrete-cosine-transform-algorithms + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: +- The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. +- The Software is provided "as is", without warranty of any kind, express or + implied, including but not limited to the warranties of merchantability, + fitness for a particular purpose and noninfringement. In no event shall the + authors or copyright holders be liable for any claim, damages or other + liability, whether in an action of contract, tort or otherwise, arising from, + out of or in connection with the Software or the use or other dealings in the + Software. diff --git a/edgeimpulse/edge-impulse-sdk/dsp/dct/README.md b/edgeimpulse/edge-impulse-sdk/dsp/dct/README.md new file mode 100644 index 0000000..90be487 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/dsp/dct/README.md @@ -0,0 +1,3 @@ +# Fast discrete cosine transform algorithms (C) + +DCT type 2 and type 3 algorithms based on https://www.nayuki.io/page/fast-discrete-cosine-transform-algorithms. These are modified to use KissFFT or hardware accelerated RFFT support with CMSIS-DSP. diff --git a/edgeimpulse/edge-impulse-sdk/dsp/dct/fast-dct-fft.cpp b/edgeimpulse/edge-impulse-sdk/dsp/dct/fast-dct-fft.cpp new file mode 100644 index 0000000..27420ca --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/dsp/dct/fast-dct-fft.cpp @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2022 Project Nayuki. (MIT License) + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an "AS + * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language + * governing permissions and limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include +#include +#include +#include "fast-dct-fft.h" +#include "../returntypes.hpp" +#include "../numpy.hpp" +#include "../memory.hpp" + +#ifndef M_PI +#define M_PI 3.14159265358979323846264338327950288 +#endif // M_PI + +// DCT type II, unscaled +int ei::dct::transform(float vector[], size_t len) { + const size_t fft_data_out_size = (len / 2 + 1) * sizeof(ei::fft_complex_t); + const size_t fft_data_in_size = len * sizeof(float); + + // Allocate KissFFT input / output buffer + fft_complex_t *fft_data_out = + (ei::fft_complex_t*)ei_dsp_calloc(fft_data_out_size, 1); + if (!fft_data_out) { + return ei::EIDSP_OUT_OF_MEM; + } + + float *fft_data_in = (float*)ei_dsp_calloc(fft_data_in_size, 1); + if (!fft_data_in) { + ei_dsp_free(fft_data_out, fft_data_out_size); + return ei::EIDSP_OUT_OF_MEM; + } + + // Preprocess the input buffer with the data from the vector + size_t halfLen = len / 2; + for (size_t i = 0; i < halfLen; i++) { + fft_data_in[i] = vector[i * 2]; + fft_data_in[len - 1 - i] = vector[i * 2 + 1]; + } + if (len % 2 == 1) { + fft_data_in[halfLen] = vector[len - 1]; + } + + int r = ei::numpy::rfft(fft_data_in, len, fft_data_out, (len / 2 + 1), len); + if (r != 0) { + ei_dsp_free(fft_data_in, fft_data_in_size); + ei_dsp_free(fft_data_out, fft_data_out_size); + return r; + } + + size_t i = 0; + for (; i < len / 2 + 1; i++) { + float temp = i * M_PI / (len * 2); + vector[i] = fft_data_out[i].r * cos(temp) + fft_data_out[i].i * sin(temp); + } + //take advantage of hermetian symmetry to calculate remainder of signal + for (; i < len; i++) { + float temp = i * M_PI / (len * 2); + int conj_idx = len-i; + // second half bins not calculated would have just been the conjugate of the first half (note minus of imag) + vector[i] = fft_data_out[conj_idx].r * cos(temp) - fft_data_out[conj_idx].i * sin(temp); + } + ei_dsp_free(fft_data_in, fft_data_in_size); + ei_dsp_free(fft_data_out, fft_data_out_size); + + return 0; +} \ No newline at end of file diff --git a/edgeimpulse/edge-impulse-sdk/dsp/dct/fast-dct-fft.h b/edgeimpulse/edge-impulse-sdk/dsp/dct/fast-dct-fft.h new file mode 100644 index 0000000..e31efe1 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/dsp/dct/fast-dct-fft.h @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2022 Project Nayuki. (MIT License) + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an "AS + * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language + * governing permissions and limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef __FAST_DCT_FFT__H__ +#define __FAST_DCT_FFT__H__ + + +#include +#include +#include "../kissfft/kiss_fft.h" + +namespace ei { +namespace dct { + +int transform(float vector[], size_t len); +int inverse_transform(float vector[], size_t len); + +} // namespace dct +} // namespace ei + +#endif //!__FAST-DCT-FFT__H__ \ No newline at end of file diff --git a/edgeimpulse/edge-impulse-sdk/dsp/ei_alloc.h b/edgeimpulse/edge-impulse-sdk/dsp/ei_alloc.h new file mode 100644 index 0000000..6690570 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/dsp/ei_alloc.h @@ -0,0 +1,79 @@ +/* Edge Impulse inferencing library + * Copyright (c) 2022 EdgeImpulse Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __EI_ALLOC__H__ +#define __EI_ALLOC__H__ + +#include "memory.hpp" + +#if EIDSP_TRACK_ALLOCATIONS +#include +#endif + +namespace ei { + +template +struct EiAlloc +{ + typedef T value_type; + EiAlloc() = default; + template + constexpr EiAlloc(const EiAlloc &) noexcept {} + + T *allocate(size_t n) + { + auto bytes = n * sizeof(T); + auto ptr = ei_dsp_malloc(bytes); +#if EIDSP_TRACK_ALLOCATIONS + get_allocs()[ptr] = bytes; +#endif + return (T *)ptr; + } + + void deallocate(T *p, size_t n) noexcept + { +#if EIDSP_TRACK_ALLOCATIONS + auto size_p = get_allocs().find(p); + ei_dsp_free(p,size_p->second); + get_allocs().erase(size_p); +#else + ei_dsp_free(p,0); +#endif + } +#if EIDSP_TRACK_ALLOCATIONS + private: + // [address] -> size requested + typedef std::map map_t; + static map_t& get_allocs() { + static map_t allocs; + return allocs; + } +#endif +}; + +template +bool operator==(const EiAlloc &, const EiAlloc &) { return true; } +template +bool operator!=(const EiAlloc &, const EiAlloc &) { return false; } +} + +#endif //!__EI_ALLOC__H__ diff --git a/edgeimpulse/edge-impulse-sdk/dsp/ei_profiler.h b/edgeimpulse/edge-impulse-sdk/dsp/ei_profiler.h new file mode 100644 index 0000000..365d9e0 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/dsp/ei_profiler.h @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2022 EdgeImpulse Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an "AS + * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language + * governing permissions and limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef __EIPROFILER__H__ +#define __EIPROFILER__H__ + +#include "edge-impulse-sdk/porting/ei_classifier_porting.h" + +class EiProfiler { +public: + EiProfiler() + { + reset(); + } + void reset() + { + timestamp = ei_read_timer_ms(); + } + void report(const char *message) + { + ei_printf("%s took %llu\r\n", message, ei_read_timer_ms() - timestamp); + timestamp = ei_read_timer_ms(); //read again to not count printf time + } + +private: + uint64_t timestamp; +}; + +#endif //!__EIPROFILER__H__ diff --git a/edgeimpulse/edge-impulse-sdk/dsp/ei_utils.h b/edgeimpulse/edge-impulse-sdk/dsp/ei_utils.h new file mode 100644 index 0000000..2a2c5e8 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/dsp/ei_utils.h @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2022 EdgeImpulse Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an "AS + * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language + * governing permissions and limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ +#ifndef __EI_UTILS__H__ +#define __EI_UTILS__H__ + +#define ARRAY_LENGTH(array) (sizeof((array))/sizeof((array)[0])) + +// Stringify +#define ei_xstr(a) ei_str(a) +#define ei_str(a) #a + +// Bit manipulation + +//Set bit y (0-indexed) of x to '1' by generating a a mask with a '1' in the proper bit location and ORing x with the mask. +#define SET_BIT_POS(x,y) (x |= (1 << y)) + +//Set bit y (0-indexed) of x to '0' by generating a mask with a '0' in the y position and 1's elsewhere then ANDing the mask with x. +#define CLEAR_BIT_POS(x,y) (x &= ~(1<< y)) + +//Return '1' if the bit value at position y within x is '1' and '0' if it's 0 by ANDing x with a bit mask where the bit in y's position is '1' and '0' elsewhere and comparing it to all 0's. Returns '1' in least significant bit position if the value of the bit is '1', '0' if it was '0'. +#define TEST_BIT_POS(x,y) ((0u == (x & (1< + +template +using ei_vector = std::vector>; + +#endif //!__EI_VECTOR__H__ \ No newline at end of file diff --git a/edgeimpulse/edge-impulse-sdk/dsp/image/image.hpp b/edgeimpulse/edge-impulse-sdk/dsp/image/image.hpp new file mode 100644 index 0000000..12c0da4 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/dsp/image/image.hpp @@ -0,0 +1,23 @@ +/* + * Copyright (c) 2022 EdgeImpulse Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an "AS + * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language + * governing permissions and limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef _EIDSP_IMAGE_H_ +#define _EIDSP_IMAGE_H_ + +#include "edge-impulse-sdk/dsp/image/processing.hpp" + +#endif \ No newline at end of file diff --git a/edgeimpulse/edge-impulse-sdk/dsp/image/processing.cpp b/edgeimpulse/edge-impulse-sdk/dsp/image/processing.cpp new file mode 100644 index 0000000..5ff30c6 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/dsp/image/processing.cpp @@ -0,0 +1,409 @@ +/* + * Copyright (c) 2022 EdgeImpulse Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an "AS + * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language + * governing permissions and limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef __EIDSP_IMAGE_PROCESSING__H__ +#define __EIDSP_IMAGE_PROCESSING__H__ + +#include "edge-impulse-sdk/dsp/ei_utils.h" +#include "edge-impulse-sdk/porting/ei_classifier_porting.h" +#include "edge-impulse-sdk/dsp/returntypes.hpp" +#include "edge-impulse-sdk/dsp/image/processing.hpp" + +namespace ei { namespace image { namespace processing { + +enum YUV_OPTIONS +{ + BIG_ENDIAN_ORDER = 1, //RGB reading from low to high memory. Otherwise, uses native encoding + PAD_4B = 2, // pad 0x00 on the high B. ie 0x00RRGGBB +}; + +/** + * @brief Convert YUV to RGB + * + * @param rgb_out Output buffer (can be the same as yuv_in if big enough) + * @param yuv_in Input buffer + * @param in_size_B Size of input image in B + * @param opts Note, only BIG_ENDIAN_ORDER supported presently + */ +int yuv422_to_rgb888( + unsigned char *rgb_out, + unsigned const char *yuv_in, + unsigned int in_size_B, + YUV_OPTIONS opts) +{ + + // Clamp out of range values + #define EI_CLAMP(t) (((t) > 255) ? 255 : (((t) < 0) ? 0 : (t))) + + // Color space conversion for RGB + #define EI_GET_R_FROM_YUV(y, u, v) ((298 * y + 409 * v + 128) >> 8) + #define EI_GET_G_FROM_YUV(y, u, v) ((298 * y - 100 * u - 208 * v + 128) >> 8) + #define EI_GET_B_FROM_YUV(y, u, v) ((298 * y + 516 * u + 128) >> 8) + + unsigned int in_size_pixels = in_size_B / 4; + yuv_in += in_size_B - 1; + + int rgb_end = TEST_BIT_MASK(opts, PAD_4B) ? 2 * in_size_B : (6 * in_size_B) / 4; + rgb_out += rgb_end - 1; + + // Going backwards probably looks strange, but + // This allows us to do the algorithm in place! + // User needs to put the YUV image into a larger buffer than necessary + // But going backwards means we don't overwrite the YUV bytes + // until we don't need them anymore + for (unsigned int i = 0; i < in_size_pixels; ++i) { + int y2 = *yuv_in-- - 16; + int v = *yuv_in-- - 128; + int y0 = *yuv_in-- - 16; + int u0 = *yuv_in-- - 128; + + if (TEST_BIT_MASK(opts, BIG_ENDIAN_ORDER)) { + *rgb_out-- = EI_CLAMP(EI_GET_B_FROM_YUV(y2, u0, v)); + *rgb_out-- = EI_CLAMP(EI_GET_G_FROM_YUV(y2, u0, v)); + *rgb_out-- = EI_CLAMP(EI_GET_R_FROM_YUV(y2, u0, v)); + if (TEST_BIT_MASK(opts, PAD_4B)) { + *rgb_out-- = 0; + } + + *rgb_out-- = EI_CLAMP(EI_GET_B_FROM_YUV(y0, u0, v)); + *rgb_out-- = EI_CLAMP(EI_GET_G_FROM_YUV(y0, u0, v)); + *rgb_out-- = EI_CLAMP(EI_GET_R_FROM_YUV(y0, u0, v)); + if (TEST_BIT_MASK(opts, PAD_4B)) { + *rgb_out-- = 0; + } + } + else { + // not yet supported + return EIDSP_NOT_SUPPORTED; + } + } + return EIDSP_OK; +} + +/** + * @brief Crops an image. Can be in-place. 4B alignment for best performance + * (Alignment is tested, will fall back to B by B movement) + * + * @param srcWidth X dimension in pixels + * @param srcHeight Y dimension in pixels + * @param srcImage Input buffer + * @param startX X coord of first pixel to keep + * @param startY Y coord of the first pixel to keep + * @param dstWidth Desired X dimension in pixels (should be smaller than srcWidth) + * @param dstHeight Desired Y dimension in pixels (should be smaller than srcHeight) + * @param dstImage Output buffer, can be the same as srcImage + * @param iBpp 8 or 16 for bits per pixel + */ +int cropImage( + const uint8_t *srcImage, + int srcWidth, + int srcHeight, + int startX, + int startY, + uint8_t *dstImage, + int dstWidth, + int dstHeight, + int iBpp) +{ + uint32_t *s32, *d32; + int x, y; + + if (startX < 0 || startX >= srcWidth || startY < 0 || startY >= srcHeight || + (startX + dstWidth) > srcWidth || (startY + dstHeight) > srcHeight) { + return EIDSP_PARAMETER_INVALID; // invalid parameters + } + if (iBpp != 8 && iBpp != 16) { + return EIDSP_PARAMETER_INVALID; + } + + if (iBpp == 8) { + const uint8_t *s; + uint8_t *d; + for (y = 0; y < dstHeight; y++) { + s = &srcImage[srcWidth * (y + startY) + startX]; + d = &dstImage[(dstWidth * y)]; + x = 0; + if ((intptr_t)s & 3 || (intptr_t)d & 3) { // either src or dst pointer is not aligned + for (; x < dstWidth; x++) { + *d++ = *s++; // have to do it byte-by-byte + } + } + else { + // move 4 bytes at a time if aligned or alignment not enforced + s32 = (uint32_t *)s; + d32 = (uint32_t *)d; + for (; x < dstWidth - 3; x += 4) { + *d32++ = *s32++; + } + // any remaining stragglers? + s = (uint8_t *)s32; + d = (uint8_t *)d32; + for (; x < dstWidth; x++) { + *d++ = *s++; + } + } + } // for y + } // 8-bpp + else { + uint16_t *s, *d; + for (y = 0; y < dstHeight; y++) { + s = (uint16_t *)&srcImage[2 * srcWidth * (y + startY) + startX * 2]; + d = (uint16_t *)&dstImage[(dstWidth * y * 2)]; + x = 0; + if ((intptr_t)s & 2 || (intptr_t)d & 2) { // either src or dst pointer is not aligned + for (; x < dstWidth; x++) { + *d++ = *s++; // have to do it 16-bits at a time + } + } + else { + // move 4 bytes at a time if aligned or alignment no enforced + s32 = (uint32_t *)s; + d32 = (uint32_t *)d; + for (; x < dstWidth - 1; x += 2) { // we can move 2 pixels at a time + *d32++ = *s32++; + } + // any remaining stragglers? + s = (uint16_t *)s32; + d = (uint16_t *)d32; + for (; x < dstWidth; x++) { + *d++ = *s++; + } + } + } // for y + } // 16-bpp case + + return EIDSP_OK; +} /* cropImage() */ + +/** + * @copydoc cropImage( + int srcWidth, + int srcHeight, + const uint8_t *srcImage, + int startX, + int startY, + int dstWidth, + int dstHeight, + uint8_t *dstImage, + int iBpp) + */ +int crop_image_rgb888_packed( + const uint8_t *srcImage, + int srcWidth, + int srcHeight, + int startX, + int startY, + uint8_t *dstImage, + int dstWidth, + int dstHeight) +{ + // use 8 bpp mode, but do everything *3 for RGB + return cropImage( + srcImage, + srcWidth * 3, + srcHeight, + startX * 3, + startY, + dstImage, + dstWidth * 3, + dstHeight, + 8); +} + +/** + * @brief Resize an image using interpolation + * Can be used to resize the image smaller or larger + * If resizing much smaller than 1/3 size, then a more rubust algorithm should average all of the pixels + * This algorithm uses bilinear interpolation - averages a 2x2 region to generate each new pixel + * + * @param srcWidth Input image width in pixels + * @param srcHeight Input image height in pixels + * @param srcImage Input buffer + * @param dstWidth Output image width in pixels + * @param dstHeight Output image height in pixels + * @param dstImage Output buffer, can be same as input buffer + * @param pixel_size_B Size of pixels in Bytes. 3 for RGB, 1 for mono + */ +int resize_image( + const uint8_t *srcImage, + int srcWidth, + int srcHeight, + uint8_t *dstImage, + int dstWidth, + int dstHeight, + int pixel_size_B) +{ +// Copied from ei_camera.cpp in firmware-eta-compute +// Modified for RGB888 +// This needs to be < 16 or it won't fit. Cortex-M4 only has SIMD for signed multiplies + constexpr int FRAC_BITS = 14; + constexpr int FRAC_VAL = (1 << FRAC_BITS); + constexpr int FRAC_MASK = (FRAC_VAL - 1); + + uint32_t src_x_accum, src_y_accum; // accumulators and fractions for scaling the image + uint32_t x_frac, nx_frac, y_frac, ny_frac; + int x, y, ty; + + if (srcHeight < 2) { + return EIDSP_PARAMETER_INVALID; + } + + // start at 1/2 pixel in to account for integer downsampling which might miss pixels + src_y_accum = FRAC_VAL / 2; + const uint32_t src_x_frac = (srcWidth * FRAC_VAL) / dstWidth; + const uint32_t src_y_frac = (srcHeight * FRAC_VAL) / dstHeight; + + //from here out, *3 b/c RGB + srcWidth *= pixel_size_B; + //srcHeight not used for indexing + //dstWidth still needed as is + //dstHeight shouldn't be scaled + + const uint8_t *s; + uint8_t *d; + + for (y = 0; y < dstHeight; y++) { + // do indexing computations + ty = src_y_accum >> FRAC_BITS; // src y + y_frac = src_y_accum & FRAC_MASK; + src_y_accum += src_y_frac; + ny_frac = FRAC_VAL - y_frac; // y fraction and 1.0 - y fraction + + s = &srcImage[ty * srcWidth]; + d = &dstImage[y * dstWidth * pixel_size_B]; //not scaled above + // start at 1/2 pixel in to account for integer downsampling which might miss pixels + src_x_accum = FRAC_VAL / 2; + for (x = 0; x < dstWidth; x++) { + uint32_t tx, p00, p01, p10, p11; + // do indexing computations + tx = (src_x_accum >> FRAC_BITS) * pixel_size_B; + x_frac = src_x_accum & FRAC_MASK; + nx_frac = FRAC_VAL - x_frac; // x fraction and 1.0 - x fraction + src_x_accum += src_x_frac; + + //interpolate and write out + for (int color = 0; color < pixel_size_B; + color++) // do pixel_size_B times for pixel_size_B colors + { + p00 = s[tx]; + p10 = s[tx + pixel_size_B]; + p01 = s[tx + srcWidth]; + p11 = s[tx + srcWidth + pixel_size_B]; + p00 = ((p00 * nx_frac) + (p10 * x_frac) + FRAC_VAL / 2) >> FRAC_BITS; // top line + p01 = ((p01 * nx_frac) + (p11 * x_frac) + FRAC_VAL / 2) >> FRAC_BITS; // bottom line + p00 = ((p00 * ny_frac) + (p01 * y_frac) + FRAC_VAL / 2) >> FRAC_BITS; //top + bottom + *d++ = (uint8_t)p00; // store new pixel + //ready next loop + tx++; + } + } // for x + } // for y + return EIDSP_OK; +} // resizeImage() + +/** + * @brief Calculate new dims that match the aspect ratio of destination + * This prevents a squashed look + * The smallest axis is held constant + * + * @param srcWidth Input width in pixels + * @param srcHeight Input height in pixels + * @param dstWidth Ultimate width in pixels + * @param dstHeight Ultimate height in pixels + * @param[out] cropWidth Width in pixels that matches the aspect ratio + * @param[out] cropHeight Height in pixels that matches the aspect ratio + */ +void calculate_crop_dims( + int srcWidth, + int srcHeight, + int dstWidth, + int dstHeight, + int &cropWidth, + int &cropHeight) +{ + //first, trim the largest axis to match destination aspect ratio + //calculate by fixing the smaller axis + if (srcWidth > srcHeight) { + cropWidth = (uint32_t)(dstWidth * srcHeight) / dstHeight; //cast in case int is small + cropHeight = srcHeight; + } + else { + cropHeight = (uint32_t)(dstHeight * srcWidth) / dstWidth; + cropWidth = srcWidth; + } +} + +int crop_and_interpolate_rgb888( + const uint8_t *srcImage, + int srcWidth, + int srcHeight, + uint8_t *dstImage, + int dstWidth, + int dstHeight) +{ + int cropWidth, cropHeight; + // What are dimensions that maintain aspect ratio? + calculate_crop_dims(srcWidth, srcHeight, dstWidth, dstHeight, cropWidth, cropHeight); + // Now crop to that dimension + int res = crop_image_rgb888_packed( + srcImage, + srcWidth, + srcHeight, + (srcWidth - cropWidth) / 2, + (srcHeight - cropHeight) / 2, + dstImage, + cropWidth, + cropHeight); + + if( res != EIDSP_OK) { return res; } + // Finally, interpolate down to desired dimensions, in place + return resize_image(dstImage, cropWidth, cropHeight, dstImage, dstWidth, dstHeight, 3); +} + +int crop_and_interpolate_image( + const uint8_t *srcImage, + int srcWidth, + int srcHeight, + uint8_t *dstImage, + int dstWidth, + int dstHeight, + int pixel_size_B) +{ + int cropWidth, cropHeight; + // What are dimensions that maintain aspect ratio? + calculate_crop_dims(srcWidth, srcHeight, dstWidth, dstHeight, cropWidth, cropHeight); + + // Now crop to that dimension + int res = cropImage( + srcImage, + srcWidth * pixel_size_B, + srcHeight, + ((srcWidth - cropWidth) / 2) * pixel_size_B, + (srcHeight - cropHeight) / 2, + dstImage, + cropWidth * pixel_size_B, + cropHeight, + 8); + + if( res != EIDSP_OK) { return res; } + + // Finally, interpolate down to desired dimensions, in place + return resize_image(dstImage, cropWidth, cropHeight, dstImage, dstWidth, dstHeight, pixel_size_B); +} + +}}} //namespaces +#endif //!__EI_IMAGE_PROCESSING__H__ \ No newline at end of file diff --git a/edgeimpulse/edge-impulse-sdk/dsp/image/processing.hpp b/edgeimpulse/edge-impulse-sdk/dsp/image/processing.hpp new file mode 100644 index 0000000..de8a3be --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/dsp/image/processing.hpp @@ -0,0 +1,183 @@ +/* + * Copyright (c) 2022 EdgeImpulse Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an "AS + * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language + * governing permissions and limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef __EIDSP_IMAGE_PROCESSING__H__ +#define __EIDSP_IMAGE_PROCESSING__H__ + +#include "edge-impulse-sdk/dsp/ei_utils.h" +#include "edge-impulse-sdk/porting/ei_classifier_porting.h" +#include "edge-impulse-sdk/dsp/returntypes.hpp" + +namespace ei { namespace image { namespace processing { + +enum YUV_OPTIONS +{ + BIG_ENDIAN_ORDER = 1, //RGB reading from low to high memory. Otherwise, uses native encoding + PAD_4B = 2, // pad 0x00 on the high B. ie 0x00RRGGBB +}; + +/** + * @brief Convert YUV to RGB + * + * @param rgb_out Output buffer (can be the same as yuv_in if big enough) + * @param yuv_in Input buffer + * @param in_size_B Size of input image in B + * @param opts Note, only BIG_ENDIAN_ORDER supported presently + */ +int yuv422_to_rgb888( + unsigned char *rgb_out, + unsigned const char *yuv_in, + unsigned int in_size_B, + YUV_OPTIONS opts); + +/** + * @brief Crops an image. Can be in-place. 4B alignment for best performance + * (Alignment is tested, will fall back to B by B movement) + * + * @param srcWidth X dimension in pixels + * @param srcHeight Y dimension in pixels + * @param srcImage Input buffer + * @param startX X coord of first pixel to keep + * @param startY Y coord of the first pixel to keep + * @param dstWidth Desired X dimension in pixels (should be smaller than srcWidth) + * @param dstHeight Desired Y dimension in pixels (should be smaller than srcHeight) + * @param dstImage Output buffer, can be the same as srcImage + * @param iBpp 8 or 16 for bits per pixel + */ +int cropImage( + const uint8_t *srcImage, + int srcWidth, + int srcHeight, + int startX, + int startY, + uint8_t *dstImage, + int dstWidth, + int dstHeight, + int iBpp); + +/** + * @copydoc cropImage( + int srcWidth, + int srcHeight, + const uint8_t *srcImage, + int startX, + int startY, + int dstWidth, + int dstHeight, + uint8_t *dstImage, + int iBpp) + */ +void crop_image_rgb888_packed( + const uint8_t *srcImage, + int srcWidth, + int srcHeight, + int startX, + int startY, + uint8_t *dstImage, + int dstWidth, + int dstHeight); + +constexpr int RGB888_B_SIZE = 3; +constexpr int MONO_B_SIZE = 1; + +/** + * @brief Resize an image using interpolation + * Can be used to resize the image smaller or larger + * If resizing much smaller than 1/3 size, then a more rubust algorithm should average all of the pixels + * This algorithm uses bilinear interpolation - averages a 2x2 region to generate each new pixel + * + * @param srcWidth Input image width in pixels + * @param srcHeight Input image height in pixels + * @param srcImage Input buffer + * @param dstWidth Output image width in pixels + * @param dstHeight Output image height in pixels + * @param dstImage Output buffer, can be same as input buffer + * @param pixel_size_B Size of pixels in Bytes. 3 for RGB, 1 for mono + */ +void resize_image( + const uint8_t *srcImage, + int srcWidth, + int srcHeight, + uint8_t *dstImage, + int dstWidth, + int dstHeight, + int pixel_size_B); + +/** + * @brief Calculate new dims that match the aspect ratio of destination + * This prevents a squashed look + * The smallest axis is held constant + * + * @param srcWidth Input width in pixels + * @param srcHeight Input height in pixels + * @param dstWidth Ultimate width in pixels + * @param dstHeight Ultimate height in pixels + * @param[out] cropWidth Width in pixels that matches the aspect ratio + * @param[out] cropHeight Height in pixels that matches the aspect ratio + */ +void calculate_crop_dims( + int srcWidth, + int srcHeight, + int dstWidth, + int dstHeight, + int &cropWidth, + int &cropHeight); + +/** + * @brief Crops, then interpolates to a desired new image size + * Can be done in place (set srcImage == dstImage) + * + * @param srcImage Input image buffer + * @param srcWidth Input width in pixels + * @param srcHeight Input height in pixels + * @param dstImage Output image buffer, can be same as input buffer + * @param dstWidth Desired new width in pixels + * @param dstHeight Desired new height in pixels + */ +int crop_and_interpolate_rgb888( + const uint8_t *srcImage, + int srcWidth, + int srcHeight, + uint8_t *dstImage, + int dstWidth, + int dstHeight); + +/** + * @brief Crops, then interpolates to a desired new image size + * Can be done in place (set srcImage == dstImage) + * A more beneric version of the previously used + * crop_and_interpolate_rgb888 + * + * @param srcImage Input image buffer + * @param srcWidth Input width in pixels + * @param srcHeight Input height in pixels + * @param dstImage Output image buffer, can be same as input buffer + * @param dstWidth Desired new width in pixels + * @param dstHeight Desired new height in pixels + * @param pixel_size_B Size of pixels in Bytes. 3 for RGB, 1 for mono + */ +int crop_and_interpolate_image( + const uint8_t *srcImage, + int srcWidth, + int srcHeight, + uint8_t *dstImage, + int dstWidth, + int dstHeight, + int pixel_size_B); + +}}} //namespaces +#endif //!__EI_IMAGE_PROCESSING__H__ \ No newline at end of file diff --git a/edgeimpulse/edge-impulse-sdk/dsp/kissfft/.clang-format b/edgeimpulse/edge-impulse-sdk/dsp/kissfft/.clang-format new file mode 100644 index 0000000..20ffce8 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/dsp/kissfft/.clang-format @@ -0,0 +1,2 @@ +"DisableFormat": true +"SortIncludes": false diff --git a/edgeimpulse/edge-impulse-sdk/dsp/kissfft/LICENSE b/edgeimpulse/edge-impulse-sdk/dsp/kissfft/LICENSE new file mode 100644 index 0000000..d95f124 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/dsp/kissfft/LICENSE @@ -0,0 +1,4 @@ +Revised BSD License, see COPYING for verbiage. +Basically, "free to use&change, give credit where due, no guarantees" +Note this license is compatible with GPL at one end of the spectrum and closed, commercial software at +the other end. See http://www.fsf.org/licensing/licenses diff --git a/edgeimpulse/edge-impulse-sdk/dsp/kissfft/README.md b/edgeimpulse/edge-impulse-sdk/dsp/kissfft/README.md new file mode 100644 index 0000000..333e1fe --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/dsp/kissfft/README.md @@ -0,0 +1,3 @@ +# KissFFT + +Software FFT library used for devices that do not have hardware accelerated RFFT, or where we want to use mixed-radix FFT. Based off of https://github.com/mborgerding/kissfft. diff --git a/edgeimpulse/edge-impulse-sdk/dsp/kissfft/_kiss_fft_guts.h b/edgeimpulse/edge-impulse-sdk/dsp/kissfft/_kiss_fft_guts.h new file mode 100755 index 0000000..754896a --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/dsp/kissfft/_kiss_fft_guts.h @@ -0,0 +1,161 @@ +/* + * Copyright (c) 2003-2010, Mark Borgerding. All rights reserved. + * This file is part of KISS FFT - https://github.com/mborgerding/kissfft + * + * SPDX-License-Identifier: BSD-3-Clause + * See COPYING file for more information. + */ + +/* kiss_fft.h + defines kiss_fft_scalar as either short or a float type + and defines + typedef struct { kiss_fft_scalar r; kiss_fft_scalar i; }kiss_fft_cpx; */ +#pragma once + +#include "kiss_fft.h" +#include + +#define MAXFACTORS 32 +/* e.g. an fft of length 128 has 4 factors + as far as kissfft is concerned + 4*4*4*2 + */ + +struct kiss_fft_state{ + int nfft; + int inverse; + int factors[2*MAXFACTORS]; + kiss_fft_cpx twiddles[1]; +}; + +/* + Explanation of macros dealing with complex math: + + C_MUL(m,a,b) : m = a*b + C_FIXDIV( c , div ) : if a fixed point impl., c /= div. noop otherwise + C_SUB( res, a,b) : res = a - b + C_SUBFROM( res , a) : res -= a + C_ADDTO( res , a) : res += a + * */ +#ifdef FIXED_POINT +#include +#if (FIXED_POINT==32) +# define FRACBITS 31 +# define SAMPPROD int64_t +#define SAMP_MAX INT32_MAX +#define SAMP_MIN INT32_MIN +#else +# define FRACBITS 15 +# define SAMPPROD int32_t +#define SAMP_MAX INT16_MAX +#define SAMP_MIN INT16_MIN +#endif + +#if defined(CHECK_OVERFLOW) +# define CHECK_OVERFLOW_OP(a,op,b) \ + if ( (SAMPPROD)(a) op (SAMPPROD)(b) > SAMP_MAX || (SAMPPROD)(a) op (SAMPPROD)(b) < SAMP_MIN ) { \ + fprintf(stderr,"WARNING:overflow @ " __FILE__ "(%d): (%d " #op" %d) = %ld\n",__LINE__,(a),(b),(SAMPPROD)(a) op (SAMPPROD)(b) ); } +#endif + + +# define smul(a,b) ( (SAMPPROD)(a)*(b) ) +# define sround( x ) (kiss_fft_scalar)( ( (x) + (1<<(FRACBITS-1)) ) >> FRACBITS ) + +# define S_MUL(a,b) sround( smul(a,b) ) + +# define C_MUL(m,a,b) \ + do{ (m).r = sround( smul((a).r,(b).r) - smul((a).i,(b).i) ); \ + (m).i = sround( smul((a).r,(b).i) + smul((a).i,(b).r) ); }while(0) + +# define DIVSCALAR(x,k) \ + (x) = sround( smul( x, SAMP_MAX/k ) ) + +# define C_FIXDIV(c,div) \ + do { DIVSCALAR( (c).r , div); \ + DIVSCALAR( (c).i , div); }while (0) + +# define C_MULBYSCALAR( c, s ) \ + do{ (c).r = sround( smul( (c).r , s ) ) ;\ + (c).i = sround( smul( (c).i , s ) ) ; }while(0) + +#else /* not FIXED_POINT*/ + +# define S_MUL(a,b) ( (a)*(b) ) +#define C_MUL(m,a,b) \ + do{ (m).r = (a).r*(b).r - (a).i*(b).i;\ + (m).i = (a).r*(b).i + (a).i*(b).r; }while(0) +# define C_FIXDIV(c,div) /* NOOP */ +# define C_MULBYSCALAR( c, s ) \ + do{ (c).r *= (s);\ + (c).i *= (s); }while(0) +#endif + +#ifndef CHECK_OVERFLOW_OP +# define CHECK_OVERFLOW_OP(a,op,b) /* noop */ +#endif + +#define C_ADD( res, a,b)\ + do { \ + CHECK_OVERFLOW_OP((a).r,+,(b).r)\ + CHECK_OVERFLOW_OP((a).i,+,(b).i)\ + (res).r=(a).r+(b).r; (res).i=(a).i+(b).i; \ + }while(0) +#define C_SUB( res, a,b)\ + do { \ + CHECK_OVERFLOW_OP((a).r,-,(b).r)\ + CHECK_OVERFLOW_OP((a).i,-,(b).i)\ + (res).r=(a).r-(b).r; (res).i=(a).i-(b).i; \ + }while(0) +#define C_ADDTO( res , a)\ + do { \ + CHECK_OVERFLOW_OP((res).r,+,(a).r)\ + CHECK_OVERFLOW_OP((res).i,+,(a).i)\ + (res).r += (a).r; (res).i += (a).i;\ + }while(0) + +#define C_SUBFROM( res , a)\ + do {\ + CHECK_OVERFLOW_OP((res).r,-,(a).r)\ + CHECK_OVERFLOW_OP((res).i,-,(a).i)\ + (res).r -= (a).r; (res).i -= (a).i; \ + }while(0) + + +#ifdef FIXED_POINT +# define KISS_FFT_COS(phase) floor(.5+SAMP_MAX * cos (phase)) +# define KISS_FFT_SIN(phase) floor(.5+SAMP_MAX * sin (phase)) +# define HALF_OF(x) ((x)>>1) +#elif defined(USE_SIMD) +# define KISS_FFT_COS(phase) _mm_set1_ps( cos(phase) ) +# define KISS_FFT_SIN(phase) _mm_set1_ps( sin(phase) ) +# define HALF_OF(x) ((x)*_mm_set1_ps(.5)) +#else +# define KISS_FFT_COS(phase) (kiss_fft_scalar) cos(phase) +# define KISS_FFT_SIN(phase) (kiss_fft_scalar) sin(phase) +# define HALF_OF(x) ((x)*.5) +#endif + +#define kf_cexp(x,phase) \ + do{ \ + (x)->r = KISS_FFT_COS(phase);\ + (x)->i = KISS_FFT_SIN(phase);\ + }while(0) + + +/* a debugging function */ +#define pcpx(c)\ + fprintf(stderr,"%g + %gi\n",(double)((c)->r),(double)((c)->i) ) + + +#ifdef KISS_FFT_USE_ALLOCA +// define this to allow use of alloca instead of malloc for temporary buffers +// Temporary buffers are used in two case: +// 1. FFT sizes that have "bad" factors. i.e. not 2,3 and 5 +// 2. "in-place" FFTs. Notice the quotes, since kissfft does not really do an in-place transform. +#include +#define KISS_FFT_TMP_ALLOC(nbytes) alloca(nbytes) +#define KISS_FFT_TMP_FREE(ptr) +#else +#define KISS_FFT_TMP_ALLOC(nbytes) KISS_FFT_MALLOC(nbytes) +#define KISS_FFT_TMP_FREE(ptr) KISS_FFT_FREE(ptr) +#endif diff --git a/edgeimpulse/edge-impulse-sdk/dsp/kissfft/kiss_fft.cpp b/edgeimpulse/edge-impulse-sdk/dsp/kissfft/kiss_fft.cpp new file mode 100755 index 0000000..9393357 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/dsp/kissfft/kiss_fft.cpp @@ -0,0 +1,413 @@ +/* + * Copyright (c) 2003-2010, Mark Borgerding. All rights reserved. + * This file is part of KISS FFT - https://github.com/mborgerding/kissfft + * + * SPDX-License-Identifier: BSD-3-Clause + * See COPYING file for more information. + */ + + +#include "edge-impulse-sdk/dsp/kissfft/_kiss_fft_guts.h" +/* The guts header contains all the multiplication and addition macros that are defined for + fixed or floating point complex numbers. It also delares the kf_ internal functions. + */ + +static void kf_bfly2( + kiss_fft_cpx * Fout, + const size_t fstride, + const kiss_fft_cfg st, + int m + ) +{ + kiss_fft_cpx * Fout2; + kiss_fft_cpx * tw1 = st->twiddles; + kiss_fft_cpx t; + Fout2 = Fout + m; + do{ + C_FIXDIV(*Fout,2); C_FIXDIV(*Fout2,2); + + C_MUL (t, *Fout2 , *tw1); + tw1 += fstride; + C_SUB( *Fout2 , *Fout , t ); + C_ADDTO( *Fout , t ); + ++Fout2; + ++Fout; + }while (--m); +} + +static void kf_bfly4( + kiss_fft_cpx * Fout, + const size_t fstride, + const kiss_fft_cfg st, + const size_t m + ) +{ + kiss_fft_cpx *tw1,*tw2,*tw3; + kiss_fft_cpx scratch[6]; + size_t k=m; + const size_t m2=2*m; + const size_t m3=3*m; + + + tw3 = tw2 = tw1 = st->twiddles; + + do { + C_FIXDIV(*Fout,4); C_FIXDIV(Fout[m],4); C_FIXDIV(Fout[m2],4); C_FIXDIV(Fout[m3],4); + + C_MUL(scratch[0],Fout[m] , *tw1 ); + C_MUL(scratch[1],Fout[m2] , *tw2 ); + C_MUL(scratch[2],Fout[m3] , *tw3 ); + + C_SUB( scratch[5] , *Fout, scratch[1] ); + C_ADDTO(*Fout, scratch[1]); + C_ADD( scratch[3] , scratch[0] , scratch[2] ); + C_SUB( scratch[4] , scratch[0] , scratch[2] ); + C_SUB( Fout[m2], *Fout, scratch[3] ); + tw1 += fstride; + tw2 += fstride*2; + tw3 += fstride*3; + C_ADDTO( *Fout , scratch[3] ); + + if(st->inverse) { + Fout[m].r = scratch[5].r - scratch[4].i; + Fout[m].i = scratch[5].i + scratch[4].r; + Fout[m3].r = scratch[5].r + scratch[4].i; + Fout[m3].i = scratch[5].i - scratch[4].r; + }else{ + Fout[m].r = scratch[5].r + scratch[4].i; + Fout[m].i = scratch[5].i - scratch[4].r; + Fout[m3].r = scratch[5].r - scratch[4].i; + Fout[m3].i = scratch[5].i + scratch[4].r; + } + ++Fout; + }while(--k); +} + +static void kf_bfly3( + kiss_fft_cpx * Fout, + const size_t fstride, + const kiss_fft_cfg st, + size_t m + ) +{ + size_t k=m; + const size_t m2 = 2*m; + kiss_fft_cpx *tw1,*tw2; + kiss_fft_cpx scratch[5]; + kiss_fft_cpx epi3; + epi3 = st->twiddles[fstride*m]; + + tw1=tw2=st->twiddles; + + do{ + C_FIXDIV(*Fout,3); C_FIXDIV(Fout[m],3); C_FIXDIV(Fout[m2],3); + + C_MUL(scratch[1],Fout[m] , *tw1); + C_MUL(scratch[2],Fout[m2] , *tw2); + + C_ADD(scratch[3],scratch[1],scratch[2]); + C_SUB(scratch[0],scratch[1],scratch[2]); + tw1 += fstride; + tw2 += fstride*2; + + Fout[m].r = Fout->r - HALF_OF(scratch[3].r); + Fout[m].i = Fout->i - HALF_OF(scratch[3].i); + + C_MULBYSCALAR( scratch[0] , epi3.i ); + + C_ADDTO(*Fout,scratch[3]); + + Fout[m2].r = Fout[m].r + scratch[0].i; + Fout[m2].i = Fout[m].i - scratch[0].r; + + Fout[m].r -= scratch[0].i; + Fout[m].i += scratch[0].r; + + ++Fout; + }while(--k); +} + +static void kf_bfly5( + kiss_fft_cpx * Fout, + const size_t fstride, + const kiss_fft_cfg st, + int m + ) +{ + kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4; + int u; + kiss_fft_cpx scratch[13]; + kiss_fft_cpx * twiddles = st->twiddles; + kiss_fft_cpx *tw; + kiss_fft_cpx ya,yb; + ya = twiddles[fstride*m]; + yb = twiddles[fstride*2*m]; + + Fout0=Fout; + Fout1=Fout0+m; + Fout2=Fout0+2*m; + Fout3=Fout0+3*m; + Fout4=Fout0+4*m; + + tw=st->twiddles; + for ( u=0; ur += scratch[7].r + scratch[8].r; + Fout0->i += scratch[7].i + scratch[8].i; + + scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[8].r,yb.r); + scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[8].i,yb.r); + + scratch[6].r = S_MUL(scratch[10].i,ya.i) + S_MUL(scratch[9].i,yb.i); + scratch[6].i = -S_MUL(scratch[10].r,ya.i) - S_MUL(scratch[9].r,yb.i); + + C_SUB(*Fout1,scratch[5],scratch[6]); + C_ADD(*Fout4,scratch[5],scratch[6]); + + scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch[8].r,ya.r); + scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch[8].i,ya.r); + scratch[12].r = - S_MUL(scratch[10].i,yb.i) + S_MUL(scratch[9].i,ya.i); + scratch[12].i = S_MUL(scratch[10].r,yb.i) - S_MUL(scratch[9].r,ya.i); + + C_ADD(*Fout2,scratch[11],scratch[12]); + C_SUB(*Fout3,scratch[11],scratch[12]); + + ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4; + } +} + +/* perform the butterfly for one stage of a mixed radix FFT */ +static void kf_bfly_generic( + kiss_fft_cpx * Fout, + const size_t fstride, + const kiss_fft_cfg st, + int m, + int p + ) +{ + int u,k,q1,q; + kiss_fft_cpx * twiddles = st->twiddles; + kiss_fft_cpx t; + int Norig = st->nfft; + + kiss_fft_cpx * scratch = (kiss_fft_cpx*)KISS_FFT_TMP_ALLOC(sizeof(kiss_fft_cpx)*p); + + for ( u=0; u=Norig) twidx-=Norig; + C_MUL(t,scratch[q] , twiddles[twidx] ); + C_ADDTO( Fout[ k ] ,t); + } + k += m; + } + } + KISS_FFT_TMP_FREE(scratch); +} + +static +void kf_work( + kiss_fft_cpx * Fout, + const kiss_fft_cpx * f, + const size_t fstride, + int in_stride, + int * factors, + const kiss_fft_cfg st + ) +{ + kiss_fft_cpx * Fout_beg=Fout; + const int p=*factors++; /* the radix */ + const int m=*factors++; /* stage's fft length/p */ + const kiss_fft_cpx * Fout_end = Fout + p*m; + +#ifdef _OPENMP + // use openmp extensions at the + // top-level (not recursive) + if (fstride==1 && p<=5) + { + int k; + + // execute the p different work units in different threads +# pragma omp parallel for + for (k=0;k floor_sqrt) + p = n; /* no more factors, skip to end */ + } + n /= p; + *facbuf++ = p; + *facbuf++ = n; + } while (n > 1); +} + +/* + * + * User-callable function to allocate all necessary storage space for the fft. + * + * The return value is a contiguous block of memory, allocated with malloc. As such, + * It can be freed with free(), rather than a kiss_fft-specific function. + * */ +kiss_fft_cfg kiss_fft_alloc(int nfft,int inverse_fft,void * mem,size_t * lenmem,size_t * memallocated ) +{ + kiss_fft_cfg st=NULL; + size_t memneeded = sizeof(struct kiss_fft_state) + + sizeof(kiss_fft_cpx)*(nfft-1); /* twiddle factors*/ + + if ( lenmem==NULL ) { + st = ( kiss_fft_cfg)KISS_FFT_MALLOC( memneeded ); + }else{ + if (mem != NULL && *lenmem >= memneeded) + st = (kiss_fft_cfg)mem; + *lenmem = memneeded; + } + if (st) { + int i; + st->nfft=nfft; + st->inverse = inverse_fft; + if (inverse_fft) + { + for (i=0;itwiddles+i, phase ); + } + } else { + for (i=0;itwiddles+i, phase ); + } + } + + kf_factor(nfft,st->factors); + } + + if (memallocated != NULL) { + *memallocated = memneeded; + } + + return st; +} + + +void kiss_fft_stride(kiss_fft_cfg st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout,int in_stride) +{ + if (fin == fout) { + //NOTE: this is not really an in-place FFT algorithm. + //It just performs an out-of-place FFT into a temp buffer + kiss_fft_cpx * tmpbuf = (kiss_fft_cpx*)KISS_FFT_TMP_ALLOC( sizeof(kiss_fft_cpx)*st->nfft); + kf_work(tmpbuf,fin,1,in_stride, st->factors,st); + memcpy(fout,tmpbuf,sizeof(kiss_fft_cpx)*st->nfft); + KISS_FFT_TMP_FREE(tmpbuf); + }else{ + kf_work( fout, fin, 1,in_stride, st->factors,st ); + } +} + +void kiss_fft(kiss_fft_cfg cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout) +{ + kiss_fft_stride(cfg,fin,fout,1); +} + + +void kiss_fft_cleanup(void) +{ + // nothing needed any more +} + +int kiss_fft_next_fast_size(int n) +{ + while(1) { + int m=n; + while ( (m%2) == 0 ) m/=2; + while ( (m%3) == 0 ) m/=3; + while ( (m%5) == 0 ) m/=5; + if (m<=1) + break; /* n is completely factorable by twos, threes, and fives */ + n++; + } + return n; +} diff --git a/edgeimpulse/edge-impulse-sdk/dsp/kissfft/kiss_fft.h b/edgeimpulse/edge-impulse-sdk/dsp/kissfft/kiss_fft.h new file mode 100755 index 0000000..301687a --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/dsp/kissfft/kiss_fft.h @@ -0,0 +1,133 @@ +/* + * Copyright (c) 2003-2010, Mark Borgerding. All rights reserved. + * This file is part of KISS FFT - https://github.com/mborgerding/kissfft + * + * SPDX-License-Identifier: BSD-3-Clause + * See COPYING file for more information. + */ + +#ifndef KISS_FFT_H +#define KISS_FFT_H + +#include +#include +#include +#include +#include "../../porting/ei_classifier_porting.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* + ATTENTION! + If you would like a : + -- a utility that will handle the caching of fft objects + -- real-only (no imaginary time component ) FFT + -- a multi-dimensional FFT + -- a command-line utility to perform ffts + -- a command-line utility to perform fast-convolution filtering + + Then see kfc.h kiss_fftr.h kiss_fftnd.h fftutil.c kiss_fastfir.c + in the tools/ directory. +*/ + +#ifdef USE_SIMD +# include +# define kiss_fft_scalar __m128 +#define KISS_FFT_MALLOC(nbytes) _mm_malloc(nbytes,16) +#define KISS_FFT_FREE _mm_free +#else +#define KISS_FFT_MALLOC ei_malloc +#define KISS_FFT_FREE ei_free +#endif + + +#ifdef FIXED_POINT +#include +# if (FIXED_POINT == 32) +# define kiss_fft_scalar int32_t +# else +# define kiss_fft_scalar int16_t +# endif +#else +# ifndef kiss_fft_scalar +/* default is float */ +# define kiss_fft_scalar float +# endif +#endif + +typedef struct { + kiss_fft_scalar r; + kiss_fft_scalar i; +}kiss_fft_cpx; + +typedef struct kiss_fft_state* kiss_fft_cfg; + +/* + * kiss_fft_alloc + * + * Initialize a FFT (or IFFT) algorithm's cfg/state buffer. + * + * typical usage: kiss_fft_cfg mycfg=kiss_fft_alloc(1024,0,NULL,NULL); + * + * The return value from fft_alloc is a cfg buffer used internally + * by the fft routine or NULL. + * + * If lenmem is NULL, then kiss_fft_alloc will allocate a cfg buffer using malloc. + * The returned value should be free()d when done to avoid memory leaks. + * + * The state can be placed in a user supplied buffer 'mem': + * If lenmem is not NULL and mem is not NULL and *lenmem is large enough, + * then the function places the cfg in mem and the size used in *lenmem + * and returns mem. + * + * If lenmem is not NULL and ( mem is NULL or *lenmem is not large enough), + * then the function returns NULL and places the minimum cfg + * buffer size in *lenmem. + * */ + +kiss_fft_cfg kiss_fft_alloc(int nfft,int inverse_fft,void * mem,size_t * lenmem,size_t * memallocated = NULL); + +/* + * kiss_fft(cfg,in_out_buf) + * + * Perform an FFT on a complex input buffer. + * for a forward FFT, + * fin should be f[0] , f[1] , ... ,f[nfft-1] + * fout will be F[0] , F[1] , ... ,F[nfft-1] + * Note that each element is complex and can be accessed like + f[k].r and f[k].i + * */ +void kiss_fft(kiss_fft_cfg cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout); + +/* + A more generic version of the above function. It reads its input from every Nth sample. + * */ +void kiss_fft_stride(kiss_fft_cfg cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout,int fin_stride); + +/* If kiss_fft_alloc allocated a buffer, it is one contiguous + buffer and can be simply free()d when no longer needed*/ +#define kiss_fft_free KISS_FFT_FREE + +/* + Cleans up some memory that gets managed internally. Not necessary to call, but it might clean up + your compiler output to call this before you exit. +*/ +void kiss_fft_cleanup(void); + + +/* + * Returns the smallest integer k, such that k>=n and k has only "fast" factors (2,3,5) + */ +int kiss_fft_next_fast_size(int n); + +/* for real ffts, we need an even size */ +#define kiss_fftr_next_fast_size_real(n) \ + (kiss_fft_next_fast_size( ((n)+1)>>1)<<1) + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/edgeimpulse/edge-impulse-sdk/dsp/kissfft/kiss_fftr.cpp b/edgeimpulse/edge-impulse-sdk/dsp/kissfft/kiss_fftr.cpp new file mode 100644 index 0000000..b448730 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/dsp/kissfft/kiss_fftr.cpp @@ -0,0 +1,162 @@ +/* + * Copyright (c) 2003-2010, Mark Borgerding. All rights reserved. + * This file is part of KISS FFT - https://github.com/mborgerding/kissfft + * + * SPDX-License-Identifier: BSD-3-Clause + * See COPYING file for more information. + */ + +#include "edge-impulse-sdk/dsp/kissfft/kiss_fftr.h" +#include "edge-impulse-sdk/dsp/kissfft/_kiss_fft_guts.h" + +struct kiss_fftr_state{ + kiss_fft_cfg substate; + kiss_fft_cpx * tmpbuf; + kiss_fft_cpx * super_twiddles; +#ifdef USE_SIMD + void * pad; +#endif +}; + +kiss_fftr_cfg kiss_fftr_alloc(int nfft,int inverse_fft,void * mem,size_t * lenmem,size_t * memallocated) +{ + int i; + kiss_fftr_cfg st = NULL; + size_t subsize = 0, memneeded; + + if (nfft & 1) { + ei_printf("FFT length must be even\n"); + return NULL; + } + nfft >>= 1; + + kiss_fft_alloc (nfft, inverse_fft, NULL, &subsize); + memneeded = sizeof(struct kiss_fftr_state) + subsize + sizeof(kiss_fft_cpx) * ( nfft * 3 / 2); + + if (lenmem == NULL) { + st = (kiss_fftr_cfg) KISS_FFT_MALLOC (memneeded); + } else { + if (*lenmem >= memneeded) + st = (kiss_fftr_cfg) mem; + *lenmem = memneeded; + } + if (!st) + return NULL; + + st->substate = (kiss_fft_cfg) (st + 1); /*just beyond kiss_fftr_state struct */ + st->tmpbuf = (kiss_fft_cpx *) (((char *) st->substate) + subsize); + st->super_twiddles = st->tmpbuf + nfft; + kiss_fft_alloc(nfft, inverse_fft, st->substate, &subsize); + + if (inverse_fft) { + for (i = 0; i < nfft/2; ++i) { + double phase = + 3.14159265358979323846264338327 * ((double) (i+1) / nfft + .5); + kf_cexp (st->super_twiddles+i,phase); + } + } else { + for (i = 0; i < nfft/2; ++i) { + double phase = + -3.14159265358979323846264338327 * ((double) (i+1) / nfft + .5); + kf_cexp (st->super_twiddles+i,phase); + } + } + + if (memallocated != NULL) { + *memallocated = memneeded; + } + + return st; +} + +void kiss_fftr(kiss_fftr_cfg st,const kiss_fft_scalar *timedata,kiss_fft_cpx *freqdata) +{ + /* input buffer timedata is stored row-wise */ + int k,ncfft; + kiss_fft_cpx fpnk,fpk,f1k,f2k,tw,tdc; + + if ( st->substate->inverse) { + ei_printf("kiss fft usage error: improper alloc\n"); + } + + ncfft = st->substate->nfft; + + /*perform the parallel fft of two real signals packed in real,imag*/ + kiss_fft( st->substate , (const kiss_fft_cpx*)timedata, st->tmpbuf ); + /* The real part of the DC element of the frequency spectrum in st->tmpbuf + * contains the sum of the even-numbered elements of the input time sequence + * The imag part is the sum of the odd-numbered elements + * + * The sum of tdc.r and tdc.i is the sum of the input time sequence. + * yielding DC of input time sequence + * The difference of tdc.r - tdc.i is the sum of the input (dot product) [1,-1,1,-1... + * yielding Nyquist bin of input time sequence + */ + + tdc.r = st->tmpbuf[0].r; + tdc.i = st->tmpbuf[0].i; + C_FIXDIV(tdc,2); + CHECK_OVERFLOW_OP(tdc.r ,+, tdc.i); + CHECK_OVERFLOW_OP(tdc.r ,-, tdc.i); + freqdata[0].r = tdc.r + tdc.i; + freqdata[ncfft].r = tdc.r - tdc.i; +#ifdef USE_SIMD + freqdata[ncfft].i = freqdata[0].i = _mm_set1_ps(0); +#else + freqdata[ncfft].i = freqdata[0].i = 0; +#endif + + for ( k=1;k <= ncfft/2 ; ++k ) { + fpk = st->tmpbuf[k]; + fpnk.r = st->tmpbuf[ncfft-k].r; + fpnk.i = - st->tmpbuf[ncfft-k].i; + C_FIXDIV(fpk,2); + C_FIXDIV(fpnk,2); + + C_ADD( f1k, fpk , fpnk ); + C_SUB( f2k, fpk , fpnk ); + C_MUL( tw , f2k , st->super_twiddles[k-1]); + + freqdata[k].r = HALF_OF(f1k.r + tw.r); + freqdata[k].i = HALF_OF(f1k.i + tw.i); + freqdata[ncfft-k].r = HALF_OF(f1k.r - tw.r); + freqdata[ncfft-k].i = HALF_OF(tw.i - f1k.i); + } +} + +void kiss_fftri(kiss_fftr_cfg st,const kiss_fft_cpx *freqdata,kiss_fft_scalar *timedata) +{ + /* input buffer timedata is stored row-wise */ + int k, ncfft; + + if (st->substate->inverse == 0) { + ei_printf("kiss fft usage error: improper alloc\n"); + } + + ncfft = st->substate->nfft; + + st->tmpbuf[0].r = freqdata[0].r + freqdata[ncfft].r; + st->tmpbuf[0].i = freqdata[0].r - freqdata[ncfft].r; + C_FIXDIV(st->tmpbuf[0],2); + + for (k = 1; k <= ncfft / 2; ++k) { + kiss_fft_cpx fk, fnkc, fek, fok, tmp; + fk = freqdata[k]; + fnkc.r = freqdata[ncfft - k].r; + fnkc.i = -freqdata[ncfft - k].i; + C_FIXDIV( fk , 2 ); + C_FIXDIV( fnkc , 2 ); + + C_ADD (fek, fk, fnkc); + C_SUB (tmp, fk, fnkc); + C_MUL (fok, tmp, st->super_twiddles[k-1]); + C_ADD (st->tmpbuf[k], fek, fok); + C_SUB (st->tmpbuf[ncfft - k], fek, fok); +#ifdef USE_SIMD + st->tmpbuf[ncfft - k].i *= _mm_set1_ps(-1.0); +#else + st->tmpbuf[ncfft - k].i *= -1; +#endif + } + kiss_fft (st->substate, st->tmpbuf, (kiss_fft_cpx *) timedata); +} diff --git a/edgeimpulse/edge-impulse-sdk/dsp/kissfft/kiss_fftr.h b/edgeimpulse/edge-impulse-sdk/dsp/kissfft/kiss_fftr.h new file mode 100644 index 0000000..49f0fd9 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/dsp/kissfft/kiss_fftr.h @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2003-2010, Mark Borgerding. All rights reserved. + * This file is part of KISS FFT - https://github.com/mborgerding/kissfft + * + * SPDX-License-Identifier: BSD-3-Clause + * See COPYING file for more information. + */ + +#ifndef KISS_FTR_H +#define KISS_FTR_H + +#include "kiss_fft.h" +#ifdef __cplusplus +extern "C" { +#endif + + +/* + + Real optimized version can save about 45% cpu time vs. complex fft of a real seq. + + + + */ + +typedef struct kiss_fftr_state *kiss_fftr_cfg; + + +kiss_fftr_cfg kiss_fftr_alloc(int nfft,int inverse_fft,void * mem, size_t * lenmem, size_t * memallocated = NULL); +/* + nfft must be even + + If you don't care to allocate space, use mem = lenmem = NULL +*/ + + +void kiss_fftr(kiss_fftr_cfg cfg,const kiss_fft_scalar *timedata,kiss_fft_cpx *freqdata); +/* + input timedata has nfft scalar points + output freqdata has nfft/2+1 complex points +*/ + +void kiss_fftri(kiss_fftr_cfg cfg,const kiss_fft_cpx *freqdata,kiss_fft_scalar *timedata); +/* + input freqdata has nfft/2+1 complex points + output timedata has nfft scalar points +*/ + +#define kiss_fftr_free KISS_FFT_FREE + +#ifdef __cplusplus +} +#endif +#endif diff --git a/edgeimpulse/edge-impulse-sdk/dsp/kissfft/kissfft.h b/edgeimpulse/edge-impulse-sdk/dsp/kissfft/kissfft.h new file mode 100644 index 0000000..3619c00 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/dsp/kissfft/kissfft.h @@ -0,0 +1,361 @@ +/* + * Copyright (c) 2003-2010, Mark Borgerding. All rights reserved. + * This file is part of KISS FFT - https://github.com/mborgerding/kissfft + * + * SPDX-License-Identifier: BSD-3-Clause + * See COPYING file for more information. + */ + +#ifndef KISSFFT_CLASS_HH +#define KISSFFT_CLASS_HH +#include +#include +#include "edge-impulse-sdk/dsp/ei_vector.h" + + +template +class kissfft +{ + public: + + typedef std::complex cpx_t; + + kissfft( const std::size_t nfft, + const bool inverse ) + :_nfft(nfft) + ,_inverse(inverse) + { + // fill twiddle factors + _twiddles.resize(_nfft); + const scalar_t phinc = (_inverse?2:-2)* std::acos( (scalar_t) -1) / _nfft; + for (std::size_t i=0;i<_nfft;++i) + _twiddles[i] = std::exp( cpx_t(0,i*phinc) ); + + //factorize + //start factoring out 4's, then 2's, then 3,5,7,9,... + std::size_t n= _nfft; + std::size_t p=4; + do { + while (n % p) { + switch (p) { + case 4: p = 2; break; + case 2: p = 3; break; + default: p += 2; break; + } + if (p*p>n) + p = n;// no more factors + } + n /= p; + _stageRadix.push_back(p); + _stageRemainder.push_back(n); + }while(n>1); + } + + + /// Changes the FFT-length and/or the transform direction. + /// + /// @post The @c kissfft object will be in the same state as if it + /// had been newly constructed with the passed arguments. + /// However, the implementation may be faster than constructing a + /// new fft object. + void assign( const std::size_t nfft, + const bool inverse ) + { + if ( nfft != _nfft ) + { + kissfft tmp( nfft, inverse ); // O(n) time. + std::swap( tmp, *this ); // this is O(1) in C++11, O(n) otherwise. + } + else if ( inverse != _inverse ) + { + // conjugate the twiddle factors. + for ( typename ei_vector::iterator it = _twiddles.begin(); + it != _twiddles.end(); ++it ) + it->imag( -it->imag() ); + } + } + + /// Calculates the complex Discrete Fourier Transform. + /// + /// The size of the passed arrays must be passed in the constructor. + /// The sum of the squares of the absolute values in the @c dst + /// array will be @c N times the sum of the squares of the absolute + /// values in the @c src array, where @c N is the size of the array. + /// In other words, the l_2 norm of the resulting array will be + /// @c sqrt(N) times as big as the l_2 norm of the input array. + /// This is also the case when the inverse flag is set in the + /// constructor. Hence when applying the same transform twice, but with + /// the inverse flag changed the second time, then the result will + /// be equal to the original input times @c N. + void transform(const cpx_t * fft_in, cpx_t * fft_out, const std::size_t stage = 0, const std::size_t fstride = 1, const std::size_t in_stride = 1) const + { + const std::size_t p = _stageRadix[stage]; + const std::size_t m = _stageRemainder[stage]; + cpx_t * const Fout_beg = fft_out; + cpx_t * const Fout_end = fft_out + p*m; + + if (m==1) { + do{ + *fft_out = *fft_in; + fft_in += fstride*in_stride; + }while(++fft_out != Fout_end ); + }else{ + do{ + // recursive call: + // DFT of size m*p performed by doing + // p instances of smaller DFTs of size m, + // each one takes a decimated version of the input + transform(fft_in, fft_out, stage+1, fstride*p,in_stride); + fft_in += fstride*in_stride; + }while( (fft_out += m) != Fout_end ); + } + + fft_out=Fout_beg; + + // recombine the p smaller DFTs + switch (p) { + case 2: kf_bfly2(fft_out,fstride,m); break; + case 3: kf_bfly3(fft_out,fstride,m); break; + case 4: kf_bfly4(fft_out,fstride,m); break; + case 5: kf_bfly5(fft_out,fstride,m); break; + default: kf_bfly_generic(fft_out,fstride,m,p); break; + } + } + + /// Calculates the Discrete Fourier Transform (DFT) of a real input + /// of size @c 2*N. + /// + /// The 0-th and N-th value of the DFT are real numbers. These are + /// stored in @c dst[0].real() and @c dst[1].imag() respectively. + /// The remaining DFT values up to the index N-1 are stored in + /// @c dst[1] to @c dst[N-1]. + /// The other half of the DFT values can be calculated from the + /// symmetry relation + /// @code + /// DFT(src)[2*N-k] == conj( DFT(src)[k] ); + /// @endcode + /// The same scaling factors as in @c transform() apply. + /// + /// @note For this to work, the types @c scalar_t and @c cpx_t + /// must fulfill the following requirements: + /// + /// For any object @c z of type @c cpx_t, + /// @c reinterpret_cast(z)[0] is the real part of @c z and + /// @c reinterpret_cast(z)[1] is the imaginary part of @c z. + /// For any pointer to an element of an array of @c cpx_t named @c p + /// and any valid array index @c i, @c reinterpret_cast(p)[2*i] + /// is the real part of the complex number @c p[i], and + /// @c reinterpret_cast(p)[2*i+1] is the imaginary part of the + /// complex number @c p[i]. + /// + /// Since C++11, these requirements are guaranteed to be satisfied for + /// @c scalar_ts being @c float, @c double or @c long @c double + /// together with @c cpx_t being @c std::complex. + void transform_real( const scalar_t * const src, + cpx_t * const dst ) const + { + const std::size_t N = _nfft; + if ( N == 0 ) + return; + + // perform complex FFT + transform( reinterpret_cast(src), dst ); + + // post processing for k = 0 and k = N + dst[0] = cpx_t( dst[0].real() + dst[0].imag(), + dst[0].real() - dst[0].imag() ); + + // post processing for all the other k = 1, 2, ..., N-1 + const scalar_t pi = std::acos( (scalar_t) -1); + const scalar_t half_phi_inc = ( _inverse ? pi : -pi ) / N; + const cpx_t twiddle_mul = std::exp( cpx_t(0, half_phi_inc) ); + for ( std::size_t k = 1; 2*k < N; ++k ) + { + const cpx_t w = (scalar_t)0.5 * cpx_t( + dst[k].real() + dst[N-k].real(), + dst[k].imag() - dst[N-k].imag() ); + const cpx_t z = (scalar_t)0.5 * cpx_t( + dst[k].imag() + dst[N-k].imag(), + -dst[k].real() + dst[N-k].real() ); + const cpx_t twiddle = + k % 2 == 0 ? + _twiddles[k/2] : + _twiddles[k/2] * twiddle_mul; + dst[ k] = w + twiddle * z; + dst[N-k] = std::conj( w - twiddle * z ); + } + if ( N % 2 == 0 ) + dst[N/2] = std::conj( dst[N/2] ); + } + + private: + + void kf_bfly2( cpx_t * Fout, const size_t fstride, const std::size_t m) const + { + for (std::size_t k=0;k _scratchbuf.size()) _scratchbuf.resize(p); + + for ( std::size_t u=0; u=_nfft) + twidx-=_nfft; + Fout[ k ] += _scratchbuf[q] * twiddles[twidx]; + } + k += m; + } + } + } + + std::size_t _nfft; + bool _inverse; + ei_vector _twiddles; + ei_vector _stageRadix; + ei_vector _stageRemainder; + mutable ei_vector _scratchbuf; +}; +#endif diff --git a/edgeimpulse/edge-impulse-sdk/dsp/memory.cpp b/edgeimpulse/edge-impulse-sdk/dsp/memory.cpp new file mode 100644 index 0000000..299694a --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/dsp/memory.cpp @@ -0,0 +1,21 @@ +/* + * Copyright (c) 2022 EdgeImpulse Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an "AS + * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language + * governing permissions and limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "memory.hpp" + +size_t ei_memory_in_use = 0; +size_t ei_memory_peak_use = 0; diff --git a/edgeimpulse/edge-impulse-sdk/dsp/memory.hpp b/edgeimpulse/edge-impulse-sdk/dsp/memory.hpp new file mode 100644 index 0000000..2ce95a7 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/dsp/memory.hpp @@ -0,0 +1,178 @@ +/* + * Copyright (c) 2022 EdgeImpulse Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an "AS + * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language + * governing permissions and limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef _EIDSP_MEMORY_H_ +#define _EIDSP_MEMORY_H_ + +// clang-format off +#include +#include +#include "../porting/ei_classifier_porting.h" +#include "edge-impulse-sdk/classifier/ei_aligned_malloc.h" +#include "config.hpp" + +extern size_t ei_memory_in_use; +extern size_t ei_memory_peak_use; + +#if EIDSP_PRINT_ALLOCATIONS == 1 +#define ei_dsp_printf printf +#else +#define ei_dsp_printf (void) +#endif + +typedef std::unique_ptr ei_unique_ptr_t; +#define EI_ALLOCATE_AUTO_POINTER(ptr, size) \ + ptr = static_cast(ei_calloc(size,sizeof(*ptr))); \ + ei_unique_ptr_t __ptr__(ptr,ei_free); + +#define EI_ERR_AND_RETURN_ON_NULL(ptr,code) \ + if( ! (ptr) ) { \ + ei_printf("Null check failed\n"); \ + return code; \ + } + +namespace ei { + +/** + * These are macros used to track allocations when running DSP processes. + * Enable memory tracking through the EIDSP_TRACK_ALLOCATIONS macro. + */ + +#if EIDSP_TRACK_ALLOCATIONS + /** + * Register a manual allocation (malloc or calloc). + * Typically you want to use ei::matrix_t types, as they keep track automatically. + * @param bytes Number of bytes allocated + */ + #define ei_dsp_register_alloc_internal(fn, file, line, bytes, ptr) \ + ei_memory_in_use += bytes; \ + if (ei_memory_in_use > ei_memory_peak_use) { \ + ei_memory_peak_use = ei_memory_in_use; \ + } \ + ei_dsp_printf("alloc %lu bytes (in_use=%lu, peak=%lu) (%s@ %s:%d) %p\n", \ + (unsigned long)bytes, (unsigned long)ei_memory_in_use, (unsigned long)ei_memory_peak_use, fn, file, line, ptr); + + /** + * Register a matrix allocation. Don't call this function yourself, + * matrices already track this automatically. + * @param rows Number of rows + * @param cols Number of columns + * @param type_size Size of the data type + */ + #define ei_dsp_register_matrix_alloc_internal(fn, file, line, rows, cols, type_size, ptr) \ + ei_memory_in_use += (rows * cols * type_size); \ + if (ei_memory_in_use > ei_memory_peak_use) { \ + ei_memory_peak_use = ei_memory_in_use; \ + } \ + ei_dsp_printf("alloc matrix %lu x %lu = %lu bytes (in_use=%lu, peak=%lu) (%s@ %s:%d) %p\n", \ + (unsigned long)rows, (unsigned long)cols, (unsigned long)(rows * cols * type_size), (unsigned long)ei_memory_in_use, \ + (unsigned long)ei_memory_peak_use, fn, file, line, ptr); + + /** + * Register free'ing manually allocated memory (allocated through malloc/calloc) + * @param bytes Number of bytes free'd + */ + #define ei_dsp_register_free_internal(fn, file, line, bytes, ptr) \ + ei_memory_in_use -= bytes; \ + ei_dsp_printf("free %lu bytes (in_use=%lu, peak=%lu) (%s@ %s:%d) %p\n", \ + (unsigned long)bytes, (unsigned long)ei_memory_in_use, (unsigned long)ei_memory_peak_use, fn, file, line, ptr); + + /** + * Register a matrix free. Don't call this function yourself, + * matrices already track this automatically. + * @param rows Number of rows + * @param cols Number of columns + * @param type_size Size of the data type + */ + #define ei_dsp_register_matrix_free_internal(fn, file, line, rows, cols, type_size, ptr) \ + ei_memory_in_use -= (rows * cols * type_size); \ + ei_dsp_printf("free matrix %lu x %lu = %lu bytes (in_use=%lu, peak=%lu) (%s@ %s:%d) %p\n", \ + (unsigned long)rows, (unsigned long)cols, (unsigned long)(rows * cols * type_size), \ + (unsigned long)ei_memory_in_use, (unsigned long)ei_memory_peak_use, fn, file, line, ptr); + + #define ei_dsp_register_alloc(...) ei_dsp_register_alloc_internal(__func__, __FILE__, __LINE__, __VA_ARGS__) + #define ei_dsp_register_matrix_alloc(...) ei_dsp_register_matrix_alloc_internal(__func__, __FILE__, __LINE__, __VA_ARGS__) + #define ei_dsp_register_free(...) ei_dsp_register_free_internal(__func__, __FILE__, __LINE__, __VA_ARGS__) + #define ei_dsp_register_matrix_free(...) ei_dsp_register_matrix_free_internal(__func__, __FILE__, __LINE__, __VA_ARGS__) + #define ei_dsp_malloc(...) memory::ei_wrapped_malloc(__func__, __FILE__, __LINE__, __VA_ARGS__) + #define ei_dsp_calloc(...) memory::ei_wrapped_calloc(__func__, __FILE__, __LINE__, __VA_ARGS__) + #define ei_dsp_free(...) memory::ei_wrapped_free(__func__, __FILE__, __LINE__, __VA_ARGS__) + #define EI_DSP_MATRIX(name, ...) matrix_t name(__VA_ARGS__, NULL, __func__, __FILE__, __LINE__); if (!name.buffer) { EIDSP_ERR(EIDSP_OUT_OF_MEM); } + #define EI_DSP_MATRIX_B(name, ...) matrix_t name(__VA_ARGS__, __func__, __FILE__, __LINE__); if (!name.buffer) { EIDSP_ERR(EIDSP_OUT_OF_MEM); } + #define EI_DSP_QUANTIZED_MATRIX(name, ...) quantized_matrix_t name(__VA_ARGS__, NULL, __func__, __FILE__, __LINE__); if (!name.buffer) { EIDSP_ERR(EIDSP_OUT_OF_MEM); } + #define EI_DSP_QUANTIZED_MATRIX_B(name, ...) quantized_matrix_t name(__VA_ARGS__, __func__, __FILE__, __LINE__); if (!name.buffer) { EIDSP_ERR(EIDSP_OUT_OF_MEM); } +#else + #define ei_dsp_register_alloc(...) (void)0 + #define ei_dsp_register_matrix_alloc(...) (void)0 + #define ei_dsp_register_free(...) (void)0 + #define ei_dsp_register_matrix_free(...) (void)0 + #define ei_dsp_malloc ei_malloc + #define ei_dsp_calloc ei_calloc + #define ei_dsp_free(ptr, size) ei_free(ptr) + #define EI_DSP_MATRIX(name, ...) matrix_t name(__VA_ARGS__); if (!name.buffer) { EIDSP_ERR(EIDSP_OUT_OF_MEM); } + #define EI_DSP_MATRIX_B(name, ...) matrix_t name(__VA_ARGS__); if (!name.buffer) { EIDSP_ERR(EIDSP_OUT_OF_MEM); } + #define EI_DSP_QUANTIZED_MATRIX(name, ...) quantized_matrix_t name(__VA_ARGS__); if (!name.buffer) { EIDSP_ERR(EIDSP_OUT_OF_MEM); } + #define EI_DSP_QUANTIZED_MATRIX_B(name, ...) quantized_matrix_t name(__VA_ARGS__); if (!name.buffer) { EIDSP_ERR(EIDSP_OUT_OF_MEM); } +#endif + +#if EIDSP_TRACK_ALLOCATIONS +class memory { + + +public: + /** + * Allocate a new block of memory + * @param size The size of the memory block, in bytes. + */ + static void *ei_wrapped_malloc(const char *fn, const char *file, int line, size_t size) { + void *ptr = ei_malloc(size); + if (ptr) { + ei_dsp_register_alloc_internal(fn, file, line, size, ptr); + } + return ptr; + } + + /** + * Allocates a block of memory for an array of num elements, each of them size bytes long, + * and initializes all its bits to zero. + * @param num Number of elements to allocate + * @param size Size of each element + */ + static void *ei_wrapped_calloc(const char *fn, const char *file, int line, size_t num, size_t size) { + void *ptr = ei_calloc(num, size); + if (ptr) { + ei_dsp_register_alloc_internal(fn, file, line, num * size, ptr); + } + return ptr; + } + + /** + * Deallocate memory previously allocated by a call to calloc, malloc, or realloc. + * @param ptr Pointer to a memory block previously allocated with malloc, calloc or realloc. + * @param size Size of the block of memory previously allocated. + */ + static void ei_wrapped_free(const char *fn, const char *file, int line, void *ptr, size_t size) { + ei_free(ptr); + ei_dsp_register_free_internal(fn, file, line, size, ptr); + } +}; +#endif // #if EIDSP_TRACK_ALLOCATIONS + +} // namespace ei + +// clang-format on +#endif // _EIDSP_MEMORY_H_ diff --git a/edgeimpulse/edge-impulse-sdk/dsp/numpy.hpp b/edgeimpulse/edge-impulse-sdk/dsp/numpy.hpp new file mode 100644 index 0000000..7bf1f73 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/dsp/numpy.hpp @@ -0,0 +1,2633 @@ +/* + * Copyright (c) 2022 EdgeImpulse Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an "AS + * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language + * governing permissions and limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef _EIDSP_NUMPY_H_ +#define _EIDSP_NUMPY_H_ + +// it's valid to include the SDK without a model, but there's information that we need +// in model_metadata.h (like the FFT tables used). +// if the compiler does not support the __has_include directive we'll assume that the +// file exists. +#ifndef __has_include +#define __has_include 1 +#endif // __has_include + +#include +#include +#include +#include +#include "ei_vector.h" +#include +#include "numpy_types.h" +#include "config.hpp" +#include "returntypes.hpp" +#include "memory.hpp" +#include "ei_utils.h" +#include "dct/fast-dct-fft.h" +#include "kissfft/kiss_fftr.h" +#if __has_include("model-parameters/model_metadata.h") +#include "model-parameters/model_metadata.h" +#endif +#if EIDSP_USE_CMSIS_DSP +#include "edge-impulse-sdk/CMSIS/DSP/Include/arm_math.h" +#include "edge-impulse-sdk/CMSIS/DSP/Include/arm_const_structs.h" +#endif + +// For the following CMSIS includes, we want to use the C fallback, so include whether or not we set the CMSIS flag +#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/statistics_functions.h" + +#ifdef __MBED__ +#include "mbed.h" +#else +#include +#endif // __MBED__ + +#define EI_MAX_UINT16 65535 + +namespace ei { + +using fvec = ei_vector; +using ivec = ei_vector; + +// clang-format off +// lookup table for quantized values between 0.0f and 1.0f +static constexpr float quantized_values_one_zero[] = { (0.0f / 1.0f), (1.0f / 100.0f), (2.0f / 100.0f), (3.0f / 100.0f), (4.0f / 100.0f), (1.0f / 22.0f), (1.0f / 21.0f), (1.0f / 20.0f), (1.0f / 19.0f), (1.0f / 18.0f), (1.0f / 17.0f), (6.0f / 100.0f), (1.0f / 16.0f), (1.0f / 15.0f), (7.0f / 100.0f), (1.0f / 14.0f), (1.0f / 13.0f), (8.0f / 100.0f), (1.0f / 12.0f), (9.0f / 100.0f), (1.0f / 11.0f), (2.0f / 21.0f), (1.0f / 10.0f), (2.0f / 19.0f), (11.0f / 100.0f), (1.0f / 9.0f), (2.0f / 17.0f), (12.0f / 100.0f), (1.0f / 8.0f), (13.0f / 100.0f), (2.0f / 15.0f), (3.0f / 22.0f), (14.0f / 100.0f), (1.0f / 7.0f), (3.0f / 20.0f), (2.0f / 13.0f), (3.0f / 19.0f), (16.0f / 100.0f), (1.0f / 6.0f), (17.0f / 100.0f), (3.0f / 17.0f), (18.0f / 100.0f), (2.0f / 11.0f), (3.0f / 16.0f), (19.0f / 100.0f), (4.0f / 21.0f), (1.0f / 5.0f), (21.0f / 100.0f), (4.0f / 19.0f), (3.0f / 14.0f), (22.0f / 100.0f), (2.0f / 9.0f), (5.0f / 22.0f), (23.0f / 100.0f), (3.0f / 13.0f), (4.0f / 17.0f), (5.0f / 21.0f), (24.0f / 100.0f), (1.0f / 4.0f), (26.0f / 100.0f), (5.0f / 19.0f), (4.0f / 15.0f), (27.0f / 100.0f), (3.0f / 11.0f), (5.0f / 18.0f), (28.0f / 100.0f), (2.0f / 7.0f), (29.0f / 100.0f), (5.0f / 17.0f), (3.0f / 10.0f), (4.0f / 13.0f), (31.0f / 100.0f), (5.0f / 16.0f), (6.0f / 19.0f), (7.0f / 22.0f), (32.0f / 100.0f), (33.0f / 100.0f), (1.0f / 3.0f), (34.0f / 100.0f), (7.0f / 20.0f), (6.0f / 17.0f), (5.0f / 14.0f), (36.0f / 100.0f), (4.0f / 11.0f), (7.0f / 19.0f), (37.0f / 100.0f), (3.0f / 8.0f), (38.0f / 100.0f), (8.0f / 21.0f), (5.0f / 13.0f), (7.0f / 18.0f), (39.0f / 100.0f), (2.0f / 5.0f), (9.0f / 22.0f), (41.0f / 100.0f), (7.0f / 17.0f), (5.0f / 12.0f), (42.0f / 100.0f), (8.0f / 19.0f), (3.0f / 7.0f), (43.0f / 100.0f), (7.0f / 16.0f), (44.0f / 100.0f), (4.0f / 9.0f), (9.0f / 20.0f), (5.0f / 11.0f), (46.0f / 100.0f), (6.0f / 13.0f), (7.0f / 15.0f), (47.0f / 100.0f), (8.0f / 17.0f), (9.0f / 19.0f), (10.0f / 21.0f), (48.0f / 100.0f), (49.0f / 100.0f), (1.0f / 2.0f), (51.0f / 100.0f), (52.0f / 100.0f), (11.0f / 21.0f), (10.0f / 19.0f), (9.0f / 17.0f), (53.0f / 100.0f), (8.0f / 15.0f), (7.0f / 13.0f), (54.0f / 100.0f), (6.0f / 11.0f), (11.0f / 20.0f), (5.0f / 9.0f), (56.0f / 100.0f), (9.0f / 16.0f), (57.0f / 100.0f), (4.0f / 7.0f), (11.0f / 19.0f), (58.0f / 100.0f), (7.0f / 12.0f), (10.0f / 17.0f), (59.0f / 100.0f), (13.0f / 22.0f), (3.0f / 5.0f), (61.0f / 100.0f), (11.0f / 18.0f), (8.0f / 13.0f), (13.0f / 21.0f), (62.0f / 100.0f), (5.0f / 8.0f), (63.0f / 100.0f), (12.0f / 19.0f), (7.0f / 11.0f), (64.0f / 100.0f), (9.0f / 14.0f), (11.0f / 17.0f), (13.0f / 20.0f), (66.0f / 100.0f), (2.0f / 3.0f), (67.0f / 100.0f), (68.0f / 100.0f), (15.0f / 22.0f), (13.0f / 19.0f), (11.0f / 16.0f), (69.0f / 100.0f), (9.0f / 13.0f), (7.0f / 10.0f), (12.0f / 17.0f), (71.0f / 100.0f), (5.0f / 7.0f), (72.0f / 100.0f), (13.0f / 18.0f), (8.0f / 11.0f), (73.0f / 100.0f), (11.0f / 15.0f), (14.0f / 19.0f), (74.0f / 100.0f), (3.0f / 4.0f), (76.0f / 100.0f), (16.0f / 21.0f), (13.0f / 17.0f), (10.0f / 13.0f), (77.0f / 100.0f), (17.0f / 22.0f), (7.0f / 9.0f), (78.0f / 100.0f), (11.0f / 14.0f), (15.0f / 19.0f), (79.0f / 100.0f), (4.0f / 5.0f), (17.0f / 21.0f), (81.0f / 100.0f), (13.0f / 16.0f), (9.0f / 11.0f), (82.0f / 100.0f), (14.0f / 17.0f), (83.0f / 100.0f), (5.0f / 6.0f), (84.0f / 100.0f), (16.0f / 19.0f), (11.0f / 13.0f), (17.0f / 20.0f), (6.0f / 7.0f), (86.0f / 100.0f), (19.0f / 22.0f), (13.0f / 15.0f), (87.0f / 100.0f), (7.0f / 8.0f), (88.0f / 100.0f), (15.0f / 17.0f), (8.0f / 9.0f), (89.0f / 100.0f), (17.0f / 19.0f), (9.0f / 10.0f), (19.0f / 21.0f), (10.0f / 11.0f), (91.0f / 100.0f), (11.0f / 12.0f), (92.0f / 100.0f), (12.0f / 13.0f), (13.0f / 14.0f), (93.0f / 100.0f), (14.0f / 15.0f), (15.0f / 16.0f), (94.0f / 100.0f), (16.0f / 17.0f), (17.0f / 18.0f), (18.0f / 19.0f), (19.0f / 20.0f), (20.0f / 21.0f), (21.0f / 22.0f), (96.0f / 100.0f), (97.0f / 100.0f), (98.0f / 100.0f), (99.0f / 100.0f), (1.0f / 1.0f) , + 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f }; +// clang-format on + +class numpy { +public: + + static float sqrt(float x) { +#if EIDSP_USE_CMSIS_DSP + float temp; + arm_sqrt_f32(x, &temp); + return temp; +#else + return sqrtf(x); +#endif + } + + /** + * Roll array elements along a given axis. + * Elements that roll beyond the last position are re-introduced at the first. + * @param input_array + * @param input_array_size + * @param shift The number of places by which elements are shifted. + * @returns EIDSP_OK if OK + */ + static int roll(float *input_array, size_t input_array_size, int shift) { + if (shift < 0) { + shift = input_array_size + shift; + } + + if (shift == 0) { + return EIDSP_OK; + } + + // so we need to allocate a buffer of the size of shift... + EI_DSP_MATRIX(shift_matrix, 1, shift); + + // we copy from the end of the buffer into the shift buffer + memcpy(shift_matrix.buffer, input_array + input_array_size - shift, shift * sizeof(float)); + + // now we do a memmove to shift the array + memmove(input_array + shift, input_array, (input_array_size - shift) * sizeof(float)); + + // and copy the shift buffer back to the beginning of the array + memcpy(input_array, shift_matrix.buffer, shift * sizeof(float)); + + return EIDSP_OK; + } + + /** + * Roll array elements along a given axis. + * Elements that roll beyond the last position are re-introduced at the first. + * @param input_array + * @param input_array_size + * @param shift The number of places by which elements are shifted. + * @returns EIDSP_OK if OK + */ + static int roll(int *input_array, size_t input_array_size, int shift) { + if (shift < 0) { + shift = input_array_size + shift; + } + + if (shift == 0) { + return EIDSP_OK; + } + + // so we need to allocate a buffer of the size of shift... + EI_DSP_MATRIX(shift_matrix, 1, shift); + + // we copy from the end of the buffer into the shift buffer + memcpy(shift_matrix.buffer, input_array + input_array_size - shift, shift * sizeof(int)); + + // now we do a memmove to shift the array + memmove(input_array + shift, input_array, (input_array_size - shift) * sizeof(int)); + + // and copy the shift buffer back to the beginning of the array + memcpy(input_array, shift_matrix.buffer, shift * sizeof(int)); + + return EIDSP_OK; + } + + /** + * Roll array elements along a given axis. + * Elements that roll beyond the last position are re-introduced at the first. + * @param input_array + * @param input_array_size + * @param shift The number of places by which elements are shifted. + * @returns EIDSP_OK if OK + */ + static int roll(int16_t *input_array, size_t input_array_size, int shift) { + if (shift < 0) { + shift = input_array_size + shift; + } + + if (shift == 0) { + return EIDSP_OK; + } + + // so we need to allocate a buffer of the size of shift... + EI_DSP_MATRIX(shift_matrix, 1, shift); + + // we copy from the end of the buffer into the shift buffer + memcpy(shift_matrix.buffer, input_array + input_array_size - shift, shift * sizeof(int16_t)); + + // now we do a memmove to shift the array + memmove(input_array + shift, input_array, (input_array_size - shift) * sizeof(int16_t)); + + // and copy the shift buffer back to the beginning of the array + memcpy(input_array, shift_matrix.buffer, shift * sizeof(int16_t)); + + return EIDSP_OK; + } + + static float sum(float *input_array, size_t input_array_size) { + float res = 0.0f; + for (size_t ix = 0; ix < input_array_size; ix++) { + res += input_array[ix]; + } + return res; + } + + /** + * Multiply two matrices (MxN * NxK matrix) + * @param matrix1 Pointer to matrix1 (MxN) + * @param matrix2 Pointer to matrix2 (NxK) + * @param out_matrix Pointer to out matrix (MxK) + * @returns EIDSP_OK if OK + */ + static int dot(matrix_t *matrix1, matrix_t *matrix2, matrix_t *out_matrix) { + if (matrix1->cols != matrix2->rows) { + EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH); + } + + // no. of rows in matrix1 determines the + if (matrix1->rows != out_matrix->rows || matrix2->cols != out_matrix->cols) { + EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH); + } + +#if EIDSP_USE_CMSIS_DSP + if (matrix1->rows > EI_MAX_UINT16 || matrix1->cols > EI_MAX_UINT16 || matrix2->rows > EI_MAX_UINT16 || + matrix2->cols > EI_MAX_UINT16 || out_matrix->rows > EI_MAX_UINT16 || out_matrix->cols > EI_MAX_UINT16) { + return EIDSP_NARROWING; + } + + const arm_matrix_instance_f32 m1 = { static_cast(matrix1->rows), static_cast(matrix1->cols), matrix1->buffer }; + const arm_matrix_instance_f32 m2 = { static_cast(matrix2->rows), static_cast(matrix2->cols), matrix2->buffer }; + arm_matrix_instance_f32 mo = { static_cast(out_matrix->rows), static_cast(out_matrix->cols), out_matrix->buffer }; + int status = arm_mat_mult_f32(&m1, &m2, &mo); + if (status != ARM_MATH_SUCCESS) { + EIDSP_ERR(status); + } +#else + memset(out_matrix->buffer, 0, out_matrix->rows * out_matrix->cols * sizeof(float)); + + for (size_t i = 0; i < matrix1->rows; i++) { + dot_by_row(i, + matrix1->buffer + (i * matrix1->cols), + matrix1->cols, + matrix2, + out_matrix); + } +#endif + + return EIDSP_OK; + } + + /** + * Multiply two matrices (MxN * NxK matrix) + * @param matrix1 Pointer to matrix1 (MxN) + * @param matrix2 Pointer to quantized matrix2 (NxK) + * @param out_matrix Pointer to out matrix (MxK) + * @returns EIDSP_OK if OK + */ + static int dot(matrix_t *matrix1, + quantized_matrix_t *matrix2, + matrix_t *out_matrix) + { + if (matrix1->cols != matrix2->rows) { + EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH); + } + + // no. of rows in matrix1 determines the + if (matrix1->rows != out_matrix->rows || matrix2->cols != out_matrix->cols) { + EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH); + } + + memset(out_matrix->buffer, 0, out_matrix->rows * out_matrix->cols * sizeof(float)); + + for (size_t i = 0; i < matrix1->rows; i++) { + dot_by_row(i, + matrix1->buffer + (i * matrix1->cols), + matrix1->cols, + matrix2, + out_matrix); + } + + return EIDSP_OK; + } + + /** + * Multiply two matrices lazily per row in matrix 1 (MxN * NxK matrix) + * @param i matrix1 row index + * @param row matrix1 row + * @param matrix1_cols matrix1 row size (1xN) + * @param matrix2 Pointer to matrix2 (NxK) + * @param out_matrix Pointer to out matrix (MxK) + * @returns EIDSP_OK if OK + */ + static int dot_by_row(int i, float *row, uint32_t matrix1_cols, matrix_t *matrix2, matrix_t *out_matrix) { + if (matrix1_cols != matrix2->rows) { + EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH); + } + +#if EIDSP_USE_CMSIS_DSP + if (matrix1_cols > EI_MAX_UINT16 || matrix2->rows > EI_MAX_UINT16 || matrix2->cols > EI_MAX_UINT16 || + out_matrix->cols > EI_MAX_UINT16) { + return EIDSP_NARROWING; + } + + const arm_matrix_instance_f32 m1 = { 1, static_cast(matrix1_cols), row }; + const arm_matrix_instance_f32 m2 = { static_cast(matrix2->rows), static_cast(matrix2->cols), matrix2->buffer }; + arm_matrix_instance_f32 mo = { 1, static_cast(out_matrix->cols), out_matrix->buffer + (i * out_matrix->cols) }; + int status = arm_mat_mult_f32(&m1, &m2, &mo); + if (status != ARM_MATH_SUCCESS) { + EIDSP_ERR(status); + } +#else + for (size_t j = 0; j < matrix2->cols; j++) { + float tmp = 0.0f; + for (size_t k = 0; k < matrix1_cols; k++) { + tmp += row[k] * matrix2->buffer[k * matrix2->cols + j]; + } + out_matrix->buffer[i * matrix2->cols + j] += tmp; + } +#endif + + return EIDSP_OK; + } + + /** + * Multiply two matrices lazily per row in matrix 1 (MxN * NxK matrix) + * @param i matrix1 row index + * @param row matrix1 row + * @param matrix1_cols matrix1 row size + * @param matrix2 Pointer to matrix2 (NxK) + * @param out_matrix Pointer to out matrix (MxK) + * @returns EIDSP_OK if OK + */ + static int dot_by_row(int i, float *row, size_t matrix1_cols, + quantized_matrix_t *matrix2, matrix_t *out_matrix) + { + if (matrix1_cols != matrix2->rows) { + EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH); + } + + for (uint16_t j = 0; j < matrix2->cols; j++) { + float tmp = 0.0; + for (uint16_t k = 0; k < matrix1_cols; k++) { + uint8_t u8 = matrix2->buffer[k * matrix2->cols + j]; + if (u8) { // this matrix appears to be very sparsely populated + tmp += row[k] * quantized_values_one_zero[u8]; + } + } + out_matrix->buffer[i * matrix2->cols + j] = tmp; + } + + return EIDSP_OK; + } + + static void transpose_in_place(matrix_t *matrix) { + size_t size = matrix->cols * matrix->rows - 1; + float temp; // temp for swap + size_t next; // next item to swap + size_t cycleBegin; // index of start of cycle + size_t i; // location in matrix + size_t all_done_mark = 1; + ei_vector done(size+1,false); + + i = 1; // Note that matrix[0] and last element of matrix won't move + while (1) + { + cycleBegin = i; + temp = matrix->buffer[i]; + do + { + size_t col = i % matrix->cols; + size_t row = i / matrix->cols; + // swap row and col to make new idx, b/c we want to know where in the transposed matrix + next = col*matrix->rows + row; + float temp2 = matrix->buffer[next]; + matrix->buffer[next] = temp; + temp = temp2; + done[next] = true; + i = next; + } + while (i != cycleBegin); + + // start next cycle by find next not done + for (i = all_done_mark; done[i]; i++) { + all_done_mark++; // move the high water mark so we don't look again + if(i>=size) { goto LOOP_END; } + } + } + LOOP_END: + // finally, swap the row and column dimensions + std::swap(matrix->rows, matrix->cols); + } + + /** + * Transpose an array, souce is destination (from MxN to NxM) + * Note: this temporary allocates a copy of the matrix on the heap. + * @param matrix + * @param rows + * @param columns + * @deprecated You probably want to use transpose_in_place + * @returns EIDSP_OK if OK + */ + static int transpose(matrix_t *matrix) { + int r = transpose(matrix->buffer, matrix->cols, matrix->rows); + if (r != 0) { + return r; + } + + uint16_t old_rows = matrix->rows; + uint16_t old_cols = matrix->cols; + + matrix->rows = old_cols; + matrix->cols = old_rows; + + return EIDSP_OK; + } + + /** + * Transpose an array, source is destination (from MxN to NxM) + * @param matrix + * @param rows + * @param columns + * @deprecated You probably want to use transpose_in_place + * @returns EIDSP_OK if OK + */ + static int transpose(float *matrix, int rows, int columns) { + EI_DSP_MATRIX(temp_matrix, rows, columns); + if (!temp_matrix.buffer) { + EIDSP_ERR(EIDSP_OUT_OF_MEM); + } + +#if EIDSP_USE_CMSIS_DSP + if (rows > EI_MAX_UINT16 || columns > EI_MAX_UINT16) { + return EIDSP_NARROWING; + } + + const arm_matrix_instance_f32 i_m = { + static_cast(columns), + static_cast(rows), + matrix + }; + arm_matrix_instance_f32 o_m = { + static_cast(rows), + static_cast(columns), + temp_matrix.buffer + }; + arm_status status = arm_mat_trans_f32(&i_m, &o_m); + if (status != ARM_MATH_SUCCESS) { + return status; + } +#else + for (int j = 0; j < rows; j++){ + for (int i = 0; i < columns; i++){ + temp_matrix.buffer[j * columns + i] = matrix[i * rows + j]; + } + } +#endif + + memcpy(matrix, temp_matrix.buffer, rows * columns * sizeof(float)); + + return EIDSP_OK; + } + + /** + * Transpose an array in place (from MxN to NxM) + * Note: this temporary allocates a copy of the matrix on the heap. + * @param matrix + * @param rows + * @param columns + * @returns EIDSP_OK if OK + */ + static int transpose(quantized_matrix_t *matrix) { + int r = transpose(matrix->buffer, matrix->cols, matrix->rows); + if (r != 0) { + return r; + } + + uint16_t old_rows = matrix->rows; + uint16_t old_cols = matrix->cols; + + matrix->rows = old_cols; + matrix->cols = old_rows; + + return EIDSP_OK; + } + + /** + * Transpose an array in place (from MxN to NxM) + * @param matrix + * @param rows + * @param columns + * @returns EIDSP_OK if OK + */ + static int transpose(uint8_t *matrix, int rows, int columns) { + // dequantization function is not used actually... + EI_DSP_QUANTIZED_MATRIX(temp_matrix, rows, columns, &dequantize_zero_one); + if (!temp_matrix.buffer) { + EIDSP_ERR(EIDSP_OUT_OF_MEM); + } + + for (int j = 0; j < rows; j++){ + for (int i = 0; i < columns; i++){ + temp_matrix.buffer[j * columns + i] = matrix[i * rows + j]; + } + } + + memcpy(matrix, temp_matrix.buffer, rows * columns * sizeof(uint8_t)); + + return EIDSP_OK; + } + + /** + * Return the Discrete Cosine Transform of arbitrary type sequence 2. + * @param input Input array (of size N) + * @param N number of items in input and output array + * @returns EIDSP_OK if OK + */ + static int dct2(float *input, size_t N, DCT_NORMALIZATION_MODE normalization = DCT_NORMALIZATION_NONE) { + if (N == 0) { + return EIDSP_OK; + } + + int ret = ei::dct::transform(input, N); + if (ret != EIDSP_OK) { + EIDSP_ERR(ret); + } + + // for some reason the output is 2x too low... + for (size_t ix = 0; ix < N; ix++) { + input[ix] *= 2; + } + + if (normalization == DCT_NORMALIZATION_ORTHO) { + input[0] = input[0] * sqrt(1.0f / static_cast(4 * N)); + for (size_t ix = 1; ix < N; ix++) { + input[ix] = input[ix] * sqrt(1.0f / static_cast(2 * N)); + } + } + + return EIDSP_OK; + } + + /** + * Discrete Cosine Transform of arbitrary type sequence 2 on a matrix. + * @param matrix + * @returns EIDSP_OK if OK + */ + static int dct2(matrix_t *matrix, DCT_NORMALIZATION_MODE normalization = DCT_NORMALIZATION_NONE) { + for (size_t row = 0; row < matrix->rows; row++) { + int r = dct2(matrix->buffer + (row * matrix->cols), matrix->cols, normalization); + if (r != EIDSP_OK) { + return r; + } + } + + return EIDSP_OK; + } + + /** + * Quantize a float value between zero and one + * @param value Float value + */ + static uint8_t quantize_zero_one(float value) { + const size_t length = sizeof(quantized_values_one_zero) / sizeof(float); + + // look in the table + for (size_t ix = 0; ix < length; ix++) { + if (quantized_values_one_zero[ix] == value) return ix; + } + + // no match? + + if (value < quantized_values_one_zero[0]) { + return quantized_values_one_zero[0]; + } + if (value > quantized_values_one_zero[length - 1]) { + return quantized_values_one_zero[length - 1]; + } + + int lo = 0; + int hi = length - 1; + + while (lo <= hi) { + int mid = (hi + lo) / 2; + + if (value < quantized_values_one_zero[mid]) { + hi = mid - 1; + } else if (value > quantized_values_one_zero[mid]) { + lo = mid + 1; + } else { + return quantized_values_one_zero[mid]; + } + } + + // lo == hi + 1 + return (quantized_values_one_zero[lo] - value) < (value - quantized_values_one_zero[hi]) ? + lo : + hi; + } + + /** + * Dequantize a float value between zero and one + * @param value + */ + static float dequantize_zero_one(uint8_t value) { + return quantized_values_one_zero[value]; + } + + /** + * Pad an array. + * Pads with the reflection of the vector mirrored along the edge of the array. + * @param input Input matrix (MxN) + * @param output Output matrix of size (M+pad_before+pad_after x N) + * @param pad_before Number of items to pad before + * @param pad_after Number of items to pad after + * @returns 0 if OK + */ + static int pad_1d_symmetric(matrix_t *input, matrix_t *output, uint16_t pad_before, uint16_t pad_after) { + if (output->cols != input->cols) { + EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH); + } + + if (output->rows != input->rows + pad_before + pad_after) { + EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH); + } + + if (input->rows == 0) { + EIDSP_ERR(EIDSP_INPUT_MATRIX_EMPTY); + } + + uint32_t pad_before_index = 0; + bool pad_before_direction_up = true; + + for (int32_t ix = pad_before - 1; ix >= 0; ix--) { + memcpy(output->buffer + (input->cols * ix), + input->buffer + (pad_before_index * input->cols), + input->cols * sizeof(float)); + + if (pad_before_index == 0 && !pad_before_direction_up) { + pad_before_direction_up = true; + } + else if (pad_before_index == input->rows - 1 && pad_before_direction_up) { + pad_before_direction_up = false; + } + else if (pad_before_direction_up) { + pad_before_index++; + } + else { + pad_before_index--; + } + } + + memcpy(output->buffer + (input->cols * pad_before), + input->buffer, + input->rows * input->cols * sizeof(float)); + + int32_t pad_after_index = input->rows - 1; + bool pad_after_direction_up = false; + + for (int32_t ix = 0; ix < pad_after; ix++) { + memcpy(output->buffer + (input->cols * (ix + pad_before + input->rows)), + input->buffer + (pad_after_index * input->cols), + input->cols * sizeof(float)); + + if (pad_after_index == 0 && !pad_after_direction_up) { + pad_after_direction_up = true; + } + else if (pad_after_index == static_cast(input->rows) - 1 && pad_after_direction_up) { + pad_after_direction_up = false; + } + else if (pad_after_direction_up) { + pad_after_index++; + } + else { + pad_after_index--; + } + } + + return EIDSP_OK; + } + + /** + * Scale a matrix in place + * @param matrix + * @param scale + * @returns 0 if OK + */ + static int scale(matrix_t *matrix, float scale) { + if (scale == 1.0f) return EIDSP_OK; + +#if EIDSP_USE_CMSIS_DSP + if (matrix->rows > EI_MAX_UINT16 || matrix->cols > EI_MAX_UINT16) { + return EIDSP_NARROWING; + } + + const arm_matrix_instance_f32 mi = { static_cast(matrix->rows), static_cast(matrix->cols), matrix->buffer }; + arm_matrix_instance_f32 mo = { static_cast(matrix->rows), static_cast(matrix->cols), matrix->buffer }; + int status = arm_mat_scale_f32(&mi, scale, &mo); + if (status != ARM_MATH_SUCCESS) { + return status; + } +#else + for (size_t ix = 0; ix < matrix->rows * matrix->cols; ix++) { + matrix->buffer[ix] *= scale; + } +#endif + return EIDSP_OK; + } + + + /** + * Scale a matrix in place, per row + * @param matrix Input matrix (MxN) + * @param scale_matrix Scale matrix (Mx1) + * @returns 0 if OK + */ + static int scale(matrix_t *matrix, matrix_t *scale_matrix) { + if (matrix->rows != scale_matrix->rows) { + EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH); + } + + if (scale_matrix->cols != 1) { + EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH); + } + + for (size_t row = 0; row < matrix->rows; row++) { + EI_DSP_MATRIX_B(temp, 1, matrix->cols, matrix->buffer + (row * matrix->cols)); + int ret = scale(&temp, scale_matrix->buffer[row]); + if (ret != EIDSP_OK) { + EIDSP_ERR(ret); + } + } + + return EIDSP_OK; + } + + /** + * Add on matrix in place + * @param matrix + * @param addition + * @returns 0 if OK + */ + static int add(matrix_t *matrix, float addition) { + for (uint32_t ix = 0; ix < matrix->rows * matrix->cols; ix++) { + matrix->buffer[ix] += addition; + } + return EIDSP_OK; + } + + /** + * Add on a matrix in place, per row + * @param matrix Input matrix (MxN) + * @param add Scale matrix (Mx1) + * @returns 0 if OK + */ + static int add(matrix_t *matrix, matrix_t *add_matrix) { + if (matrix->rows != add_matrix->rows) { + EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH); + } + + if (add_matrix->cols != 1) { + EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH); + } + + for (size_t row = 0; row < matrix->rows; row++) { + EI_DSP_MATRIX_B(temp, 1, matrix->cols, matrix->buffer + (row * matrix->cols)); + int ret = add(&temp, add_matrix->buffer[row]); + if (ret != EIDSP_OK) { + EIDSP_ERR(ret); + } + } + + return EIDSP_OK; + } + + /** + * Subtract from matrix in place + * @param matrix + * @param subtraction + * @returns 0 if OK + */ + static int subtract(matrix_t *matrix, float subtraction) { + for (uint32_t ix = 0; ix < matrix->rows * matrix->cols; ix++) { + matrix->buffer[ix] -= subtraction; + } + return EIDSP_OK; + } + + /** + * Add on a matrix in place, per row + * @param matrix Input matrix (MxN) + * @param add Scale matrix (Mx1) + * @returns 0 if OK + */ + static int subtract(matrix_t *matrix, matrix_t *subtract_matrix) { + if (matrix->rows != subtract_matrix->rows) { + EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH); + } + + if (subtract_matrix->cols != 1) { + EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH); + } + + for (size_t row = 0; row < matrix->rows; row++) { + EI_DSP_MATRIX_B(temp, 1, matrix->cols, matrix->buffer + (row * matrix->cols)); + int ret = subtract(&temp, subtract_matrix->buffer[row]); + if (ret != EIDSP_OK) { + EIDSP_ERR(ret); + } + } + + return EIDSP_OK; + } + + /** + * Calculate the root mean square of a matrix, one per row + * @param matrix Matrix of size (MxN) + * @param output_matrix Matrix of size (Mx1) + * @returns 0 if OK + */ + static int rms(matrix_t *matrix, matrix_t *output_matrix) { + if (matrix->rows != output_matrix->rows) { + EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH); + } + + if (output_matrix->cols != 1) { + EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH); + } + + for (size_t row = 0; row < matrix->rows; row++) { +#if EIDSP_USE_CMSIS_DSP + float rms_result; + arm_rms_f32(matrix->buffer + (row * matrix->cols), matrix->cols, &rms_result); + output_matrix->buffer[row] = rms_result; +#else + float sum = 0.0; + for(size_t ix = 0; ix < matrix->cols; ix++) { + float v = matrix->buffer[(row * matrix->cols) + ix]; + sum += v * v; + } + output_matrix->buffer[row] = sqrt(sum / static_cast(matrix->cols)); +#endif + } + + return EIDSP_OK; + } + + /** + * Calculate the mean over a matrix per row + * @param input_matrix Input matrix (MxN) + * @param output_matrix Output matrix (Mx1) + */ + static int mean(matrix_t *input_matrix, matrix_t *output_matrix) { + if (input_matrix->rows != output_matrix->rows) { + EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH); + } + if (output_matrix->cols != 1) { + EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH); + } + + for (size_t row = 0; row < input_matrix->rows; row++) { +#if EIDSP_USE_CMSIS_DSP + float mean; + arm_mean_f32(input_matrix->buffer + (row * input_matrix->cols), input_matrix->cols, &mean); + output_matrix->buffer[row] = mean; +#else + float sum = 0.0f; + + for (size_t col = 0; col < input_matrix->cols; col++) { + sum += input_matrix->buffer[( row * input_matrix->cols ) + col]; + } + + output_matrix->buffer[row] = sum / input_matrix->cols; +#endif + } + + return EIDSP_OK; + } + + /** + * Calculate the mean over a matrix on axis 0 + * @param input_matrix Input matrix (MxN) + * @param output_matrix Output matrix (Nx1) + * @returns 0 if OK + */ + static int mean_axis0(matrix_t *input_matrix, matrix_t *output_matrix) { + if (input_matrix->cols != output_matrix->rows) { + EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH); + } + + if (output_matrix->cols != 1) { + EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH); + } + + for (size_t col = 0; col < input_matrix->cols; col++) { + // Note - not using CMSIS-DSP here + // gathering up the current columnand moving it into sequential memory to use + // SIMD to calculate the mean would take more time than the simple loop + // so disable this case. The alternative is to use 2 transposes and on a "big" ARM + // platform that will take more time + + float sum = 0.0f; + + for (size_t row = 0; row < input_matrix->rows; row++) { + sum += input_matrix->buffer[( row * input_matrix->cols ) + col]; + } + + output_matrix->buffer[col] = sum / input_matrix->rows; + } + + return EIDSP_OK; + } + + /** + * Calculate the standard deviation over a matrix on axis 0 + * @param input_matrix Input matrix (MxN) + * @param output_matrix Output matrix (Nx1) + * @returns 0 if OK + */ + static int std_axis0(matrix_t *input_matrix, matrix_t *output_matrix) { +#if EIDSP_USE_CMSIS_DSP + return std_axis0_CMSIS(input_matrix, output_matrix); +#else + + if (input_matrix->cols != output_matrix->rows) { + EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH); + } + + if (output_matrix->cols != 1) { + EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH); + } + + for (size_t col = 0; col < input_matrix->cols; col++) { + float sum = 0.0f; + + for (size_t row = 0; row < input_matrix->rows; row++) { + sum += input_matrix->buffer[(row * input_matrix->cols) + col]; + } + + float mean = sum / input_matrix->rows; + + float std = 0.0f; + float tmp; + for (size_t row = 0; row < input_matrix->rows; row++) { + tmp = input_matrix->buffer[(row * input_matrix->cols) + col] - mean; + std += tmp * tmp; + } + + output_matrix->buffer[col] = sqrt(std / input_matrix->rows); + } + + return EIDSP_OK; +#endif + } + + /** + * Get the minimum value in a matrix per row + * @param input_matrix Input matrix (MxN) + * @param output_matrix Output matrix (Mx1) + */ + static int min(matrix_t *input_matrix, matrix_t *output_matrix) { + if (input_matrix->rows != output_matrix->rows) { + EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH); + } + if (output_matrix->cols != 1) { + EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH); + } + + for (size_t row = 0; row < input_matrix->rows; row++) { +#if EIDSP_USE_CMSIS_DSP + float min; + uint32_t ix; + arm_min_f32(input_matrix->buffer + (row * input_matrix->cols), input_matrix->cols, &min, &ix); + output_matrix->buffer[row] = min; +#else + float min = FLT_MAX; + + for (size_t col = 0; col < input_matrix->cols; col++) { + float v = input_matrix->buffer[( row * input_matrix->cols ) + col]; + if (v < min) { + min = v; + } + } + + output_matrix->buffer[row] = min; +#endif + } + + return EIDSP_OK; + } + + /** + * Get the maximum value in a matrix per row + * @param input_matrix Input matrix (MxN) + * @param output_matrix Output matrix (Mx1) + */ + static int max(matrix_t *input_matrix, matrix_t *output_matrix) { + if (input_matrix->rows != output_matrix->rows) { + EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH); + } + if (output_matrix->cols != 1) { + EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH); + } + + for (size_t row = 0; row < input_matrix->rows; row++) { +#if EIDSP_USE_CMSIS_DSP + float max; + uint32_t ix; + arm_max_f32(input_matrix->buffer + (row * input_matrix->cols), input_matrix->cols, &max, &ix); + output_matrix->buffer[row] = max; +#else + float max = -FLT_MAX; + + for (size_t col = 0; col < input_matrix->cols; col++) { + float v = input_matrix->buffer[( row * input_matrix->cols ) + col]; + if (v > max) { + max = v; + } + } + + output_matrix->buffer[row] = max; +#endif + } + + return EIDSP_OK; + } + + /** + * Get the stdev value in a matrix per row + * @param input_matrix Input matrix (MxN) + * @param output_matrix Output matrix (Mx1) + */ + static int stdev(matrix_t *input_matrix, matrix_t *output_matrix) { + if (input_matrix->rows != output_matrix->rows) { + EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH); + } + if (output_matrix->cols != 1) { + EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH); + } + + for (size_t row = 0; row < input_matrix->rows; row++) { +#if EIDSP_USE_CMSIS_DSP + float std; + float var; + cmsis_arm_variance(&input_matrix->buffer[(row * input_matrix->cols)], input_matrix->cols, &var); + arm_sqrt_f32(var, &std); + output_matrix->buffer[row] = std; +#else + float sum = 0.0f; + + for (size_t col = 0; col < input_matrix->cols; col++) { + sum += input_matrix->buffer[(row * input_matrix->cols) + col]; + } + + float mean = sum / input_matrix->cols; + + float std = 0.0f; + + for (size_t col = 0; col < input_matrix->cols; col++) { + float diff; + diff = input_matrix->buffer[(row * input_matrix->cols) + col] - mean; + std += diff * diff; + } + + output_matrix->buffer[row] = sqrt(std / input_matrix->cols); +#endif + } + + return EIDSP_OK; + } + + /** + * Get the skewness value in a matrix per row + * @param input_matrix Input matrix (MxN) + * @param output_matrix Output matrix (Mx1) + */ + static int skew(matrix_t *input_matrix, matrix_t *output_matrix) { + if (input_matrix->rows != output_matrix->rows) { + EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH); + } + if (output_matrix->cols != 1) { + EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH); + } + + for (size_t row = 0; row < input_matrix->rows; row++) { +#if EIDSP_USE_CMSIS_DSP + float mean; + float var; + + // Calculate the mean & variance + arm_mean_f32(input_matrix->buffer + (row * input_matrix->cols), input_matrix->cols, &mean); + cmsis_arm_variance(&input_matrix->buffer[(row * input_matrix->cols)], input_matrix->cols, &var); + + // Calculate m_3 + float m_3; + cmsis_arm_third_moment(&input_matrix->buffer[(row * input_matrix->cols)], input_matrix->cols, mean, &m_3); + + // Calculate (variance)^(3/2) + arm_sqrt_f32(var * var * var, &var); + + // Calculate skew = (m_3) / (variance)^(3/2) + if (var == 0.0f) { + output_matrix->buffer[row] = 0.0f; + } else { + output_matrix->buffer[row] = m_3 / var; + } +#else + float sum = 0.0f; + float mean; + + // Calculate the mean + for (size_t col = 0; col < input_matrix->cols; col++) { + sum += input_matrix->buffer[( row * input_matrix->cols ) + col]; + } + mean = sum / input_matrix->cols; + + // Calculate the m values + float m_3 = 0.0f; + float m_2 = 0.0f; + + for (size_t col = 0; col < input_matrix->cols; col++) { + float diff; + diff = input_matrix->buffer[( row * input_matrix->cols ) + col] - mean; + m_3 += diff * diff * diff; + m_2 += diff * diff; + } + m_3 = m_3 / input_matrix->cols; + m_2 = m_2 / input_matrix->cols; + + // Calculate (m_2)^(3/2) + m_2 = sqrt(m_2 * m_2 * m_2); + + // Calculate skew = (m_3) / (m_2)^(3/2) + if (m_2 == 0.0f) { + output_matrix->buffer[row] = 0.0f; + } else { + output_matrix->buffer[row] = m_3 / m_2; + } +#endif + } + + return EIDSP_OK; + } + + /** + * Get the kurtosis value in a matrix per row + * @param input_matrix Input matrix (MxN) + * @param output_matrix Output matrix (Mx1) + */ + static int kurtosis(matrix_t *input_matrix, matrix_t *output_matrix) { + if (input_matrix->rows != output_matrix->rows) { + EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH); + } + if (output_matrix->cols != 1) { + EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH); + } + + for (size_t row = 0; row < input_matrix->rows; row++) { +#if EIDSP_USE_CMSIS_DSP + float mean; + float var; + + // Calculate mean & variance + arm_mean_f32(input_matrix->buffer + (row * input_matrix->cols), input_matrix->cols, &mean); + cmsis_arm_variance(&input_matrix->buffer[(row * input_matrix->cols)], input_matrix->cols, &var); + + // Calculate m_4 + float m_4; + cmsis_arm_fourth_moment(&input_matrix->buffer[(row * input_matrix->cols)], input_matrix->cols, mean, &m_4); + + // Calculate Fisher kurtosis = (m_4 / variance^2) - 3 + var = var * var; + if (var == 0.0f) { + output_matrix->buffer[row] = -3.0f; + } else { + output_matrix->buffer[row] = (m_4 / var) - 3.0f; + } +#else + // Calculate the mean + float mean = 0.0f; + float sum = 0.0f; + + for (size_t col = 0; col < input_matrix->cols; col++) { + sum += input_matrix->buffer[( row * input_matrix->cols ) + col]; + } + mean = sum / input_matrix->cols; + + // Calculate m_4 & variance + float m_4 = 0.0f; + float variance = 0.0f; + + for (size_t col = 0; col < input_matrix->cols; col++) { + float diff; + diff = input_matrix->buffer[(row * input_matrix->cols) + col] - mean; + float square_diff = diff * diff; + variance += square_diff; + m_4 += square_diff * square_diff; + } + m_4 = m_4 / input_matrix->cols; + variance = variance / input_matrix->cols; + + // Square the variance + variance = variance * variance; + // Calculate Fisher kurtosis = (m_4 / variance^2) - 3 + if (variance == 0.0f) { + output_matrix->buffer[row] = -3.0f; + } else { + output_matrix->buffer[row] = (m_4 / variance) - 3.0f; + } +#endif + } + + return EIDSP_OK; + } + + + /** + * Compute the one-dimensional discrete Fourier Transform for real input. + * This function computes the one-dimensional n-point discrete Fourier Transform (DFT) of + * a real-valued array by means of an efficient algorithm called the Fast Fourier Transform (FFT). + * @param src Source buffer + * @param src_size Size of the source buffer + * @param output Output buffer + * @param output_size Size of the output buffer, should be n_fft / 2 + 1 + * @returns 0 if OK + */ + static int rfft(const float *src, size_t src_size, float *output, size_t output_size, size_t n_fft) { + size_t n_fft_out_features = (n_fft / 2) + 1; + if (output_size != n_fft_out_features) { + EIDSP_ERR(EIDSP_BUFFER_SIZE_MISMATCH); + } + + // truncate if needed + if (src_size > n_fft) { + src_size = n_fft; + } + + // declare input and output arrays + EI_DSP_MATRIX(fft_input, 1, n_fft); + if (!fft_input.buffer) { + EIDSP_ERR(EIDSP_OUT_OF_MEM); + } + + // copy from src to fft_input + memcpy(fft_input.buffer, src, src_size * sizeof(float)); + // pad to the rigth with zeros + memset(fft_input.buffer + src_size, 0, (n_fft - src_size) * sizeof(kiss_fft_scalar)); + +#if EIDSP_USE_CMSIS_DSP + if (n_fft != 32 && n_fft != 64 && n_fft != 128 && n_fft != 256 && + n_fft != 512 && n_fft != 1024 && n_fft != 2048 && n_fft != 4096) { + int ret = software_rfft(fft_input.buffer, output, n_fft, n_fft_out_features); + if (ret != EIDSP_OK) { + EIDSP_ERR(ret); + } + } + else { + // hardware acceleration only works for the powers above... + arm_rfft_fast_instance_f32 rfft_instance; + int status = cmsis_rfft_init_f32(&rfft_instance, n_fft); + if (status != ARM_MATH_SUCCESS) { + return status; + } + + EI_DSP_MATRIX(fft_output, 1, n_fft); + if (!fft_output.buffer) { + EIDSP_ERR(EIDSP_OUT_OF_MEM); + } + + arm_rfft_fast_f32(&rfft_instance, fft_input.buffer, fft_output.buffer, 0); + + output[0] = fft_output.buffer[0]; + output[n_fft_out_features - 1] = fft_output.buffer[1]; + + size_t fft_output_buffer_ix = 2; + for (size_t ix = 1; ix < n_fft_out_features - 1; ix += 1) { + float rms_result; + arm_rms_f32(fft_output.buffer + fft_output_buffer_ix, 2, &rms_result); + output[ix] = rms_result * sqrt(2); + + fft_output_buffer_ix += 2; + } + } +#else + int ret = software_rfft(fft_input.buffer, output, n_fft, n_fft_out_features); + if (ret != EIDSP_OK) { + EIDSP_ERR(ret); + } +#endif + + return EIDSP_OK; + } + + + /** + * Compute the one-dimensional discrete Fourier Transform for real input. + * This function computes the one-dimensional n-point discrete Fourier Transform (DFT) of + * a real-valued array by means of an efficient algorithm called the Fast Fourier Transform (FFT). + * @param src Source buffer + * @param src_size Size of the source buffer + * @param output Output buffer + * @param output_size Size of the output buffer, should be n_fft / 2 + 1 + * @returns 0 if OK + */ + static int rfft(const float *src, size_t src_size, fft_complex_t *output, size_t output_size, size_t n_fft) { + size_t n_fft_out_features = (n_fft / 2) + 1; + if (output_size != n_fft_out_features) { + EIDSP_ERR(EIDSP_BUFFER_SIZE_MISMATCH); + } + + // truncate if needed + if (src_size > n_fft) { + src_size = n_fft; + } + + // declare input and output arrays + float *fft_input_buffer = NULL; + if (src_size == n_fft) { + fft_input_buffer = (float*)src; + } + + EI_DSP_MATRIX_B(fft_input, 1, n_fft, fft_input_buffer); + if (!fft_input.buffer) { + EIDSP_ERR(EIDSP_OUT_OF_MEM); + } + + if (!fft_input_buffer) { + // copy from src to fft_input + memcpy(fft_input.buffer, src, src_size * sizeof(float)); + // pad to the rigth with zeros + memset(fft_input.buffer + src_size, 0, (n_fft - src_size) * sizeof(float)); + } + +#if EIDSP_USE_CMSIS_DSP + if (n_fft != 32 && n_fft != 64 && n_fft != 128 && n_fft != 256 && + n_fft != 512 && n_fft != 1024 && n_fft != 2048 && n_fft != 4096) { + int ret = software_rfft(fft_input.buffer, output, n_fft, n_fft_out_features); + if (ret != EIDSP_OK) { + EIDSP_ERR(ret); + } + } + else { + // hardware acceleration only works for the powers above... + arm_rfft_fast_instance_f32 rfft_instance; + int status = cmsis_rfft_init_f32(&rfft_instance, n_fft); + if (status != ARM_MATH_SUCCESS) { + return status; + } + + EI_DSP_MATRIX(fft_output, 1, n_fft); + if (!fft_output.buffer) { + EIDSP_ERR(EIDSP_OUT_OF_MEM); + } + + arm_rfft_fast_f32(&rfft_instance, fft_input.buffer, fft_output.buffer, 0); + + output[0].r = fft_output.buffer[0]; + output[0].i = 0.0f; + output[n_fft_out_features - 1].r = fft_output.buffer[1]; + output[n_fft_out_features - 1].i = 0.0f; + + size_t fft_output_buffer_ix = 2; + for (size_t ix = 1; ix < n_fft_out_features - 1; ix += 1) { + output[ix].r = fft_output.buffer[fft_output_buffer_ix]; + output[ix].i = fft_output.buffer[fft_output_buffer_ix + 1]; + + fft_output_buffer_ix += 2; + } + } +#else + int ret = software_rfft(fft_input.buffer, output, n_fft, n_fft_out_features); + if (ret != EIDSP_OK) { + EIDSP_ERR(ret); + } +#endif + + return EIDSP_OK; + } + + + /** + * Return evenly spaced numbers over a specified interval. + * Returns num evenly spaced samples, calculated over the interval [start, stop]. + * The endpoint of the interval can optionally be excluded. + * + * Based on https://github.com/ntessore/algo/blob/master/linspace.c + * Licensed in public domain (see LICENSE in repository above) + * + * @param start The starting value of the sequence. + * @param stop The end value of the sequence. + * @param number Number of samples to generate. + * @param out Out array, with size `number` + * @returns 0 if OK + */ + static int linspace(float start, float stop, uint32_t number, float *out) + { + if (number < 1 || !out) { + EIDSP_ERR(EIDSP_PARAMETER_INVALID); + } + + if (number == 1) { + out[0] = start; + return EIDSP_OK; + } + + // step size + float step = (stop - start) / (number - 1); + + // do steps + for (uint32_t ix = 0; ix < number - 1; ix++) { + out[ix] = start + ix * step; + } + + // last entry always stop + out[number - 1] = stop; + + return EIDSP_OK; + } + + /** + * Return evenly spaced q31 numbers over a specified interval. + * Returns num evenly spaced samples, calculated over the interval [start, stop]. + * The endpoint of the interval can optionally be excluded. + * + * Based on https://github.com/ntessore/algo/blob/master/linspace.c + * Licensed in public domain (see LICENSE in repository above) + * + * @param start The starting value of the sequence. + * @param stop The end value of the sequence. + * @param number Number of samples to generate. + * @param out Out array, with size `number` + * @returns 0 if OK + */ + static int linspace(EIDSP_i32 start, EIDSP_i32 stop, uint32_t number, EIDSP_i32 *out) + { + if (number < 1 || !out) { + EIDSP_ERR(EIDSP_PARAMETER_INVALID); + } + + if (number == 1) { + out[0] = start; + return EIDSP_OK; + } + + // step size + EIDSP_i32 step = (stop - start) / (number - 1); + + // do steps + for (uint32_t ix = 0; ix < number - 1; ix++) { + out[ix] = start + ix * step; + } + + // last entry always stop + out[number - 1] = stop; + + return EIDSP_OK; + } + + /** + * Convert an int16_t buffer into a float buffer, maps to -1..1 + * @param input + * @param output + * @param length + * @returns 0 if OK + */ + static int int16_to_float(const EIDSP_i16 *input, float *output, size_t length) { + for (size_t ix = 0; ix < length; ix++) { + output[ix] = static_cast((input[ix])); + } + return EIDSP_OK; + } + +#if EIDSP_SIGNAL_C_FN_POINTER == 0 + /** + * Create a signal structure from a buffer. + * This is useful for data that you keep in memory anyway. If you need to load from + * flash, then create the structure yourself. + * @param data Buffer, make sure to keep this pointer alive + * @param data_size Size of the buffer + * @param signal Output signal + * @returns EIDSP_OK if ok + */ + static int signal_from_buffer(const float *data, size_t data_size, signal_t *signal) + { + signal->total_length = data_size; +#ifdef __MBED__ + signal->get_data = mbed::callback(&numpy::signal_get_data, data); +#else + signal->get_data = [data](size_t offset, size_t length, float *out_ptr) { + return numpy::signal_get_data(data, offset, length, out_ptr); + }; +#endif + return EIDSP_OK; + } + +#endif + +#if defined ( __GNUC__ ) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wstrict-aliasing" +#endif + /** + * > 50% faster then the math.h log() function + * in return for a small loss in accuracy (0.00001 average diff with log()) + * From: https://stackoverflow.com/questions/39821367/very-fast-approximate-logarithm-natural-log-function-in-c/39822314#39822314 + * Licensed under the CC BY-SA 3.0 + * @param a Input number + * @returns Natural log value of a + */ + __attribute__((always_inline)) static inline float log(float a) + { + int32_t g = (int32_t) * ((int32_t *)&a); + int32_t e = (g - 0x3f2aaaab) & 0xff800000; + g = g - e; + float m = (float) * ((float *)&g); + float i = (float)e * 1.19209290e-7f; // 0x1.0p-23 + /* m in [2/3, 4/3] */ + float f = m - 1.0f; + float s = f * f; + /* Compute log1p(f) for f in [-1/3, 1/3] */ + float r = fmaf(0.230836749f, f, -0.279208571f); // 0x1.d8c0f0p-3, -0x1.1de8dap-2 + float t = fmaf(0.331826031f, f, -0.498910338f); // 0x1.53ca34p-2, -0x1.fee25ap-2 + r = fmaf(r, s, t); + r = fmaf(r, s, f); + r = fmaf(i, 0.693147182f, r); // 0x1.62e430p-1 // log(2) + + return r; + } + + /** + * Fast log10 and log2 functions, significantly faster than the ones from math.h (~6x for log10 on M4F) + * From https://community.arm.com/developer/tools-software/tools/f/armds-forum/4292/cmsis-dsp-new-functionality-proposal/22621#22621 + * @param a Input number + * @returns Log2 value of a + */ + __attribute__((always_inline)) static inline float log2(float a) + { + int e; + float f = frexpf(fabsf(a), &e); + float y = 1.23149591368684f; + y *= f; + y += -4.11852516267426f; + y *= f; + y += 6.02197014179219f; + y *= f; + y += -3.13396450166353f; + y += e; + return y; + } + + /** + * Fast log10 and log2 functions, significantly faster than the ones from math.h (~6x for log10 on M4F) + * From https://community.arm.com/developer/tools-software/tools/f/armds-forum/4292/cmsis-dsp-new-functionality-proposal/22621#22621 + * @param a Input number + * @returns Log10 value of a + */ + __attribute__((always_inline)) static inline float log10(float a) + { + return numpy::log2(a) * 0.3010299956639812f; + } +#if defined ( __GNUC__ ) +#pragma GCC diagnostic pop +#endif + + /** + * Calculate the natural log value of a matrix. Does an in-place replacement. + * @param matrix Matrix (MxN) + * @returns 0 if OK + */ + static int log(matrix_t *matrix) + { + for (uint32_t ix = 0; ix < matrix->rows * matrix->cols; ix++) { + matrix->buffer[ix] = numpy::log(matrix->buffer[ix]); + } + + return EIDSP_OK; + } + + /** + * Calculate the log10 of a matrix. Does an in-place replacement. + * @param matrix Matrix (MxN) + * @returns 0 if OK + */ + static int log10(matrix_t *matrix) + { + for (uint32_t ix = 0; ix < matrix->rows * matrix->cols; ix++) { + matrix->buffer[ix] = numpy::log10(matrix->buffer[ix]); + } + + return EIDSP_OK; + } + + /** + * @brief Signed Saturate + * + * @param[in] val The value to be saturated + * @param[in] sat Bit position to saturate to (1..32) + * + * @return Saturated value + */ + static int32_t saturate(int64_t val, uint32_t sat) + { + if ((sat >= 1U) && (sat <= 32U)) { + int64_t max = (int64_t)((1U << (sat - 1U)) - 1U); + int64_t min = -1 - max; + if (val > max) { + return (int32_t)max; + } else if (val < min) { + return (int32_t)min; + } + } + return (int32_t)val; + } + + /** + * Normalize a matrix to 0..1. Does an in-place replacement. + * Normalization done per row. + * @param matrix + */ + static int normalize(matrix_t *matrix) { + // Python implementation: + // matrix = (matrix - np.min(matrix)) / (np.max(matrix) - np.min(matrix)) + int r; + + matrix_t temp_matrix(1, matrix->rows * matrix->cols, matrix->buffer); + + matrix_t min_matrix(1, 1); + if (!min_matrix.buffer) { + EIDSP_ERR(EIDSP_OUT_OF_MEM); + } + r = min(&temp_matrix, &min_matrix); + if (r != EIDSP_OK) { + EIDSP_ERR(r); + } + + matrix_t max_matrix(1, 1); + if (!max_matrix.buffer) { + EIDSP_ERR(EIDSP_OUT_OF_MEM); + } + r = max(&temp_matrix, &max_matrix); + if (r != EIDSP_OK) { + EIDSP_ERR(r); + } + + float min_max_diff = (max_matrix.buffer[0] - min_matrix.buffer[0]); + /* Prevent divide by 0 by setting minimum value for divider */ + float row_scale = min_max_diff < 0.001 ? 1.0f : 1.0f / min_max_diff; + + r = subtract(&temp_matrix, min_matrix.buffer[0]); + if (r != EIDSP_OK) { + EIDSP_ERR(r); + } + + r = scale(&temp_matrix, row_scale); + if (r != EIDSP_OK) { + EIDSP_ERR(r); + } + + return EIDSP_OK; + } + + /** + * Clip (limit) the values in an array. Does an in-place replacement. + * Values outside the interval are clipped to the interval edges. + * For example, if an interval of [0, 1] is specified, values smaller than 0 become 0, + * and values larger than 1 become 1. + * @param matrix + * @param min Min value to be clipped + * @param max Max value to be clipped + */ + static int clip(matrix_t *matrix, float min, float max) { + if (max < min) { + EIDSP_ERR(EIDSP_PARAMETER_INVALID); + } + + for (size_t ix = 0; ix < matrix->rows * matrix->cols; ix++) { + if (matrix->buffer[ix] < min) { + matrix->buffer[ix] = min; + } + else if (matrix->buffer[ix] > max) { + matrix->buffer[ix] = max; + } + } + + return EIDSP_OK; + } + + /** + * Cut the data behind the comma on a matrix. Does an in-place replacement. + * E.g. around([ 3.01, 4.89 ]) becomes [3, 4] + * @param matrix + */ + static int round(matrix_t *matrix) { + for (size_t ix = 0; ix < matrix->rows * matrix->cols; ix++) { + matrix->buffer[ix] = ::round(matrix->buffer[ix]); + } + + return EIDSP_OK; + } + + static int software_rfft(float *fft_input, float *output, size_t n_fft, size_t n_fft_out_features) { + kiss_fft_cpx *fft_output = (kiss_fft_cpx*)ei_dsp_malloc(n_fft_out_features * sizeof(kiss_fft_cpx)); + if (!fft_output) { + EIDSP_ERR(EIDSP_OUT_OF_MEM); + } + + size_t kiss_fftr_mem_length; + + // create fftr context + kiss_fftr_cfg cfg = kiss_fftr_alloc(n_fft, 0, NULL, NULL, &kiss_fftr_mem_length); + if (!cfg) { + ei_dsp_free(fft_output, n_fft_out_features * sizeof(kiss_fft_cpx)); + EIDSP_ERR(EIDSP_OUT_OF_MEM); + } + + ei_dsp_register_alloc(kiss_fftr_mem_length, cfg); + + // execute the rfft operation + kiss_fftr(cfg, fft_input, fft_output); + + // and write back to the output + for (size_t ix = 0; ix < n_fft_out_features; ix++) { + output[ix] = sqrt(pow(fft_output[ix].r, 2) + pow(fft_output[ix].i, 2)); + } + + ei_dsp_free(cfg, kiss_fftr_mem_length); + ei_dsp_free(fft_output, n_fft_out_features * sizeof(kiss_fft_cpx)); + + return EIDSP_OK; + } + + static int software_rfft(float *fft_input, fft_complex_t *output, size_t n_fft, size_t n_fft_out_features) + { + // create fftr context + size_t kiss_fftr_mem_length; + + kiss_fftr_cfg cfg = kiss_fftr_alloc(n_fft, 0, NULL, NULL, &kiss_fftr_mem_length); + if (!cfg) { + EIDSP_ERR(EIDSP_OUT_OF_MEM); + } + + ei_dsp_register_alloc(kiss_fftr_mem_length, cfg); + + // execute the rfft operation + kiss_fftr(cfg, fft_input, (kiss_fft_cpx*)output); + + ei_dsp_free(cfg, kiss_fftr_mem_length); + + return EIDSP_OK; + } + + static int signal_get_data(const float *in_buffer, size_t offset, size_t length, float *out_ptr) + { + memcpy(out_ptr, in_buffer + offset, length * sizeof(float)); + return 0; + } + + static int signal_get_data_i16(int16_t *in_buffer, size_t offset, size_t length, int16_t *out_ptr) + { + memcpy(out_ptr, in_buffer + offset, length * sizeof(int16_t)); + return 0; + } + +#if EIDSP_USE_CMSIS_DSP + /** + * @brief The CMSIS std variance function with the same behaviour as the NumPy + * implementation + * @details Variance in CMSIS version is calculated using fSum / (float32_t)(blockSize - 1) + * @param[in] pSrc Pointer to float block + * @param[in] blockSize Number of floats in block + * @param pResult The variance + */ + static void cmsis_arm_variance(const float32_t *pSrc, uint32_t blockSize, float32_t *pResult) + { + uint32_t blkCnt; + float32_t sum = 0.0f; + float32_t fSum = 0.0f; + float32_t fMean, fValue; + const float32_t *pInput = pSrc; + + if (blockSize <= 1U) { + *pResult = 0; + return; + } + blkCnt = blockSize >> 2U; + + while (blkCnt > 0U) { + sum += *pInput++; + sum += *pInput++; + sum += *pInput++; + sum += *pInput++; + blkCnt--; + } + + /* Loop unrolling: Compute remaining outputs */ + blkCnt = blockSize % 0x4U; + + while (blkCnt > 0U) { + sum += *pInput++; + blkCnt--; + } + + fMean = sum / (float32_t)blockSize; + + pInput = pSrc; + + /* Loop unrolling: Compute 4 outputs at a time */ + blkCnt = blockSize >> 2U; + + while (blkCnt > 0U) { + fValue = *pInput++ - fMean; + fSum += fValue * fValue; + fValue = *pInput++ - fMean; + fSum += fValue * fValue; + fValue = *pInput++ - fMean; + fSum += fValue * fValue; + fValue = *pInput++ - fMean; + fSum += fValue * fValue; + blkCnt--; + } + + /* Loop unrolling: Compute remaining outputs */ + blkCnt = blockSize % 0x4U; + + while (blkCnt > 0U) { + fValue = *pInput++ - fMean; + fSum += fValue * fValue; + blkCnt--; + } + + /* Variance */ + *pResult = fSum / (float32_t)(blockSize); + } + + /** + * @brief Copy of the numpy version explicitely using the CMSIS lib + * for STD and Matrix transpose + * @param input_matrix The input matrix + * @param output_matrix The output matrix + * + * @return EIDSP error + */ + static int std_axis0_CMSIS(matrix_t *input_matrix, matrix_t *output_matrix) + { + arm_matrix_instance_f32 arm_in_matrix, arm_transposed_matrix; + + if (input_matrix->cols != output_matrix->rows) { + EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH); + } + + if (output_matrix->cols != 1) { + EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH); + } + + /* Copy input matrix to arm matrix */ + arm_in_matrix.numRows = input_matrix->rows; + arm_in_matrix.numCols = input_matrix->cols; + arm_in_matrix.pData = &input_matrix->buffer[0]; + /* Create transposed matrix */ + arm_transposed_matrix.numRows = input_matrix->cols; + arm_transposed_matrix.numCols = input_matrix->rows; + arm_transposed_matrix.pData = (float *)ei_calloc(input_matrix->cols * input_matrix->rows * sizeof(float), 1); + + if (arm_transposed_matrix.pData == NULL) { + EIDSP_ERR(EIDSP_OUT_OF_MEM); + } + + int ret = arm_mat_trans_f32(&arm_in_matrix, &arm_transposed_matrix); + if (ret != EIDSP_OK) { + EIDSP_ERR(ret); + } + + for (size_t row = 0; row < arm_transposed_matrix.numRows; row++) { + float std; + float var; + + cmsis_arm_variance(arm_transposed_matrix.pData + (row * arm_transposed_matrix.numCols), + arm_transposed_matrix.numCols, &var); + arm_sqrt_f32(var, &std); + + output_matrix->buffer[row] = std; + } + + ei_free(arm_transposed_matrix.pData); + + return EIDSP_OK; + } + + /** + * @brief A copy of the CMSIS power function, adapted to calculate the third central moment + * @details Calculates the sum of cubes of a block with the mean value subtracted. + * @param[in] pSrc Pointer to float block + * @param[in] blockSize Number of floats in block + * @param[in] mean The mean to subtract from each value before cubing + * @param pResult The third central moment of the input + */ + static void cmsis_arm_third_moment(const float32_t * pSrc, uint32_t blockSize, float32_t mean, float32_t * pResult) + { + uint32_t blkCnt; + float32_t sum = 0.0f; + float32_t in; + + /* Loop unrolling: Compute 4 outputs at a time */ + blkCnt = blockSize >> 2U; + + while (blkCnt > 0U) { + + /* Compute Power and store result in a temporary variable, sum. */ + in = *pSrc++; + in = in - mean; + sum += in * in * in; + + in = *pSrc++; + in = in - mean; + sum += in * in * in; + + in = *pSrc++; + in = in - mean; + sum += in * in * in; + + in = *pSrc++; + in = in - mean; + sum += in * in * in; + + /* Decrement loop counter */ + blkCnt--; + } + + /* Loop unrolling: Compute remaining outputs */ + blkCnt = blockSize % 0x4U; + + while (blkCnt > 0U) { + /* Compute Power and store result in a temporary variable, sum. */ + in = *pSrc++; + in = in - mean; + sum += in * in * in; + + /* Decrement loop counter */ + blkCnt--; + } + + sum = sum / blockSize; + /* Store result to destination */ + *pResult = sum; + } + + /** + * @brief A copy of the CMSIS power function, adapted to calculate the fourth central moment + * @details Calculates the sum of fourth powers of a block with the mean value subtracted. + * @param[in] pSrc Pointer to float block + * @param[in] blockSize Number of floats in block + * @param[in] mean The mean to subtract from each value before calculating fourth power + * @param pResult The fourth central moment of the input + */ + static void cmsis_arm_fourth_moment(const float32_t * pSrc, uint32_t blockSize, float32_t mean, float32_t * pResult) + { + uint32_t blkCnt; + float32_t sum = 0.0f; + float32_t in; + + /* Loop unrolling: Compute 4 outputs at a time */ + blkCnt = blockSize >> 2U; + + while (blkCnt > 0U) { + + /* Compute Power and store result in a temporary variable, sum. */ + in = *pSrc++; + in = in - mean; + float square; + square = in * in; + sum += square * square; + + in = *pSrc++; + in = in - mean; + square = in * in; + sum += square * square; + + in = *pSrc++; + in = in - mean; + square = in * in; + sum += square * square; + + in = *pSrc++; + in = in - mean; + square = in * in; + sum += square * square; + + /* Decrement loop counter */ + blkCnt--; + } + + /* Loop unrolling: Compute remaining outputs */ + blkCnt = blockSize % 0x4U; + + while (blkCnt > 0U) { + /* Compute Power and store result in a temporary variable, sum. */ + in = *pSrc++; + in = in - mean; + float square; + square = in * in; + sum += square * square; + + /* Decrement loop counter */ + blkCnt--; + } + + sum = sum / blockSize; + /* Store result to destination */ + *pResult = sum; + } +#endif // EIDSP_USE_CMSIS_DSP + + static uint8_t count_leading_zeros(uint32_t data) + { + if (data == 0U) { return 32U; } + + uint32_t count = 0U; + uint32_t mask = 0x80000000U; + + while ((data & mask) == 0U) + { + count += 1U; + mask = mask >> 1U; + } + return count; + } + +#if EIDSP_USE_CMSIS_DSP + /** + * Initialize a CMSIS-DSP fast rfft structure + * We do it this way as this means we can compile out fast_init calls which hints the compiler + * to which tables can be removed + */ + static int cmsis_rfft_init_f32(arm_rfft_fast_instance_f32 *rfft_instance, const size_t n_fft) + { +// ARM cores (ex M55) with Helium extensions (MVEF) need special treatment (Issue 2843) +#if EI_CLASSIFIER_HAS_FFT_INFO == 1 && !defined(ARM_MATH_MVEF) && !defined(EI_CLASSIFIER_LOAD_ALL_FFTS) + arm_status status; + switch (n_fft) { +#if EI_CLASSIFIER_LOAD_FFT_32 == 1 + case 32: { + arm_cfft_instance_f32 *S = &(rfft_instance->Sint); + S->fftLen = 16U; + S->pTwiddle = NULL; + S->bitRevLength = arm_cfft_sR_f32_len16.bitRevLength; + S->pBitRevTable = arm_cfft_sR_f32_len16.pBitRevTable; + S->pTwiddle = arm_cfft_sR_f32_len16.pTwiddle; + rfft_instance->fftLenRFFT = 32U; + rfft_instance->pTwiddleRFFT = (float32_t *) twiddleCoef_rfft_32; + status = ARM_MATH_SUCCESS; + break; + } +#endif +#if EI_CLASSIFIER_LOAD_FFT_64 == 1 + case 64: { + arm_cfft_instance_f32 *S = &(rfft_instance->Sint); + S->fftLen = 32U; + S->pTwiddle = NULL; + S->bitRevLength = arm_cfft_sR_f32_len32.bitRevLength; + S->pBitRevTable = arm_cfft_sR_f32_len32.pBitRevTable; + S->pTwiddle = arm_cfft_sR_f32_len32.pTwiddle; + rfft_instance->fftLenRFFT = 64U; + rfft_instance->pTwiddleRFFT = (float32_t *) twiddleCoef_rfft_64; + status = ARM_MATH_SUCCESS; + break; + } +#endif +#if EI_CLASSIFIER_LOAD_FFT_128 == 1 + case 128: { + arm_cfft_instance_f32 *S = &(rfft_instance->Sint); + S->fftLen = 64U; + S->pTwiddle = NULL; + S->bitRevLength = arm_cfft_sR_f32_len64.bitRevLength; + S->pBitRevTable = arm_cfft_sR_f32_len64.pBitRevTable; + S->pTwiddle = arm_cfft_sR_f32_len64.pTwiddle; + rfft_instance->fftLenRFFT = 128U; + rfft_instance->pTwiddleRFFT = (float32_t *) twiddleCoef_rfft_128; + status = ARM_MATH_SUCCESS; + break; + } +#endif +#if EI_CLASSIFIER_LOAD_FFT_256 == 1 + case 256: { + arm_cfft_instance_f32 *S = &(rfft_instance->Sint); + S->fftLen = 128U; + S->pTwiddle = NULL; + S->bitRevLength = arm_cfft_sR_f32_len128.bitRevLength; + S->pBitRevTable = arm_cfft_sR_f32_len128.pBitRevTable; + S->pTwiddle = arm_cfft_sR_f32_len128.pTwiddle; + rfft_instance->fftLenRFFT = 256U; + rfft_instance->pTwiddleRFFT = (float32_t *) twiddleCoef_rfft_256; + status = ARM_MATH_SUCCESS; + break; + } +#endif +#if EI_CLASSIFIER_LOAD_FFT_512 == 1 + case 512: { + arm_cfft_instance_f32 *S = &(rfft_instance->Sint); + S->fftLen = 256U; + S->pTwiddle = NULL; + S->bitRevLength = arm_cfft_sR_f32_len256.bitRevLength; + S->pBitRevTable = arm_cfft_sR_f32_len256.pBitRevTable; + S->pTwiddle = arm_cfft_sR_f32_len256.pTwiddle; + rfft_instance->fftLenRFFT = 512U; + rfft_instance->pTwiddleRFFT = (float32_t *) twiddleCoef_rfft_512; + status = ARM_MATH_SUCCESS; + break; + } +#endif +#if EI_CLASSIFIER_LOAD_FFT_1024 == 1 + case 1024: { + arm_cfft_instance_f32 *S = &(rfft_instance->Sint); + S->fftLen = 512U; + S->pTwiddle = NULL; + S->bitRevLength = arm_cfft_sR_f32_len512.bitRevLength; + S->pBitRevTable = arm_cfft_sR_f32_len512.pBitRevTable; + S->pTwiddle = arm_cfft_sR_f32_len512.pTwiddle; + rfft_instance->fftLenRFFT = 1024U; + rfft_instance->pTwiddleRFFT = (float32_t *) twiddleCoef_rfft_1024; + status = ARM_MATH_SUCCESS; + break; + } +#endif +#if EI_CLASSIFIER_LOAD_FFT_2048 == 1 + case 2048: { + arm_cfft_instance_f32 *S = &(rfft_instance->Sint); + S->fftLen = 1024U; + S->pTwiddle = NULL; + S->bitRevLength = arm_cfft_sR_f32_len1024.bitRevLength; + S->pBitRevTable = arm_cfft_sR_f32_len1024.pBitRevTable; + S->pTwiddle = arm_cfft_sR_f32_len1024.pTwiddle; + rfft_instance->fftLenRFFT = 2048U; + rfft_instance->pTwiddleRFFT = (float32_t *) twiddleCoef_rfft_2048; + status = ARM_MATH_SUCCESS; + break; + } +#endif +#if EI_CLASSIFIER_LOAD_FFT_4096 == 1 + case 4096: { + arm_cfft_instance_f32 *S = &(rfft_instance->Sint); + S->fftLen = 2048U; + S->pTwiddle = NULL; + S->bitRevLength = arm_cfft_sR_f32_len2048.bitRevLength; + S->pBitRevTable = arm_cfft_sR_f32_len2048.pBitRevTable; + S->pTwiddle = arm_cfft_sR_f32_len2048.pTwiddle; + rfft_instance->fftLenRFFT = 4096U; + rfft_instance->pTwiddleRFFT = (float32_t *) twiddleCoef_rfft_4096; + status = ARM_MATH_SUCCESS; + break; + } +#endif + default: + return EIDSP_FFT_TABLE_NOT_LOADED; + } + + return status; +#else + return arm_rfft_fast_init_f32(rfft_instance, n_fft); +#endif + } +#endif // #if EIDSP_USE_CMSIS_DSP + + /** + * Power spectrum of a frame + * @param frame Row of a frame + * @param frame_size Size of the frame + * @param out_buffer Out buffer, size should be fft_points + * @param out_buffer_size Buffer size + * @param fft_points (int): The length of FFT. If fft_length is greater than frame_len, the frames will be zero-padded. + * @returns EIDSP_OK if OK + */ + static int power_spectrum( + float *frame, + size_t frame_size, + float *out_buffer, + size_t out_buffer_size, + uint16_t fft_points) + { + if (out_buffer_size != static_cast(fft_points / 2 + 1)) { + EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH); + } + + int r = numpy::rfft(frame, frame_size, out_buffer, out_buffer_size, fft_points); + if (r != EIDSP_OK) { + return r; + } + + for (size_t ix = 0; ix < out_buffer_size; ix++) { + out_buffer[ix] = (1.0 / static_cast(fft_points)) * + (out_buffer[ix] * out_buffer[ix]); + } + + return EIDSP_OK; + } + + static int welch_max_hold( + float *input, + size_t input_size, + float *output, + size_t start_bin, + size_t stop_bin, + size_t fft_points, + bool do_overlap) + { + // save off one point to put back, b/c we're going to calculate in place + float saved_point = 0; + bool do_saved_point = false; + size_t fft_out_size = fft_points / 2 + 1; + float *fft_out; + ei_unique_ptr_t p_fft_out(nullptr, ei_free); + if (input_size < fft_points) { + fft_out = (float *)ei_calloc(fft_out_size, sizeof(float)); + p_fft_out.reset(fft_out); + } + else { + // set input as output for in place operation + fft_out = input; + // save off one point to put back, b/c we're going to calculate in place + saved_point = input[fft_points / 2]; + do_saved_point = true; + } + + // init the output to zeros + memset(output, 0, sizeof(float) * (stop_bin - start_bin)); + int input_ix = 0; + while (input_ix < (int)input_size) { + // Figure out if we need any zero padding + size_t n_input_points = input_ix + fft_points <= input_size ? fft_points + : input_size - input_ix; + EI_TRY(power_spectrum( + input + input_ix, + n_input_points, + fft_out, + fft_points / 2 + 1, + fft_points)); + int j = 0; + // keep the max of the last frame and everything before + for (size_t i = start_bin; i < stop_bin; i++) { + output[j] = std::max(output[j], fft_out[i]); + j++; + } + if (do_overlap) { + if (do_saved_point) { + // This step only matters first time through + input[fft_points / 2] = saved_point; + do_saved_point = false; + } + input_ix += fft_points / 2; + } + else { + input_ix += fft_points; + } + } + + return EIDSP_OK; + } + + static float variance(float *input, size_t size) + { + // Use CMSIS either way. Will fall back to straight C when needed + float temp; +#if EIDSP_USE_CMSIS_DSP + arm_var_f32(input, size, &temp); +#else + float mean = 0.0f; + for (size_t i = 0; i < size; i++) { + mean += input[i]; + } + mean /= size; + + temp = 0.0f; + for (size_t i = 0; i < size; i++) { + temp += (input[i] - mean) * (input[i] - mean); + } + temp /= (size - 1); +#endif + return temp; + } + + /** + * This function handle the issue with zero values if the are exposed + * to become an argument for any log function. + * @param input Array + * @param input_size Size of array + * @returns void + */ + static void zero_handling(float *input, size_t input_size) + { + for (size_t ix = 0; ix < input_size; ix++) { + if (input[ix] == 0) { + input[ix] = 1e-10; + } + } + } + + /** + * This function handle the issue with zero values if the are exposed + * to become an argument for any log function. + * @param input Matrix + * @returns void + */ + static void zero_handling(matrix_t *input) + { + zero_handling(input->buffer, input->rows * input->cols); + } + + /** + * This function handle the underflow float values. + * @param input Array + * @param input_size Size of array + * @param epsilon Smallest valid non-zero value + * @returns void + */ + static void underflow_handling(float* input, size_t input_size, float epsilon = 1e-07f) + { + for (size_t ix = 0; ix < input_size; ix++) { + if (fabs(input[ix]) < epsilon) { + input[ix] = 0.0f; + } + } + } + + __attribute__((unused)) static void scale(fvec& v, float scale) { + for (auto& x : v) { + x *= scale; + } + } + + __attribute__((unused)) static void sub(fvec& v, float b) { + for (auto& x : v) { + x -= b; + } + } + + __attribute__((unused)) static void mul(float* y, const float* x, float* b, size_t n) { + for (size_t i = 0; i < n; i++) { + y[i] = x[i] * b[i]; + } + } + + __attribute__((unused)) static fvec diff(const float* v, size_t n) { + fvec d(n - 1); + for (size_t i = 0; i < d.size(); i++) { + d[i] = v[i + 1] - v[i]; + } + return d; + } + + __attribute__((unused)) static float sum(const float* v, size_t n) { + float sum = 0; + for (size_t i = 0; i < n; i++) { + sum += v[i]; + } + return sum; + } + + static float mean(const fvec& v) { + float mean = 0; + for (auto x : v) { + mean += x; + } + mean /= v.size(); + return mean; + } + + static float mean(const float* v, size_t n) { + float mean = 0; + for (size_t i = 0; i < n; i++) { + mean += v[i]; + } + mean /= n; + return mean; + } + + static float median(const float* v, size_t n) { + fvec vc(n); + std::copy(v, v + n, vc.begin()); + std::sort(vc.begin(), vc.end()); + if (vc.size() % 2 == 0) { + return (vc[vc.size() / 2 - 1] + vc[vc.size() / 2]) / 2; + } + return vc[vc.size() / 2]; + } + + __attribute__((unused)) static float median(const fvec& v) { + return median(v.data(), v.size()); + } + + static float stddev(const float* v, size_t n, float m /* mean */, int ddof = 0) { + float var = 0; + for (size_t i = 0; i < n; i++) { + var += (v[i] - m) * (v[i] - m); + } + var /= n - ddof; + return sqrt(var); + } + + __attribute__((unused)) static float stddev(const float* v, size_t n) { + return stddev(v, n, mean(v, n), 0); + } + + __attribute__((unused)) static float stddev(const float* v, size_t n, int ddof) { + return stddev(v, n, mean(v, n), ddof); + } + + __attribute__((unused)) static float stddev(const fvec& v, int ddof = 0) { + return stddev(v.data(), v.size(), mean(v), ddof); + } + + static float rms(const float* v, size_t n) { + float rms = 0; + for (size_t i = 0; i < n; i++) { + rms += v[i] * v[i]; + } + rms /= n; + return sqrt(rms); + } + + __attribute__((unused)) static float rms(const fvec& v) { + return rms(v.data(), v.size()); + } + + template + static float max(const ei_vector& v) { + return *std::max_element(v.begin(), v.end()); + } + + __attribute__((unused)) static float max(const float* v, size_t n) { + return *std::max_element(v, v + n); + } + + template + static float min(const ei_vector& v) { + return *std::min_element(v.begin(), v.end()); + } + + __attribute__((unused)) static float min(const float* v, size_t n) { + return *std::min_element(v, v + n); + } + + __attribute__((unused)) static int argmax(const fvec& v, int start, int end) { + return std::max_element(v.begin() + start, v.begin() + end) - v.begin(); + } + + __attribute__((unused)) static fvec divide(float num, const float* den, size_t n) { + fvec v(n); + for (size_t i = 0; i < n; i++) { + v[i] = num / den[i]; + } + return v; + } + + __attribute__((unused)) static ivec histogram(const float* x, size_t n, int a, int b, int inc) { + int num_bins = (b - a) / inc; + ivec bins(num_bins, 0); + for (size_t i = 0; i < n; i++) { + int bin = (int)((x[i] - a) / inc); + if (bin >= 0 && bin < num_bins) { + bins[bin]++; + } + } + return bins; + } + + __attribute__((unused)) static fvec cumsum(const float* v, size_t n) { + fvec c(n); + c[0] = v[0]; + for (size_t i = 1; i < n; i++) { + c[i] = c[i - 1] + v[i]; + } + return c; + } + + __attribute__((unused)) static fvec arange(float start, float end, float step) { + assert(start < end); + assert(step > 0); + fvec v(::round((end - start) / step)); + for (size_t i = 0; i < v.size(); i++) { + v[i] = start + i * step; + } + return v; + } + + __attribute__((unused)) static void add(fvec& v, fvec& b) { + for (size_t i = 0; i < v.size(); i++) { + v[i] += b[i]; + } + } + + __attribute__((unused)) static float trapz(const fvec& x, const fvec& y, size_t lo, size_t hi) { + float area = 0; + for (size_t i = lo; i < hi; i++) { + area += (x[i + 1] - x[i]) * (y[i + 1] + y[i]) / 2; + } + return area; + } + + __attribute__((unused)) static fvec quantile(const fvec& v, size_t start, size_t end, const fvec& q) { + end = std::min(end, v.size()); + fvec vc(end - start); + std::copy(v.begin() + start, v.begin() + end, vc.begin()); + std::sort(vc.begin(), vc.end()); + fvec res(q.size()); + for (size_t i = 0; i < q.size(); i++) { + res[i] = vc[q[i] * vc.size()]; + } + return res; + } + + __attribute__((unused)) static fvec quantile(const float* v, size_t n, const fvec& q) { + fvec vc(n); + std::copy(v, v + n, vc.begin()); + std::sort(vc.begin(), vc.end()); + fvec res(q.size()); + for (size_t i = 0; i < q.size(); i++) { + res[i] = vc[q[i] * vc.size()]; + } + return res; + } + + static float dot(const float* x, const float* y, size_t n) { + float res = 0; + for (size_t i = 0; i < n; i++) { + res += x[i] * y[i]; + } + return res; + } + + + __attribute__((unused)) static float cosine_similarity(const fvec& x, const fvec& y) { + float xy = dot(x.data(), y.data(), x.size()); + float magx = dot(x.data(), x.data(), x.size()); + float magy = dot(y.data(), y.data(), y.size()); + xy /= sqrt(magx * magy); + return xy; + } + + __attribute__((unused)) static void ln(fvec& v) { + for (auto& x : v) { + x = log(x); + } + } + + static size_t next_power_of_2(size_t x) { + size_t res = 1; + while (res < x) { + res *= 2; + } + return res; + } + + static void detrend(float* data, size_t n) { + // Calculate the mean of the data points + float mean = 0.0; + for (size_t i = 0; i < n; i++) { + mean += data[i]; + } + mean /= n; + + // Calculate the slope of the best-fit line + float x_mean = (n + 1) / 2.0; + float y_mean = mean; + float numerator = 0.0; + float denominator = 0.0; + for (size_t i = 0; i < n; i++) { + numerator += (i + 1 - x_mean) * (data[i] - y_mean); + denominator += (i + 1 - x_mean) * (i + 1 - x_mean); + } + float slope = numerator / denominator; + + // Subtract the best-fit line from the data points to get the detrended data + for (size_t i = 0; i < n; i++) { + data[i] = data[i] - (slope * (i + 1)); + } + + // Calculate the mean of the detrended data + float detrended_mean = 0.0; + for (size_t i = 0; i < n; i++) { + detrended_mean += data[i]; + } + detrended_mean /= n; + + // Subtract the mean of the detrended data from each element + for (size_t i = 0; i < n; i++) { + data[i] -= detrended_mean; + } + } + + static fvec detrend(const fvec& data) { + auto ret = data; + detrend(ret.data(), ret.size()); + return ret; + } + +}; + +struct fmat { + ei_matrix* mat = nullptr; + fmat(size_t rows, size_t cols) { + mat = new ei_matrix(rows, cols); + assert(mat); + } + + ~fmat() { + delete mat; + } + + void resize(size_t rows, size_t cols) { + delete mat; + mat = new ei_matrix(rows, cols); + } + + float* operator[](size_t i) { + if (mat == nullptr || i >= mat->rows) { + return nullptr; + } + return mat->get_row_ptr(i); + } + + void fill(float x) { + if (mat == nullptr) { + return; + } + for (size_t i = 0; i < mat->rows; i++) { + for (size_t j = 0; j < mat->cols; j++) { + (*this)[i][j] = x; + } + } + } + + void fill_col(size_t col, float x) { + if (mat == nullptr) { + return; + } + for (size_t i = 0; i < mat->rows; i++) { + (*this)[i][col] = x; + } + } + + void fill_row(size_t row, float x) { + if (mat == nullptr) { + return; + } + for (size_t i = 0; i < mat->cols; i++) { + (*this)[row][i] = x; + } + } +}; +} // namespace ei + +#endif // _EIDSP_NUMPY_H_ diff --git a/edgeimpulse/edge-impulse-sdk/dsp/numpy_types.h b/edgeimpulse/edge-impulse-sdk/dsp/numpy_types.h new file mode 100644 index 0000000..4fda745 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/dsp/numpy_types.h @@ -0,0 +1,597 @@ +/* + * Copyright (c) 2022 EdgeImpulse Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an "AS + * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language + * governing permissions and limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef _EIDSP_NUMPY_TYPES_H_ +#define _EIDSP_NUMPY_TYPES_H_ + +#include +#include +#include +#include +#ifdef __cplusplus +#include +#include "edge-impulse-sdk/dsp/ei_vector.h" +#ifdef __MBED__ +#include "mbed.h" +#endif // __MBED__ +#endif // __cplusplus +#include "config.hpp" + +#include "../porting/ei_classifier_porting.h" + + +#if EIDSP_TRACK_ALLOCATIONS +#include "memory.hpp" +#endif + +#ifdef __cplusplus +namespace ei { +#endif // __cplusplus + +typedef struct { + float r; + float i; +} fft_complex_t; + +typedef struct { + int32_t r; + int32_t i; +} fft_complex_i32_t; +/** + * A matrix structure that allocates a matrix on the **heap**. + * Freeing happens by calling `delete` on the object or letting the object go out of scope. + */ +typedef struct ei_matrix { + float *buffer; + uint32_t rows; + uint32_t cols; + bool buffer_managed_by_me; + +#if EIDSP_TRACK_ALLOCATIONS + const char *_fn; + const char *_file; + int _line; + uint32_t _originally_allocated_rows; + uint32_t _originally_allocated_cols; +#endif + +#ifdef __cplusplus + /** + * Create a new matrix + * @param n_rows Number of rows + * @param n_cols Number of columns + * @param a_buffer Buffer, if not provided we'll alloc on the heap + */ + ei_matrix( + uint32_t n_rows, + uint32_t n_cols, + float *a_buffer = NULL +#if EIDSP_TRACK_ALLOCATIONS + , + const char *fn = NULL, + const char *file = NULL, + int line = 0 +#endif + ) + { + if (a_buffer) { + buffer = a_buffer; + buffer_managed_by_me = false; + } + else { + buffer = (float*)ei_calloc(n_rows * n_cols * sizeof(float), 1); + buffer_managed_by_me = true; + } + rows = n_rows; + cols = n_cols; + + if (!a_buffer) { +#if EIDSP_TRACK_ALLOCATIONS + _fn = fn; + _file = file; + _line = line; + _originally_allocated_rows = rows; + _originally_allocated_cols = cols; + if (_fn) { + ei_dsp_register_matrix_alloc_internal(fn, file, line, rows, cols, sizeof(float), buffer); + } + else { + ei_dsp_register_matrix_alloc(rows, cols, sizeof(float), buffer); + } +#endif + } + } + + ~ei_matrix() { + if (buffer && buffer_managed_by_me) { + ei_free(buffer); + +#if EIDSP_TRACK_ALLOCATIONS + if (_fn) { + ei_dsp_register_matrix_free_internal(_fn, _file, _line, _originally_allocated_rows, + _originally_allocated_cols, sizeof(float), buffer); + } + else { + ei_dsp_register_matrix_free(_originally_allocated_rows, _originally_allocated_cols, + sizeof(float), buffer); + } +#endif + } + } + + /** + * @brief Get a pointer to the buffer advanced by n rows + * + * @param row Numer of rows to advance the returned buffer pointer + * @return float* Pointer to the buffer at the start of row n + */ + float *get_row_ptr(size_t row) + { + return buffer + row * cols; + } + + ei_matrix(ei_vector &in) : ei_matrix(1, in.size(), in.data()) { + } +#endif // #ifdef __cplusplus +} matrix_t; + + +/** + * A matrix structure that allocates a matrix on the **heap**. + * Freeing happens by calling `delete` on the object or letting the object go out of scope. + */ +typedef struct ei_matrix_i8 { + int8_t *buffer; + uint32_t rows; + uint32_t cols; + bool buffer_managed_by_me; + +#if EIDSP_TRACK_ALLOCATIONS + const char *_fn; + const char *_file; + int _line; + uint32_t _originally_allocated_rows; + uint32_t _originally_allocated_cols; +#endif + +#ifdef __cplusplus + /** + * Create a new matrix + * @param n_rows Number of rows + * @param n_cols Number of columns + * @param a_buffer Buffer, if not provided we'll alloc on the heap + */ + ei_matrix_i8( + uint32_t n_rows, + uint32_t n_cols, + int8_t *a_buffer = NULL +#if EIDSP_TRACK_ALLOCATIONS + , + const char *fn = NULL, + const char *file = NULL, + int line = 0 +#endif + ) + { + if (a_buffer) { + buffer = a_buffer; + buffer_managed_by_me = false; + } + else { + buffer = (int8_t*)ei_calloc(n_rows * n_cols * sizeof(int8_t), 1); + buffer_managed_by_me = true; + } + rows = n_rows; + cols = n_cols; + + if (!a_buffer) { +#if EIDSP_TRACK_ALLOCATIONS + _fn = fn; + _file = file; + _line = line; + _originally_allocated_rows = rows; + _originally_allocated_cols = cols; + if (_fn) { + ei_dsp_register_matrix_alloc_internal(fn, file, line, rows, cols, sizeof(int8_t), buffer); + } + else { + ei_dsp_register_matrix_alloc(rows, cols, sizeof(int8_t), buffer); + } +#endif + } + } + + ~ei_matrix_i8() { + if (buffer && buffer_managed_by_me) { + ei_free(buffer); + +#if EIDSP_TRACK_ALLOCATIONS + if (_fn) { + ei_dsp_register_matrix_free_internal(_fn, _file, _line, _originally_allocated_rows, + _originally_allocated_cols, sizeof(int8_t), buffer); + } + else { + ei_dsp_register_matrix_free(_originally_allocated_rows, _originally_allocated_cols, + sizeof(int8_t), buffer); + } +#endif + } + } + + /** + * @brief Get a pointer to the buffer advanced by n rows + * + * @param row Numer of rows to advance the returned buffer pointer + * @return float* Pointer to the buffer at the start of row n + */ + int8_t *get_row_ptr(size_t row) + { + return buffer + row * cols; + } + +#endif // #ifdef __cplusplus +} matrix_i8_t; + +/** + * A matrix structure that allocates a matrix on the **heap**. + * Freeing happens by calling `delete` on the object or letting the object go out of scope. + */ +typedef struct ei_matrix_i32 { + int32_t *buffer; + uint32_t rows; + uint32_t cols; + bool buffer_managed_by_me; + +#if EIDSP_TRACK_ALLOCATIONS + const char *_fn; + const char *_file; + int _line; + uint32_t _originally_allocated_rows; + uint32_t _originally_allocated_cols; +#endif + +#ifdef __cplusplus + /** + * Create a new matrix + * @param n_rows Number of rows + * @param n_cols Number of columns + * @param a_buffer Buffer, if not provided we'll alloc on the heap + */ + ei_matrix_i32( + uint32_t n_rows, + uint32_t n_cols, + int32_t *a_buffer = NULL +#if EIDSP_TRACK_ALLOCATIONS + , + const char *fn = NULL, + const char *file = NULL, + int line = 0 +#endif + ) + { + if (a_buffer) { + buffer = a_buffer; + buffer_managed_by_me = false; + } + else { + buffer = (int32_t*)ei_calloc(n_rows * n_cols * sizeof(int32_t), 1); + buffer_managed_by_me = true; + } + rows = n_rows; + cols = n_cols; + + if (!a_buffer) { +#if EIDSP_TRACK_ALLOCATIONS + _fn = fn; + _file = file; + _line = line; + _originally_allocated_rows = rows; + _originally_allocated_cols = cols; + if (_fn) { + ei_dsp_register_matrix_alloc_internal(fn, file, line, rows, cols, sizeof(int32_t), buffer); + } + else { + ei_dsp_register_matrix_alloc(rows, cols, sizeof(int32_t), buffer); + } +#endif + } + } + + ~ei_matrix_i32() { + if (buffer && buffer_managed_by_me) { + ei_free(buffer); + +#if EIDSP_TRACK_ALLOCATIONS + if (_fn) { + ei_dsp_register_matrix_free_internal(_fn, _file, _line, _originally_allocated_rows, + _originally_allocated_cols, sizeof(int32_t), buffer); + } + else { + ei_dsp_register_matrix_free(_originally_allocated_rows, _originally_allocated_cols, + sizeof(int32_t), buffer); + } +#endif + } + } + + /** + * @brief Get a pointer to the buffer advanced by n rows + * + * @param row Numer of rows to advance the returned buffer pointer + * @return float* Pointer to the buffer at the start of row n + */ + int32_t *get_row_ptr(size_t row) + { + return buffer + row * cols; + } + +#endif // #ifdef __cplusplus +} matrix_i32_t; + +/** + * Another matrix structure that allocates a matrix on the **heap**. + * Freeing happens by calling `delete` on the object or letting the object go out of scope. + * We use this for the filterbanks, as we quantize these operations to save memory. + */ +typedef struct ei_quantized_matrix { + uint8_t *buffer; + uint32_t rows; + uint32_t cols; + bool buffer_managed_by_me; +#ifdef __MBED__ + mbed::Callback dequantization_fn; +#else + float (*dequantization_fn)(uint8_t); +#endif + +#if EIDSP_TRACK_ALLOCATIONS + const char *_fn; + const char *_file; + int _line; + uint32_t _originally_allocated_rows; + uint32_t _originally_allocated_cols; +#endif + +#ifdef __cplusplus + /** + * Create a quantized matrix + * @param n_rows Number of rows + * @param n_cols Number of columns + * @param a_dequantization_fn How to dequantize the values in this matrix + * @param a_buffer Optional: a buffer, if set we won't allocate memory ourselves + */ + ei_quantized_matrix(uint32_t n_rows, + uint32_t n_cols, +#ifdef __MBED__ + mbed::Callback a_dequantization_fn, +#else + float (*a_dequantization_fn)(uint8_t), +#endif + uint8_t *a_buffer = NULL +#if EIDSP_TRACK_ALLOCATIONS + , + const char *fn = NULL, + const char *file = NULL, + int line = 0 +#endif + ) + { + if (a_buffer) { + buffer = a_buffer; + buffer_managed_by_me = false; + } + else { + buffer = (uint8_t*)ei_calloc(n_rows * n_cols * sizeof(uint8_t), 1); + buffer_managed_by_me = true; + } + rows = n_rows; + cols = n_cols; + dequantization_fn = a_dequantization_fn; + if (!a_buffer) { +#if EIDSP_TRACK_ALLOCATIONS + _fn = fn; + _file = file; + _line = line; + _originally_allocated_rows = rows; + _originally_allocated_cols = cols; + if (_fn) { + ei_dsp_register_matrix_alloc_internal(fn, file, line, rows, cols, sizeof(uint8_t), buffer); + } + else { + ei_dsp_register_matrix_alloc(rows, cols, sizeof(uint8_t), buffer); + } +#endif + } + } + + ~ei_quantized_matrix() { + if (buffer && buffer_managed_by_me) { + ei_free(buffer); + +#if EIDSP_TRACK_ALLOCATIONS + if (_fn) { + ei_dsp_register_matrix_free_internal(_fn, _file, _line, _originally_allocated_rows, + _originally_allocated_cols, sizeof(uint8_t), buffer); + } + else { + ei_dsp_register_matrix_free(_originally_allocated_rows, _originally_allocated_cols, + sizeof(uint8_t), buffer); + } +#endif + } + } + + /** + * @brief Get a pointer to the buffer advanced by n rows + * + * @param row Numer of rows to advance the returned buffer pointer + * @return float* Pointer to the buffer at the start of row n + */ + uint8_t *get_row_ptr(size_t row) + { + return buffer + row * cols; + } + +#endif // #ifdef __cplusplus +} quantized_matrix_t; + +/** + * A matrix structure that allocates a matrix on the **heap**. + * Freeing happens by calling `delete` on the object or letting the object go out of scope. + */ +typedef struct ei_matrix_u8 { + uint8_t *buffer; + uint32_t rows; + uint32_t cols; + bool buffer_managed_by_me; + +#if EIDSP_TRACK_ALLOCATIONS + const char *_fn; + const char *_file; + int _line; + uint32_t _originally_allocated_rows; + uint32_t _originally_allocated_cols; +#endif + +#ifdef __cplusplus + /** + * Create a new matrix + * @param n_rows Number of rows + * @param n_cols Number of columns + * @param a_buffer Buffer, if not provided we'll alloc on the heap + */ + ei_matrix_u8( + uint32_t n_rows, + uint32_t n_cols, + uint8_t *a_buffer = NULL +#if EIDSP_TRACK_ALLOCATIONS + , + const char *fn = NULL, + const char *file = NULL, + int line = 0 +#endif + ) + { + if (a_buffer) { + buffer = a_buffer; + buffer_managed_by_me = false; + } + else { + buffer = (uint8_t*)ei_calloc(n_rows * n_cols * sizeof(uint8_t), 1); + buffer_managed_by_me = true; + } + rows = n_rows; + cols = n_cols; + + if (!a_buffer) { +#if EIDSP_TRACK_ALLOCATIONS + _fn = fn; + _file = file; + _line = line; + _originally_allocated_rows = rows; + _originally_allocated_cols = cols; + if (_fn) { + ei_dsp_register_matrix_alloc_internal(fn, file, line, rows, cols, sizeof(uint8_t), buffer); + } + else { + ei_dsp_register_matrix_alloc(rows, cols, sizeof(uint8_t), buffer); + } +#endif + } + } + + ~ei_matrix_u8() { + if (buffer && buffer_managed_by_me) { + ei_free(buffer); + +#if EIDSP_TRACK_ALLOCATIONS + if (_fn) { + ei_dsp_register_matrix_free_internal(_fn, _file, _line, _originally_allocated_rows, + _originally_allocated_cols, sizeof(uint8_t), buffer); + } + else { + ei_dsp_register_matrix_free(_originally_allocated_rows, _originally_allocated_cols, + sizeof(uint8_t), buffer); + } +#endif + } + } + + /** + * @brief Get a pointer to the buffer advanced by n rows + * + * @param row Numer of rows to advance the returned buffer pointer + * @return float* Pointer to the buffer at the start of row n + */ + uint8_t *get_row_ptr(size_t row) + { + return buffer + row * cols; + } + +#endif // #ifdef __cplusplus +} matrix_u8_t; + +/** + * Size of a matrix + */ +typedef struct { + uint32_t rows; + uint32_t cols; +} matrix_size_t; + +typedef enum { + DCT_NORMALIZATION_NONE, + DCT_NORMALIZATION_ORTHO +} DCT_NORMALIZATION_MODE; + +/** + * Sensor signal structure + */ +typedef struct ei_signal_t { + /** + * A function to retrieve part of the sensor signal + * No bytes will be requested outside of the `total_length`. + * @param offset The offset in the signal + * @param length The total length of the signal + * @param out_ptr An out buffer to set the signal data + */ +#if EIDSP_SIGNAL_C_FN_POINTER == 1 + int (*get_data)(size_t, size_t, float *); +#else +#ifdef __MBED__ + mbed::Callback get_data; +#else + std::function get_data; +#endif // __MBED__ +#endif // EIDSP_SIGNAL_C_FN_POINTER == 1 + + size_t total_length; +} signal_t; + +#ifdef __cplusplus +} // namespace ei { +#endif // __cplusplus + +// required on Adafruit nRF52 +#if defined(__cplusplus) && defined(ARDUINO_NRF52_ADAFRUIT) +namespace std { + __attribute__((weak)) void __throw_bad_function_call() { while(1); }; + __attribute__((weak)) void __throw_length_error(char const*) { while(1); }; +} +#endif // __cplusplus + +#endif // _EIDSP_NUMPY_TYPES_H_ diff --git a/edgeimpulse/edge-impulse-sdk/dsp/returntypes.hpp b/edgeimpulse/edge-impulse-sdk/dsp/returntypes.hpp new file mode 100644 index 0000000..01cdbf6 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/dsp/returntypes.hpp @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2022 EdgeImpulse Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an "AS + * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language + * governing permissions and limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef _EIDSP_RETURN_TYPES_H_ +#define _EIDSP_RETURN_TYPES_H_ + +#include + +namespace ei { + +typedef enum { + EIDSP_OK = 0, + EIDSP_OUT_OF_MEM = -1002, + EIDSP_SIGNAL_SIZE_MISMATCH = -1003, + EIDSP_MATRIX_SIZE_MISMATCH = -1004, + EIDSP_DCT_ERROR = -1005, + EIDSP_INPUT_MATRIX_EMPTY = -1006, + EIDSP_BUFFER_SIZE_MISMATCH = -1007, + EIDSP_PARAMETER_INVALID = -1008, + EIDSP_UNEXPECTED_NEXT_OFFSET = -1009, + EIDSP_OUT_OF_BOUNDS = -1010, + EIDSP_UNSUPPORTED_FILTER_CONFIG = -1011, + EIDSP_NARROWING = -1012, + EIDSP_BLOCK_VERSION_INCORRECT = -1013, + EIDSP_NOT_SUPPORTED = -1014, + EIDSP_REQUIRES_CMSIS_DSP = -1015, + EIDSP_FFT_TABLE_NOT_LOADED = -1016, + EIDSP_INFERENCE_ERROR = -1017 +} EIDSP_RETURN_T; + +} // namespace ei + +#endif // _EIDSP_RETURN_TYPES_H_ diff --git a/edgeimpulse/edge-impulse-sdk/dsp/spectral/feature.hpp b/edgeimpulse/edge-impulse-sdk/dsp/spectral/feature.hpp new file mode 100644 index 0000000..aada87e --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/dsp/spectral/feature.hpp @@ -0,0 +1,679 @@ +/* + * Copyright (c) 2022 EdgeImpulse Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an "AS + * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language + * governing permissions and limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef _EIDSP_SPECTRAL_FEATURE_H_ +#define _EIDSP_SPECTRAL_FEATURE_H_ + +#include +#include "processing.hpp" +#include "wavelet.hpp" +#include "signal.hpp" +#include "edge-impulse-sdk/dsp/ei_utils.h" +#include "model-parameters/model_metadata.h" + +namespace ei { +namespace spectral { + +typedef enum { + filter_none = 0, + filter_lowpass = 1, + filter_highpass = 2 +} filter_t; + +class feature { +public: + + /** + * Calculate the spectral features over a signal. + * @param out_features Output matrix. Use `calculate_spectral_buffer_size` to calculate + * the size required. Needs as many rows as `raw_data`. + * @param input_matrix Signal, with one row per axis + * @param sampling_freq Sampling frequency of the signal + * @param filter_type Filter type + * @param filter_cutoff Filter cutoff frequency + * @param filter_order Filter order + * @param fft_length Length of the FFT signal + * @param fft_peaks Number of FFT peaks to find + * @param fft_peaks_threshold Minimum threshold + * @param edges_matrix Spectral power edges + * @returns 0 if OK + */ + static int spectral_analysis( + matrix_t *out_features, + matrix_t *input_matrix, + float sampling_freq, + filter_t filter_type, + float filter_cutoff, + uint8_t filter_order, + uint16_t fft_length, + uint8_t fft_peaks, + float fft_peaks_threshold, + matrix_t *edges_matrix_in + ) { + if (out_features->rows != input_matrix->rows) { + EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH); + } + + if (out_features->cols != calculate_spectral_buffer_size(true, fft_peaks, edges_matrix_in->rows)) { + EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH); + } + + if (edges_matrix_in->cols != 1) { + EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH); + } + + int ret; + + size_t axes = input_matrix->rows; + + EI_TRY(processing::subtract_mean(input_matrix) ); + + // apply filter + if (filter_type == filter_lowpass) { + ret = spectral::processing::butterworth_lowpass_filter( + input_matrix, sampling_freq, filter_cutoff, filter_order); + if (ret != EIDSP_OK) { + EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH); + } + } + else if (filter_type == filter_highpass) { + ret = spectral::processing::butterworth_highpass_filter( + input_matrix, sampling_freq, filter_cutoff, filter_order); + if (ret != EIDSP_OK) { + EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH); + } + } + + // calculate RMS + EI_DSP_MATRIX(rms_matrix, axes, 1); + ret = numpy::rms(input_matrix, &rms_matrix); + if (ret != EIDSP_OK) { + EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH); + } + + // find peaks in FFT + EI_DSP_MATRIX(peaks_matrix, axes, fft_peaks * 2); + + for (size_t row = 0; row < input_matrix->rows; row++) { + // per axis code + + // get a slice of the current axis + EI_DSP_MATRIX_B(axis_matrix, 1, input_matrix->cols, input_matrix->buffer + (row * input_matrix->cols)); + + // calculate FFT + EI_DSP_MATRIX(fft_matrix, 1, fft_length / 2 + 1); + ret = numpy::rfft(axis_matrix.buffer, axis_matrix.cols, fft_matrix.buffer, fft_matrix.cols, fft_length); + if (ret != EIDSP_OK) { + EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH); + } + + // multiply by 2/N + numpy::scale(&fft_matrix, (2.0f / static_cast(fft_length))); + + // we're now using the FFT matrix to calculate peaks etc. + EI_DSP_MATRIX(peaks_matrix, fft_peaks, 2); + ret = spectral::processing::find_fft_peaks(&fft_matrix, &peaks_matrix, + sampling_freq, fft_peaks_threshold, fft_length); + if (ret != EIDSP_OK) { + EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH); + } + + // calculate periodogram for spectral power buckets + EI_DSP_MATRIX(period_fft_matrix, 1, fft_length / 2 + 1); + EI_DSP_MATRIX(period_freq_matrix, 1, fft_length / 2 + 1); + ret = spectral::processing::periodogram(&axis_matrix, + &period_fft_matrix, &period_freq_matrix, sampling_freq, fft_length); + if (ret != EIDSP_OK) { + EIDSP_ERR(ret); + } + + EI_DSP_MATRIX(edges_matrix_out, edges_matrix_in->rows - 1, 1); + ret = spectral::processing::spectral_power_edges( + &period_fft_matrix, + &period_freq_matrix, + edges_matrix_in, + &edges_matrix_out, + sampling_freq); + if (ret != EIDSP_OK) { + EIDSP_ERR(ret); + } + + float *features_row = out_features->buffer + (row * out_features->cols); + + size_t fx = 0; + + features_row[fx++] = rms_matrix.buffer[row]; + for (size_t peak_row = 0; peak_row < peaks_matrix.rows; peak_row++) { + features_row[fx++] = peaks_matrix.buffer[peak_row * peaks_matrix.cols + 0]; + features_row[fx++] = peaks_matrix.buffer[peak_row * peaks_matrix.cols + 1]; + } + for (size_t edge_row = 0; edge_row < edges_matrix_out.rows; edge_row++) { + features_row[fx++] = edges_matrix_out.buffer[edge_row * edges_matrix_out.cols] / 10.0f; + } + } + + return EIDSP_OK; + } + + + /** + * Calculate the buffer size for Spectral Analysis + * @param rms: Whether to calculate the RMS as part of the features + * @param peaks_count: Number of FFT peaks + * @param spectral_edges_count: Number of spectral edges + */ + static size_t calculate_spectral_buffer_size( + bool rms, size_t peaks_count, size_t spectral_edges_count) + { + size_t count = 0; + if (rms) count++; + count += (peaks_count * 2); + if (spectral_edges_count > 0) { + count += (spectral_edges_count - 1); + } + return count; + } + + static int extract_spectral_analysis_features_v1( + matrix_t *input_matrix, + matrix_t *output_matrix, + ei_dsp_config_spectral_analysis_t *config_ptr, + const float sampling_freq) + { + // scale the signal + int ret = numpy::scale(input_matrix, config_ptr->scale_axes); + if (ret != EIDSP_OK) { + ei_printf("ERR: Failed to scale signal (%d)\n", ret); + EIDSP_ERR(ret); + } + + // transpose the matrix so we have one row per axis (nifty!) + ret = numpy::transpose(input_matrix); + if (ret != EIDSP_OK) { + ei_printf("ERR: Failed to transpose matrix (%d)\n", ret); + EIDSP_ERR(ret); + } + + // the spectral edges that we want to calculate + matrix_t edges_matrix_in(64, 1); + size_t edge_matrix_ix = 0; + + char spectral_str[128] = { 0 }; + if (strlen(config_ptr->spectral_power_edges) > sizeof(spectral_str) - 1) { + EIDSP_ERR(EIDSP_PARAMETER_INVALID); + } + memcpy( + spectral_str, + config_ptr->spectral_power_edges, + strlen(config_ptr->spectral_power_edges)); + + // convert spectral_power_edges (string) into float array + char *spectral_ptr = spectral_str; + while (spectral_ptr != NULL) { + while ((*spectral_ptr) == ' ') { + spectral_ptr++; + } + + edges_matrix_in.buffer[edge_matrix_ix++] = atof(spectral_ptr); + + // find next (spectral) delimiter (or '\0' character) + while ((*spectral_ptr != ',')) { + spectral_ptr++; + if (*spectral_ptr == '\0') + break; + } + + if (*spectral_ptr == '\0') { + spectral_ptr = NULL; + } + else { + spectral_ptr++; + } + } + edges_matrix_in.rows = edge_matrix_ix; + + // calculate how much room we need for the output matrix + size_t output_matrix_cols = spectral::feature::calculate_spectral_buffer_size( + true, + config_ptr->spectral_peaks_count, + edges_matrix_in.rows); + // ei_printf("output_matrix_size %hux%zu\n", input_matrix.rows, output_matrix_cols); + if (output_matrix->cols * output_matrix->rows != + static_cast(output_matrix_cols * config_ptr->axes)) { + EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH); + } + + output_matrix->cols = output_matrix_cols; + output_matrix->rows = config_ptr->axes; + + spectral::filter_t filter_type; + if (strcmp(config_ptr->filter_type, "low") == 0) { + filter_type = spectral::filter_lowpass; + } + else if (strcmp(config_ptr->filter_type, "high") == 0) { + filter_type = spectral::filter_highpass; + } + else { + filter_type = spectral::filter_none; + } + + ret = spectral::feature::spectral_analysis( + output_matrix, + input_matrix, + sampling_freq, + filter_type, + config_ptr->filter_cutoff, + config_ptr->filter_order, + config_ptr->fft_length, + config_ptr->spectral_peaks_count, + config_ptr->spectral_peaks_threshold, + &edges_matrix_in); + if (ret != EIDSP_OK) { + ei_printf("ERR: Failed to calculate spectral features (%d)\n", ret); + EIDSP_ERR(ret); + } + + // flatten again + output_matrix->cols = config_ptr->axes * output_matrix_cols; + output_matrix->rows = 1; + + return EIDSP_OK; + } + + static void get_start_stop_bin( + float sampling_freq, + size_t fft_length, + float filter_cutoff, + size_t *start_bin, + size_t *stop_bin, + bool is_high_pass) + { + // we want to find n such that fcutoff < sample_f / fft * n ( or > for high pass ) + // also, + - half bin width (sample_f/(fft*2)) for high / low pass + if (filter_cutoff > sampling_freq / 2) { + filter_cutoff = sampling_freq / 2; + } + float bin = filter_cutoff * fft_length / sampling_freq; + if (is_high_pass) { + *start_bin = static_cast(bin - 0.5) + 1; // add one b/c we want to always round up + // don't use the DC bin b/c it's zero + *start_bin = *start_bin == 0 ? 1 : *start_bin; + *stop_bin = fft_length / 2 + 1; // go one past + } + else { + *start_bin = 1; + *stop_bin = static_cast(bin + 0.5) + 1; // go one past + } + } + + /** + * @brief Calculates the spectral analysis features. + * + * @return the number of features calculated + */ + static size_t extract_spec_features( + matrix_t *input_matrix, + matrix_t *output_matrix, + ei_dsp_config_spectral_analysis_t *config, + const float sampling_freq, + const bool remove_mean = true, + const bool transpose_and_scale_input = true) + { + if (transpose_and_scale_input) { + // transpose the matrix so we have one row per axis + numpy::transpose_in_place(input_matrix); + + // func tests for scale of 1 and does a no op in that case + EI_TRY(numpy::scale(input_matrix, config->scale_axes)); + } + + bool do_filter = false; + bool is_high_pass; + + // apply filter, if enabled + // "zero" order filter allowed. will still remove unwanted fft bins later + if (strcmp(config->filter_type, "low") == 0) { + if( config->filter_order ) { + EI_TRY(spectral::processing::butterworth_lowpass_filter( + input_matrix, + sampling_freq, + config->filter_cutoff, + config->filter_order)); + } + do_filter = true; + is_high_pass = false; + } + else if (strcmp(config->filter_type, "high") == 0) { + if( config->filter_order ) { + EI_TRY(spectral::processing::butterworth_highpass_filter( + input_matrix, + sampling_freq, + config->filter_cutoff, + config->filter_order)); + } + do_filter = true; + is_high_pass = true; + } + + if (remove_mean){ + EI_TRY(processing::subtract_mean(input_matrix)); + } + + // Figure bins we remove based on filter cutoff + size_t start_bin, stop_bin; + if (do_filter) { + get_start_stop_bin( + sampling_freq, + config->fft_length, + config->filter_cutoff, + &start_bin, + &stop_bin, + is_high_pass); + } + else { + start_bin = 1; + stop_bin = config->fft_length / 2 + 1; + } + size_t num_bins = stop_bin - start_bin; + + float *feature_out = output_matrix->buffer; + const float *feature_out_ori = feature_out; + for (size_t row = 0; row < input_matrix->rows; row++) { + float *data_window = input_matrix->get_row_ptr(row); + size_t data_size = input_matrix->cols; + + matrix_t rms_in_matrix(1, data_size, data_window); + matrix_t rms_out_matrix(1, 1, feature_out); + EI_TRY(numpy::rms(&rms_in_matrix, &rms_out_matrix)); + + feature_out++; + + // Standard Deviation + float stddev = *(feature_out-1); //= sqrt(numpy::variance(data_window, data_size)); + if (stddev == 0.0f) { + stddev = 1e-10f; + } + // Don't add std dev as a feature b/c it's the same as RMS + // Skew and Kurtosis w/ shortcut: + // See definition at https://en.wikipedia.org/wiki/Skewness + // See definition at https://en.wikipedia.org/wiki/Kurtosis + // Substitute 0 for mean (b/c it is subtracted out above) + // Skew becomes: mean(X^3) / stddev^3 + // Kurtosis becomes: mean(X^4) / stddev^4 + // Note, this is the Fisher definition of Kurtosis, so subtract 3 + // (see https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.kurtosis.html) + float s_sum = 0; + float k_sum = 0; + float temp; + for (size_t i = 0; i < data_size; i++) { + temp = data_window[i] * data_window[i] * data_window[i]; + s_sum += temp; + k_sum += temp * data_window[i]; + } + // Skewness out + temp = stddev * stddev * stddev; + *feature_out++ = (s_sum / data_size) / temp; + // Kurtosis out + *feature_out++ = ((k_sum / data_size) / (temp * stddev)) - 3; + + if (config->implementation_version == 4) { + + size_t fft_out_size = config->fft_length / 2 + 1; + ei_vector fft_out(fft_out_size); + EI_TRY(numpy::welch_max_hold( + data_window, + data_size, + fft_out.data(), + 0, + fft_out_size, + config->fft_length, + config->do_fft_overlap)); + + matrix_t x(1, fft_out.size(), const_cast(fft_out.data())); + matrix_t out(1, 1); + + *feature_out++ = (numpy::skew(&x, &out) == EIDSP_OK) ? (out.get_row_ptr(0)[0]) : 0.0f; + *feature_out++ = (numpy::kurtosis(&x, &out) == EIDSP_OK) ? (out.get_row_ptr(0)[0]) : 0.0f; + + for (size_t i = start_bin; i < stop_bin; i++) { + feature_out[i - start_bin] = fft_out[i]; + } + } else { + EI_TRY(numpy::welch_max_hold( + data_window, + data_size, + feature_out, + start_bin, + stop_bin, + config->fft_length, + config->do_fft_overlap)); + } + if (config->do_log) { + numpy::zero_handling(feature_out, num_bins); + ei_matrix temp(num_bins, 1, feature_out); + numpy::log10(&temp); + } + feature_out += num_bins; + } + size_t num_features = feature_out - feature_out_ori; + return num_features; + } + + static int extract_spectral_analysis_features_v2( + matrix_t *input_matrix, + matrix_t *output_matrix, + ei_dsp_config_spectral_analysis_t *config, + const float sampling_freq) + { + size_t n_features = + extract_spec_features(input_matrix, output_matrix, config, sampling_freq); + return n_features == output_matrix->cols ? EIDSP_OK : EIDSP_MATRIX_SIZE_MISMATCH; + } + + static int extract_spectral_analysis_features_v3( + matrix_t *input_matrix, + matrix_t *output_matrix, + ei_dsp_config_spectral_analysis_t *config, + const float sampling_freq) + { + if (strcmp(config->analysis_type, "Wavelet") == 0) { + return wavelet::extract_wavelet_features(input_matrix, output_matrix, config, sampling_freq); + } else { + return extract_spectral_analysis_features_v2(input_matrix, output_matrix, config, sampling_freq); + } + } + + static ei_vector get_ratio_combo(int r) + { + if (r == 1 || r == 3 || r == 10) { + return {r}; + } else if (r == 30) { + return {3, 10}; + } else if (r == 100) { + return {10, 10}; + } else if (r == 1000) { + return {10, 10, 10}; + } else { + assert(0); + } + return {0}; // to make linter happy + } + + // can do in-place or out-of-place + static size_t _decimate(matrix_t *input_matrix, matrix_t *output_matrix, size_t ratio) + { + // generated by build_sav4_header in prepare.py + static float sos_deci_3[] = { + 3.4799547399084973e-05f, 6.959909479816995e-05f, 3.4799547399084973e-05f, 1.0f, -1.416907422639627f, 0.5204552955670066f, 1.0f, 2.0f, 1.0f, 1.0f, -1.3342748248687593f, 0.594631953081447f, 1.0f, 2.0f, 1.0f, 1.0f, -1.237675162600336f, 0.7259326611233617f, 1.0f, 2.0f, 1.0f, 1.0f, -1.2180861262950025f, 0.8987833581253264}; + static float sos_zi_deci_3[] = { 0.0013094887094341828f, -0.0006648423946383296f, + 0.0193087012128479f, -0.010936639208493802f, + 0.1485445305451165f, -0.10217301649013415f, + 0.8250625539381586f, -0.7244268881025758 }; + static float sos_deci_10[] = { 3.5863243209995215e-09f, + 7.172648641999043e-09f, + 3.5863243209995215e-09f, + 1.0f, + -1.8204968644767618f, + 0.8308597403796137f, + 1.0f, + 2.0f, + 1.0f, + 1.0f, + -1.8289505620176847f, + 0.8553173710387741f, + 1.0f, + 2.0f, + 1.0f, + 1.0f, + -1.8517334482627625f, + 0.9015161055713813f, + 1.0f, + 2.0f, + 1.0f, + 1.0f, + -1.8965395961864169f, + 0.9644245584642932 }; + static float sos_zi_deci_10[] = { 1.38071060429997e-06f, -1.146570262401316e-06f, + 0.00020862168862901534f, -0.0001782374705409433f, + 0.016663820918116152f, -0.015002020730727955f, + 0.9773862470492868f, -0.9420150059170858 }; + + assert(ratio == 3 || ratio == 10); + + float* sos = ratio == 3 ? sos_deci_3 : sos_deci_10; + float* sos_zi = ratio == 3 ? sos_zi_deci_3 : sos_zi_deci_10; + + const size_t out_size = signal::get_decimated_size(input_matrix->cols, ratio); + + for (size_t row = 0; row < input_matrix->rows; row++) { + const float *x = input_matrix->get_row_ptr(row); + float *y = output_matrix->get_row_ptr(row); + signal::sosfilt sosfilt(sos, sos_zi, 4); + signal::decimate_simple( + x, + input_matrix->cols, + y, + output_matrix->cols, + ratio, + sosfilt); + } + + return out_size; + } + + static int extract_spectral_analysis_features_v4( + matrix_t *input_matrix, + matrix_t *output_matrix, + ei_dsp_config_spectral_analysis_t *config_p, + const float sampling_freq) + { + auto config_copy = *config_p; + auto config = &config_copy; + if (strcmp(config->analysis_type, "Wavelet") == 0) { + return wavelet::extract_wavelet_features(input_matrix, output_matrix, config, sampling_freq); + } + else if (config->extra_low_freq == false && config->input_decimation_ratio == 1) { + size_t n_features = + extract_spec_features(input_matrix, output_matrix, config, sampling_freq); + return n_features == output_matrix->cols ? EIDSP_OK : EIDSP_MATRIX_SIZE_MISMATCH; + } + else { + numpy::transpose_in_place(input_matrix); + EI_TRY(numpy::scale(input_matrix, config->scale_axes)); + + if (config->input_decimation_ratio > 1) { + ei_vector ratio_combo = get_ratio_combo(config->input_decimation_ratio); + size_t out_size = input_matrix->cols; + for (int r : ratio_combo) { + out_size = _decimate(input_matrix, input_matrix, r); + } + + // rearrange input matrix to be in the right shape after decimation + float* out = input_matrix->get_row_ptr(0) + out_size; + for(uint32_t r = 1; r < input_matrix->rows; r++) { + float *row = input_matrix->get_row_ptr(r); + for(size_t c = 0; c < out_size; c++) { + *out++ = row[c]; + } + } + input_matrix->cols = out_size; + } + + float new_sampling_freq = sampling_freq / config->input_decimation_ratio; + + // filter here, before decimating, instead of inside extract_spec_features + if (strcmp(config->filter_type, "low") == 0) { + if( config->filter_order ) { + EI_TRY(spectral::processing::butterworth_lowpass_filter( + input_matrix, + new_sampling_freq, + config->filter_cutoff, + config->filter_order)); + } + } + else if (strcmp(config->filter_type, "high") == 0) { + if( config->filter_order ) { + EI_TRY(spectral::processing::butterworth_highpass_filter( + input_matrix, + new_sampling_freq, + config->filter_cutoff, + config->filter_order)); + } + } + + // set the filter order to 0, so that we won't double filter + config->filter_order = 0; + + // do this before extract_spec_features because extract_spec_features modifies the matrix + constexpr size_t decimation = 10; + const size_t decimated_size = + signal::get_decimated_size(input_matrix->cols, decimation); + matrix_t lf_signal(input_matrix->rows, decimated_size); + _decimate(input_matrix, &lf_signal, decimation); + + size_t n_features = extract_spec_features( + input_matrix, + output_matrix, + config, + new_sampling_freq, + true, + false); + + if (n_features > 0 && config->extra_low_freq) { + // disable filtering post decimation + matrix_t lf_features(1, output_matrix->rows * output_matrix->cols - n_features, + output_matrix->buffer + n_features); + + n_features += extract_spec_features( + &lf_signal, + &lf_features, + config, + new_sampling_freq / decimation, + true, + false); + } + return n_features == output_matrix->cols ? EIDSP_OK : EIDSP_MATRIX_SIZE_MISMATCH; + } + } +}; + +} // namespace spectral +} // namespace ei + + + +#endif // _EIDSP_SPECTRAL_FEATURE_H_ diff --git a/edgeimpulse/edge-impulse-sdk/dsp/spectral/filters.hpp b/edgeimpulse/edge-impulse-sdk/dsp/spectral/filters.hpp new file mode 100644 index 0000000..c400fcf --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/dsp/spectral/filters.hpp @@ -0,0 +1,147 @@ +/* + * Copyright (c) 2022 EdgeImpulse Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an "AS + * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language + * governing permissions and limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef _EIDSP_SPECTRAL_FILTERS_H_ +#define _EIDSP_SPECTRAL_FILTERS_H_ + +#include +#include "../numpy.hpp" + +#ifndef M_PI +#define M_PI 3.14159265358979323846264338327950288 +#endif // M_PI + +namespace ei { +namespace spectral { +namespace filters { + /** + * The Butterworth filter has maximally flat frequency response in the passband. + * @param filter_order Even filter order (between 2..8) + * @param sampling_freq Sample frequency of the signal + * @param cutoff_freq Cut-off frequency of the signal + * @param src Source array + * @param dest Destination array + * @param size Size of both source and destination arrays + */ + static void butterworth_lowpass( + int filter_order, + float sampling_freq, + float cutoff_freq, + const float *src, + float *dest, + size_t size) + { + int n_steps = filter_order / 2; + float a = tan(M_PI * cutoff_freq / sampling_freq); + float a2 = pow(a, 2); + float *A = (float*)ei_calloc(n_steps, sizeof(float)); + float *d1 = (float*)ei_calloc(n_steps, sizeof(float)); + float *d2 = (float*)ei_calloc(n_steps, sizeof(float)); + float *w0 = (float*)ei_calloc(n_steps, sizeof(float)); + float *w1 = (float*)ei_calloc(n_steps, sizeof(float)); + float *w2 = (float*)ei_calloc(n_steps, sizeof(float)); + + // Calculate the filter parameters + for(int ix = 0; ix < n_steps; ix++) { + float r = sin(M_PI * ((2.0 * ix) + 1.0) / (2.0 * filter_order)); + sampling_freq = a2 + (2.0 * a * r) + 1.0; + A[ix] = a2 / sampling_freq; + d1[ix] = 2.0 * (1 - a2) / sampling_freq; + d2[ix] = -(a2 - (2.0 * a * r) + 1.0) / sampling_freq; + } + + // Apply the filter + for (size_t sx = 0; sx < size; sx++) { + dest[sx] = src[sx]; + + for (int i = 0; i < n_steps; i++) { + w0[i] = d1[i] * w1[i] + d2[i] * w2[i] + dest[sx]; + dest[sx] = A[i] * (w0[i] + (2.0 * w1[i]) + w2[i]); + w2[i] = w1[i]; + w1[i] = w0[i]; + } + } + + ei_free(A); + ei_free(d1); + ei_free(d2); + ei_free(w0); + ei_free(w1); + ei_free(w2); + } + + /** + * The Butterworth filter has maximally flat frequency response in the passband. + * @param filter_order Even filter order (between 2..8) + * @param sampling_freq Sample frequency of the signal + * @param cutoff_freq Cut-off frequency of the signal + * @param src Source array + * @param dest Destination array + * @param size Size of both source and destination arrays + */ + static void butterworth_highpass( + int filter_order, + float sampling_freq, + float cutoff_freq, + const float *src, + float *dest, + size_t size) + { + int n_steps = filter_order / 2; + float a = tan(M_PI * cutoff_freq / sampling_freq); + float a2 = pow(a, 2); + float *A = (float*)ei_calloc(n_steps, sizeof(float)); + float *d1 = (float*)ei_calloc(n_steps, sizeof(float)); + float *d2 = (float*)ei_calloc(n_steps, sizeof(float)); + float *w0 = (float*)ei_calloc(n_steps, sizeof(float)); + float *w1 = (float*)ei_calloc(n_steps, sizeof(float)); + float *w2 = (float*)ei_calloc(n_steps, sizeof(float)); + + // Calculate the filter parameters + for (int ix = 0; ix < n_steps; ix++) { + float r = sin(M_PI * ((2.0 * ix) + 1.0) / (2.0 * filter_order)); + sampling_freq = a2 + (2.0 * a * r) + 1.0; + A[ix] = 1.0f / sampling_freq; + d1[ix] = 2.0 * (1 - a2) / sampling_freq; + d2[ix] = -(a2 - (2.0 * a * r) + 1.0) / sampling_freq; + } + + // Apply the filter + for (size_t sx = 0; sx < size; sx++) { + dest[sx] = src[sx]; + + for (int i = 0; i < n_steps; i++) { + w0[i] = d1[i] * w1[i] + d2[i] * w2[i] + dest[sx]; + dest[sx] = A[i] * (w0[i] - (2.0 * w1[i]) + w2[i]); + w2[i] = w1[i]; + w1[i] = w0[i]; + } + } + + ei_free(A); + ei_free(d1); + ei_free(d2); + ei_free(w0); + ei_free(w1); + ei_free(w2); + } + +} // namespace filters +} // namespace spectral +} // namespace ei + +#endif // _EIDSP_SPECTRAL_FILTERS_H_ diff --git a/edgeimpulse/edge-impulse-sdk/dsp/spectral/fir_filter.hpp b/edgeimpulse/edge-impulse-sdk/dsp/spectral/fir_filter.hpp new file mode 100644 index 0000000..52c5874 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/dsp/spectral/fir_filter.hpp @@ -0,0 +1,212 @@ +/* + * Copyright (c) 2022 EdgeImpulse Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an "AS + * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language + * governing permissions and limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ +#ifndef __FIR_FILTER__H__ +#define __FIR_FILTER__H__ + +#include +#include +#include "filters.hpp" //for M_PI +#include + +/** + * @brief + * + * @tparam input_t Type of input array. Either matrix_i16_t, or matrix_i32_t + * @tparam acc_t Accumulator size that matches above. 64bit for i16 + */ +template +class fir_filter +{ +private: + /** + * @brief Set the taps lowpass object + * + * @param cutoff_normalized Should be in the range 0..0.5 (0.5 being the nyquist) + */ + void set_taps_lowpass(float cutoff_normalized, std::vector &f_taps) + { + //http://www.dspguide.com/ch16/2.htm + float sine_scale = 2 * M_PI * cutoff_normalized; + // offset is M/2...M is filter order -1. so truncation is desired + int offset = filter_size / 2; + for (int i = 0; i < filter_size / 2; i++) + { + f_taps[i] = sin(sine_scale * (i - offset)) / (i - offset); + } + f_taps[filter_size / 2] = sine_scale; + for (int i = filter_size / 2 + 1; i < filter_size; i++) + { + f_taps[i] = sin(sine_scale * (i - offset)) / (i - offset); + } + } + + void apply_hamming(std::vector &f_taps) + { + for (int i = 0; i < filter_size; i++) + { + f_taps[i] *= 0.54 - 0.46 * cos(2 * M_PI * i / (filter_size - 1)); + } + } + + void scale_to_unity_gain(std::vector &f_taps) + { + //find the sum of taps + float sum = 0; + for (auto tap : f_taps) + { + sum += tap; + } + //scale down + for (auto &tap : f_taps) + { + tap /= sum; + } + } + + void convert_lowpass_to_highpass(std::vector &f_taps) + { + for (size_t i = 0; i < f_taps.size(); i += 2) + { + f_taps[i] *= -1; + } + } + +public: + /** + * @brief Perform in place filtering on the input matrix + * @param sampling_frequency Sampling freqency of data + * @param filter_size Number of taps desired (note, filter order +1) + * @param lowpass_cutoff Lowpass cutoff freqency. If 0, will be a high pass filter + * @param highpass_cutoff Highpass cutoff. If 0, will just be a lowpass. If both lowpass and higpass, bandpass + * @param decimation_ratio To downsample, ratio of samples to get rid of. + * For example, 4 to go from sample rate of 40k to 10k. LOWPASS CUTOFF MUST MATCH THIS + * If you don't filter the high frequencies, they WILL alias into the passband + * So in the above example, you would want to cutoff at 5K (so you have some buffer) + */ + fir_filter( + float sampling_frequency, + uint8_t filter_size, + float lowpass_cutoff, + float highpass_cutoff = 0, + int decimation_ratio = 1) : taps(filter_size) , history(filter_size, 0) + { + this->filter_size = filter_size; + std::vector f_taps(filter_size, 0); + if( highpass_cutoff == 0 && lowpass_cutoff == 0 ) + { + ei_printf("You must choose either a lowpass or highpass cutoff"); + return; // return a filter that will return zeros always + } + if (highpass_cutoff == 0) + { + // use normalized frequency + set_taps_lowpass(lowpass_cutoff / sampling_frequency, f_taps); + } + if (lowpass_cutoff == 0) + { + //for highpass, we'll just design a lowpass filter, then invert its spectrum + set_taps_lowpass(highpass_cutoff / sampling_frequency, f_taps); + } + //todo bandpass + apply_hamming(f_taps); + //scale to unity gain in passband (this prevents overflow) + scale_to_unity_gain(f_taps); + // aka if highpass filter + if (lowpass_cutoff == 0) + { + //now invert the spectrum + convert_lowpass_to_highpass(f_taps); + } + // scale and write into fixed point taps + for (int i = 0; i < filter_size; i++) + { + taps[i] = f_taps[i] * 32767; + } + } + +/** + * @brief Apply the filter to the input data. You can do this blockwise, as the object preserves memory of old samples + * Call reset if there's a gap in the data + * + * @param src Source array + * @param dest Output array (can be the same as source for in place) + * @param size Number of samples to process + */ + void apply_filter( + const input_t *src, + input_t *dest, + size_t size) + { + for (size_t i = 0; i < size; i++) + { + history[write_index] = src[i]; + int read_index = write_index; + //minus one b/c of the sign bit + int shift = (sizeof(input_t) * 8) - 1; + //stuff a 1 into one less than we're going to shift to effectively round + //this is essentially resetting the accumulator back to zero otherwise + acc_t accumulator = 1 << (shift - 1); + for (auto tap : taps) + { + accumulator += static_cast(tap) * history[read_index]; + //wrap the read index + read_index = read_index == 0 ? filter_size - 1 : read_index - 1; + } + //wrap the write index + write_index++; + if (write_index == filter_size) + { + write_index = 0; + } + + accumulator >>= shift; + //saturate if overflow + if (accumulator > std::numeric_limits::max()) + { + dest[i] = std::numeric_limits::max(); + } + else if (accumulator < std::numeric_limits::min()) + { + dest[i] = std::numeric_limits::min(); + } + else + { + dest[i] = accumulator; + } + } + } + + /** + * @brief Reset the filter (when changing rows for instance, for a new signal) + * This simply clears the filter history + * + */ + void reset() + { + std::fill(history.begin(), history.end(), 0); + } + +private: + std::vector taps; + std::vector history; + int write_index = 0; + int filter_size; + + friend class AccelerometerQuantizedTestCase; + +}; +#endif //!__FIR_FILTER__H__ diff --git a/edgeimpulse/edge-impulse-sdk/dsp/spectral/processing.hpp b/edgeimpulse/edge-impulse-sdk/dsp/spectral/processing.hpp new file mode 100644 index 0000000..c70d516 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/dsp/spectral/processing.hpp @@ -0,0 +1,487 @@ +/* + * Copyright (c) 2022 EdgeImpulse Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an "AS + * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language + * governing permissions and limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef _EIDSP_SPECTRAL_PROCESSING_H_ +#define _EIDSP_SPECTRAL_PROCESSING_H_ + +#include "edge-impulse-sdk/dsp/ei_vector.h" +#include +#include "../numpy.hpp" +#include "filters.hpp" + +namespace ei { +namespace spectral { + +namespace processing { + /** + * Scaling on the signal. + * @param signal: The input signal. + * @param scaling (int): To scale by which factor (e.g. 10 here means multiply by 10) + */ + class scale { +public: + scale(ei_signal_t *signal, float scaling = 1.0f) + : _signal(signal), _scaling(scaling) + { + } + + /** + * Get scaled data from the underlying sensor buffer... + * This retrieves data from the signal then scales it. + * @param offset Offset in the audio signal + * @param length Length of the audio signal + */ + int get_data(size_t offset, size_t length, float *out_buffer) { + if (offset + length > _signal->total_length) { + EIDSP_ERR(EIDSP_OUT_OF_BOUNDS); + } + + int ret = _signal->get_data(offset, length, out_buffer); + if (ret != 0) { + EIDSP_ERR(ret); + } + + EI_DSP_MATRIX_B(temp, 1, length, out_buffer); + return numpy::scale(&temp, _scaling); + } + +private: + ei_signal_t *_signal; + float _scaling; + }; +} + +namespace processing { + typedef struct { + float freq; + float amplitude; + } freq_peak_t; + + typedef struct { + EIDSP_i32 freq; + EIDSP_i32 amplitude; + } freq_peak_i32_t; + + /** + * Scale a the signal. This modifies the signal in place! + * For memory consumption reasons you **probably** want the scaling class, + * which lazily loads the signal in. + * @param signal (array): The input signal. + * @param signal_size: The length of the signal. + * @param scale (float): The scaling factor (multiplies by this number). + * @returns 0 when successful + */ + __attribute__((unused)) static int scale(float *signal, size_t signal_size, float scale = 1) + { + EI_DSP_MATRIX_B(temp, 1, signal_size, signal); + return numpy::scale(&temp, scale); + } + + /** + * Filter data along one-dimension with an IIR or FIR filter using + * Butterworth digital and analog filter design. + * This modifies the matrix in-place (per row) + * @param matrix Input matrix + * @param sampling_freq Sampling frequency + * @param filter_cutoff + * @param filter_order + * @returns 0 when successful + */ + static int butterworth_lowpass_filter( + matrix_t *matrix, + float sampling_frequency, + float filter_cutoff, + uint8_t filter_order) + { + for (size_t row = 0; row < matrix->rows; row++) { + filters::butterworth_lowpass( + filter_order, + sampling_frequency, + filter_cutoff, + matrix->buffer + (row * matrix->cols), + matrix->buffer + (row * matrix->cols), + matrix->cols); + } + + return EIDSP_OK; + } + + /** + * Filter data along one-dimension with an IIR or FIR filter using + * Butterworth digital and analog filter design. + * This modifies the matrix in-place (per row) + * @param matrix Input matrix + * @param sampling_freq Sampling frequency + * @param filter_cutoff + * @param filter_order + * @returns 0 when successful + */ + static int butterworth_highpass_filter( + matrix_t *matrix, + float sampling_frequency, + float filter_cutoff, + uint8_t filter_order) + { + for (size_t row = 0; row < matrix->rows; row++) { + filters::butterworth_highpass( + filter_order, + sampling_frequency, + filter_cutoff, + matrix->buffer + (row * matrix->cols), + matrix->buffer + (row * matrix->cols), + matrix->cols); + } + + return EIDSP_OK; + } + + /** + * Find peaks in a FFT spectrum + * threshold is *normalized* threshold + * (I'm still not completely sure if this matches my Python code but it looks OK) + * @param input_matrix Matrix with FFT data of size 1xM + * @param output_matrix Output matrix with N rows for every peak you want to find. + * @param threshold Minimum threshold + * @param peaks_found Out parameter with the number of peaks found + * @returns 0 if OK + */ + static int find_peak_indexes( + matrix_t *input_matrix, + matrix_t *output_matrix, + float threshold, + uint16_t *peaks_found) + { + if (input_matrix->rows != 1) { + EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH); + } + + if (output_matrix->cols != 1) { + EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH); + } + + uint16_t out_ix = 0; + size_t in_size = input_matrix->cols; + float *in = input_matrix->buffer; + size_t out_size = output_matrix->rows; + float *out = output_matrix->buffer; + + // for normalized threshold calculation + float min = FLT_MAX, max = 0.0f; + for (size_t ix = 0; ix < in_size - 1; ix++) { + if (in[ix] < min) { + min = in[ix]; + } + if (in[ix] > max) { + max = in[ix]; + } + } + + + float prev = in[0]; + + // so.... + for (size_t ix = 1; ix < in_size - 1; ix++) { + // first make sure it's actually a peak... + if (in[ix] > prev && in[ix] > in[ix+1]) { + // then make sure the threshold is met (on both?) + float height = (in[ix] - prev) + (in[ix] - in[ix + 1]); + // printf("%d inx: %f height: %f threshold: %f\r\n", ix, in[ix], height, threshold); + if (height > threshold) { + out[out_ix] = ix; + out_ix++; + if (out_ix == out_size) break; + } + } + + prev = in[ix]; + } + + *peaks_found = out_ix; + + return EIDSP_OK; + } + + /** + * Find peaks in FFT + * @param fft_matrix Matrix of FFT numbers (1xN) + * @param output_matrix Matrix for the output (Mx2), one row per output you want and two colums per row + * @param sampling_freq How often we sample (in Hz) + * @param threshold Minimum threshold (default: 0.1) + * @returns + */ + static int find_fft_peaks( + matrix_t *fft_matrix, + matrix_t *output_matrix, + float sampling_freq, + float threshold, + uint16_t fft_length) + { + if (fft_matrix->rows != 1) { + EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH); + } + + if (output_matrix->cols != 2) { + EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH); + } + + if (output_matrix->rows == 0) { + return EIDSP_OK; + } + + int ret; + + int N = static_cast(fft_length); + float T = 1.0f / sampling_freq; + + EI_DSP_MATRIX(freq_space, 1, fft_matrix->cols); + ret = numpy::linspace(0.0f, 1.0f / (2.0f * T), floor(N / 2), freq_space.buffer); + if (ret != EIDSP_OK) { + EIDSP_ERR(ret); + } + + EI_DSP_MATRIX(peaks_matrix, output_matrix->rows * 10, 1); + + uint16_t peak_count; + ret = find_peak_indexes(fft_matrix, &peaks_matrix, 0.0f, &peak_count); + if (ret != EIDSP_OK) { + EIDSP_ERR(ret); + } + + // turn this into C++ vector and sort it based on amplitude + ei_vector peaks; + for (uint8_t ix = 0; ix < peak_count; ix++) { + freq_peak_t d; + + d.freq = freq_space.buffer[static_cast(peaks_matrix.buffer[ix])]; + d.amplitude = fft_matrix->buffer[static_cast(peaks_matrix.buffer[ix])]; + // printf("freq %f : %f amp: %f\r\n", peaks_matrix.buffer[ix], d.freq, d.amplitude); + if (d.amplitude < threshold) { + d.freq = 0.0f; + d.amplitude = 0.0f; + } + peaks.push_back(d); + } + sort(peaks.begin(), peaks.end(), + [](const freq_peak_t & a, const freq_peak_t & b) -> bool + { + return a.amplitude > b.amplitude; + }); + + // fill with zeros at the end (if needed) + for (size_t ix = peaks.size(); ix < output_matrix->rows; ix++) { + freq_peak_t d; + d.freq = 0; + d.amplitude = 0; + peaks.push_back(d); + } + + for (size_t row = 0; row < output_matrix->rows; row++) { + // col 0 is freq, col 1 is ampl + output_matrix->buffer[row * output_matrix->cols + 0] = peaks[row].freq; + output_matrix->buffer[row * output_matrix->cols + 1] = peaks[row].amplitude; + } + + return EIDSP_OK; + } + + + /** + * Calculate spectral power edges in a singal + * @param fft_matrix FFT matrix (1xM) + * @param input_matrix_cols Number of columns in the input matrix + * @param edges_matrix The power edges (Nx1) where N=is number of edges + * (e.g. [0.1, 0.5, 1.0, 2.0, 5.0]) + * @param output_matrix Output matrix of size (N-1 x 1) + * @param sampling_freq Sampling frequency + * @returns 0 if OK + */ + int spectral_power_edges( + matrix_t *fft_matrix, + matrix_t *freq_matrix, + matrix_t *edges_matrix, + matrix_t *output_matrix, + float sampling_freq + ) { + if (fft_matrix->rows != 1 || freq_matrix->rows != 1) { + EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH); + } + + if (edges_matrix->cols != 1) { + EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH); + } + + if (output_matrix->rows != edges_matrix->rows - 1 || output_matrix->cols != edges_matrix->cols) { + EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH); + } + + if (fft_matrix->cols != freq_matrix->cols) { + EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH); + } + + EI_DSP_MATRIX(buckets, 1, edges_matrix->rows - 1); + EI_DSP_MATRIX(bucket_count, 1, edges_matrix->rows - 1); + + for (uint16_t ix = 0; ix < freq_matrix->cols; ix++) { + float t = freq_matrix->buffer[ix]; + float v = fft_matrix->buffer[ix]; + + // does this fit between any edges? + for (uint16_t ex = 0; ex < edges_matrix->rows - 1; ex++) { + if (t >= edges_matrix->buffer[ex] && t < edges_matrix->buffer[ex + 1]) { + buckets.buffer[ex] += v; + bucket_count.buffer[ex]++; + break; + } + } + } + + // average out and push to vector + for (uint16_t ex = 0; ex < edges_matrix->rows - 1; ex++) { + if (bucket_count.buffer[ex] == 0.0f) { + output_matrix->buffer[ex] = 0.0f; + } + else { + output_matrix->buffer[ex] = buckets.buffer[ex] / bucket_count.buffer[ex]; + } + } + + return EIDSP_OK; + } + + + /** + * Estimate power spectral density using a periodogram using Welch's method. + * @param input_matrix Of size 1xN + * @param out_fft_matrix Output matrix of size 1x(n_fft/2+1) with frequency data + * @param out_freq_matrix Output matrix of size 1x(n_fft/2+1) with frequency data + * @param sampling_freq The sampling frequency + * @param n_fft Number of FFT buckets + * @returns 0 if OK + */ + int periodogram(matrix_t *input_matrix, matrix_t *out_fft_matrix, matrix_t *out_freq_matrix, float sampling_freq, uint16_t n_fft) + { + if (input_matrix->rows != 1) { + EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH); + } + + if (out_fft_matrix->rows != 1 || out_fft_matrix->cols != static_cast(n_fft / 2 + 1)) { + EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH); + } + + if (out_freq_matrix->rows != 1 || out_freq_matrix->cols != static_cast(n_fft / 2 + 1)) { + EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH); + } + + if (input_matrix->buffer == NULL) { + EIDSP_ERR(EIDSP_OUT_OF_MEM); + } + + if (out_fft_matrix->buffer == NULL) { + EIDSP_ERR(EIDSP_OUT_OF_MEM); + } + + if (out_freq_matrix->buffer == NULL) { + EIDSP_ERR(EIDSP_OUT_OF_MEM); + } + + // map over the input buffer, so we can manipulate the number of columns + EI_DSP_MATRIX_B(welch_matrix, input_matrix->rows, input_matrix->cols, input_matrix->buffer); + + uint16_t nperseg = n_fft; + + if (n_fft > input_matrix->cols) { + nperseg = input_matrix->cols; + } + // make the column align to nperseg in this case + else if (n_fft < input_matrix->cols) { + welch_matrix.cols = n_fft; + } + + EI_DSP_MATRIX(triage_segments, 1, nperseg); + for (uint16_t ix = 0; ix < nperseg; ix++) { + triage_segments.buffer[ix] = 1.0f; + } + + float scale = 1.0f / (sampling_freq * nperseg); + + for (uint16_t ix = 0; ix < n_fft / 2 + 1; ix++) { + out_freq_matrix->buffer[ix] = static_cast(ix) * (1.0f / (n_fft * (1.0f / sampling_freq))); + } + + int ret; + + // now we need to detrend... which is done constant so just subtract the mean + EI_DSP_MATRIX(mean_matrix, 1, 1); + ret = numpy::mean(&welch_matrix, &mean_matrix); + if (ret != EIDSP_OK) { + EIDSP_ERR(ret); + } + + ret = numpy::subtract(&welch_matrix, &mean_matrix); + if (ret != EIDSP_OK) { + EIDSP_ERR(ret); + } + + fft_complex_t *fft_output = (fft_complex_t*)ei_dsp_calloc((n_fft / 2 + 1) * sizeof(fft_complex_t), 1); + ret = numpy::rfft(welch_matrix.buffer, welch_matrix.cols, fft_output, n_fft / 2 + 1, n_fft); + if (ret != EIDSP_OK) { + ei_dsp_free(fft_output, (n_fft / 2 + 1) * sizeof(fft_complex_t)); + EIDSP_ERR(ret); + } + + // conjugate and then multiply with itself and scale + for (uint16_t ix = 0; ix < n_fft / 2 + 1; ix++) { + fft_output[ix].r = (fft_output[ix].r * fft_output[ix].r) + + (abs(fft_output[ix].i * fft_output[ix].i)); + fft_output[ix].i = 0.0f; + + fft_output[ix].r *= scale; + + if (ix != n_fft / 2) { + fft_output[ix].r *= 2; + } + + // then multiply by itself... + out_fft_matrix->buffer[ix] = fft_output[ix].r; + } + + ei_dsp_free(fft_output, (n_fft / 2 + 1) * sizeof(fft_complex_t)); + + return EIDSP_OK; + } + + static int subtract_mean(matrix_t* input_matrix) { + // calculate the mean + EI_DSP_MATRIX(mean_matrix, input_matrix->rows, 1); + int ret = numpy::mean(input_matrix, &mean_matrix); + if (ret != EIDSP_OK) { + EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH); + } + + // scale by the mean + ret = numpy::subtract(input_matrix, &mean_matrix); + if (ret != EIDSP_OK) { + EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH); + } + + return EIDSP_OK; + } +} // namespace processing +} // namespace spectral +} // namespace ei + +#endif // _EIDSP_SPECTRAL_PROCESSING_H_ diff --git a/edgeimpulse/edge-impulse-sdk/dsp/spectral/signal.hpp b/edgeimpulse/edge-impulse-sdk/dsp/spectral/signal.hpp new file mode 100644 index 0000000..7452f76 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/dsp/spectral/signal.hpp @@ -0,0 +1,351 @@ +/* Edge Impulse inferencing library + * Copyright (c) 2022 EdgeImpulse Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#pragma once + +#include "edge-impulse-sdk/dsp/ei_vector.h" +#include +#include + +namespace ei { + +/** + * @brief Class for signal processing. + * tries to mimic scipy.signal + * + * @todo: call CMSIS DSP functions if available + */ +class signal { +public: + using fvec = ei_vector; + + static void scale(fvec &x, float a) + { + for (size_t ix = 0; ix < x.size(); ix++) { + x[ix] *= a; + } + } + + /** + * @brief Decimate a signal using a IIR filter + * This is the counterpart of scipy.signal.decimate with zero-phase=false. This function + * is not recommended for larger decimation factors, as it will have stability issues. + * Use the SOS version instead. + * @param input Input signal + * @param output Output signal + * @param factor Decimation factor + * @param b Numerator coefficients + * @param a Denominator coefficients + * @param zi Initial conditions + */ + static void decimate_simple( + const fvec &input, + fvec &output, + size_t factor, + const fvec &b, + const fvec &a, + const fvec &zi) + { + fvec d = zi; + scale(d, input[0]); + + fvec filtered(input.size()); + lfilter(b, a, input, filtered, d); + + size_t output_size = input.size() / factor; + output.resize(output_size); + + for (size_t ix = 0; ix < output_size; ix++) { + output[ix] = filtered[ix * factor]; + } + } + + static size_t get_decimated_size(size_t input_size, size_t factor) + { + return (input_size + factor - 1) / factor; + } + + struct sosfilt { + const float *coeff; // 6 * num_sections coefficients + float* zi; + fvec zi_vec; // 2 * num_sections initial conditions + size_t num_sections; + + sosfilt(const float *coeff_, const float *zi_, size_t num_sections_) + : coeff(coeff_), + zi_vec(zi_, zi_ + (num_sections_ * 2)), + num_sections(num_sections_) + { + zi = zi_vec.data(); + } + + /** + * @brief IIR filters in second-order sections. + * This is the counterpart of scipy.signal.sosfilt . + * @param input Input signal + * @param output Output signal. Can be the same as input for in place + * @param x_size Minimum size of input and output signal + */ + void run(const float *input, const size_t size, float* output) + { + assert(num_sections > 0); + + iir2(input, output, size, coeff, coeff + 3, zi); + + for (size_t sect = 1; sect < num_sections; sect++) { + iir2( + output, + output, + size, + coeff + sect * 6, + coeff + sect * 6 + 3, + zi + sect * 2); + } + } + + void init(float x0) + { + for (size_t sect = 0; sect < num_sections; sect++) { + zi[sect * 2] *= x0; + zi[sect * 2 + 1] *= x0; + } + } + }; + + /** + * @brief Decimate a signal using a IIR filter with second-order sections + * This is the counterpart of scipy.signal.decimate with zero-phase=false. + * @param input Input signal + * @param output Output signal + * @param factor Decimation factor + * @param sos Second-order section + */ + static void decimate_simple( + const float *input, + const size_t input_size, + float *output, + const size_t output_size, + size_t factor, + sosfilt &sos) + { + sos.init(input[0]); + + fvec filtered(input_size); + sos.run(input, input_size, filtered.data()); + + size_t expected_size = get_decimated_size(input_size, factor); + assert(output_size >= expected_size); + + for (size_t ix = 0; ix < expected_size; ix++) { + output[ix] = filtered[ix * factor]; + } + } + + /** + * @brief Linear filter. + * This is the counterpart of scipy.signal.lfilter with zero-phase=false. This function + * is not recommended for high order filters or cutoff close to boundaries, as it will + * have stability issues. Use the sosfilt instead. + * @param input Input signal + * @param output Output signal + * @param b Numerator coefficients + * @param a Denominator coefficients + * @param zi Initial conditions + */ + static void lfilter(const fvec &b, const fvec &a, const fvec &x, fvec &y, fvec &d) + { + /* + a[0]*y[n] = b[0] * x[n] + d[0][n-1] + d[0][n] = b[1] * x[n] - a[1] * y[n] + d[1][n-1] + d[1][n] = b[2] * x[n] - a[2] * y[n] + d[2][n-1] + ... + d[N-2][n] = b[N-1]*x[n] - a[N-1]*y[n] + d[N-1][n-1] + d[N-1][n] = b[N] * x[n] - a[N] * y[n] + */ + + assert(b.size() == a.size() && b.size() == d.size() + 1); + assert(d.size() > 0); + assert(y.size() >= x.size()); + assert(a[0] != 0.0f); + + const float one_over_a0 = 1.0f / a[0]; + for (size_t ix = 0; ix < x.size(); ix++) { + const float xx = x[ix]; + y[ix] = b[0] * xx + d[0]; + y[ix] *= one_over_a0; + size_t jx; + for (jx = 1; jx < b.size() - 1; jx++) { + d[jx - 1] = b[jx] * xx - a[jx] * y[ix] + d[jx]; + } + d[jx - 1] = b[jx] * xx - a[jx] * y[ix]; + } + } + + static void iir2(const float *x, float *y, size_t n, const float *b, const float *a, float *d) + { + /* + a[0]*y[n] = b[0] * x[n] + d[0][n-1] + d[0][n] = b[1] * x[n] - a[1] * y[n] + d[1][n-1] + d[1][n] = b[2] * x[n] - a[2] * y[n] + */ + const float one_over_a0 = 1.0f / a[0]; + for (size_t ix = 0; ix < n; ix++) { + const float xx = x[ix]; + y[ix] = b[0] * xx + d[0]; + y[ix] *= one_over_a0; + d[0] = b[1] * xx - a[1] * y[ix] + d[1]; + d[1] = b[2] * xx - a[2] * y[ix]; + } + } + + static int gcd(int a, int b) + { + if (b == 0) + return a; + return gcd(b, a % b); + } + + /** + * @brief Upsample, FIR and downsample. + * This is the counterpart of scipy.signal.upfirdn without the padding. + * @param y Input signal + * @param y Output signal + * @param h FIR coefficients + */ + static void upfirdn(const float * x, size_t x_size, fvec &y, int up, int down, const fvec &h) + { + assert(up > 0); + assert(down > 0); + assert(h.size() > 0); + +#if 0 // bug in optimized version + const int N = (h.size() - 1) / 2; + + for (size_t n = 0; n < y.size(); n++) { + float acc = 0.0f; + for (size_t k = 0; k < h.size(); k += up) { + const size_t x_ind = n * down + k - N; + if (x_ind >= 0 && x_ind < x.size()) { + acc += h[k] * x[x_ind]; + } + } + y[n] = acc; + } +#else + int nx = x_size; + int nh = h.size(); + + // Upsample the input signal by inserting zeros + fvec r(up * nx); + for (int i = 0; i < nx; i++) + { + r[i * up] = x[i]; + } + + // Filter the upsampled signal using the given filter coefficients + fvec z(nh + up * nx - 1); + for (int i = 0; i < up * nx; i++) + { + for (int j = 0; j < nh; j++) + { + if (i - j >= 0 && i - j < up * nx) + { + z[i] += r[i - j] * h[j]; + } + } + } + + // Downsample the filtered signal by skipping samples + int skip = (nh - 1) / 2; + for (size_t i = 0; i < y.size(); i++) + { + y[i] = z[i * down + skip]; + } +#endif + + } + + /** + * @brief Resample using a polyphase FIR. + * This is the counterpart of scipy.signal.resample_poly. + * @param input Input signal + * @param output Output signal, will be moved from an internal vector sized correctly. + * @param window FIR coefficients. e.g. signal.firwin(2 * half_len + 1, f_c, window=('kaiser', 5.0)) + */ + static void resample_poly(const float* input, size_t input_size, fvec &output, int up, int down, const fvec &window) + { + assert(up > 0); + assert(down > 0); + assert(window.size() > 0 && (window.size() % 2) == 1); + + int gcd_up_down = gcd(up, down); + up /= gcd_up_down; + down /= gcd_up_down; + + if (up == 1 && down == 1) { + // output = std::move(fvec(input, input + input_size)); + output = fvec(input, input + input_size); + return; + } + + int n_out = (input_size * up); + n_out = n_out / down + (n_out % down == 0 ? 0 : 1); + + fvec h = window; + scale(h, float(up)); + + output.resize(n_out); + upfirdn(input, input_size, output, up, down, h); + } + + static void calc_decimation_ratios( + const char *filter_type, + float filter_cutoff, + float sample_rate, + std::vector &ratios) + { + if (strcmp(filter_type, "low") == 0) { + ratios = {1}; + return; + } + + static const std::vector supported = {1000, 100, 30, 10, 3}; + for (size_t i = 0; i < supported.size(); i++) { + const int r = supported[i]; + if (sample_rate * 0.5f / r > filter_cutoff) { + if (r == 3 || r == 10) { + ratios = {r}; + } else if (r == 30) { + ratios = {3, 10}; + } else if (r == 100) { + ratios = {10, 10}; + } else if (r == 1000) { + ratios = {10, 10, 10}; + } + return; + } + } + + } +}; + +} // namespace ei diff --git a/edgeimpulse/edge-impulse-sdk/dsp/spectral/spectral.hpp b/edgeimpulse/edge-impulse-sdk/dsp/spectral/spectral.hpp new file mode 100644 index 0000000..0c8b876 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/dsp/spectral/spectral.hpp @@ -0,0 +1,25 @@ +/* + * Copyright (c) 2022 EdgeImpulse Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an "AS + * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language + * governing permissions and limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef _EIDSP_SPECTRAL_SPECTRAL_H_ +#define _EIDSP_SPECTRAL_SPECTRAL_H_ + +#include "../config.hpp" +#include "processing.hpp" +#include "feature.hpp" + +#endif // _EIDSP_SPECTRAL_SPECTRAL_H_ diff --git a/edgeimpulse/edge-impulse-sdk/dsp/spectral/wavelet.hpp b/edgeimpulse/edge-impulse-sdk/dsp/spectral/wavelet.hpp new file mode 100644 index 0000000..ba19b29 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/dsp/spectral/wavelet.hpp @@ -0,0 +1,354 @@ +/* Edge Impulse inferencing library + * Copyright (c) 2022 EdgeImpulse Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#pragma once + +#include "edge-impulse-sdk/dsp/ei_vector.h" + +#include "processing.hpp" +#include "wavelet_coeff.hpp" + +namespace ei { +namespace spectral { + +using fvec = ei_vector; + +inline float dot(const float *x, const float *y, size_t sz) +{ + float sum = 0.0f; + for (size_t i = 0; i < sz; i++) { + sum += x[i] * y[i]; + } + return sum; +} + +inline void histo(const fvec &x, size_t nbins, fvec &h, bool normalize = false) +{ + float min = *std::min_element(x.begin(), x.end()); + float max = *std::max_element(x.begin(), x.end()); + float step = (max - min) / nbins; + h.resize(nbins); + for (size_t i = 0; i < x.size(); i++) { + size_t bin = (x[i] - min) / step; + if (bin >= nbins) + bin = nbins - 1; + h[bin]++; + } + if (normalize) { + float s = numpy::sum(h.data(), h.size()); + for (size_t i = 0; i < nbins; i++) { + h[i] /= s; + } + } +} + +class wavelet { + + static constexpr size_t NUM_FEATHERS_PER_COMP = 14; + + template + static void get_filter(const std::array, 2> wav, fvec &h, fvec &g) + { + size_t n = wav[0].size(); + h.resize(n); + g.resize(n); + for (size_t i = 0; i < n; i++) { + h[i] = wav[0][n - i - 1]; + g[i] = wav[1][n - i - 1]; + } + } + + static void find_filter(const char *wav, fvec &h, fvec &g) + { + if (strcmp(wav, "bior1.3") == 0) get_filter<6>(bior1p3, h, g); + else if (strcmp(wav, "bior1.5") == 0) get_filter<10>(bior1p5, h, g); + else if (strcmp(wav, "bior2.2") == 0) get_filter<6>(bior2p2, h, g); + else if (strcmp(wav, "bior2.4") == 0) get_filter<10>(bior2p4, h, g); + else if (strcmp(wav, "bior2.6") == 0) get_filter<14>(bior2p6, h, g); + else if (strcmp(wav, "bior2.8") == 0) get_filter<18>(bior2p8, h, g); + else if (strcmp(wav, "bior3.1") == 0) get_filter<4>(bior3p1, h, g); + else if (strcmp(wav, "bior3.3") == 0) get_filter<8>(bior3p3, h, g); + else if (strcmp(wav, "bior3.5") == 0) get_filter<12>(bior3p5, h, g); + else if (strcmp(wav, "bior3.7") == 0) get_filter<16>(bior3p7, h, g); + else if (strcmp(wav, "bior3.9") == 0) get_filter<20>(bior3p9, h, g); + else if (strcmp(wav, "bior4.4") == 0) get_filter<10>(bior4p4, h, g); + else if (strcmp(wav, "bior5.5") == 0) get_filter<12>(bior5p5, h, g); + else if (strcmp(wav, "bior6.8") == 0) get_filter<18>(bior6p8, h, g); + else if (strcmp(wav, "coif1") == 0) get_filter<6>(coif1, h, g); + else if (strcmp(wav, "coif2") == 0) get_filter<12>(coif2, h, g); + else if (strcmp(wav, "coif3") == 0) get_filter<18>(coif3, h, g); + else if (strcmp(wav, "db2") == 0) get_filter<4>(db2, h, g); + else if (strcmp(wav, "db3") == 0) get_filter<6>(db3, h, g); + else if (strcmp(wav, "db4") == 0) get_filter<8>(db4, h, g); + else if (strcmp(wav, "db5") == 0) get_filter<10>(db5, h, g); + else if (strcmp(wav, "db6") == 0) get_filter<12>(db6, h, g); + else if (strcmp(wav, "db7") == 0) get_filter<14>(db7, h, g); + else if (strcmp(wav, "db8") == 0) get_filter<16>(db8, h, g); + else if (strcmp(wav, "db9") == 0) get_filter<18>(db9, h, g); + else if (strcmp(wav, "db10") == 0) get_filter<20>(db10, h, g); + else if (strcmp(wav, "haar") == 0) get_filter<2>(haar, h, g); + else if (strcmp(wav, "rbio1.3") == 0) get_filter<6>(rbio1p3, h, g); + else if (strcmp(wav, "rbio1.5") == 0) get_filter<10>(rbio1p5, h, g); + else if (strcmp(wav, "rbio2.2") == 0) get_filter<6>(rbio2p2, h, g); + else if (strcmp(wav, "rbio2.4") == 0) get_filter<10>(rbio2p4, h, g); + else if (strcmp(wav, "rbio2.6") == 0) get_filter<14>(rbio2p6, h, g); + else if (strcmp(wav, "rbio2.8") == 0) get_filter<18>(rbio2p8, h, g); + else if (strcmp(wav, "rbio3.1") == 0) get_filter<4>(rbio3p1, h, g); + else if (strcmp(wav, "rbio3.3") == 0) get_filter<8>(rbio3p3, h, g); + else if (strcmp(wav, "rbio3.5") == 0) get_filter<12>(rbio3p5, h, g); + else if (strcmp(wav, "rbio3.7") == 0) get_filter<16>(rbio3p7, h, g); + else if (strcmp(wav, "rbio3.9") == 0) get_filter<20>(rbio3p9, h, g); + else if (strcmp(wav, "rbio4.4") == 0) get_filter<10>(rbio4p4, h, g); + else if (strcmp(wav, "rbio5.5") == 0) get_filter<12>(rbio5p5, h, g); + else if (strcmp(wav, "rbio6.8") == 0) get_filter<18>(rbio6p8, h, g); + else if (strcmp(wav, "sym2") == 0) get_filter<4>(sym2, h, g); + else if (strcmp(wav, "sym3") == 0) get_filter<6>(sym3, h, g); + else if (strcmp(wav, "sym4") == 0) get_filter<8>(sym4, h, g); + else if (strcmp(wav, "sym5") == 0) get_filter<10>(sym5, h, g); + else if (strcmp(wav, "sym6") == 0) get_filter<12>(sym6, h, g); + else if (strcmp(wav, "sym7") == 0) get_filter<14>(sym7, h, g); + else if (strcmp(wav, "sym8") == 0) get_filter<16>(sym8, h, g); + else if (strcmp(wav, "sym9") == 0) get_filter<18>(sym9, h, g); + else if (strcmp(wav, "sym10") == 0) get_filter<20>(sym10, h, g); + else assert(0); // wavelet not in the list + } + + static void calculate_entropy(const fvec &y, fvec &features) + { + fvec h; + histo(y, 100, h, true); + // entropy = -sum(prob * log(prob) + float entropy = 0.0f; + for (size_t i = 0; i < h.size(); i++) { + if (h[i] > 0.0f) { + entropy -= h[i] * log(h[i]); + } + } + features.push_back(entropy); + } + + static float get_percentile_from_sorted(const fvec &sorted, float percentile) + { + // adding 0.5 is a trick to get rounding out of C flooring behavior during cast + size_t index = (size_t) ((percentile * (sorted.size()-1)) + 0.5); + return sorted[index]; + } + + static void calculate_statistics(const fvec &y, fvec &features, float mean) + { + fvec sorted = y; + std::sort(sorted.begin(), sorted.end()); + features.push_back(get_percentile_from_sorted(sorted,0.05)); + features.push_back(get_percentile_from_sorted(sorted,0.25)); + features.push_back(get_percentile_from_sorted(sorted,0.75)); + features.push_back(get_percentile_from_sorted(sorted,0.95)); + features.push_back(get_percentile_from_sorted(sorted,0.5)); + + matrix_t x(1, y.size(), const_cast(y.data())); + matrix_t out(1, 1); + + features.push_back(mean); + if (numpy::stdev(&x, &out) == EIDSP_OK) + features.push_back(out.get_row_ptr(0)[0]); + features.push_back(numpy::variance(const_cast(y.data()), y.size())); + if (numpy::rms(&x, &out) == EIDSP_OK) + features.push_back(out.get_row_ptr(0)[0]); + if (numpy::skew(&x, &out) == EIDSP_OK) + features.push_back(out.get_row_ptr(0)[0]); + if (numpy::kurtosis(&x, &out) == EIDSP_OK) + features.push_back(out.get_row_ptr(0)[0]); + } + + static void calculate_crossings(const fvec &y, fvec &features, float mean) + { + size_t zc = 0; + for (size_t i = 1; i < y.size(); i++) { + if (y[i] * y[i - 1] < 0) { + zc++; + } + } + features.push_back(zc / (float)y.size()); + + size_t mc = 0; + for (size_t i = 1; i < y.size(); i++) { + if ((y[i] - mean) * (y[i - 1] - mean) < 0) { + mc++; + } + } + features.push_back(mc / (float)y.size()); + } + + static void + dwt(const float *x, size_t nx, const float *h, const float *g, size_t nh, fvec &a, fvec &d) + { + assert(nh <= 20 && nh > 0 && nx > 0); + size_t nx_padded = nx + nh * 2 - 2; + fvec x_padded(nx_padded); + + // symmetric padding (default in PyWavelet) + for (size_t i = 0; i < nh - 2; i++) + x_padded[i] = x[nh - 3 - i]; + for (size_t i = 0; i < nx; i++) + x_padded[i + nh - 2] = x[i]; + for (size_t i = 0; i < nh; i++) + x_padded[i + nx + nh - 2] = x[nx - 1 - i]; + + size_t ny = (nx + nh - 1) / 2; + a.resize(ny); + d.resize(ny); + + // decimate and filter + const float *xx = x_padded.data(); + for (size_t i = 0; i < ny; i++) { + a[i] = dot(xx + 2 * i, h, nh); + d[i] = dot(xx + 2 * i, g, nh); + } + + numpy::underflow_handling(d.data(), d.size()); + numpy::underflow_handling(a.data(), a.size()); + } + + static void extract_features(fvec& y, fvec &features) + { + matrix_t x(1, y.size(), const_cast(y.data())); + matrix_t out(1, 1); + if (numpy::mean(&x, &out) != EIDSP_OK) + assert(0); + float mean = out.get_row_ptr(0)[0]; + + calculate_entropy(y, features); + calculate_crossings(y, features, mean); + calculate_statistics(y, features, mean); + } + + static void + wavedec_features(const float *x, int len, const char *wav, int level, fvec &features) + { + assert(level > 0 && level < 8); + + fvec h; + fvec g; + find_filter(wav, h, g); + + features.clear(); + fvec a; + fvec d; + dwt(x, len, h.data(), g.data(), h.size(), a, d); + extract_features(d, features); + + for (int l = 1; l < level; l++) { + dwt(a.data(), a.size(), h.data(), g.data(), h.size(), a, d); + extract_features(d, features); + } + + extract_features(a, features); + + for (int l = 0; l <= level / 2; l++) { // reverse order to match python results. + for (int i = 0; i < (int)NUM_FEATHERS_PER_COMP; i++) { + std::swap( + features[l * NUM_FEATHERS_PER_COMP + i], + features[(level - l) * NUM_FEATHERS_PER_COMP + i]); + } + } + } + + static int dwt_features(const float *x, int len, const char *wav, int level, fvec &features) + { + assert(level <= 7); + + assert(features.size() == 0); // make sure features is empty + features.reserve((level + 1) * NUM_FEATHERS_PER_COMP); + + wavedec_features(x, len, wav, level, features); + + return features.size(); + } + + static bool check_min_size(int len, int level) + { + int min_size = 32 * (1 << level); + return (len >= min_size); + } + +public: + static int extract_wavelet_features( + matrix_t *input_matrix, + matrix_t *output_matrix, + ei_dsp_config_spectral_analysis_t *config, + const float sampling_freq) + { + // transpose the matrix so we have one row per axis + numpy::transpose_in_place(input_matrix); + + // func tests for scale of 1 and does a no op in that case + EI_TRY(numpy::scale(input_matrix, config->scale_axes)); + + // apply filter, if enabled + // "zero" order filter allowed. will still remove unwanted fft bins later + if (strcmp(config->filter_type, "low") == 0) { + if (config->filter_order) { + EI_TRY(spectral::processing::butterworth_lowpass_filter( + input_matrix, + sampling_freq, + config->filter_cutoff, + config->filter_order)); + } + } + else if (strcmp(config->filter_type, "high") == 0) { + if (config->filter_order) { + EI_TRY(spectral::processing::butterworth_highpass_filter( + input_matrix, + sampling_freq, + config->filter_cutoff, + config->filter_order)); + } + } + + EI_TRY(processing::subtract_mean(input_matrix)); + + int out_idx = 0; + for (size_t row = 0; row < input_matrix->rows; row++) { + float *data_window = input_matrix->get_row_ptr(row); + size_t data_size = input_matrix->cols; + + if (!check_min_size(data_size, config->wavelet_level)) + EIDSP_ERR(EIDSP_BUFFER_SIZE_MISMATCH); + + fvec features; + size_t num_features = dwt_features( + data_window, + data_size, + config->wavelet, + config->wavelet_level, + features); + + assert(num_features == output_matrix->cols / input_matrix->rows); + for (size_t i = 0; i < num_features; i++) { + output_matrix->buffer[out_idx++] = features[i]; + } + } + return EIDSP_OK; + } +}; + +} +} diff --git a/edgeimpulse/edge-impulse-sdk/dsp/spectral/wavelet_coeff.hpp b/edgeimpulse/edge-impulse-sdk/dsp/spectral/wavelet_coeff.hpp new file mode 100644 index 0000000..63616fc --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/dsp/spectral/wavelet_coeff.hpp @@ -0,0 +1,282 @@ +/* Edge Impulse inferencing library + * Copyright (c) 2022 EdgeImpulse Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#pragma once + +#include + +namespace ei{ +namespace spectral{ +//generated by autotune.export + +static const std::array, 2> bior1p3 = {{ + {{-0.08838834764831845, 0.08838834764831845, 0.7071067811865476, 0.7071067811865476, 0.08838834764831845, -0.08838834764831845}}, + {{-0.0, 0.0, -0.7071067811865476, 0.7071067811865476, -0.0, 0.0}} +}}; + +static const std::array, 2> bior1p5 = {{ + {{0.016572815184059706, -0.016572815184059706, -0.12153397801643785, 0.12153397801643785, 0.7071067811865476, 0.7071067811865476, 0.12153397801643785, -0.12153397801643785, -0.016572815184059706, 0.016572815184059706}}, + {{-0.0, 0.0, -0.0, 0.0, -0.7071067811865476, 0.7071067811865476, -0.0, 0.0, -0.0, 0.0}} +}}; + +static const std::array, 2> bior2p2 = {{ + {{0.0, -0.1767766952966369, 0.3535533905932738, 1.0606601717798212, 0.3535533905932738, -0.1767766952966369}}, + {{-0.0, 0.3535533905932738, -0.7071067811865476, 0.3535533905932738, -0.0, 0.0}} +}}; + +static const std::array, 2> bior2p4 = {{ + {{0.0, 0.03314563036811941, -0.06629126073623882, -0.1767766952966369, 0.4198446513295126, 0.9943689110435825, 0.4198446513295126, -0.1767766952966369, -0.06629126073623882, 0.03314563036811941}}, + {{-0.0, 0.0, -0.0, 0.3535533905932738, -0.7071067811865476, 0.3535533905932738, -0.0, 0.0, -0.0, 0.0}} +}}; + +static const std::array, 2> bior2p6 = {{ + {{0.0, -0.006905339660024878, 0.013810679320049757, 0.04695630968816917, -0.1077232986963881, -0.16987135563661201, 0.4474660099696121, 0.966747552403483, 0.4474660099696121, -0.16987135563661201, -0.1077232986963881, 0.04695630968816917, 0.013810679320049757, -0.006905339660024878}}, + {{-0.0, 0.0, -0.0, 0.0, -0.0, 0.3535533905932738, -0.7071067811865476, 0.3535533905932738, -0.0, 0.0, -0.0, 0.0, -0.0, 0.0}} +}}; + +static const std::array, 2> bior2p8 = {{ + {{0.0, 0.0015105430506304422, -0.0030210861012608843, -0.012947511862546647, 0.02891610982635418, 0.05299848189069094, -0.13491307360773605, -0.16382918343409023, 0.46257144047591653, 0.9516421218971786, 0.46257144047591653, -0.16382918343409023, -0.13491307360773605, 0.05299848189069094, 0.02891610982635418, -0.012947511862546647, -0.0030210861012608843, 0.0015105430506304422}}, + {{-0.0, 0.0, -0.0, 0.0, -0.0, 0.0, -0.0, 0.3535533905932738, -0.7071067811865476, 0.3535533905932738, -0.0, 0.0, -0.0, 0.0, -0.0, 0.0, -0.0, 0.0}} +}}; + +static const std::array, 2> bior3p1 = {{ + {{-0.3535533905932738, 1.0606601717798212, 1.0606601717798212, -0.3535533905932738}}, + {{-0.1767766952966369, 0.5303300858899106, -0.5303300858899106, 0.1767766952966369}} +}}; + +static const std::array, 2> bior3p3 = {{ + {{0.06629126073623882, -0.1988737822087165, -0.15467960838455727, 0.9943689110435825, 0.9943689110435825, -0.15467960838455727, -0.1988737822087165, 0.06629126073623882}}, + {{-0.0, 0.0, -0.1767766952966369, 0.5303300858899106, -0.5303300858899106, 0.1767766952966369, -0.0, 0.0}} +}}; + +static const std::array, 2> bior3p5 = {{ + {{-0.013810679320049757, 0.04143203796014927, 0.052480581416189075, -0.26792717880896527, -0.07181553246425873, 0.966747552403483, 0.966747552403483, -0.07181553246425873, -0.26792717880896527, 0.052480581416189075, 0.04143203796014927, -0.013810679320049757}}, + {{-0.0, 0.0, -0.0, 0.0, -0.1767766952966369, 0.5303300858899106, -0.5303300858899106, 0.1767766952966369, -0.0, 0.0, -0.0, 0.0}} +}}; + +static const std::array, 2> bior3p7 = {{ + {{0.0030210861012608843, -0.009063258303782653, -0.01683176542131064, 0.074663985074019, 0.03133297870736289, -0.301159125922835, -0.02649924094534547, 0.9516421218971786, 0.9516421218971786, -0.02649924094534547, -0.301159125922835, 0.03133297870736289, 0.074663985074019, -0.01683176542131064, -0.009063258303782653, 0.0030210861012608843}}, + {{-0.0, 0.0, -0.0, 0.0, -0.0, 0.0, -0.1767766952966369, 0.5303300858899106, -0.5303300858899106, 0.1767766952966369, -0.0, 0.0, -0.0, 0.0, -0.0, 0.0}} +}}; + +static const std::array, 2> bior3p9 = {{ + {{-0.0006797443727836989, 0.002039233118351097, 0.005060319219611981, -0.020618912641105536, -0.014112787930175844, 0.09913478249423216, 0.012300136269419315, -0.32019196836077857, 0.0020500227115698858, 0.9421257006782068, 0.9421257006782068, 0.0020500227115698858, -0.32019196836077857, 0.012300136269419315, 0.09913478249423216, -0.014112787930175844, -0.020618912641105536, 0.005060319219611981, 0.002039233118351097, -0.0006797443727836989}}, + {{-0.0, 0.0, -0.0, 0.0, -0.0, 0.0, -0.0, 0.0, -0.1767766952966369, 0.5303300858899106, -0.5303300858899106, 0.1767766952966369, -0.0, 0.0, -0.0, 0.0, -0.0, 0.0, -0.0, 0.0}} +}}; + +static const std::array, 2> bior4p4 = {{ + {{0.0, 0.03782845550726404, -0.023849465019556843, -0.11062440441843718, 0.37740285561283066, 0.8526986790088938, 0.37740285561283066, -0.11062440441843718, -0.023849465019556843, 0.03782845550726404}}, + {{-0.0, -0.06453888262869706, 0.04068941760916406, 0.41809227322161724, -0.7884856164055829, 0.41809227322161724, 0.04068941760916406, -0.06453888262869706, -0.0, 0.0}} +}}; + +static const std::array, 2> bior5p5 = {{ + {{0.0, 0.0, 0.03968708834740544, 0.007948108637240322, -0.05446378846823691, 0.34560528195603346, 0.7366601814282105, 0.34560528195603346, -0.05446378846823691, 0.007948108637240322, 0.03968708834740544, 0.0}}, + {{-0.013456709459118716, -0.002694966880111507, 0.13670658466432914, -0.09350469740093886, -0.47680326579848425, 0.8995061097486484, -0.47680326579848425, -0.09350469740093886, 0.13670658466432914, -0.002694966880111507, -0.013456709459118716, 0.0}} +}}; + +static const std::array, 2> bior6p8 = {{ + {{0.0, 0.0019088317364812906, -0.0019142861290887667, -0.016990639867602342, 0.01193456527972926, 0.04973290349094079, -0.07726317316720414, -0.09405920349573646, 0.4207962846098268, 0.8259229974584023, 0.4207962846098268, -0.09405920349573646, -0.07726317316720414, 0.04973290349094079, 0.01193456527972926, -0.016990639867602342, -0.0019142861290887667, 0.0019088317364812906}}, + {{-0.0, 0.0, -0.0, 0.014426282505624435, -0.014467504896790148, -0.07872200106262882, 0.04036797903033992, 0.41784910915027457, -0.7589077294536541, 0.41784910915027457, 0.04036797903033992, -0.07872200106262882, -0.014467504896790148, 0.014426282505624435, -0.0, 0.0, -0.0, 0.0}} +}}; + +static const std::array, 2> coif1 = {{ + {{-0.015655728135791993, -0.07273261951252645, 0.3848648468648578, 0.8525720202116004, 0.3378976624574818, -0.07273261951252645}}, + {{0.07273261951252645, 0.3378976624574818, -0.8525720202116004, 0.3848648468648578, 0.07273261951252645, -0.015655728135791993}} +}}; + +static const std::array, 2> coif2 = {{ + {{-0.000720549445520347, -0.0018232088709110323, 0.005611434819368834, 0.02368017194684777, -0.05943441864643109, -0.07648859907828076, 0.4170051844232391, 0.8127236354494135, 0.3861100668227629, -0.0673725547237256, -0.04146493678687178, 0.01638733646320364}}, + {{-0.01638733646320364, -0.04146493678687178, 0.0673725547237256, 0.3861100668227629, -0.8127236354494135, 0.4170051844232391, 0.07648859907828076, -0.05943441864643109, -0.02368017194684777, 0.005611434819368834, 0.0018232088709110323, -0.000720549445520347}} +}}; + +static const std::array, 2> coif3 = {{ + {{-3.459977319727278e-05, -7.0983302506379e-05, 0.0004662169598204029, 0.0011175187708306303, -0.0025745176881367972, -0.009007976136730624, 0.015880544863669452, 0.03455502757329774, -0.08230192710629983, -0.07179982161915484, 0.42848347637737, 0.7937772226260872, 0.40517690240911824, -0.06112339000297255, -0.06577191128146936, 0.023452696142077168, 0.007782596425672746, -0.003793512864380802}}, + {{0.003793512864380802, 0.007782596425672746, -0.023452696142077168, -0.06577191128146936, 0.06112339000297255, 0.40517690240911824, -0.7937772226260872, 0.42848347637737, 0.07179982161915484, -0.08230192710629983, -0.03455502757329774, 0.015880544863669452, 0.009007976136730624, -0.0025745176881367972, -0.0011175187708306303, 0.0004662169598204029, 7.0983302506379e-05, -3.459977319727278e-05}} +}}; + +static const std::array, 2> db2 = {{ + {{-0.12940952255126037, 0.2241438680420134, 0.8365163037378079, 0.48296291314453416}}, + {{-0.48296291314453416, 0.8365163037378079, -0.2241438680420134, -0.12940952255126037}} +}}; + +static const std::array, 2> db3 = {{ + {{0.03522629188570953, -0.08544127388202666, -0.13501102001025458, 0.45987750211849154, 0.8068915093110925, 0.33267055295008263}}, + {{-0.33267055295008263, 0.8068915093110925, -0.45987750211849154, -0.13501102001025458, 0.08544127388202666, 0.03522629188570953}} +}}; + +static const std::array, 2> db4 = {{ + {{-0.010597401785069032, 0.0328830116668852, 0.030841381835560764, -0.18703481171909309, -0.027983769416859854, 0.6308807679298589, 0.7148465705529157, 0.2303778133088965}}, + {{-0.2303778133088965, 0.7148465705529157, -0.6308807679298589, -0.027983769416859854, 0.18703481171909309, 0.030841381835560764, -0.0328830116668852, -0.010597401785069032}} +}}; + +static const std::array, 2> db5 = {{ + {{0.0033357252854737712, -0.012580751999081999, -0.006241490212798274, 0.07757149384004572, -0.032244869584638375, -0.24229488706638203, 0.13842814590132074, 0.7243085284377729, 0.6038292697971896, 0.16010239797419293}}, + {{-0.16010239797419293, 0.6038292697971896, -0.7243085284377729, 0.13842814590132074, 0.24229488706638203, -0.032244869584638375, -0.07757149384004572, -0.006241490212798274, 0.012580751999081999, 0.0033357252854737712}} +}}; + +static const std::array, 2> db6 = {{ + {{-0.0010773010853084796, 0.004777257510945511, 0.0005538422011614961, -0.03158203931748603, 0.027522865530305727, 0.09750160558732304, -0.12976686756726194, -0.22626469396543983, 0.31525035170919763, 0.7511339080210954, 0.49462389039845306, 0.11154074335010947}}, + {{-0.11154074335010947, 0.49462389039845306, -0.7511339080210954, 0.31525035170919763, 0.22626469396543983, -0.12976686756726194, -0.09750160558732304, 0.027522865530305727, 0.03158203931748603, 0.0005538422011614961, -0.004777257510945511, -0.0010773010853084796}} +}}; + +static const std::array, 2> db7 = {{ + {{0.00035371379997452024, -0.0018016407040474908, 0.0004295779729213665, 0.01255099855609984, -0.01657454163066688, -0.03802993693501441, 0.08061260915108308, 0.07130921926683026, -0.22403618499387498, -0.14390600392856498, 0.4697822874051931, 0.7291320908462351, 0.3965393194819173, 0.07785205408500918}}, + {{-0.07785205408500918, 0.3965393194819173, -0.7291320908462351, 0.4697822874051931, 0.14390600392856498, -0.22403618499387498, -0.07130921926683026, 0.08061260915108308, 0.03802993693501441, -0.01657454163066688, -0.01255099855609984, 0.0004295779729213665, 0.0018016407040474908, 0.00035371379997452024}} +}}; + +static const std::array, 2> db8 = {{ + {{-0.00011747678412476953, 0.0006754494064505693, -0.00039174037337694705, -0.004870352993451574, 0.008746094047405777, 0.013981027917398282, -0.044088253930794755, -0.017369301001807547, 0.12874742662047847, 0.0004724845739132828, -0.2840155429615469, -0.015829105256349306, 0.5853546836542067, 0.6756307362972898, 0.31287159091429995, 0.05441584224310401}}, + {{-0.05441584224310401, 0.31287159091429995, -0.6756307362972898, 0.5853546836542067, 0.015829105256349306, -0.2840155429615469, -0.0004724845739132828, 0.12874742662047847, 0.017369301001807547, -0.044088253930794755, -0.013981027917398282, 0.008746094047405777, 0.004870352993451574, -0.00039174037337694705, -0.0006754494064505693, -0.00011747678412476953}} +}}; + +static const std::array, 2> db9 = {{ + {{3.93473203162716e-05, -0.0002519631889427101, 0.00023038576352319597, 0.0018476468830562265, -0.00428150368246343, -0.004723204757751397, 0.022361662123679096, 0.00025094711483145197, -0.06763282906132997, 0.03072568147933338, 0.14854074933810638, -0.09684078322297646, -0.2932737832791749, 0.13319738582500756, 0.6572880780513005, 0.6048231236901112, 0.24383467461259034, 0.038077947363878345}}, + {{-0.038077947363878345, 0.24383467461259034, -0.6048231236901112, 0.6572880780513005, -0.13319738582500756, -0.2932737832791749, 0.09684078322297646, 0.14854074933810638, -0.03072568147933338, -0.06763282906132997, -0.00025094711483145197, 0.022361662123679096, 0.004723204757751397, -0.00428150368246343, -0.0018476468830562265, 0.00023038576352319597, 0.0002519631889427101, 3.93473203162716e-05}} +}}; + +static const std::array, 2> db10 = {{ + {{-1.3264202894521244e-05, 9.358867032006959e-05, -0.00011646685512928545, -0.0006858566949597116, 0.001992405295185056, 0.001395351747052901, -0.010733175483330575, 0.0036065535669561697, 0.033212674059341, -0.029457536821875813, -0.07139414716639708, 0.09305736460357235, 0.12736934033579325, -0.19594627437737705, -0.24984642432731538, 0.2811723436605775, 0.6884590394536035, 0.5272011889317256, 0.1881768000776915, 0.026670057900555554}}, + {{-0.026670057900555554, 0.1881768000776915, -0.5272011889317256, 0.6884590394536035, -0.2811723436605775, -0.24984642432731538, 0.19594627437737705, 0.12736934033579325, -0.09305736460357235, -0.07139414716639708, 0.029457536821875813, 0.033212674059341, -0.0036065535669561697, -0.010733175483330575, -0.001395351747052901, 0.001992405295185056, 0.0006858566949597116, -0.00011646685512928545, -9.358867032006959e-05, -1.3264202894521244e-05}} +}}; + +static const std::array, 2> haar = {{ + {{0.7071067811865476, 0.7071067811865476}}, + {{-0.7071067811865476, 0.7071067811865476}} +}}; + +static const std::array, 2> rbio1p3 = {{ + {{0.0, 0.0, 0.7071067811865476, 0.7071067811865476, 0.0, 0.0}}, + {{0.08838834764831845, 0.08838834764831845, -0.7071067811865476, 0.7071067811865476, -0.08838834764831845, -0.08838834764831845}} +}}; + +static const std::array, 2> rbio1p5 = {{ + {{0.0, 0.0, 0.0, 0.0, 0.7071067811865476, 0.7071067811865476, 0.0, 0.0, 0.0, 0.0}}, + {{-0.016572815184059706, -0.016572815184059706, 0.12153397801643785, 0.12153397801643785, -0.7071067811865476, 0.7071067811865476, -0.12153397801643785, -0.12153397801643785, 0.016572815184059706, 0.016572815184059706}} +}}; + +static const std::array, 2> rbio2p2 = {{ + {{0.0, 0.0, 0.3535533905932738, 0.7071067811865476, 0.3535533905932738, 0.0}}, + {{0.1767766952966369, 0.3535533905932738, -1.0606601717798212, 0.3535533905932738, 0.1767766952966369, 0.0}} +}}; + +static const std::array, 2> rbio2p4 = {{ + {{0.0, 0.0, 0.0, 0.0, 0.3535533905932738, 0.7071067811865476, 0.3535533905932738, 0.0, 0.0, 0.0}}, + {{-0.03314563036811941, -0.06629126073623882, 0.1767766952966369, 0.4198446513295126, -0.9943689110435825, 0.4198446513295126, 0.1767766952966369, -0.06629126073623882, -0.03314563036811941, 0.0}} +}}; + +static const std::array, 2> rbio2p6 = {{ + {{0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.3535533905932738, 0.7071067811865476, 0.3535533905932738, 0.0, 0.0, 0.0, 0.0, 0.0}}, + {{0.006905339660024878, 0.013810679320049757, -0.04695630968816917, -0.1077232986963881, 0.16987135563661201, 0.4474660099696121, -0.966747552403483, 0.4474660099696121, 0.16987135563661201, -0.1077232986963881, -0.04695630968816917, 0.013810679320049757, 0.006905339660024878, 0.0}} +}}; + +static const std::array, 2> rbio2p8 = {{ + {{0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.3535533905932738, 0.7071067811865476, 0.3535533905932738, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0}}, + {{-0.0015105430506304422, -0.0030210861012608843, 0.012947511862546647, 0.02891610982635418, -0.05299848189069094, -0.13491307360773605, 0.16382918343409023, 0.46257144047591653, -0.9516421218971786, 0.46257144047591653, 0.16382918343409023, -0.13491307360773605, -0.05299848189069094, 0.02891610982635418, 0.012947511862546647, -0.0030210861012608843, -0.0015105430506304422, 0.0}} +}}; + +static const std::array, 2> rbio3p1 = {{ + {{0.1767766952966369, 0.5303300858899106, 0.5303300858899106, 0.1767766952966369}}, + {{0.3535533905932738, 1.0606601717798212, -1.0606601717798212, -0.3535533905932738}} +}}; + +static const std::array, 2> rbio3p3 = {{ + {{0.0, 0.0, 0.1767766952966369, 0.5303300858899106, 0.5303300858899106, 0.1767766952966369, 0.0, 0.0}}, + {{-0.06629126073623882, -0.1988737822087165, 0.15467960838455727, 0.9943689110435825, -0.9943689110435825, -0.15467960838455727, 0.1988737822087165, 0.06629126073623882}} +}}; + +static const std::array, 2> rbio3p5 = {{ + {{0.0, 0.0, 0.0, 0.0, 0.1767766952966369, 0.5303300858899106, 0.5303300858899106, 0.1767766952966369, 0.0, 0.0, 0.0, 0.0}}, + {{0.013810679320049757, 0.04143203796014927, -0.052480581416189075, -0.26792717880896527, 0.07181553246425873, 0.966747552403483, -0.966747552403483, -0.07181553246425873, 0.26792717880896527, 0.052480581416189075, -0.04143203796014927, -0.013810679320049757}} +}}; + +static const std::array, 2> rbio3p7 = {{ + {{0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.1767766952966369, 0.5303300858899106, 0.5303300858899106, 0.1767766952966369, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0}}, + {{-0.0030210861012608843, -0.009063258303782653, 0.01683176542131064, 0.074663985074019, -0.03133297870736289, -0.301159125922835, 0.02649924094534547, 0.9516421218971786, -0.9516421218971786, -0.02649924094534547, 0.301159125922835, 0.03133297870736289, -0.074663985074019, -0.01683176542131064, 0.009063258303782653, 0.0030210861012608843}} +}}; + +static const std::array, 2> rbio3p9 = {{ + {{0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.1767766952966369, 0.5303300858899106, 0.5303300858899106, 0.1767766952966369, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0}}, + {{0.0006797443727836989, 0.002039233118351097, -0.005060319219611981, -0.020618912641105536, 0.014112787930175844, 0.09913478249423216, -0.012300136269419315, -0.32019196836077857, -0.0020500227115698858, 0.9421257006782068, -0.9421257006782068, 0.0020500227115698858, 0.32019196836077857, 0.012300136269419315, -0.09913478249423216, -0.014112787930175844, 0.020618912641105536, 0.005060319219611981, -0.002039233118351097, -0.0006797443727836989}} +}}; + +static const std::array, 2> rbio4p4 = {{ + {{0.0, 0.0, -0.06453888262869706, -0.04068941760916406, 0.41809227322161724, 0.7884856164055829, 0.41809227322161724, -0.04068941760916406, -0.06453888262869706, 0.0}}, + {{-0.03782845550726404, -0.023849465019556843, 0.11062440441843718, 0.37740285561283066, -0.8526986790088938, 0.37740285561283066, 0.11062440441843718, -0.023849465019556843, -0.03782845550726404, 0.0}} +}}; + +static const std::array, 2> rbio5p5 = {{ + {{0.0, 0.013456709459118716, -0.002694966880111507, -0.13670658466432914, -0.09350469740093886, 0.47680326579848425, 0.8995061097486484, 0.47680326579848425, -0.09350469740093886, -0.13670658466432914, -0.002694966880111507, 0.013456709459118716}}, + {{-0.0, 0.03968708834740544, -0.007948108637240322, -0.05446378846823691, -0.34560528195603346, 0.7366601814282105, -0.34560528195603346, -0.05446378846823691, -0.007948108637240322, 0.03968708834740544, -0.0, 0.0}} +}}; + +static const std::array, 2> rbio6p8 = {{ + {{0.0, 0.0, 0.0, 0.0, 0.014426282505624435, 0.014467504896790148, -0.07872200106262882, -0.04036797903033992, 0.41784910915027457, 0.7589077294536541, 0.41784910915027457, -0.04036797903033992, -0.07872200106262882, 0.014467504896790148, 0.014426282505624435, 0.0, 0.0, 0.0}}, + {{-0.0019088317364812906, -0.0019142861290887667, 0.016990639867602342, 0.01193456527972926, -0.04973290349094079, -0.07726317316720414, 0.09405920349573646, 0.4207962846098268, -0.8259229974584023, 0.4207962846098268, 0.09405920349573646, -0.07726317316720414, -0.04973290349094079, 0.01193456527972926, 0.016990639867602342, -0.0019142861290887667, -0.0019088317364812906, 0.0}} +}}; + +static const std::array, 2> sym2 = {{ + {{-0.12940952255092145, 0.22414386804185735, 0.836516303737469, 0.48296291314469025}}, + {{-0.48296291314469025, 0.836516303737469, -0.22414386804185735, -0.12940952255092145}} +}}; + +static const std::array, 2> sym3 = {{ + {{0.035226291882100656, -0.08544127388224149, -0.13501102001039084, 0.4598775021193313, 0.8068915093133388, 0.3326705529509569}}, + {{-0.3326705529509569, 0.8068915093133388, -0.4598775021193313, -0.13501102001039084, 0.08544127388224149, 0.035226291882100656}} +}}; + +static const std::array, 2> sym4 = {{ + {{-0.07576571478927333, -0.02963552764599851, 0.49761866763201545, 0.8037387518059161, 0.29785779560527736, -0.09921954357684722, -0.012603967262037833, 0.0322231006040427}}, + {{-0.0322231006040427, -0.012603967262037833, 0.09921954357684722, 0.29785779560527736, -0.8037387518059161, 0.49761866763201545, 0.02963552764599851, -0.07576571478927333}} +}}; + +static const std::array, 2> sym5 = {{ + {{0.027333068345077982, 0.029519490925774643, -0.039134249302383094, 0.1993975339773936, 0.7234076904024206, 0.6339789634582119, 0.01660210576452232, -0.17532808990845047, -0.021101834024758855, 0.019538882735286728}}, + {{-0.019538882735286728, -0.021101834024758855, 0.17532808990845047, 0.01660210576452232, -0.6339789634582119, 0.7234076904024206, -0.1993975339773936, -0.039134249302383094, -0.029519490925774643, 0.027333068345077982}} +}}; + +static const std::array, 2> sym6 = {{ + {{0.015404109327027373, 0.0034907120842174702, -0.11799011114819057, -0.048311742585633, 0.4910559419267466, 0.787641141030194, 0.3379294217276218, -0.07263752278646252, -0.021060292512300564, 0.04472490177066578, 0.0017677118642428036, -0.007800708325034148}}, + {{0.007800708325034148, 0.0017677118642428036, -0.04472490177066578, -0.021060292512300564, 0.07263752278646252, 0.3379294217276218, -0.787641141030194, 0.4910559419267466, 0.048311742585633, -0.11799011114819057, -0.0034907120842174702, 0.015404109327027373}} +}}; + +static const std::array, 2> sym7 = {{ + {{0.002681814568257878, -0.0010473848886829163, -0.01263630340325193, 0.03051551316596357, 0.0678926935013727, -0.049552834937127255, 0.017441255086855827, 0.5361019170917628, 0.767764317003164, 0.2886296317515146, -0.14004724044296152, -0.10780823770381774, 0.004010244871533663, 0.010268176708511255}}, + {{-0.010268176708511255, 0.004010244871533663, 0.10780823770381774, -0.14004724044296152, -0.2886296317515146, 0.767764317003164, -0.5361019170917628, 0.017441255086855827, 0.049552834937127255, 0.0678926935013727, -0.03051551316596357, -0.01263630340325193, 0.0010473848886829163, 0.002681814568257878}} +}}; + +static const std::array, 2> sym8 = {{ + {{-0.0033824159510061256, -0.0005421323317911481, 0.03169508781149298, 0.007607487324917605, -0.1432942383508097, -0.061273359067658524, 0.4813596512583722, 0.7771857517005235, 0.3644418948353314, -0.05194583810770904, -0.027219029917056003, 0.049137179673607506, 0.003808752013890615, -0.01495225833704823, -0.0003029205147213668, 0.0018899503327594609}}, + {{-0.0018899503327594609, -0.0003029205147213668, 0.01495225833704823, 0.003808752013890615, -0.049137179673607506, -0.027219029917056003, 0.05194583810770904, 0.3644418948353314, -0.7771857517005235, 0.4813596512583722, 0.061273359067658524, -0.1432942383508097, -0.007607487324917605, 0.03169508781149298, 0.0005421323317911481, -0.0033824159510061256}} +}}; + +static const std::array, 2> sym9 = {{ + {{0.0014009155259146807, 0.0006197808889855868, -0.013271967781817119, -0.01152821020767923, 0.03022487885827568, 0.0005834627461258068, -0.05456895843083407, 0.238760914607303, 0.717897082764412, 0.6173384491409358, 0.035272488035271894, -0.19155083129728512, -0.018233770779395985, 0.06207778930288603, 0.008859267493400484, -0.010264064027633142, -0.0004731544986800831, 0.0010694900329086053}}, + {{-0.0010694900329086053, -0.0004731544986800831, 0.010264064027633142, 0.008859267493400484, -0.06207778930288603, -0.018233770779395985, 0.19155083129728512, 0.035272488035271894, -0.6173384491409358, 0.717897082764412, -0.238760914607303, -0.05456895843083407, -0.0005834627461258068, 0.03022487885827568, 0.01152821020767923, -0.013271967781817119, -0.0006197808889855868, 0.0014009155259146807}} +}}; + +static const std::array, 2> sym10 = {{ + {{0.0007701598091144901, 9.563267072289475e-05, -0.008641299277022422, -0.0014653825813050513, 0.0459272392310922, 0.011609893903711381, -0.15949427888491757, -0.07088053578324385, 0.47169066693843925, 0.7695100370211071, 0.38382676106708546, -0.03553674047381755, -0.0319900568824278, 0.04999497207737669, 0.005764912033581909, -0.02035493981231129, -0.0008043589320165449, 0.004593173585311828, 5.7036083618494284e-05, -0.0004593294210046588}}, + {{0.0004593294210046588, 5.7036083618494284e-05, -0.004593173585311828, -0.0008043589320165449, 0.02035493981231129, 0.005764912033581909, -0.04999497207737669, -0.0319900568824278, 0.03553674047381755, 0.38382676106708546, -0.7695100370211071, 0.47169066693843925, 0.07088053578324385, -0.15949427888491757, -0.011609893903711381, 0.0459272392310922, 0.0014653825813050513, -0.008641299277022422, -9.563267072289475e-05, 0.0007701598091144901}} +}}; + +} +} diff --git a/edgeimpulse/edge-impulse-sdk/dsp/speechpy/feature.hpp b/edgeimpulse/edge-impulse-sdk/dsp/speechpy/feature.hpp new file mode 100644 index 0000000..89765b2 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/dsp/speechpy/feature.hpp @@ -0,0 +1,807 @@ +/* + * Copyright (c) 2022 EdgeImpulse Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an "AS + * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language + * governing permissions and limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef _EIDSP_SPEECHPY_FEATURE_H_ +#define _EIDSP_SPEECHPY_FEATURE_H_ + +#include +#include "../../porting/ei_classifier_porting.h" +#include "../ei_utils.h" +#include "functions.hpp" +#include "processing.hpp" +#include "../memory.hpp" +#include "../returntypes.hpp" +#include "../ei_vector.h" + +namespace ei { +namespace speechpy { + +class feature { +public: + /** + * Compute the Mel-filterbanks. Each filter will be stored in one rows. + * The columns correspond to fft bins. + * + * @param filterbanks Matrix of size num_filter * coefficients + * @param num_filter the number of filters in the filterbank + * @param coefficients (fftpoints//2 + 1) + * @param sampling_freq the samplerate of the signal we are working + * with. It affects mel spacing. + * @param low_freq lowest band edge of mel filters, default 0 Hz + * @param high_freq highest band edge of mel filters, default samplerate / 2 + * @param output_transposed If set to true this will transpose the matrix (memory efficient). + * This is more efficient than calling this function and then transposing + * as the latter requires the filterbank to be allocated twice (for a short while). + * @returns EIDSP_OK if OK + */ + static int filterbanks( +#if EIDSP_QUANTIZE_FILTERBANK + quantized_matrix_t *filterbanks, +#else + matrix_t *filterbanks, +#endif + uint16_t num_filter, int coefficients, uint32_t sampling_freq, + uint32_t low_freq, uint32_t high_freq, + bool output_transposed = false + ) + { + const size_t mels_mem_size = (num_filter + 2) * sizeof(float); + const size_t hertz_mem_size = (num_filter + 2) * sizeof(float); + const size_t freq_index_mem_size = (num_filter + 2) * sizeof(int); + + float *mels = (float*)ei_dsp_malloc(mels_mem_size); + if (!mels) { + EIDSP_ERR(EIDSP_OUT_OF_MEM); + } + + if (filterbanks->rows != num_filter || filterbanks->cols != static_cast(coefficients)) { + EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH); + } + +#if EIDSP_QUANTIZE_FILTERBANK + memset(filterbanks->buffer, 0, filterbanks->rows * filterbanks->cols * sizeof(uint8_t)); +#else + memset(filterbanks->buffer, 0, filterbanks->rows * filterbanks->cols * sizeof(float)); +#endif + + // Computing the Mel filterbank + // converting the upper and lower frequencies to Mels. + // num_filter + 2 is because for num_filter filterbanks we need + // num_filter+2 point. + numpy::linspace( + functions::frequency_to_mel(static_cast(low_freq)), + functions::frequency_to_mel(static_cast(high_freq)), + num_filter + 2, + mels); + + // we should convert Mels back to Hertz because the start and end-points + // should be at the desired frequencies. + float *hertz = (float*)ei_dsp_malloc(hertz_mem_size); + if (!hertz) { + ei_dsp_free(mels, mels_mem_size); + EIDSP_ERR(EIDSP_OUT_OF_MEM); + } + for (uint16_t ix = 0; ix < num_filter + 2; ix++) { + hertz[ix] = functions::mel_to_frequency(mels[ix]); + if (hertz[ix] < low_freq) { + hertz[ix] = low_freq; + } + if (hertz[ix] > high_freq) { + hertz[ix] = high_freq; + } + + // here is a really annoying bug in Speechpy which calculates the frequency index wrong for the last bucket + // the last 'hertz' value is not 8,000 (with sampling rate 16,000) but 7,999.999999 + // thus calculating the bucket to 64, not 65. + // we're adjusting this here a tiny bit to ensure we have the same result + if (ix == num_filter + 2 - 1) { + hertz[ix] -= 0.001; + } + } + ei_dsp_free(mels, mels_mem_size); + + // The frequency resolution required to put filters at the + // exact points calculated above should be extracted. + // So we should round those frequencies to the closest FFT bin. + int *freq_index = (int*)ei_dsp_malloc(freq_index_mem_size); + if (!freq_index) { + ei_dsp_free(hertz, hertz_mem_size); + EIDSP_ERR(EIDSP_OUT_OF_MEM); + } + for (uint16_t ix = 0; ix < num_filter + 2; ix++) { + freq_index[ix] = static_cast(floor((coefficients + 1) * hertz[ix] / sampling_freq)); + } + ei_dsp_free(hertz, hertz_mem_size); + + for (size_t i = 0; i < num_filter; i++) { + int left = freq_index[i]; + int middle = freq_index[i + 1]; + int right = freq_index[i + 2]; + + EI_DSP_MATRIX(z, 1, (right - left + 1)); + if (!z.buffer) { + ei_dsp_free(freq_index, freq_index_mem_size); + EIDSP_ERR(EIDSP_OUT_OF_MEM); + } + numpy::linspace(left, right, (right - left + 1), z.buffer); + functions::triangle(z.buffer, (right - left + 1), left, middle, right); + + // so... z now contains some values that we need to overwrite in the filterbank + for (int zx = 0; zx < (right - left + 1); zx++) { + size_t index = (i * filterbanks->cols) + (left + zx); + + if (output_transposed) { + index = ((left + zx) * filterbanks->rows) + i; + } + +#if EIDSP_QUANTIZE_FILTERBANK + filterbanks->buffer[index] = numpy::quantize_zero_one(z.buffer[zx]); +#else + filterbanks->buffer[index] = z.buffer[zx]; +#endif + } + } + + if (output_transposed) { + uint16_t r = filterbanks->rows; + filterbanks->rows = filterbanks->cols; + filterbanks->cols = r; + } + + ei_dsp_free(freq_index, freq_index_mem_size); + + return EIDSP_OK; + } + + /** + * @brief Get the fft bin index from hertz + * + * @param fft_size Size of fft + * @param hertz Desired hertz + * @param sampling_freq In Hz + * @return int the index of the bin closest to the hertz + */ + static int get_fft_bin_from_hertz(uint16_t fft_size, float hertz, uint32_t sampling_freq) + { + return static_cast(floor((fft_size + 1) * hertz / sampling_freq)); + } + + /** + * Compute Mel-filterbank energy features from an audio signal. + * @param out_features Use `calculate_mfe_buffer_size` to allocate the right matrix. + * @param out_energies A matrix in the form of Mx1 where M is the rows from `calculate_mfe_buffer_size` + * @param signal: audio signal structure with functions to retrieve data from a signal + * @param sampling_frequency (int): the sampling frequency of the signal + * we are working with. + * @param frame_length (float): the length of each frame in seconds. + * Default is 0.020s + * @param frame_stride (float): the step between successive frames in seconds. + * Default is 0.02s (means no overlap) + * @param num_filters (int): the number of filters in the filterbank, + * default 40. + * @param fft_length (int): number of FFT points. Default is 512. + * @param low_frequency (int): lowest band edge of mel filters. + * In Hz, default is 0. + * @param high_frequency (int): highest band edge of mel filters. + * In Hz, default is samplerate/2 + * @EIDSP_OK if OK + */ + static int mfe(matrix_t *out_features, matrix_t *out_energies, + signal_t *signal, + uint32_t sampling_frequency, + float frame_length, float frame_stride, uint16_t num_filters, + uint16_t fft_length, uint32_t low_frequency, uint32_t high_frequency, + uint16_t version + ) + { + int ret = 0; + + if (high_frequency == 0) { + high_frequency = sampling_frequency / 2; + } + + if (version<4) { + if (low_frequency == 0) { + low_frequency = 300; + } + } + + stack_frames_info_t stack_frame_info = { 0 }; + stack_frame_info.signal = signal; + + ret = processing::stack_frames( + &stack_frame_info, + sampling_frequency, + frame_length, + frame_stride, + false, + version + ); + if (ret != 0) { + EIDSP_ERR(ret); + } + + if (stack_frame_info.frame_ixs.size() != out_features->rows) { + EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH); + } + + if (num_filters != out_features->cols) { + EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH); + } + + if (out_energies) { + if (stack_frame_info.frame_ixs.size() != out_energies->rows || out_energies->cols != 1) { + EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH); + } + } + + for (uint32_t i = 0; i < out_features->rows * out_features->cols; i++) { + *(out_features->buffer + i) = 0; + } + + const size_t power_spectrum_frame_size = (fft_length / 2 + 1); + // Computing the Mel filterbank + // converting the upper and lower frequencies to Mels. + // num_filter + 2 is because for num_filter filterbanks we need + // num_filter+2 point. + float *mels; + const int MELS_SIZE = num_filters + 2; + mels = (float*)ei_calloc(MELS_SIZE, sizeof(float)); + EI_ERR_AND_RETURN_ON_NULL(mels, EIDSP_OUT_OF_MEM); + ei_unique_ptr_t __ptr__(mels,ei_free); + uint16_t* bins = reinterpret_cast(mels); // alias the mels array so we can reuse the space + + numpy::linspace( + functions::frequency_to_mel(static_cast(low_frequency)), + functions::frequency_to_mel(static_cast(high_frequency)), + num_filters + 2, + mels); + + uint16_t max_bin = version >= 4 ? fft_length : power_spectrum_frame_size; // preserve a bug in v<4 + // go to -1 size b/c special handling, see after + for (uint16_t ix = 0; ix < MELS_SIZE-1; ix++) { + mels[ix] = functions::mel_to_frequency(mels[ix]); + if (mels[ix] < low_frequency) { + mels[ix] = low_frequency; + } + if (mels[ix] > high_frequency) { + mels[ix] = high_frequency; + } + bins[ix] = get_fft_bin_from_hertz(max_bin, mels[ix], sampling_frequency); + } + + // here is a really annoying bug in Speechpy which calculates the frequency index wrong for the last bucket + // the last 'hertz' value is not 8,000 (with sampling rate 16,000) but 7,999.999999 + // thus calculating the bucket to 64, not 65. + // we're adjusting this here a tiny bit to ensure we have the same result + mels[MELS_SIZE-1] = functions::mel_to_frequency(mels[MELS_SIZE-1]); + if (mels[MELS_SIZE-1] > high_frequency) { + mels[MELS_SIZE-1] = high_frequency; + } + mels[MELS_SIZE-1] -= 0.001; + bins[MELS_SIZE-1] = get_fft_bin_from_hertz(max_bin, mels[MELS_SIZE-1], sampling_frequency); + + EI_DSP_MATRIX(power_spectrum_frame, 1, power_spectrum_frame_size); + if (!power_spectrum_frame.buffer) { + EIDSP_ERR(EIDSP_OUT_OF_MEM); + } + + // get signal data from the audio file + EI_DSP_MATRIX(signal_frame, 1, stack_frame_info.frame_length); + + for (size_t ix = 0; ix < stack_frame_info.frame_ixs.size(); ix++) { + // don't read outside of the audio buffer... we'll automatically zero pad then + size_t signal_offset = stack_frame_info.frame_ixs.at(ix); + size_t signal_length = stack_frame_info.frame_length; + if (signal_offset + signal_length > stack_frame_info.signal->total_length) { + signal_length = signal_length - + (stack_frame_info.signal->total_length - (signal_offset + signal_length)); + } + + ret = stack_frame_info.signal->get_data( + signal_offset, + signal_length, + signal_frame.buffer + ); + if (ret != 0) { + EIDSP_ERR(ret); + } + + ret = numpy::power_spectrum( + signal_frame.buffer, + stack_frame_info.frame_length, + power_spectrum_frame.buffer, + power_spectrum_frame_size, + fft_length + ); + + if (ret != 0) { + EIDSP_ERR(ret); + } + + float energy = numpy::sum(power_spectrum_frame.buffer, power_spectrum_frame_size); + if (energy == 0) { + energy = 1e-10; + } + + if (out_energies) { + out_energies->buffer[ix] = energy; + } + + auto row_ptr = out_features->get_row_ptr(ix); + for (size_t i = 0; i < num_filters; i++) { + size_t left = bins[i]; + size_t middle = bins[i+1]; + size_t right = bins[i+2]; + + assert(right < power_spectrum_frame_size); + // now we have weights and locations to move from fft to mel sgram + // both left and right become zero weights, so skip them + + // middle always has weight of 1.0 + // since we skip left and right, if left = middle we need to handle that + row_ptr[i] = power_spectrum_frame.buffer[middle]; + + for (size_t bin = left+1; bin < right; bin++) { + if (bin < middle) { + row_ptr[i] += + ((static_cast(bin) - left) / (middle - left)) * // weight * + power_spectrum_frame.buffer[bin]; + } + // intentionally skip middle, handled above + if (bin > middle) { + row_ptr[i] += + ((right - static_cast(bin)) / (right - middle)) * // weight * + power_spectrum_frame.buffer[bin]; + } + } + } + + if (ret != 0) { + EIDSP_ERR(ret); + } + } + + numpy::zero_handling(out_features); + + return EIDSP_OK; + } + + /** + * Compute Mel-filterbank energy features from an audio signal. + * @param out_features Use `calculate_mfe_buffer_size` to allocate the right matrix. + * @param out_energies A matrix in the form of Mx1 where M is the rows from `calculate_mfe_buffer_size` + * @param signal: audio signal structure with functions to retrieve data from a signal + * @param sampling_frequency (int): the sampling frequency of the signal + * we are working with. + * @param frame_length (float): the length of each frame in seconds. + * Default is 0.020s + * @param frame_stride (float): the step between successive frames in seconds. + * Default is 0.02s (means no overlap) + * @param num_filters (int): the number of filters in the filterbank, + * default 40. + * @param fft_length (int): number of FFT points. Default is 512. + * @param low_frequency (int): lowest band edge of mel filters. + * In Hz, default is 0. + * @param high_frequency (int): highest band edge of mel filters. + * In Hz, default is samplerate/2 + * @EIDSP_OK if OK + */ + static int mfe_v3(matrix_t *out_features, matrix_t *out_energies, + signal_t *signal, + uint32_t sampling_frequency, + float frame_length, float frame_stride, uint16_t num_filters, + uint16_t fft_length, uint32_t low_frequency, uint32_t high_frequency, + uint16_t version + ) + { + int ret = 0; + + if (high_frequency == 0) { + high_frequency = sampling_frequency / 2; + } + + if (low_frequency == 0) { + low_frequency = 300; + } + + stack_frames_info_t stack_frame_info = { 0 }; + stack_frame_info.signal = signal; + + ret = processing::stack_frames( + &stack_frame_info, + sampling_frequency, + frame_length, + frame_stride, + false, + version + ); + if (ret != 0) { + EIDSP_ERR(ret); + } + + if (stack_frame_info.frame_ixs.size() != out_features->rows) { + EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH); + } + + if (num_filters != out_features->cols) { + EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH); + } + + if (out_energies) { + if (stack_frame_info.frame_ixs.size() != out_energies->rows || out_energies->cols != 1) { + EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH); + } + } + + for (uint32_t i = 0; i < out_features->rows * out_features->cols; i++) { + *(out_features->buffer + i) = 0; + } + + uint16_t coefficients = fft_length / 2 + 1; + + // calculate the filterbanks first... preferably I would want to do the matrix multiplications + // whenever they happen, but OK... +#if EIDSP_QUANTIZE_FILTERBANK + EI_DSP_QUANTIZED_MATRIX(filterbanks, num_filters, coefficients, &numpy::dequantize_zero_one); +#else + EI_DSP_MATRIX(filterbanks, num_filters, coefficients); +#endif + if (!filterbanks.buffer) { + EIDSP_ERR(EIDSP_OUT_OF_MEM); + } + + ret = feature::filterbanks( + &filterbanks, num_filters, coefficients, sampling_frequency, low_frequency, high_frequency, true); + if (ret != 0) { + EIDSP_ERR(ret); + } + for (size_t ix = 0; ix < stack_frame_info.frame_ixs.size(); ix++) { + size_t power_spectrum_frame_size = (fft_length / 2 + 1); + + EI_DSP_MATRIX(power_spectrum_frame, 1, power_spectrum_frame_size); + if (!power_spectrum_frame.buffer) { + EIDSP_ERR(EIDSP_OUT_OF_MEM); + } + + // get signal data from the audio file + EI_DSP_MATRIX(signal_frame, 1, stack_frame_info.frame_length); + + // don't read outside of the audio buffer... we'll automatically zero pad then + size_t signal_offset = stack_frame_info.frame_ixs.at(ix); + size_t signal_length = stack_frame_info.frame_length; + if (signal_offset + signal_length > stack_frame_info.signal->total_length) { + signal_length = signal_length - + (stack_frame_info.signal->total_length - (signal_offset + signal_length)); + } + + ret = stack_frame_info.signal->get_data( + signal_offset, + signal_length, + signal_frame.buffer + ); + if (ret != 0) { + EIDSP_ERR(ret); + } + + ret = numpy::power_spectrum( + signal_frame.buffer, + stack_frame_info.frame_length, + power_spectrum_frame.buffer, + power_spectrum_frame_size, + fft_length + ); + + if (ret != 0) { + EIDSP_ERR(ret); + } + + float energy = numpy::sum(power_spectrum_frame.buffer, power_spectrum_frame_size); + if (energy == 0) { + energy = 1e-10; + } + + if (out_energies) { + out_energies->buffer[ix] = energy; + } + + // calculate the out_features directly here + ret = numpy::dot_by_row( + ix, + power_spectrum_frame.buffer, + power_spectrum_frame_size, + &filterbanks, + out_features + ); + + if (ret != 0) { + EIDSP_ERR(ret); + } + } + + numpy::zero_handling(out_features); + + return EIDSP_OK; + } + + /** + * Compute spectrogram from a sensor signal. + * @param out_features Use `calculate_mfe_buffer_size` to allocate the right matrix. + * @param signal: audio signal structure with functions to retrieve data from a signal + * @param sampling_frequency (int): the sampling frequency of the signal + * we are working with. + * @param frame_length (float): the length of each frame in seconds. + * Default is 0.020s + * @param frame_stride (float): the step between successive frames in seconds. + * Default is 0.02s (means no overlap) + * @param fft_length (int): number of FFT points. Default is 512. + * @EIDSP_OK if OK + */ + static int spectrogram(matrix_t *out_features, + signal_t *signal, float sampling_frequency, + float frame_length, float frame_stride, uint16_t fft_length, + uint16_t version + ) + { + int ret = 0; + + stack_frames_info_t stack_frame_info = { 0 }; + stack_frame_info.signal = signal; + + ret = processing::stack_frames( + &stack_frame_info, + sampling_frequency, + frame_length, + frame_stride, + false, + version + ); + if (ret != 0) { + EIDSP_ERR(ret); + } + + if (stack_frame_info.frame_ixs.size() != out_features->rows) { + EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH); + } + + uint16_t coefficients = fft_length / 2 + 1; + + if (coefficients != out_features->cols) { + EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH); + } + + for (uint32_t i = 0; i < out_features->rows * out_features->cols; i++) { + *(out_features->buffer + i) = 0; + } + + for (size_t ix = 0; ix < stack_frame_info.frame_ixs.size(); ix++) { + // get signal data from the audio file + EI_DSP_MATRIX(signal_frame, 1, stack_frame_info.frame_length); + + // don't read outside of the audio buffer... we'll automatically zero pad then + size_t signal_offset = stack_frame_info.frame_ixs.at(ix); + size_t signal_length = stack_frame_info.frame_length; + if (signal_offset + signal_length > stack_frame_info.signal->total_length) { + signal_length = signal_length - + (stack_frame_info.signal->total_length - (signal_offset + signal_length)); + } + + ret = stack_frame_info.signal->get_data( + signal_offset, + signal_length, + signal_frame.buffer + ); + if (ret != 0) { + EIDSP_ERR(ret); + } + + // normalize data (only when version is 3) + if (version == 3) { + // it might be that everything is already normalized here... + bool all_between_min_1_and_1 = true; + for (size_t ix = 0; ix < signal_frame.rows * signal_frame.cols; ix++) { + if (signal_frame.buffer[ix] < -1.0f || signal_frame.buffer[ix] > 1.0f) { + all_between_min_1_and_1 = false; + break; + } + } + + if (!all_between_min_1_and_1) { + ret = numpy::scale(&signal_frame, 1.0f / 32768.0f); + if (ret != 0) { + EIDSP_ERR(ret); + } + } + } + + ret = numpy::power_spectrum( + signal_frame.buffer, + stack_frame_info.frame_length, + out_features->buffer + (ix * coefficients), + coefficients, + fft_length + ); + + if (ret != 0) { + EIDSP_ERR(ret); + } + } + + numpy::zero_handling(out_features); + + return EIDSP_OK; + } + + /** + * Calculate the buffer size for MFE + * @param signal_length: Length of the signal. + * @param sampling_frequency (int): The sampling frequency of the signal. + * @param frame_length (float): The length of the frame in second. + * @param frame_stride (float): The stride between frames. + * @param num_filters + */ + static matrix_size_t calculate_mfe_buffer_size( + size_t signal_length, + uint32_t sampling_frequency, + float frame_length, float frame_stride, uint16_t num_filters, + uint16_t version) + { + int32_t rows = processing::calculate_no_of_stack_frames( + signal_length, + sampling_frequency, + frame_length, + frame_stride, + false, + version); + int32_t cols = num_filters; + + matrix_size_t size_matrix; + size_matrix.rows = (uint32_t)rows; + size_matrix.cols = (uint32_t)cols; + return size_matrix; + } + + /** + * Compute MFCC features from an audio signal. + * @param out_features Use `calculate_mfcc_buffer_size` to allocate the right matrix. + * @param signal: audio signal structure from which to compute features. + * has functions to retrieve data from a signal lazily. + * @param sampling_frequency (int): the sampling frequency of the signal + * we are working with. + * @param frame_length (float): the length of each frame in seconds. + * Default is 0.020s + * @param frame_stride (float): the step between successive frames in seconds. + * Default is 0.01s (means no overlap) + * @param num_cepstral (int): Number of cepstral coefficients. + * @param num_filters (int): the number of filters in the filterbank, + * default 40. + * @param fft_length (int): number of FFT points. Default is 512. + * @param low_frequency (int): lowest band edge of mel filters. + * In Hz, default is 0. + * @param high_frequency (int): highest band edge of mel filters. + * In Hz, default is samplerate/2 + * @param dc_elimination Whether the first dc component should + * be eliminated or not. + * @returns 0 if OK + */ + static int mfcc(matrix_t *out_features, signal_t *signal, + uint32_t sampling_frequency, float frame_length, float frame_stride, + uint8_t num_cepstral, uint16_t num_filters, uint16_t fft_length, + uint32_t low_frequency, uint32_t high_frequency, bool dc_elimination, + uint16_t version) + { + if (out_features->cols != num_cepstral) { + EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH); + } + + matrix_size_t mfe_matrix_size = + calculate_mfe_buffer_size( + signal->total_length, + sampling_frequency, + frame_length, + frame_stride, + num_filters, + version); + + if (out_features->rows != mfe_matrix_size.rows) { + EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH); + } + + int ret = EIDSP_OK; + + // allocate some memory for the MFE result + EI_DSP_MATRIX(features_matrix, mfe_matrix_size.rows, mfe_matrix_size.cols); + if (!features_matrix.buffer) { + EIDSP_ERR(EIDSP_OUT_OF_MEM); + } + + EI_DSP_MATRIX(energy_matrix, mfe_matrix_size.rows, 1); + if (!energy_matrix.buffer) { + EIDSP_ERR(EIDSP_OUT_OF_MEM); + } + + ret = mfe(&features_matrix, &energy_matrix, signal, + sampling_frequency, frame_length, frame_stride, num_filters, fft_length, + low_frequency, high_frequency, version); + if (ret != EIDSP_OK) { + EIDSP_ERR(ret); + } + + // ok... now we need to calculate the MFCC from this... + // first do log() over all features... + ret = numpy::log(&features_matrix); + if (ret != EIDSP_OK) { + EIDSP_ERR(ret); + } + + // now do DST type 2 + ret = numpy::dct2(&features_matrix, DCT_NORMALIZATION_ORTHO); + if (ret != EIDSP_OK) { + EIDSP_ERR(ret); + } + + // replace first cepstral coefficient with log of frame energy for DC elimination + if (dc_elimination) { + for (size_t row = 0; row < features_matrix.rows; row++) { + features_matrix.buffer[row * features_matrix.cols] = numpy::log(energy_matrix.buffer[row]); + } + } + + // copy to the output... + for (size_t row = 0; row < features_matrix.rows; row++) { + for(int i = 0; i < num_cepstral; i++) { + *(out_features->buffer + (num_cepstral * row) + i) = *(features_matrix.buffer + (features_matrix.cols * row) + i); + } + } + + return EIDSP_OK; + } + + /** + * Calculate the buffer size for MFCC + * @param signal_length: Length of the signal. + * @param sampling_frequency (int): The sampling frequency of the signal. + * @param frame_length (float): The length of the frame in second. + * @param frame_stride (float): The stride between frames. + * @param num_cepstral + */ + static matrix_size_t calculate_mfcc_buffer_size( + size_t signal_length, + uint32_t sampling_frequency, + float frame_length, float frame_stride, uint16_t num_cepstral, + uint16_t version) + { + int32_t rows = processing::calculate_no_of_stack_frames( + signal_length, + sampling_frequency, + frame_length, + frame_stride, + false, + version); + int32_t cols = num_cepstral; + + matrix_size_t size_matrix; + size_matrix.rows = (uint32_t)rows; + size_matrix.cols = (uint32_t)cols; + return size_matrix; + } +}; + +} // namespace speechpy +} // namespace ei + +#endif // _EIDSP_SPEECHPY_FEATURE_H_ diff --git a/edgeimpulse/edge-impulse-sdk/dsp/speechpy/functions.hpp b/edgeimpulse/edge-impulse-sdk/dsp/speechpy/functions.hpp new file mode 100644 index 0000000..733c4ac --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/dsp/speechpy/functions.hpp @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2022 EdgeImpulse Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an "AS + * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language + * governing permissions and limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef _EIDSP_SPEECHPY_FUNCTIONS_H_ +#define _EIDSP_SPEECHPY_FUNCTIONS_H_ + +#include +#include "../numpy.hpp" +#include "../returntypes.hpp" + +namespace ei { +namespace speechpy { + +class functions { +public: + /** + * Converting from frequency to Mel scale + * + * @param f The frequency values(or a single frequency) in Hz. + * @returns The mel scale values(or a single mel). + */ + static float frequency_to_mel(float f) { +#if EI_PORTING_RENESASRA65 == 1 + return 1127.0 * log(1.0 + f / 700.0f); +#else + return 1127.0 * numpy::log((1.0 + f / 700.0f)); +#endif + } + + /** + * Converting from Mel scale to frequency. + * + * @param mel The mel scale values(or a single mel). + * @returns The frequency values(or a single frequency) in Hz. + */ + static float mel_to_frequency(float mel) { + return 700.0f * (exp(mel / 1127.0f) - 1.0f); + } + + + + + /** + * Triangle, linear scale from left up to middle, then down to right + * @param x Linspace output, will be overwritten! + * @param x_size Size of the linspace output + * @param left Starting index (assigned 0) + * @param middle Index where 1.0 will be placed + * @param right Ending index (assigned 0) + */ + static int triangle(float *x, size_t x_size, int left, int middle, int right) { + EI_DSP_MATRIX(out, 1, x_size); + + for (size_t ix = 0; ix < x_size; ix++) { + if (x[ix] > left && x[ix] <= middle) { + out.buffer[ix] = (x[ix] - left) / (middle - left); + } + + if (x[ix] < right && middle <= x[ix]) { + out.buffer[ix] = (right - x[ix]) / (right - middle); + } + } + + memcpy(x, out.buffer, x_size * sizeof(float)); + + return EIDSP_OK; + } +}; + +} // namespace speechpy +} // namespace ei + +#endif // _EIDSP_SPEECHPY_FUNCTIONS_H_ diff --git a/edgeimpulse/edge-impulse-sdk/dsp/speechpy/processing.hpp b/edgeimpulse/edge-impulse-sdk/dsp/speechpy/processing.hpp new file mode 100644 index 0000000..b7e87e5 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/dsp/speechpy/processing.hpp @@ -0,0 +1,544 @@ +/* + * Copyright (c) 2022 EdgeImpulse Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an "AS + * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language + * governing permissions and limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef _EIDSP_SPEECHPY_PROCESSING_H_ +#define _EIDSP_SPEECHPY_PROCESSING_H_ + +#include "../numpy.hpp" + +namespace ei { +namespace speechpy { + +// one stack frame returned by stack_frames +typedef struct ei_stack_frames_info { + signal_t *signal; + ei_vector frame_ixs; + int frame_length; +} stack_frames_info_t; + +namespace processing { + /** + * Lazy Preemphasising on the signal. + * @param signal: The input signal. + * @param shift (int): The shift step. + * @param cof (float): The preemphasising coefficient. 0 equals to no filtering. + */ + class preemphasis { +public: + preemphasis(ei_signal_t *signal, int shift, float cof, bool rescale) + : _signal(signal), _shift(shift), _cof(cof), _rescale(rescale) + { + _prev_buffer = (float*)ei_dsp_calloc(shift * sizeof(float), 1); + _end_of_signal_buffer = (float*)ei_dsp_calloc(shift * sizeof(float), 1); + _next_offset_should_be = 0; + + if (shift < 0) { + _shift = signal->total_length + shift; + } + + if (!_prev_buffer || !_end_of_signal_buffer) return; + + // we need to get the shift bytes from the end of the buffer... + signal->get_data(signal->total_length - shift, shift, _end_of_signal_buffer); + } + + /** + * Get preemphasized data from the underlying audio buffer... + * This retrieves data from the signal then preemphasizes it. + * @param offset Offset in the audio signal + * @param length Length of the audio signal + */ + int get_data(size_t offset, size_t length, float *out_buffer) { + if (!_prev_buffer || !_end_of_signal_buffer) { + EIDSP_ERR(EIDSP_OUT_OF_MEM); + } + if (offset + length > _signal->total_length) { + EIDSP_ERR(EIDSP_OUT_OF_BOUNDS); + } + + int ret; + if (static_cast(offset) - _shift >= 0) { + ret = _signal->get_data(offset - _shift, _shift, _prev_buffer); + if (ret != 0) { + EIDSP_ERR(ret); + } + } + // else we'll use the end_of_signal_buffer; so no need to check + + ret = _signal->get_data(offset, length, out_buffer); + if (ret != 0) { + EIDSP_ERR(ret); + } + + // now we have the signal and we can preemphasize + for (size_t ix = 0; ix < length; ix++) { + float now = out_buffer[ix]; + + // under shift? read from end + if (offset + ix < static_cast(_shift)) { + out_buffer[ix] = now - (_cof * _end_of_signal_buffer[offset + ix]); + } + // otherwise read from history buffer + else { + out_buffer[ix] = now - (_cof * _prev_buffer[0]); + } + + // roll through and overwrite last element + if (_shift != 1) { + numpy::roll(_prev_buffer, _shift, -1); + } + _prev_buffer[_shift - 1] = now; + } + + _next_offset_should_be += length; + + // rescale from [-1 .. 1] ? + if (_rescale) { + matrix_t scale_matrix(length, 1, out_buffer); + ret = numpy::scale(&scale_matrix, 1.0f / 32768.0f); + if (ret != 0) { + EIDSP_ERR(ret); + } + } + + return EIDSP_OK; + } + + ~preemphasis() { + if (_prev_buffer) { + ei_dsp_free(_prev_buffer, _shift * sizeof(float)); + } + if (_end_of_signal_buffer) { + ei_dsp_free(_end_of_signal_buffer, _shift * sizeof(float)); + } + } + +private: + ei_signal_t *_signal; + int _shift; + float _cof; + float *_prev_buffer; + float *_end_of_signal_buffer; + size_t _next_offset_should_be; + bool _rescale; + }; +} + +namespace processing { + /** + * Preemphasising on the signal. This modifies the signal in place! + * For memory consumption reasons you **probably** want the preemphasis class, + * which lazily loads the signal in. + * @param signal (array): The input signal. + * @param shift (int): The shift step. + * @param cof (float): The preemphasising coefficient. 0 equals to no filtering. + * @returns 0 when successful + */ + __attribute__((unused)) static int preemphasis(float *signal, size_t signal_size, int shift = 1, float cof = 0.98f) + { + if (shift < 0) { + shift = signal_size + shift; + } + + // so we need to keep some history + float *prev_buffer = (float*)ei_dsp_calloc(shift * sizeof(float), 1); + + // signal - cof * xt::roll(signal, shift) + for (size_t ix = 0; ix < signal_size; ix++) { + float now = signal[ix]; + + // under shift? read from end + if (ix < static_cast(shift)) { + signal[ix] = now - (cof * signal[signal_size - shift + ix]); + } + // otherwise read from history buffer + else { + signal[ix] = now - (cof * prev_buffer[0]); + } + + // roll through and overwrite last element + numpy::roll(prev_buffer, shift, -1); + prev_buffer[shift - 1] = now; + } + + ei_dsp_free(prev_buffer, shift * sizeof(float)); + + return EIDSP_OK; + } + + /** + * frame_length is a float and can thus be off by a little bit, e.g. + * frame_length = 0.018f actually can yield 0.018000011f + * thus screwing up our frame calculations here... + */ + static float ceil_unless_very_close_to_floor(float v) { + if (v > floor(v) && v - floor(v) < 0.001f) { + v = (floor(v)); + } + else { + v = (ceil(v)); + } + return v; + } + + /** + * Calculate the length of a signal that will be sused for the settings provided. + * @param signal_size: The number of frames in the signal + * @param sampling_frequency (int): The sampling frequency of the signal. + * @param frame_length (float): The length of the frame in second. + * @param frame_stride (float): The stride between frames. + * @returns Number of frames required, or a negative number if an error occured + */ + static int calculate_signal_used( + size_t signal_size, + uint32_t sampling_frequency, + float frame_length, + float frame_stride, + bool zero_padding, + uint16_t version) + { + int frame_sample_length; + int length; + if (version == 1) { + frame_sample_length = static_cast(round(static_cast(sampling_frequency) * frame_length)); + frame_stride = round(static_cast(sampling_frequency) * frame_stride); + length = frame_sample_length; + } + else { + frame_sample_length = static_cast(ceil_unless_very_close_to_floor(static_cast(sampling_frequency) * frame_length)); + float frame_stride_arg = frame_stride; + frame_stride = ceil_unless_very_close_to_floor(static_cast(sampling_frequency) * frame_stride_arg); + length = (frame_sample_length - (int)frame_stride); + } + + volatile int numframes; + volatile int len_sig; + + if (zero_padding) { + // Calculation of number of frames + numframes = static_cast( + ceil(static_cast(signal_size - length) / frame_stride)); + + // Zero padding + len_sig = static_cast(static_cast(numframes) * frame_stride) + frame_sample_length; + } + else { + numframes = static_cast( + floor(static_cast(signal_size - length) / frame_stride)); + len_sig = static_cast( + (static_cast(numframes - 1) * frame_stride + frame_sample_length)); + } + + return len_sig; + } + + /** + * Frame a signal into overlapping frames. + * @param info This is both the base object and where we'll store our results. + * @param sampling_frequency (int): The sampling frequency of the signal. + * @param frame_length (float): The length of the frame in second. + * @param frame_stride (float): The stride between frames. + * @param zero_padding (bool): If the samples is not a multiple of + * frame_length(number of frames sample), zero padding will + * be done for generating last frame. + * @returns EIDSP_OK if OK + */ + static int stack_frames(stack_frames_info_t *info, + float sampling_frequency, + float frame_length, + float frame_stride, + bool zero_padding, + uint16_t version) + { + if (!info->signal || !info->signal->get_data || info->signal->total_length == 0) { + EIDSP_ERR(EIDSP_SIGNAL_SIZE_MISMATCH); + } + + size_t length_signal = info->signal->total_length; + int frame_sample_length; + int length; + if (version == 1) { + frame_sample_length = static_cast(round(static_cast(sampling_frequency) * frame_length)); + frame_stride = round(static_cast(sampling_frequency) * frame_stride); + length = frame_sample_length; + } + else { + frame_sample_length = static_cast(ceil_unless_very_close_to_floor(static_cast(sampling_frequency) * frame_length)); + float frame_stride_arg = frame_stride; + frame_stride = ceil_unless_very_close_to_floor(static_cast(sampling_frequency) * frame_stride_arg); + length = (frame_sample_length - (int)frame_stride); + } + + volatile int numframes; + volatile int len_sig; + + if (zero_padding) { + // Calculation of number of frames + numframes = static_cast( + ceil(static_cast(length_signal - length) / frame_stride)); + + // Zero padding + len_sig = static_cast(static_cast(numframes) * frame_stride) + frame_sample_length; + + info->signal->total_length = static_cast(len_sig); + } + else { + numframes = static_cast( + floor(static_cast(length_signal - length) / frame_stride)); + len_sig = static_cast( + (static_cast(numframes - 1) * frame_stride + frame_sample_length)); + + info->signal->total_length = static_cast(len_sig); + } + + info->frame_ixs.clear(); + + int frame_count = 0; + + for (size_t ix = 0; ix < static_cast(len_sig); ix += static_cast(frame_stride)) { + if (++frame_count > numframes) break; + + info->frame_ixs.push_back(ix); + } + + info->frame_length = frame_sample_length; + + return EIDSP_OK; + } + + /** + * Calculate the number of stack frames for the settings provided. + * This is needed to allocate the right buffer size for the output of f.e. the MFE + * blocks. + * @param signal_size: The number of frames in the signal + * @param sampling_frequency (int): The sampling frequency of the signal. + * @param frame_length (float): The length of the frame in second. + * @param frame_stride (float): The stride between frames. + * @param zero_padding (bool): If the samples is not a multiple of + * frame_length(number of frames sample), zero padding will + * be done for generating last frame. + * @returns Number of frames required, or a negative number if an error occured + */ + static int32_t calculate_no_of_stack_frames( + size_t signal_size, + uint32_t sampling_frequency, + float frame_length, + float frame_stride, + bool zero_padding, + uint16_t version) + { + int frame_sample_length; + int length; + if (version == 1) { + frame_sample_length = static_cast(round(static_cast(sampling_frequency) * frame_length)); + frame_stride = round(static_cast(sampling_frequency) * frame_stride); + length = frame_sample_length; + } + else { + frame_sample_length = static_cast(ceil_unless_very_close_to_floor(static_cast(sampling_frequency) * frame_length)); + float frame_stride_arg = frame_stride; + frame_stride = ceil_unless_very_close_to_floor(static_cast(sampling_frequency) * frame_stride_arg); + length = (frame_sample_length - (int)frame_stride); + } + + volatile int numframes; + + if (zero_padding) { + // Calculation of number of frames + numframes = static_cast( + ceil(static_cast(signal_size - length) / frame_stride)); + } + else { + numframes = static_cast( + floor(static_cast(signal_size - length) / frame_stride)); + } + + return numframes; + } + + /** + * This function performs local cepstral mean and + * variance normalization on a sliding window. The code assumes that + * there is one observation per row. + * @param features_matrix input feature matrix, will be modified in place + * @param win_size The size of sliding window for local normalization. + * Default=301 which is around 3s if 100 Hz rate is + * considered(== 10ms frame stide) + * @param variance_normalization If the variance normilization should + * be performed or not. + * @param scale Scale output to 0..1 + * @returns 0 if OK + */ + static int cmvnw(matrix_t *features_matrix, uint16_t win_size = 301, bool variance_normalization = false, + bool scale = false) + { + if (win_size == 0) { + return EIDSP_OK; + } + + uint16_t pad_size = (win_size - 1) / 2; + + int ret; + float *features_buffer_ptr; + + // mean & variance normalization + EI_DSP_MATRIX(vec_pad, features_matrix->rows + (pad_size * 2), features_matrix->cols); + if (!vec_pad.buffer) { + EIDSP_ERR(EIDSP_OUT_OF_MEM); + } + + ret = numpy::pad_1d_symmetric(features_matrix, &vec_pad, pad_size, pad_size); + if (ret != EIDSP_OK) { + EIDSP_ERR(ret); + } + + EI_DSP_MATRIX(mean_matrix, vec_pad.cols, 1); + if (!mean_matrix.buffer) { + EIDSP_ERR(EIDSP_OUT_OF_MEM); + } + + EI_DSP_MATRIX(window_variance, vec_pad.cols, 1); + if (!window_variance.buffer) { + return EIDSP_OUT_OF_MEM; + } + + for (size_t ix = 0; ix < features_matrix->rows; ix++) { + // create a slice on the vec_pad + EI_DSP_MATRIX_B(window, win_size, vec_pad.cols, vec_pad.buffer + (ix * vec_pad.cols)); + if (!window.buffer) { + EIDSP_ERR(EIDSP_OUT_OF_MEM); + } + + ret = numpy::mean_axis0(&window, &mean_matrix); + if (ret != EIDSP_OK) { + EIDSP_ERR(ret); + } + + // subtract the mean for the features + for (size_t fm_col = 0; fm_col < features_matrix->cols; fm_col++) { + features_matrix->buffer[(ix * features_matrix->cols) + fm_col] = + features_matrix->buffer[(ix * features_matrix->cols) + fm_col] - mean_matrix.buffer[fm_col]; + } + } + + ret = numpy::pad_1d_symmetric(features_matrix, &vec_pad, pad_size, pad_size); + if (ret != EIDSP_OK) { + EIDSP_ERR(ret); + } + + for (size_t ix = 0; ix < features_matrix->rows; ix++) { + // create a slice on the vec_pad + EI_DSP_MATRIX_B(window, win_size, vec_pad.cols, vec_pad.buffer + (ix * vec_pad.cols)); + if (!window.buffer) { + EIDSP_ERR(EIDSP_OUT_OF_MEM); + } + + if (variance_normalization == true) { + ret = numpy::std_axis0(&window, &window_variance); + if (ret != EIDSP_OK) { + EIDSP_ERR(ret); + } + + features_buffer_ptr = &features_matrix->buffer[ix * vec_pad.cols]; + for (size_t col = 0; col < vec_pad.cols; col++) { + *(features_buffer_ptr) = (*(features_buffer_ptr)) / + (window_variance.buffer[col] + 1e-10); + features_buffer_ptr++; + } + } + } + + if (scale) { + ret = numpy::normalize(features_matrix); + if (ret != EIDSP_OK) { + EIDSP_ERR(ret); + } + } + + return EIDSP_OK; + } + + /** + * Perform normalization for MFE frames, this converts the signal to dB, + * then add a hard filter, and quantize / dequantize the output + * @param features_matrix input feature matrix, will be modified in place + */ + static int mfe_normalization(matrix_t *features_matrix, int noise_floor_db) { + const float noise = static_cast(noise_floor_db * -1); + const float noise_scale = 1.0f / (static_cast(noise_floor_db * -1) + 12.0f); + + for (size_t ix = 0; ix < features_matrix->rows * features_matrix->cols; ix++) { + float f = features_matrix->buffer[ix]; + if (f < 1e-30) { + f = 1e-30; + } + f = numpy::log10(f); + f *= 10.0f; // scale by 10 + f += noise; + f *= noise_scale; + // clip again + + /* Here is the python code we're duplicating: + # Quantize to 8 bits and dequantize back to float32 + mfe = np.uint8(np.around(mfe * 2**8)) + # clip to 2**8 + mfe = np.clip(mfe, 0, 255) + mfe = np.float32(mfe / 2**8) + */ + + f = roundf(f*256)/256; + + if (f < 0.0f) f = 0.0f; + else if (f > 1.0f) f = 1.0f; + features_matrix->buffer[ix] = f; + } + + return EIDSP_OK; + } + + /** + * Perform normalization for spectrogram frames, this converts the signal to dB, + * then add a hard filter + * @param features_matrix input feature matrix, will be modified in place + */ + static int spectrogram_normalization(matrix_t *features_matrix, int noise_floor_db, bool clip_at_one) { + const float noise = static_cast(noise_floor_db * -1); + const float noise_scale = 1.0f / (static_cast(noise_floor_db * -1) + 12.0f); + + for (size_t ix = 0; ix < features_matrix->rows * features_matrix->cols; ix++) { + float f = features_matrix->buffer[ix]; + if (f < 1e-30) { + f = 1e-30; + } + f = numpy::log10(f); + f *= 10.0f; // scale by 10 + f += noise; + f *= noise_scale; + // clip again + if (f < 0.0f) f = 0.0f; + else if (f > 1.0f && clip_at_one) f = 1.0f; + features_matrix->buffer[ix] = f; + } + + return EIDSP_OK; + } +}; + +} // namespace speechpy +} // namespace ei + +#endif // _EIDSP_SPEECHPY_PROCESSING_H_ diff --git a/edgeimpulse/edge-impulse-sdk/dsp/speechpy/speechpy.hpp b/edgeimpulse/edge-impulse-sdk/dsp/speechpy/speechpy.hpp new file mode 100644 index 0000000..c2ca9b6 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/dsp/speechpy/speechpy.hpp @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2022 EdgeImpulse Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an "AS + * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language + * governing permissions and limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef _EIDSP_SPEECHPY_SPEECHPY_H_ +#define _EIDSP_SPEECHPY_SPEECHPY_H_ + +#include "../config.hpp" +#include "feature.hpp" +#include "functions.hpp" +#include "processing.hpp" + +#endif // _EIDSP_SPEECHPY_SPEECHPY_H_ diff --git a/edgeimpulse/edge-impulse-sdk/porting/.clang-format b/edgeimpulse/edge-impulse-sdk/porting/.clang-format new file mode 100644 index 0000000..20ffce8 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/porting/.clang-format @@ -0,0 +1,2 @@ +"DisableFormat": true +"SortIncludes": false diff --git a/edgeimpulse/edge-impulse-sdk/porting/arduino/debug_log.cpp b/edgeimpulse/edge-impulse-sdk/porting/arduino/debug_log.cpp new file mode 100644 index 0000000..794710e --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/porting/arduino/debug_log.cpp @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2022 EdgeImpulse Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an "AS + * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language + * governing permissions and limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "../ei_classifier_porting.h" +#if EI_PORTING_ARDUINO == 1 + +#include "edge-impulse-sdk/tensorflow/lite/micro/debug_log.h" +#include +#include + +// On mbed platforms, we set up a serial port and write to it for debug logging. +#if defined(__cplusplus) && EI_C_LINKAGE == 1 +extern "C" +#endif // defined(__cplusplus) && EI_C_LINKAGE == 1 +void DebugLog(const char* s) { + ei_printf("%s", s); +} + +#endif // EI_PORTING_ARDUINO diff --git a/edgeimpulse/edge-impulse-sdk/porting/arduino/ei_classifier_porting.cpp b/edgeimpulse/edge-impulse-sdk/porting/arduino/ei_classifier_porting.cpp new file mode 100644 index 0000000..af64f97 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/porting/arduino/ei_classifier_porting.cpp @@ -0,0 +1,102 @@ +/* + * Copyright (c) 2022 EdgeImpulse Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an "AS + * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language + * governing permissions and limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "../ei_classifier_porting.h" +#if EI_PORTING_ARDUINO == 1 + +#include +#include +#include + +#define EI_WEAK_FN __attribute__((weak)) + +EI_WEAK_FN EI_IMPULSE_ERROR ei_run_impulse_check_canceled() { + return EI_IMPULSE_OK; +} + +EI_WEAK_FN EI_IMPULSE_ERROR ei_sleep(int32_t time_ms) { + delay(time_ms); + return EI_IMPULSE_OK; +} + +uint64_t ei_read_timer_ms() { + return millis(); +} + +uint64_t ei_read_timer_us() { + return micros(); +} + +void ei_serial_set_baudrate(int baudrate) +{ + +} + +EI_WEAK_FN void ei_putchar(char c) +{ + Serial.write(c); +} + +EI_WEAK_FN char ei_getchar() +{ + char ch = 0; + if (Serial.available() > 0) { + ch = Serial.read(); + } + return ch; +} + +/** + * Printf function uses vsnprintf and output using Arduino Serial + */ +__attribute__((weak)) void ei_printf(const char *format, ...) { + static char print_buf[1024] = { 0 }; + + va_list args; + va_start(args, format); + int r = vsnprintf(print_buf, sizeof(print_buf), format, args); + va_end(args); + + if (r > 0) { + Serial.write(print_buf); + } +} + +__attribute__((weak)) void ei_printf_float(float f) { + Serial.print(f, 6); +} + +__attribute__((weak)) void *ei_malloc(size_t size) { + return malloc(size); +} + +__attribute__((weak)) void *ei_calloc(size_t nitems, size_t size) { + return calloc(nitems, size); +} + +__attribute__((weak)) void ei_free(void *ptr) { + free(ptr); +} + +#if defined(__cplusplus) && EI_C_LINKAGE == 1 +extern "C" +#endif +__attribute__((weak)) void DebugLog(const char* s) { + ei_printf("%s", s); +} + +#endif // EI_PORTING_ARDUINO == 1 diff --git a/edgeimpulse/edge-impulse-sdk/porting/brickml/debug_log.cpp b/edgeimpulse/edge-impulse-sdk/porting/brickml/debug_log.cpp new file mode 100644 index 0000000..d0da510 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/porting/brickml/debug_log.cpp @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2022 EdgeImpulse Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an "AS + * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language + * governing permissions and limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "../ei_classifier_porting.h" +#if EI_PORTING_BRICKML == 1 + +#include "edge-impulse-sdk/tensorflow/lite/micro/debug_log.h" +#include +#include + +// Redirect TFLite DebugLog to ei_printf +#if defined(__cplusplus) && EI_C_LINKAGE == 1 +extern "C" +#endif // defined(__cplusplus) && EI_C_LINKAGE == 1 +void DebugLog(const char* s) { + ei_printf("%s", s); +} + +#endif // EI_PORTING_RENESASRA65 == 1 diff --git a/edgeimpulse/edge-impulse-sdk/porting/brickml/ei_classifier_porting.cpp b/edgeimpulse/edge-impulse-sdk/porting/brickml/ei_classifier_porting.cpp new file mode 100644 index 0000000..09b7485 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/porting/brickml/ei_classifier_porting.cpp @@ -0,0 +1,178 @@ +/* + * Copyright (c) 2022 EdgeImpulse Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an "AS + * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language + * governing permissions and limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +/* Includes */ +#include "../ei_classifier_porting.h" +#if (EI_PORTING_BRICKML == 1) + +#include +#include +#include +#include "unistd.h" +#include "trace_use.h" +#include "comms.h" +#include + +#include "FreeRTOS.h" +#include "task.h" + +static void *pvPortCalloc(size_t sNb, size_t sSize); + +__attribute__((weak)) EI_IMPULSE_ERROR ei_run_impulse_check_canceled() { + return EI_IMPULSE_OK; +} + +/** + * Cancelable sleep, can be triggered with signal from other thread + */ +__attribute__((weak)) EI_IMPULSE_ERROR ei_sleep(int32_t time_ms) { + + vTaskDelay(time_ms / portTICK_PERIOD_MS); + + return EI_IMPULSE_OK; +} + +uint64_t ei_read_timer_ms() { + + return xTaskGetTickCount(); +} + +uint64_t ei_read_timer_us() { + + return xTaskGetTickCount()*1000; +} + +__attribute__((weak)) void ei_printf(const char *format, ...) { + + char buffer[256] = {0}; + int length; + va_list myargs; + va_start(myargs, format); + length = vsnprintf(buffer, sizeof(buffer), format, myargs); + va_end(myargs); + + if (length > 0){ + comms_send((uint8_t*)buffer, length, 100); + } + +} + +__attribute__((weak)) void ei_printf_float(float f) { + float n = f; + + static double PRECISION = 0.00001; + static int MAX_NUMBER_STRING_SIZE = 32; + + char s[MAX_NUMBER_STRING_SIZE]; + + if (n == 0.0) { + strcpy(s, "0"); + } + else { + int digit, m; + char *c = s; + int neg = (n < 0); + if (neg) { + n = -n; + } + // calculate magnitude + m = log10(n); + if (neg) { + *(c++) = '-'; + } + if (m < 1.0) { + m = 0; + } + // convert the number + while (n > PRECISION || m >= 0) { + double weight = pow(10.0, m); + if (weight > 0 && !isinf(weight)) { + digit = floor(n / weight); + n -= (digit * weight); + *(c++) = '0' + digit; + } + if (m == 0 && n > 0) { + *(c++) = '.'; + } + m--; + } + *(c) = '\0'; + } + + ei_printf("%s", s); +} + +/** + * + * @param c + */ +void ei_putchar(char c) +{ + ei_printf("%c", c); +} + +__attribute__((weak)) void *ei_malloc(size_t size) { + return pvPortMalloc(size); +} + +__attribute__((weak)) void *ei_calloc(size_t nitems, size_t size) { + return pvPortCalloc(nitems, size); +} + +__attribute__((weak)) void ei_free(void *ptr) { + vPortFree(ptr); +} + +#if defined(__cplusplus) && EI_C_LINKAGE == 1 +extern "C" +#endif +__attribute__((weak)) void DebugLog(const char* s) { + ei_printf("%s", s); +} + +static void *pvPortCalloc(size_t sNb, size_t sSize) +{ + void *vPtr = NULL; + if (sSize > 0) { + vPtr = pvPortMalloc(sSize * sNb); // Call FreeRTOS or other standard API + if(vPtr) + memset(vPtr, 0, (sSize * sNb)); // Must required + } + return vPtr; +} + +void * operator new( size_t size ) +{ + return pvPortMalloc( size ); +} + +void * operator new[]( size_t size ) +{ + return pvPortMalloc(size); +} + +void operator delete( void * ptr ) +{ + vPortFree ( ptr ); +} + +void operator delete[]( void * ptr ) +{ + vPortFree ( ptr ); +} + +#endif // EI_PORTING_BRICKML == 1 diff --git a/edgeimpulse/edge-impulse-sdk/porting/ei_classifier_porting.h b/edgeimpulse/edge-impulse-sdk/porting/ei_classifier_porting.h new file mode 100644 index 0000000..cb82adf --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/porting/ei_classifier_porting.h @@ -0,0 +1,238 @@ +/* + * Copyright (c) 2022 EdgeImpulse Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an "AS + * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language + * governing permissions and limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef _EI_CLASSIFIER_PORTING_H_ +#define _EI_CLASSIFIER_PORTING_H_ + +#include +#include + +#if defined(__cplusplus) && EI_C_LINKAGE == 1 +extern "C" { +#endif // defined(__cplusplus) + +typedef enum { + EI_IMPULSE_OK = 0, + EI_IMPULSE_ERROR_SHAPES_DONT_MATCH = -1, + EI_IMPULSE_CANCELED = -2, + EI_IMPULSE_TFLITE_ERROR = -3, + EI_IMPULSE_DSP_ERROR = -5, + EI_IMPULSE_TFLITE_ARENA_ALLOC_FAILED = -6, + EI_IMPULSE_CUBEAI_ERROR = -7, + EI_IMPULSE_ALLOC_FAILED = -8, + EI_IMPULSE_ONLY_SUPPORTED_FOR_IMAGES = -9, + EI_IMPULSE_UNSUPPORTED_INFERENCING_ENGINE = -10, + EI_IMPULSE_OUT_OF_MEMORY = -11, + EI_IMPULSE_INPUT_TENSOR_WAS_NULL = -13, + EI_IMPULSE_OUTPUT_TENSOR_WAS_NULL = -14, + EI_IMPULSE_SCORE_TENSOR_WAS_NULL = -15, + EI_IMPULSE_LABEL_TENSOR_WAS_NULL = -16, + EI_IMPULSE_TENSORRT_INIT_FAILED = -17, + EI_IMPULSE_DRPAI_INIT_FAILED = -18, + EI_IMPULSE_DRPAI_RUNTIME_FAILED = -19, + EI_IMPULSE_DEPRECATED_MODEL = -20, + EI_IMPULSE_LAST_LAYER_NOT_AVAILABLE = -21, + EI_IMPULSE_INFERENCE_ERROR = -22, + EI_IMPULSE_AKIDA_ERROR = -23, + EI_IMPULSE_INVALID_SIZE = -24, + EI_IMPULSE_ONNX_ERROR = -25, + EI_IMPULSE_MEMRYX_ERROR = -26, +} EI_IMPULSE_ERROR; + +/** + * Cancelable sleep, can be triggered with signal from other thread + */ +EI_IMPULSE_ERROR ei_sleep(int32_t time_ms); + +/** + * Check if the sampler thread was canceled, use this in conjunction with + * the same signaling mechanism as ei_sleep + */ +EI_IMPULSE_ERROR ei_run_impulse_check_canceled(); + +/** + * Read the millisecond timer + */ +uint64_t ei_read_timer_ms(); + +/** + * Read the microsecond timer + */ +uint64_t ei_read_timer_us(); + +/** + * Set Serial baudrate + */ +void ei_serial_set_baudrate(int baudrate); + +/** + * @brief Connect to putchar of target + * + * @param[in] c The chararater + */ +void ei_putchar(char c); + +/** + * @brief Connect to getchar of target + * @return character from serial +*/ +char ei_getchar(void); + +/** + * Print wrapper around printf() + * This is used internally to print debug information. + */ +__attribute__ ((format (printf, 1, 2))) +void ei_printf(const char *format, ...); + +/** + * Override this function if your target cannot properly print floating points + * If not overriden, this will be sent through `ei_printf()`. + */ +void ei_printf_float(float f); + +/** + * Wrapper around malloc + */ +void *ei_malloc(size_t size); + +/** + * Wrapper around calloc + */ +void *ei_calloc(size_t nitems, size_t size); + +/** + * Wrapper around free + */ +void ei_free(void *ptr); + +#if defined(__cplusplus) && EI_C_LINKAGE == 1 +} +#endif // defined(__cplusplus) && EI_C_LINKAGE == 1 + +// Load porting layer depending on target + +// First check if any of the general frameworks or operating systems are supported/enabled +#ifndef EI_PORTING_ZEPHYR +#if defined(__ZEPHYR__) +#define EI_PORTING_ZEPHYR 1 +#else +#define EI_PORTING_ZEPHYR 0 +#endif +#endif + +#ifndef EI_PORTING_ARDUINO +#ifdef ARDUINO +#define EI_PORTING_ARDUINO 1 +#else +#define EI_PORTING_ARDUINO 0 +#endif +#endif + +#ifndef EI_PORTING_MBED +#ifdef __MBED__ +#define EI_PORTING_MBED 1 +#else +#define EI_PORTING_MBED 0 +#endif +#endif + +// Then check for target spcific build systems + +#ifndef EI_PORTING_ESPRESSIF +#if ((defined(CONFIG_IDF_TARGET_ESP32) || defined(CONFIG_IDF_TARGET_ESP32S3)) && EI_PORTING_ZEPHYR == 0) +#include "esp_idf_version.h" +#if ESP_IDF_VERSION >= ESP_IDF_VERSION_VAL(5, 0, 0) +#define portTICK_RATE_MS portTICK_PERIOD_MS +#endif +#define EI_PORTING_ESPRESSIF 1 +#define EI_PORTING_ARDUINO 0 +#else +#define EI_PORTING_ESPRESSIF 0 +#endif +#endif + +#ifndef EI_PORTING_POSIX +#if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__)) +#define EI_PORTING_POSIX 1 +#else +#define EI_PORTING_POSIX 0 +#endif +#endif + +#ifndef EI_PORTING_SILABS +#if defined(EFR32MG12P332F1024GL125) +#define EI_PORTING_SILABS 1 +#else +#define EI_PORTING_SILABS 0 +#endif +#endif + +#ifndef EI_PORTING_RASPBERRY +#ifdef PICO_BOARD +#define EI_PORTING_RASPBERRY 1 +#else +#define EI_PORTING_RASPBERRY 0 +#endif +#endif + + +#ifndef EI_PORTING_STM32_CUBEAI +#if defined(USE_HAL_DRIVER) && !defined(__MBED__) && EI_PORTING_ZEPHYR == 0 +#define EI_PORTING_STM32_CUBEAI 1 +#else +#define EI_PORTING_STM32_CUBEAI 0 +#endif +#endif + +#ifndef EI_PORTING_HIMAX +#ifdef CPU_ARC +#define EI_PORTING_HIMAX 1 +#else +#define EI_PORTING_HIMAX 0 +#endif +#endif + +#ifndef EI_PORTING_MINGW32 +#ifdef __MINGW32__ +#define EI_PORTING_MINGW32 1 +#else +#define EI_PORTING_MINGW32 0 +#endif +#endif +// End load porting layer depending on target + +// Additional configuration for specific architecture +#if defined(__CORTEX_M) + +#if (__CORTEX_M == 55U) +#define EI_MAX_OVERFLOW_BUFFER_COUNT 15 +#endif + +#if (__CORTEX_M == 85U) +#define EI_MAX_OVERFLOW_BUFFER_COUNT 50 +#endif + +#endif + +#if defined(CONFIG_IDF_TARGET_ESP32S3) +#define EI_MAX_OVERFLOW_BUFFER_COUNT 30 +#endif + +// End additional configuration + +#endif // _EI_CLASSIFIER_PORTING_H_ diff --git a/edgeimpulse/edge-impulse-sdk/porting/ei_logging.h b/edgeimpulse/edge-impulse-sdk/porting/ei_logging.h new file mode 100644 index 0000000..d15832e --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/porting/ei_logging.h @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2022 EdgeImpulse Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an "AS + * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language + * governing permissions and limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef _EI_LOGGING_H_ +#define _EI_LOGGING_H_ + +#include +#include + +#include "ei_classifier_porting.h" + +#define EI_LOG_LEVEL_NONE 0 /*!< No log output */ +#define EI_LOG_LEVEL_ERROR 1 /*!< Critical errors, software module can not recover on its own */ +#define EI_LOG_LEVEL_WARNING 2 /*!< Error conditions from which recovery measures have been taken */ +#define EI_LOG_LEVEL_INFO 3 /*!< Information messages which describe normal flow of events */ +#define EI_LOG_LEVEL_DEBUG 4 /*!< Extra information which is not necessary for normal use (values, pointers, sizes, etc). */ + +// if we do not want ANY logging, setting EI_LOG_LEVEL to EI_LOG_LEVEL_NONE +// will not generate any code according to +// https://stackoverflow.com/a/25021889 + +#define EI_LOGE(format, ...) (void)0 +#define EI_LOGW(format, ...) (void)0 +#define EI_LOGI(format, ...) (void)0 +#define EI_LOGD(format, ...) (void)0 + +#ifndef EI_LOG_LEVEL + #define EI_LOG_LEVEL EI_LOG_LEVEL_NONE +#endif + +#if defined(__cplusplus) && EI_C_LINKAGE == 1 +extern "C" +#endif // defined(__cplusplus) && EI_C_LINKAGE == 1 + +const char *debug_msgs[] = +{ + "NONE", // this one will never show + "ERR", + "WARNING", + "INFO", + "DEBUG" +}; + +#if EI_LOG_LEVEL >= EI_LOG_LEVEL_ERROR + #ifdef EI_LOGE + #undef EI_LOGE + #endif // EI_LOGE + #define EI_LOGE(format, ...) ei_printf("%s: ",debug_msgs[EI_LOG_LEVEL_ERROR]); ei_printf(format, ##__VA_ARGS__); +#endif + +#if EI_LOG_LEVEL >= EI_LOG_LEVEL_WARNING + #ifdef EI_LOGW + #undef EI_LOGW + #endif // EI_LOGW + #define EI_LOGW(format, ...) ei_printf("%s: ",debug_msgs[EI_LOG_LEVEL_WARNING]); ei_printf(format, ##__VA_ARGS__); +#endif + +#if EI_LOG_LEVEL >= EI_LOG_LEVEL_INFO + #ifdef EI_LOGI + #undef EI_LOGI + #endif // EI_LOGI + #define EI_LOGI(format, ...) ei_printf("%s: ",debug_msgs[EI_LOG_LEVEL_INFO]); ei_printf(format, ##__VA_ARGS__); +#endif + +#if EI_LOG_LEVEL >= EI_LOG_LEVEL_DEBUG + #ifdef EI_LOGD + #undef EI_LOGD + #endif // EI_LOGD + #define EI_LOGD(format, ...) ei_printf("%s: ",debug_msgs[EI_LOG_LEVEL_DEBUG]); ei_printf(format, ##__VA_ARGS__); +#endif + +#endif // _EI_LOGGING_H_ \ No newline at end of file diff --git a/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/CMakeLists.txt b/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/CMakeLists.txt new file mode 100644 index 0000000..736eaf9 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/CMakeLists.txt @@ -0,0 +1,51 @@ +cmake_minimum_required(VERSION 3.5) + +set(c_srcs + "src/activation_functions/esp_nn_relu_ansi.c" + "src/basic_math/esp_nn_add_ansi.c" + "src/basic_math/esp_nn_mul_ansi.c" + "src/convolution/esp_nn_conv_ansi.c" + "src/convolution/esp_nn_conv_opt.c" + "src/convolution/esp_nn_depthwise_conv_ansi.c" + "src/convolution/esp_nn_depthwise_conv_opt.c" + "src/fully_connected/esp_nn_fully_connected_ansi.c" + "src/softmax/esp_nn_softmax_ansi.c" + "src/softmax/esp_nn_softmax_opt.c" + "src/pooling/esp_nn_avg_pool_ansi.c" + "src/pooling/esp_nn_max_pool_ansi.c") + +if(CONFIG_IDF_TARGET_ESP32S3) + set(s3_srcs + "src/common/esp_nn_common_functions_esp32s3.S" + "src/common/esp_nn_multiply_by_quantized_mult_esp32s3.S" + "src/common/esp_nn_multiply_by_quantized_mult_ver1_esp32s3.S" + "src/activation_functions/esp_nn_relu_s8_esp32s3.S" + "src/basic_math/esp_nn_add_s8_esp32s3.S" + "src/basic_math/esp_nn_mul_s8_esp32s3.S" + "src/convolution/esp_nn_conv_esp32s3.c" + "src/convolution/esp_nn_depthwise_conv_s8_esp32s3.c" + "src/convolution/esp_nn_conv_s16_mult8_esp32s3.S" + "src/convolution/esp_nn_conv_s8_mult8_1x1_esp32s3.S" + "src/convolution/esp_nn_conv_s16_mult4_1x1_esp32s3.S" + "src/convolution/esp_nn_conv_s8_filter_aligned_input_padded_esp32s3.S" + "src/convolution/esp_nn_depthwise_conv_s8_mult1_3x3_padded_esp32s3.S" + "src/convolution/esp_nn_depthwise_conv_s16_mult1_esp32s3.S" + "src/convolution/esp_nn_depthwise_conv_s16_mult1_3x3_esp32s3.S" + "src/convolution/esp_nn_depthwise_conv_s16_mult1_3x3_no_pad_esp32s3.S" + "src/convolution/esp_nn_depthwise_conv_s16_mult8_3x3_esp32s3.S" + "src/convolution/esp_nn_depthwise_conv_s16_mult4_esp32s3.S" + "src/convolution/esp_nn_depthwise_conv_s16_mult8_esp32s3.S" + "src/fully_connected/esp_nn_fully_connected_s8_esp32s3.S" + "src/pooling/esp_nn_max_pool_s8_esp32s3.S" + "src/pooling/esp_nn_avg_pool_s8_esp32s3.S") +endif() + +idf_component_register(SRCS "${c_srcs}" + "${s3_srcs}" + INCLUDE_DIRS "include" "src/common") + +if(CONFIG_IDF_TARGET_ESP32S3) + target_compile_options(${COMPONENT_LIB} PRIVATE -mlongcalls -fno-unroll-loops -O2 -Wno-unused-function) +else() + target_compile_options(${COMPONENT_LIB} PRIVATE -O2 -Wno-unused-function) +endif() diff --git a/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/CONTRIBUTING.md b/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/CONTRIBUTING.md new file mode 100644 index 0000000..b541db7 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/CONTRIBUTING.md @@ -0,0 +1,38 @@ +# Contributing + +Contributions to ESP-NN project in the form of pull requests, bug reports, and feature requests are welcome! + +This document covers various topics related to contributions to the ESP-NN projects. Please read it if you plan to submit a PR! + +## CLA + +We require accepting the contributor's license agreement for all pull requests. When opening a pull request the first time you will be prompted to sign the CLA by the [CLA Assistant](https://cla-assistant.io/) service. + +## Large-scale Changes + +If you'd like to propose a change to the existing APIs or a large-scale refactoring of the implementation, we recommend opening an issue first to discuss this. + +## Updating the Benchmarks Table + +The benchmarks table in [README.md](README.md) contains benchmarks for ESP32-S3. The benchmarks are collected by running the app in [test_app](test_app/) directory. Please update this table if you have changed the implementations of some of the functions or added the new ones. + +## Releasing a new version + +Maintainers should follow the steps below to release a new version of ESP-NN component. Assuming the new version is `vX.Y.Z`: + +1. Ensure you are on the latest `master` branch: + ```bash + git checkout master + git pull --ff-only origin master + ``` +1. Create the new tag: + ```bash + git tag -s -a -m "vX.Y.Z" vX.Y.Z + ``` +1. Push the tag and the branch to the internal repository: + ```bash + git push origin vX.Y.Z + ``` +1. CI will automatically push the tag to Github and will upload the new version to the IDF Component Registry. +1. Go to https://github.com/espressif/esp-nn/releases and create a release from the tag vX.Y.Z. +1. Write the release notes and publish the release. diff --git a/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/Kconfig.projbuild b/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/Kconfig.projbuild new file mode 100644 index 0000000..a146305 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/Kconfig.projbuild @@ -0,0 +1,29 @@ +menu "ESP-NN" + +choice NN_OPTIMIZATIONS + bool "Optimization for nn functions" + default NN_OPTIMIZED + help + Use ANSI-C versions for verification and debug purpose. + Optimisations are automatically picked up for a chipset. + For ESP32-S3, assembly optimisations are selected. + For other platforms(viz., ESP32, ESP32-C3), generic optimisations are used. + +config NN_ANSI_C + bool "ANSI C" + help + ANSI C versions for verification and debug purposes. +config NN_OPTIMIZED + bool "Optimized versions" + help + Optimisations are automatically picked up for a chipset. + For ESP32-S3, assembly optimisations are selected. + For other platforms(viz., ESP32, ESP32-C3), generic optimisations are used. +endchoice + +config NN_OPTIMIZATIONS + int + default 0 if NN_ANSI_C + default 1 if NN_OPTIMIZED + +endmenu diff --git a/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/LICENSE b/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/LICENSE new file mode 100644 index 0000000..d645695 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/LICENSE @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/README.md b/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/README.md new file mode 100644 index 0000000..2efde15 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/README.md @@ -0,0 +1 @@ +Internal Edge Impulse fork of ESP-NN. Derived from https://github.com/edgeimpulse/esp-nn/commit/6b3ef8e226a05554a6d874f6456f5ca1771c01c2. diff --git a/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/idf_component.yml b/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/idf_component.yml new file mode 100644 index 0000000..b90ac5e --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/idf_component.yml @@ -0,0 +1,11 @@ +description: Optimized NN (Neural Network) functions for Espressif chips +url: https://github.com/espressif/esp-nn +repository: https://github.com/espressif/esp-nn.git +issues: https://github.com/espressif/esp-nn/issues +dependencies: + idf: + version: ">=4.2" +files: + exclude: + - test_app + - tests diff --git a/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/include/esp_nn.h b/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/include/esp_nn.h new file mode 100644 index 0000000..3d2ef30 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/include/esp_nn.h @@ -0,0 +1,46 @@ +// Copyright 2020-2021 Espressif Systems (Shanghai) PTE LTD +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#if defined(EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN) +// select apt optimisations +#ifdef CONFIG_IDF_TARGET_ESP32S3 +#define ARCH_ESP32_S3 1 +#endif +#ifdef CONFIG_IDF_TARGET_ESP32 +#define ARCH_ESP32 1 +#endif +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +/* reference kernels included by default */ +#include "esp_nn_ansi_headers.h" + +#if defined(EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN) +#if defined(ARCH_ESP32_S3) +#include "esp_nn_esp32s3.h" +#else // for other platforms use generic optimisations +#include "esp_nn_generic_opt.h" +#endif // #if defined(ARCH_ESP32_S3) +#else +#include "esp_nn_ansi_c.h" +#endif + +#ifdef __cplusplus +} +#endif diff --git a/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/include/esp_nn_ansi_c.h b/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/include/esp_nn_ansi_c.h new file mode 100644 index 0000000..8279ebe --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/include/esp_nn_ansi_c.h @@ -0,0 +1,47 @@ +// Copyright 2020-2021 Espressif Systems (Shanghai) PTE LTD +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +/** + * @file Header definitions to include for ANSI C versions. + * These are just typedefs to pick up ANSI versions. + */ + +#pragma once + +#include "esp_nn_defs.h" +#include "esp_nn_ansi_headers.h" + +#define esp_nn_add_elementwise_s8 esp_nn_add_elementwise_s8_ansi +#define esp_nn_mul_elementwise_s8 esp_nn_mul_elementwise_s8_ansi + +#define esp_nn_depthwise_conv_s8 esp_nn_depthwise_conv_s8_ansi + +#define esp_nn_conv_s8 esp_nn_conv_s8_ansi + +#define esp_nn_get_conv_scratch_size esp_nn_get_conv_scratch_size_ansi +#define esp_nn_set_conv_scratch_buf esp_nn_set_conv_scratch_buf_ansi + +#define esp_nn_get_depthwise_conv_scratch_size esp_nn_get_depthwise_conv_scratch_size_ansi +#define esp_nn_set_depthwise_conv_scratch_buf esp_nn_set_depthwise_conv_scratch_buf_ansi + +#define esp_nn_relu6_s8 esp_nn_relu6_s8_ansi + +#define esp_nn_avg_pool_s8 esp_nn_avg_pool_s8_ansi +#define esp_nn_max_pool_s8 esp_nn_max_pool_s8_ansi + +#define esp_nn_fully_connected_s8 esp_nn_fully_connected_s8_ansi + +#define esp_nn_get_softmax_scratch_size esp_nn_get_softmax_scratch_size_ansi +#define esp_nn_set_softmax_scratch_buf esp_nn_set_softmax_scratch_buf_ansi +#define esp_nn_softmax_s8 esp_nn_softmax_s8_ansi diff --git a/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/include/esp_nn_ansi_headers.h b/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/include/esp_nn_ansi_headers.h new file mode 100644 index 0000000..52ebb68 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/include/esp_nn_ansi_headers.h @@ -0,0 +1,309 @@ +// Copyright 2020-2021 Espressif Systems (Shanghai) PTE LTD +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +/** + * @file Header definitions to include for esp_nn reference functions + */ + +#include "esp_nn_defs.h" +/************************** Basic math functions ****************************/ + +/** + * @brief elementwise addition + * + * @note inputs type: int8_t, output: int8_t + * input offsets: although int32_t, they are contained in 8 bits [-128, 127] + * + * shift values are expected to be <= 0 + */ +void esp_nn_add_elementwise_s8_ansi(const int8_t *input1_data, + const int8_t *input2_data, + const int32_t input1_offset, + const int32_t input2_offset, + const int32_t input1_mult, + const int32_t input2_mult, + const int32_t input1_shift, + const int32_t input2_shift, + const int32_t left_shift, + int8_t *output, + const int32_t out_offset, + const int32_t out_mult, + const int32_t out_shift, + const int32_t activation_min, + const int32_t activation_max, + const int32_t size); +/** + * @brief elementwise multiplication + * + * @note inputs type: int8_t, output: int8_t + * input offsets: although int32_t, they are contained in 8 bits [-128, 127] + * + * output shift is expected to be <= 0 + */ +void esp_nn_mul_elementwise_s8_ansi(const int8_t *input1_data, + const int8_t *input2_data, + const int32_t input1_offset, + const int32_t input2_offset, + int8_t *output, + const int32_t out_offset, + const int32_t out_mult, + const int32_t out_shift, + const int32_t activation_min, + const int32_t activation_max, + const int32_t size); + + +/************************** Convolution functions *****************************/ + +/** + * @brief depthwise convolution per channel + * + * @note inputs type: int8_t, output: int8_t + * Version used in tflite is per channel. + * This version follows the same footsprints. + * Meaning, it has per out_channel shift and multiplier for + * requantization + * + * optimization notes: Though input_offset is int32 type, + * offset values are contained in 8 bits [-128, 127] + */ +void esp_nn_depthwise_conv_s8_ansi(const data_dims_t *input_dims, + const int8_t *input_data, + const data_dims_t *filter_dims, + const int8_t *filter_data, + const int32_t *bias, + const data_dims_t *output_dims, + int8_t *out_data, + const dw_conv_params_t *conv_params, + const quant_data_t *quant_data); + +/** + * @brief 2d-convolution channelwise + * + * @note operation: result += (input + offset) * filter + * + * inputs type: int8_t, output: int8_t + * input offsets: although int32_t, they are contained in 8 bits [-128, 127] + */ +void esp_nn_conv_s8_ansi(const data_dims_t *input_dims, + const int8_t *input_data, + const data_dims_t *filter_dims, + const int8_t *filter_data, + const int32_t *bias, + const data_dims_t *output_dims, + int8_t *out_data, + const conv_params_t *conv_params, + const quant_data_t *quant_data); + +int esp_nn_get_conv_scratch_size_ansi(const data_dims_t *input_dims, + const data_dims_t *filter_dims, + const data_dims_t *output_dims, + const conv_params_t *conv_params); +void esp_nn_set_conv_scratch_buf_ansi(const void *buf); + +int esp_nn_get_depthwise_conv_scratch_size_ansi(const data_dims_t *input_dims, + const data_dims_t *filter_dims, + const data_dims_t *output_dims, + const dw_conv_params_t *conv_params); +void esp_nn_set_depthwise_conv_scratch_buf_ansi(const void *buf); + +/************************** Activation functions *****************************/ + +/** + * @brief relu6 + * + * @note inout: int8_t + */ +void esp_nn_relu6_s8_ansi(int8_t *data, uint16_t size); + +/************************** Pooling functions *****************************/ + + +/** + * @brief max_pool + * + * @note inputs type: int8_t, output: int8_t + * input offsets: although int32_t, they are contained in 8 bits [-128, 127] + */ +void esp_nn_max_pool_s8_ansi(const int8_t *input, + const uint16_t input_wd, + const uint16_t input_ht, + int8_t *output, + const uint16_t output_wd, + const uint16_t output_ht, + const uint16_t stride_wd, + const uint16_t stride_ht, + const uint16_t filter_wd, + const uint16_t filter_ht, + const uint16_t pad_wd, + const uint16_t pad_ht, + const int32_t activation_min, + const int32_t activation_max, + const uint16_t channels); + +/** + * @brief avg_pool + * + * @note inputs type: int8_t, output: int8_t + * input offsets: although int32_t, they are contained in 8 bits [-128, 127] + */ +void esp_nn_avg_pool_s8_ansi(const int8_t *input, + const uint16_t input_wd, + const uint16_t input_ht, + int8_t *output, + const uint16_t output_wd, + const uint16_t output_ht, + const uint16_t stride_wd, + const uint16_t stride_ht, + const uint16_t filter_wd, + const uint16_t filter_ht, + const uint16_t pad_wd, + const uint16_t pad_ht, + const int32_t activation_min, + const int32_t activation_max, + const uint16_t channels); + + +/************************** Fully connected functions ***********************/ + +/** + * @brief fully connected + * + * @note inputs type: int8_t, output: int8_t + * input offsets: although int32_t, they are contained in 8 bits [-128, 127] + */ +void esp_nn_fully_connected_s8_ansi(const int8_t *input_data, + const int32_t input_offset, + const uint16_t row_len, + const int8_t *filter_data, + const int32_t filter_offset, + const int32_t *bias, + int8_t *out_data, + const uint16_t out_channels, + const int32_t out_offset, + const int32_t out_shift, + const int32_t out_mult, + const int32_t activation_min, + const int32_t activation_max); + +/** + * @brief Get scratch buffer size needed by softmax function + * + * @param width + * @param height + * @return size in bytes + * + * @note buffer must be 4 byte aligned + */ +int32_t esp_nn_get_softmax_scratch_size_ansi(const int32_t width, const int32_t height); + +/* ANSI C function to be hooked up when optimised version needed */ +int32_t esp_nn_get_softmax_scratch_size_opt(const int32_t width, const int32_t height); + +/** + * @brief Set scratch buffer to be used by softmax function + * + * @param buffer this can be NULL if one needs to unset it + * must be aligned to 4 bytes + */ +void esp_nn_set_softmax_scratch_buf_ansi(void *buffer); + +/** + * @brief reference softmax function + * + * @note inputs type: int8_t, output: int8_t + */ +void esp_nn_softmax_s8_ansi(const int8_t *input_data, + const int32_t height, + const int32_t width, + const int32_t mult, + const int32_t shift, + const int32_t diff_min, + int8_t *output_data); + + +//////////////////////////// Generic optimisations ///////////////////////////// + +/************************** Convolution functions *****************************/ + +/** + * @brief 2d-convolution channelwise optimized version + * + * @note operation: result += (input + offset) * filter + * + * inputs type: int8_t, output: int8_t + * input offsets: although int32_t, they are contained in 8 bits [-128, 127] + */ +void esp_nn_conv_s8_opt(const data_dims_t *input_dims, + const int8_t *input_data, + const data_dims_t *filter_dims, + const int8_t *filter_data, + const int32_t *bias, + const data_dims_t *output_dims, + int8_t *out_data, + const conv_params_t *conv_params, + const quant_data_t *quant_data); + +/** + * @brief depthwise convolution per channel optimized version + * + * @note inputs type: int8_t, output: int8_t + * Version used in tflite is per channel. + * This version follows the same footsprints. + * Meaning, it has per out_channel shift and multiplier for + * requantization + * + * optimization notes: Though input_offset is int32 type, + * offset values are contained in 8 bits [-128, 127] + */ +void esp_nn_depthwise_conv_s8_opt(const data_dims_t *input_dims, + const int8_t *input_data, + const data_dims_t *filter_dims, + const int8_t *filter_data, + const int32_t *bias, + const data_dims_t *output_dims, + int8_t *out_data, + const dw_conv_params_t *conv_params, + const quant_data_t *quant_data); + +int esp_nn_get_conv_scratch_size_opt(const data_dims_t *input_dims, + const data_dims_t *filter_dims, + const data_dims_t *output_dims, + const conv_params_t *conv_params); +void esp_nn_set_conv_scratch_buf_opt(const void *buf); + +int esp_nn_get_depthwise_conv_scratch_size_opt(const data_dims_t *input_dims, + const data_dims_t *filter_dims, + const data_dims_t *output_dims, + const dw_conv_params_t *conv_params); +void esp_nn_set_depthwise_conv_scratch_buf_opt(const void *buf); + +/* ANSI C function to be hooked up when optimised version needed */ +void esp_nn_set_softmax_scratch_buf_opt(void *buffer); + +/** + * @brief optimised version of softmax function + * + * @note the function uses extra buffer (4 * width bytes) + * hence, scratch buffers must be set before calling this. + */ +void esp_nn_softmax_s8_opt(const int8_t *input_data, + const int32_t height, + const int32_t width, + const int32_t mult, + const int32_t shift, + const int32_t diff_min, + int8_t *output_data); diff --git a/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/include/esp_nn_defs.h b/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/include/esp_nn_defs.h new file mode 100644 index 0000000..756d8e6 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/include/esp_nn_defs.h @@ -0,0 +1,83 @@ +// Copyright 2022 Espressif Systems (Shanghai) PTE LTD +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include + +/** + * @brief structure to club data dims + * this structure can be used for input, output and filter + */ +typedef struct data_dims { + int32_t width; + int32_t height; + int32_t channels; + + int32_t extra; // can be used as batch or any other param +} data_dims_t; + +/** + * @brief 2d data structure (width, height) + * + */ +typedef struct data_2d { + int32_t width; + int32_t height; +} data_2d_t; + +/** + * @brief min/max activation + */ +typedef struct act_params { + int32_t min; + int32_t max; +} act_params_t; + +/** + * @brief per channel quant data + * + * @note number of shift and mult elements are equal to output channels + */ +typedef struct quant_data { + int32_t *shift; + int32_t *mult; +} quant_data_t; + +/** + * @brief params specific to convolution 2d + * + */ +typedef struct conv_params { + int32_t in_offset; + int32_t out_offset; + data_2d_t stride; + data_2d_t padding; + data_2d_t dilation; + act_params_t activation; +} conv_params_t; + +/** + * @brief params specific to depthwise convolution 2d + * + */ +typedef struct dw_conv_params { + int32_t in_offset; + int32_t out_offset; + int32_t ch_mult; // channel multiplier. (in_ch * ch_mult = out_ch) + data_2d_t stride; + data_2d_t padding; + data_2d_t dilation; + act_params_t activation; +} dw_conv_params_t; diff --git a/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/include/esp_nn_esp32s3.h b/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/include/esp_nn_esp32s3.h new file mode 100644 index 0000000..0f52c94 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/include/esp_nn_esp32s3.h @@ -0,0 +1,231 @@ +// Copyright 2020-2021 Espressif Systems (Shanghai) PTE LTD +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +/** + * @file Header definitions to include for esp_nn optimized functions for + * the ESP32-S3 platform + */ + +#pragma once + +#include "esp_nn_defs.h" +#include "esp_nn_ansi_headers.h" + +/************************** Basic math functions *****************************/ + + +/** + * @brief elementwise addition + * + * @note inputs type: int8_t, output: int8_t + * input offsets: although int32_t, they are contained in 8 bits [-128, 127] + * + * shift values are expected to be <= 0 + */ +void esp_nn_add_elementwise_s8_esp32s3(const int8_t *input1_data, + const int8_t *input2_data, + const int32_t input1_offset, + const int32_t input2_offset, + const int32_t input1_mult, + const int32_t input2_mult, + const int32_t input1_shift, + const int32_t input2_shift, + const int32_t left_shift, + int8_t *output, + const int32_t out_offset, + const int32_t out_mult, + const int32_t out_shift, + const int32_t activation_min, + const int32_t activation_max, + const int32_t size); + +/** + * @brief elementwise multiplication + * + * @note inputs type: int8_t, output: int8_t + * input offsets: although int32_t, they are contained in 8 bits [-128, 127] + * + * output shift is expected to be <= 0 + */ +void esp_nn_mul_elementwise_s8_esp32s3(const int8_t *input1_data, + const int8_t *input2_data, + const int32_t input1_offset, + const int32_t input2_offset, + int8_t *output, + const int32_t out_offset, + const int32_t out_mult, + const int32_t out_shift, + const int32_t activation_min, + const int32_t activation_max, + const int32_t size); + + +/************************** Convolution functions *****************************/ + +/** + * @brief depthwise convolution per channel + * + * @note inputs type: int8_t, output: int8_t + * Version used in tflite is per channel. + * This version follows the same footsprints. + * Meaning, it has per out_channel shift and multiplier for + * requantization + * + * optimization notes: Though input_offset is int32 type, + * offset values are contained in 8 bits [-128, 127] + */ +void esp_nn_depthwise_conv_s8_esp32s3(const data_dims_t *input_dims, + const int8_t *input_data, + const data_dims_t *filter_dims, + const int8_t *filter_data, + const int32_t *bias, + const data_dims_t *output_dims, + int8_t *output_data, + const dw_conv_params_t *conv_params, + const quant_data_t *quant_data); + +/** + * @brief 2d - convolution channelwise + * + * @note operation: result += (input + offset) * filter + * + * inputs type: int8_t, output: int8_t + * input offsets: although int32_t, they are contained in 8 bits [-128, 127] + */ +void esp_nn_conv_s8_esp32s3(const data_dims_t *input_dims, + const int8_t *input_data, + const data_dims_t *filter_dims, + const int8_t *filter_data, + const int32_t *bias, + const data_dims_t *output_dims, + int8_t *output_data, + const conv_params_t *conv_params, + const quant_data_t *quant_data); + +int esp_nn_get_conv_scratch_size_esp32s3(const data_dims_t *input_dims, + const data_dims_t *filter_dims, + const data_dims_t *output_dims, + const conv_params_t *conv_params); +void esp_nn_set_conv_scratch_buf_esp32s3(const void *buf); + +int esp_nn_get_depthwise_conv_scratch_size_esp32s3(const data_dims_t *input_dims, + const data_dims_t *filter_dims, + const data_dims_t *output_dims, + const dw_conv_params_t *conv_params); +void esp_nn_set_depthwise_conv_scratch_buf_esp32s3(const void *buf); + +/************************** Pooling functions *****************************/ + +/** + * @brief max_pool + * + * @note inputs type: int8_t, output: int8_t + * input offsets: although int32_t, they are contained in 8 bits [-128, 127] + */ +void esp_nn_max_pool_s8_esp32s3(const int8_t *input, + const uint16_t input_wd, + const uint16_t input_ht, + int8_t *output, + const uint16_t output_wd, + const uint16_t output_ht, + const uint16_t stride_wd, + const uint16_t stride_ht, + const uint16_t filter_wd, + const uint16_t filter_ht, + const uint16_t pad_wd, + const uint16_t pad_ht, + const int32_t activation_min, + const int32_t activation_max, + const uint16_t channels); + +/** + * @brief avg_pool + * + * @note inputs type: int8_t, output: int8_t + * input offsets: although int32_t, they are contained in 8 bits [-128, 127] + */ +void esp_nn_avg_pool_s8_esp32s3(const int8_t *input, + const uint16_t input_wd, + const uint16_t input_ht, + int8_t *output, + const uint16_t output_wd, + const uint16_t output_ht, + const uint16_t stride_wd, + const uint16_t stride_ht, + const uint16_t filter_wd, + const uint16_t filter_ht, + const uint16_t pad_wd, + const uint16_t pad_ht, + const int32_t activation_min, + const int32_t activation_max, + const uint16_t channels); + + +/************************** Fully connected functions *****************************/ + +/** + * @brief fully connected + * + * @note inputs type: int8_t, output: int8_t + * input offsets: although int32_t, they are contained in 8 bits [-128, 127] + * + * Current version works only on aligned input. + * row_len and channels should both be multiple of 8. + */ +void esp_nn_fully_connected_s8_esp32s3(const int8_t *input_data, + const int32_t input_offset, + const uint16_t row_len, + const int8_t *filter_data, + const int32_t filter_offset, + const int32_t *bias, + int8_t *out_data, + const uint16_t out_channels, + const int32_t out_offset, + const int32_t out_shift, + const int32_t out_mult, + const int32_t activation_min, + const int32_t activation_max); + +/** + * @brief relu6 + * + * @note inout: int8_t + */ +void esp_nn_relu6_s8_esp32s3(int8_t *data, uint16_t size); + +/********************** function defines ***************************/ + +#define esp_nn_add_elementwise_s8 esp_nn_add_elementwise_s8_esp32s3 +#define esp_nn_mul_elementwise_s8 esp_nn_mul_elementwise_s8_esp32s3 + +#define esp_nn_depthwise_conv_s8 esp_nn_depthwise_conv_s8_esp32s3 + +#define esp_nn_get_conv_scratch_size esp_nn_get_conv_scratch_size_esp32s3 +#define esp_nn_set_conv_scratch_buf esp_nn_set_conv_scratch_buf_esp32s3 + +#define esp_nn_get_depthwise_conv_scratch_size esp_nn_get_depthwise_conv_scratch_size_esp32s3 +#define esp_nn_set_depthwise_conv_scratch_buf esp_nn_set_depthwise_conv_scratch_buf_esp32s3 + +#define esp_nn_conv_s8 esp_nn_conv_s8_esp32s3 + +#define esp_nn_relu6_s8 esp_nn_relu6_s8_esp32s3 + +#define esp_nn_avg_pool_s8 esp_nn_avg_pool_s8_esp32s3 +#define esp_nn_max_pool_s8 esp_nn_max_pool_s8_esp32s3 + +#define esp_nn_fully_connected_s8 esp_nn_fully_connected_s8_esp32s3 + +#define esp_nn_get_softmax_scratch_size esp_nn_get_softmax_scratch_size_opt +#define esp_nn_set_softmax_scratch_buf esp_nn_set_softmax_scratch_buf_opt +#define esp_nn_softmax_s8 esp_nn_softmax_s8_opt diff --git a/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/include/esp_nn_generic_opt.h b/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/include/esp_nn_generic_opt.h new file mode 100644 index 0000000..136cba5 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/include/esp_nn_generic_opt.h @@ -0,0 +1,47 @@ +// Copyright 2020-2021 Espressif Systems (Shanghai) PTE LTD +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +/** + * @file Header definitions to include for esp_nn generic optimisations + * For functions which not having optimisations, _ansi versions are picked. + */ + +#pragma once + +#include "esp_nn_defs.h" +#include "esp_nn_ansi_headers.h" + +#define esp_nn_add_elementwise_s8 esp_nn_add_elementwise_s8_ansi +#define esp_nn_mul_elementwise_s8 esp_nn_mul_elementwise_s8_ansi + +#define esp_nn_depthwise_conv_s8 esp_nn_depthwise_conv_s8_opt + +#define esp_nn_conv_s8 esp_nn_conv_s8_opt + +#define esp_nn_get_conv_scratch_size esp_nn_get_conv_scratch_size_opt +#define esp_nn_set_conv_scratch_buf esp_nn_set_conv_scratch_buf_opt + +#define esp_nn_get_depthwise_conv_scratch_size esp_nn_get_depthwise_conv_scratch_size_opt +#define esp_nn_set_depthwise_conv_scratch_buf esp_nn_set_depthwise_conv_scratch_buf_opt + +#define esp_nn_relu6_s8 esp_nn_relu6_s8_ansi + +#define esp_nn_avg_pool_s8 esp_nn_avg_pool_s8_ansi +#define esp_nn_max_pool_s8 esp_nn_max_pool_s8_ansi + +#define esp_nn_fully_connected_s8 esp_nn_fully_connected_s8_ansi + +#define esp_nn_get_softmax_scratch_size esp_nn_get_softmax_scratch_size_opt +#define esp_nn_set_softmax_scratch_buf esp_nn_set_softmax_scratch_buf_opt +#define esp_nn_softmax_s8 esp_nn_softmax_s8_opt diff --git a/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/src/activation_functions/esp_nn_relu_ansi.c b/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/src/activation_functions/esp_nn_relu_ansi.c new file mode 100644 index 0000000..2ac260f --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/src/activation_functions/esp_nn_relu_ansi.c @@ -0,0 +1,34 @@ +#include "edge-impulse-sdk/classifier/ei_classifier_config.h" +#if EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN +// Copyright 2020-2021 Espressif Systems (Shanghai) PTE LTD +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +#include + +void esp_nn_relu6_s8_ansi(int8_t *data, uint16_t size) +{ + int32_t i; + + for (i = 0; i < size; i++) { + int32_t ip = data[i]; + + ip = max(ip, 0); + data[i] = min(ip, 6); + } +} + +#endif // EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN diff --git a/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/src/activation_functions/esp_nn_relu_s8_esp32s3.S b/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/src/activation_functions/esp_nn_relu_s8_esp32s3.S new file mode 100644 index 0000000..b020920 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/src/activation_functions/esp_nn_relu_s8_esp32s3.S @@ -0,0 +1,118 @@ +#include "edge-impulse-sdk/classifier/ei_classifier_config.h" +#if EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN && EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN_S3 +// Copyright 2021-2022 Espressif Systems (Shanghai) PTE LTD +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + + .text + .align 4 + .literal_position + +# in place relu6 function. a2: data, a3: size + # Program Unit: esp_nn_relu6_s8_esp32s3 + .type esp_nn_relu6_s8_esp32s3, @function + .align 4 + .global esp_nn_relu6_s8_esp32s3 + +esp_nn_relu6_s8_esp32s3: + entry a1,48 # + mov.n a9,a2 # [0], data + mov.n a7,a3 # [1], size + + // process multiple of 16 + movi.n a4,6 # [4] + s8i a4,a1,0 # [5] six + addi a10,a3,-7 # [2] + ee.vldbc.8 q1,a1 # [6] id:72 six+0x0 + blti a3,16,.Lt_0_5634 # [7] + + srai a8,a3,4 # [0] + ee.zero.q q2 # [1] + loopgtz a8,.LBB37_esp_nn_relu6_s8_esp32s3 # [3] + + ee.vld.128.ip q0,a2,0 # [0*II+0] id:73 + ee.vmax.s8 q0,q0,q2 # [0*II+2] + ee.vmin.s8 q0,q0,q1 # [0*II+3] + ee.vst.128.ip q0,a2,16 # [0*II+4] id:74 +.LBB37_esp_nn_relu6_s8_esp32s3: # 0x34 + + slli a8,a8,4 # [0] + + // remaining multiple of 8 data + bge a8,a10,.Lt_0_3586 # [1] + +.Lt_0_3842: # 0x3a + sub a6,a7,a8 # [0] + srai a6,a6,3 # [1] + loopgtz a6,.LBB52_esp_nn_relu6_s8_esp32s3 # [2] + + ee.vld.l.64.ip q0,a2,0 # [0*II+0] id:75 + ee.vmax.s8 q0,q0,q2 # [0*II+2] + ee.vmin.s8 q0,q0,q1 # [0*II+3] + ee.vst.l.64.ip q0,a2,8 # [0*II+4] id:76 + +.LBB52_esp_nn_relu6_s8_esp32s3: # 0x4f + addx8 a8,a6,a8 # [0] + +.Lt_0_3586: # 0x52 + // process leftover + bge a8,a7,.Lt_0_6402 # [0] + +.Lt_0_4866: # 0x55 + movi.n a5,0 # [0] + sub a3,a7,a8 # [1] + add.n a2,a8,a9 # [2] + l8ui a6,a2,0 # [3] id:78 + addi.n a3,a3,-1 # [4] + sext a6,a6,7 + max a6,a5,a6 # [6] + min a6,a4,a6 # [7] + s8i a6,a2,0 # [8] id:79 + + loopgtz a3,.LBB67_esp_nn_relu6_s8_esp32s3 # [9] + + l8ui a3,a2,1 # [0*II+0] id:78 + addi.n a2,a2,1 # [1*II+1] + sext a3,a3,7 + max a3,a5,a3 # [0*II+3] + min a3,a4,a3 # [0*II+4] + s8i a3,a2,0 # [0*II+5] id:79 +.LBB67_esp_nn_relu6_s8_esp32s3: # 0x81 + +.Lt_0_6402: # 0x83 + retw.n # [0] + +.Lt_0_5634: # 0x85 + blti a10,1,.Lt_0_5890 # [0] + + movi.n a8,0 # [0] + ee.zero.q q2 # [1] + j .Lt_0_3842 # [2] + +.Lt_0_5890: # 0x90 + beqz.n a3,.Lt_0_6402 # [0] + + movi.n a8,0 # [0] + j .Lt_0_4866 # [1] + + .size esp_nn_relu6_s8_esp32s3, . - esp_nn_relu6_s8_esp32s3 + +#elif defined(WIO_TERMINAL) +// dummy code, added for old ARM toolchain +.syntax unified +.thumb +.cpu cortex-m0 + +.section .text +#endif // EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN && EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN_S3 diff --git a/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/src/basic_math/esp_nn_add_ansi.c b/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/src/basic_math/esp_nn_add_ansi.c new file mode 100644 index 0000000..b123d62 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/src/basic_math/esp_nn_add_ansi.c @@ -0,0 +1,101 @@ +#include "edge-impulse-sdk/classifier/ei_classifier_config.h" +#if EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN +// Copyright 2020-2021 Espressif Systems (Shanghai) PTE LTD +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include + +void esp_nn_add_elementwise_u8_ansi(const uint8_t *input1_data, + const uint8_t *input2_data, + const int32_t input1_offset, + const int32_t input2_offset, + const int32_t input1_mult, + const int32_t input2_mult, + const int32_t input1_shift, + const int32_t input2_shift, + const int32_t left_shift, + uint8_t *output, + const int32_t out_offset, + const int32_t out_mult, + const int32_t out_shift, + const int32_t activation_min, + const int32_t activation_max, + const int32_t size) +{ + for (int i = 0; i < size; i++) { + int32_t tmp1 = input1_data[i] + input1_offset; + int32_t tmp2 = input2_data[i] + input2_offset; + + tmp1 <<= left_shift; + tmp2 <<= left_shift; + + tmp1 = esp_nn_sat_round_doubling_high_mul(tmp1, input1_mult); + tmp2 = esp_nn_sat_round_doubling_high_mul(tmp2, input2_mult); + + tmp1 = esp_nn_div_by_power_of_two(tmp1, -input1_shift); + tmp2 = esp_nn_div_by_power_of_two(tmp2, -input2_shift); + + int32_t out = tmp1 + tmp2; + out = esp_nn_sat_round_doubling_high_mul(out, out_mult); + out = esp_nn_div_by_power_of_two(out, -out_shift); + out = out + out_offset; + + out = max(activation_min, min(out, activation_max)); + output[i] = (uint8_t) out; + } +} + +void esp_nn_add_elementwise_s8_ansi(const int8_t *input1_data, + const int8_t *input2_data, + const int32_t input1_offset, + const int32_t input2_offset, + const int32_t input1_mult, + const int32_t input2_mult, + const int32_t input1_shift, + const int32_t input2_shift, + const int32_t left_shift, + int8_t *output, + const int32_t out_offset, + const int32_t out_mult, + const int32_t out_shift, + const int32_t activation_min, + const int32_t activation_max, + const int32_t size) +{ + for (int i = 0; i < size; i++) { + int32_t tmp1 = input1_data[i] + input1_offset; + int32_t tmp2 = input2_data[i] + input2_offset; + + tmp1 <<= left_shift; + tmp2 <<= left_shift; + + tmp1 = esp_nn_sat_round_doubling_high_mul(tmp1, input1_mult); + tmp2 = esp_nn_sat_round_doubling_high_mul(tmp2, input2_mult); + + tmp1 = esp_nn_div_by_power_of_two(tmp1, -input1_shift); + tmp2 = esp_nn_div_by_power_of_two(tmp2, -input2_shift); + + int32_t out = tmp1 + tmp2; + out = esp_nn_sat_round_doubling_high_mul(out, out_mult); + out = esp_nn_div_by_power_of_two(out, -out_shift); + out = out + out_offset; + + out = max(activation_min, min(out, activation_max)); + output[i] = (int8_t) out; + } +} + +#endif // EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN diff --git a/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/src/basic_math/esp_nn_add_s8_esp32s3.S b/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/src/basic_math/esp_nn_add_s8_esp32s3.S new file mode 100644 index 0000000..492254c --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/src/basic_math/esp_nn_add_s8_esp32s3.S @@ -0,0 +1,638 @@ +#include "edge-impulse-sdk/classifier/ei_classifier_config.h" +#if EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN && EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN_S3 +// Copyright 2021-2023 Espressif Systems (Shanghai) PTE LTD +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + .text + .align 4 + .literal_position + .literal .nudge_val, 1073741824 + + # Program Unit: esp_nn_add_elementwise_s8_esp32s3 + .type esp_nn_add_elementwise_s8_esp32s3, @function + .align 4 + .global esp_nn_add_elementwise_s8_esp32s3 + +esp_nn_add_elementwise_s8_esp32s3: # 0x4 + # temp_neg_out_shift = 0 + # temp_neg_input2_shift = 4 + # temp_neg_input1_shift = 8 + # gra_spill_temp_2 = 12 + # gra_spill_temp_3 = 16 + # gra_spill_temp_4 = 20 + # gra_spill_temp_5 = 24 + # gra_spill_temp_6 = 28 + # gra_spill_temp_7 = 32 + # gra_spill_temp_8 = 36 + # gra_spill_temp_9 = 40 + # gra_spill_temp_10 = 44 + # gra_spill_temp_11 = 48 + # gra_spill_temp_12 = 52 + # gra_spill_temp_13 = 56 + + // a2 : *input1_data + // a3 : *input2_data + // a4 : input1_offset + // a5 : input2_offset + // a6 : input1_mult + // a7 : input2_mult + // On stack: + // 80: input1_shift + // 84: input2_shift + // 88: left_shift + // 92: *output + // 96: out_offset + // 100: out_mult, loaded in `a8` + // 104: out_shift + // 108: activation_min + // 112: activation_max + // 116: size + + entry a1,80 # + s32i.n a4,a1,48 # [10] gra_spill_temp_11, input1_offset + s32i.n a5,a1,52 # [0] gra_spill_temp_12, input2_offset + s32i.n a2,a1,32 # [5] gra_spill_temp_7, input1_data + s32i.n a3,a1,12 # [3] gra_spill_temp_2, input2_data + + l32i a12,a1,116 # [11] id:720 size+0x0 + mov.n a14,a2 # [6] + mov.n a10,a3 # [8] + blti a12,1,.exit # [1] // exit + + l32i a3,a1,80 # [0] id:721 input1_shift+0x0 + l32i a13,a1,84 # [1] id:722 input2_shift+0x0 + l32i a2,a1,104 # [8] id:723 out_shift+0x0 + l32i a8,a1,100 # [1] out_mult + + neg a3,a3 # [12] + neg a13,a13 # [7] + neg a2,a2 # [11] + + s32i.n a3,a1,8 # [12] temp_neg_input1_shift, -input1_shift + s32i.n a13,a1,4 # [7] temp_neg_input2_shift, -input2_shift + s32i.n a2,a1,0 # [16] temp_neg_out_shift, -out_shift + + movi.n a5,1 + addi a9,a3,-1 + ssl a9 + sll a15,a5 + s32i.n a15,a1,16 # gra_spill_temp_3, 1 << (exponent - 1) for input1 + + addi a9,a13,-1 + ssl a9 + sll a15,a5 + s32i.n a15,a1,20 # gra_spill_temp_4, 1 << (exponent - 1) for input2 + + addi a9,a2,-1 + ssl a9 + sll a15,a5 + s32i.n a15,a1,24 # gra_spill_temp_5, 1 << (exponent - 1) for out + + movi.n a2,0 + blti a12,12,.process_leftover # [23] + + // skip to leftover routine if inputs are unaligned + or a9,a14,a10 + extui a9,a9,0,4 + bnez a9,.process_leftover + + l32i a9,a1,92 # [17] id:1279 output+0x0 + + l32i a13,a1,116 # [20] + srai a13,a13,3 # [21] + s32i.n a13,a1,56 # [22] gra_spill_temp_13 + + movi.n a13,8 + s32i.n a13,a1,28 # gra_spill_temp_6, mult_of8 counter + + ee.zero.q q6 # [8] + +.vector_loop: // process 8 values in one go + l32i a15,a1,88 # [6] left_shift + ee.vld.l.64.ip q0,a14,8 # [9] id:729 + s32i.n a9,a1,44 # [10] gra_spill_temp_10, out_ptr + s32i.n a14,a1,40 # [20] gra_spill_temp_9 + wsr.sar a15 # [21] load left shift + + addi.n a15,a1,48 # [14] + ee.vldbc.16 q7,a15 # [21] id:1277 input1_offset + ee.vcmp.lt.s8 q5,q0,q6 # [29] + ee.vzip.8 q0,q5 # [31], 20 bits + ee.vadds.s16 q0,q0,q7 # [34], add offset + ee.vcmp.lt.s16 q2,q0,q6 # [36] + ee.vzip.16 q0,q2 # [39], 32 bits + ee.vsl.32 q0,q0 # [41] left_shift + ee.vsl.32 q2,q2 # [42] left_shift + + l32r a9,.nudge_val # [15], nudge + +// mulhi32 for q0 + ee.movi.32.a q0,a3,2 # [44] + ee.movi.32.a q0,a4,3 # [45] + ee.movi.32.a q0,a14,1 # [46] + ee.movi.32.a q0,a5,0 # [62] + + mulsh a13,a6,a3 # [51] + mull a3,a6,a3 # [53] + + mulsh a12,a6,a4 # [50] + mull a4,a6,a4 # [55] + + mulsh a15,a6,a14 # [48] + mull a14,a6,a14 # [49] + + ssai 31 # [47] + + add a3,a3,a9 + saltu a2,a3,a9 + add.n a13,a13,a2 + src a13,a13,a3 + + add a4,a4,a9 + saltu a2,a4,a9 + add.n a12,a12,a2 + src a12,a12,a4 + ee.movi.32.q q0,a13,2 # [62] + + add a14,a14,a9 + saltu a2,a14,a9 + add.n a15,a15,a2 + src a15,a15,a14 + ee.movi.32.q q0,a12,3 # [62] + + mulsh a13,a6,a5 # [51] + mull a5,a6,a5 # [53] + ee.movi.32.q q0,a15,1 # [62] + + add a5,a5,a9 + saltu a2,a5,a9 + add.n a13,a13,a2 + src a13,a13,a5 + ee.movi.32.q q0,a13,0 # [62] + + +// mulhi32 for q2 + ee.movi.32.a q2,a3,2 # [44] + ee.movi.32.a q2,a4,3 # [45] + ee.movi.32.a q2,a14,1 # [46] + ee.movi.32.a q2,a5,0 # [62] + + mulsh a13,a6,a3 # [51] + mull a3,a6,a3 # [53] + + mulsh a12,a6,a4 # [50] + mull a4,a6,a4 # [55] + + mulsh a15,a6,a14 # [48] + mull a14,a6,a14 # [49] + + ssai 31 # [47] + + add a3,a3,a9 + saltu a2,a3,a9 + add.n a13,a13,a2 + src a13,a13,a3 + + add a4,a4,a9 + saltu a2,a4,a9 + add.n a12,a12,a2 + src a12,a12,a4 + ee.movi.32.q q2,a13,2 # [62] + + add a14,a14,a9 + saltu a2,a14,a9 + add.n a15,a15,a2 + src a15,a15,a14 + ee.movi.32.q q2,a12,3 # [62] + + mulsh a13,a6,a5 # [51] + mull a5,a6,a5 # [53] + ee.movi.32.q q2,a15,1 # [62] + + l32i a3,a1,8 # [12] temp_neg_input1_shift, -input1_shift + add a5,a5,a9 + saltu a2,a5,a9 + add.n a13,a13,a2 + src a13,a13,a5 + ee.movi.32.q q2,a13,0 # [62] + + + blti a3,1, .skip_div_by2_in0 + + addi.n a13,a1,16 + ee.vcmp.lt.s32 q1,q0,q6 + ee.vcmp.lt.s32 q3,q2,q6 + ee.vldbc.32 q5,a13 // 1 << (exponent - 1) + wsr.sar a3 // load right_shift + ee.vadds.s32 q0,q0,q1 // subtract 1 `if (val < 0)` + ee.vadds.s32 q2,q2,q3 // subtract 1 `if (val < 0)` + ee.vadds.s32 q0,q0,q5 + ee.vadds.s32 q2,q2,q5 + ee.vsr.32 q0,q0 + ee.vsr.32 q2,q2 + +.skip_div_by2_in0: + + + ee.vld.l.64.ip q1,a10,8 # [11] id:1290 + addi.n a15,a1,52 # [12] + ee.vldbc.16 q7,a15 # [19] id:1278 input2_offset + l32i a15,a1,88 # [6] left_shift + s32i a10,a1,36 # [14] gra_spill_temp_8 + ee.vcmp.lt.s8 q3,q1,q6 # [271] + wsr.sar a15 # [21], load shift for left shift + ee.vzip.8 q1,q3 # [274], 20 bits + ee.vadds.s16 q1,q1,q7 # [281] + ee.vcmp.lt.s16 q3,q1,q6 # [282] + ee.vzip.16 q1,q3 # [283], 32 bits + ee.vsl.32 q1,q1 # [284] + ee.vsl.32 q3,q3 # [285] + + +// mulhi32 for q1 + ee.movi.32.a q1,a3,2 # [44] + ee.movi.32.a q1,a4,3 # [45] + ee.movi.32.a q1,a14,1 # [46] + ee.movi.32.a q1,a5,0 # [62] + + mulsh a13,a7,a3 # [51] + mull a3,a7,a3 # [53] + + mulsh a12,a7,a4 # [50] + mull a4,a7,a4 # [55] + + mulsh a15,a7,a14 # [48] + mull a14,a7,a14 # [49] + + ssai 31 # [47] + + add a3,a3,a9 + saltu a2,a3,a9 + add.n a13,a13,a2 + src a13,a13,a3 + + add a4,a4,a9 + saltu a2,a4,a9 + add.n a12,a12,a2 + src a12,a12,a4 + ee.movi.32.q q1,a13,2 # [62] + + add a14,a14,a9 + saltu a2,a14,a9 + add.n a15,a15,a2 + src a15,a15,a14 + ee.movi.32.q q1,a12,3 # [62] + + mulsh a13,a7,a5 # [51] + mull a5,a7,a5 # [53] + ee.movi.32.q q1,a15,1 # [62] + + add a5,a5,a9 + saltu a2,a5,a9 + add.n a13,a13,a2 + src a13,a13,a5 + ee.movi.32.q q1,a13,0 # [62] + + +// mulhi32 for q3 + ee.movi.32.a q3,a3,2 # [44] + ee.movi.32.a q3,a4,3 # [45] + ee.movi.32.a q3,a14,1 # [46] + ee.movi.32.a q3,a5,0 # [62] + + mulsh a13,a7,a3 # [51] + mull a3,a7,a3 # [53] + + mulsh a12,a7,a4 # [50] + mull a4,a7,a4 # [55] + + mulsh a15,a7,a14 # [48] + mull a14,a7,a14 # [49] + + ssai 31 # [47] + + add a3,a3,a9 + saltu a2,a3,a9 + add.n a13,a13,a2 + src a13,a13,a3 + + add a4,a4,a9 + saltu a2,a4,a9 + add.n a12,a12,a2 + src a12,a12,a4 + ee.movi.32.q q3,a13,2 # [62] + + add a14,a14,a9 + saltu a2,a14,a9 + add.n a15,a15,a2 + src a15,a15,a14 + ee.movi.32.q q3,a12,3 # [62] + + mulsh a13,a7,a5 # [51] + mull a5,a7,a5 # [53] + ee.movi.32.q q3,a15,1 # [62] + l32i a14,a1,4 # [7] temp_neg_input2_shift, -input2_shift + + add a5,a5,a9 + saltu a2,a5,a9 + add.n a13,a13,a2 + src a13,a13,a5 + ee.movi.32.q q3,a13,0 # [62] + + // multiplication results: q0-q2 & q1-q3 + + + blti a14,1, .skip_div_by2_in1 + + addi.n a5,a1,20 + ee.vcmp.lt.s32 q4,q1,q6 + ee.vcmp.lt.s32 q5,q3,q6 + ee.vldbc.32 q7,a5 // 1 << (exponent - 1) + wsr.sar a14 // load right_shift + ee.vadds.s32 q4,q4,q7 // subtract 1 `if (val < 0)` + ee.vadds.s32 q5,q5,q7 // subtract 1 `if (val < 0)` + ee.vadds.s32 q1,q1,q4 + ee.vadds.s32 q3,q3,q5 + ee.vsr.32 q1,q1 + ee.vsr.32 q3,q3 + +.skip_div_by2_in1: + + ee.vadds.s32 q0,q0,q1 + ee.vadds.s32 q1,q2,q3 + +// mulhi32 for q0 + ee.movi.32.a q0,a3,2 # [44] + ee.movi.32.a q0,a4,3 # [45] + ee.movi.32.a q0,a14,1 # [46] + ee.movi.32.a q0,a5,0 # [62] + + mulsh a13,a8,a3 # [51] + mull a3,a8,a3 # [53] + + mulsh a12,a8,a4 # [50] + mull a4,a8,a4 # [55] + + mulsh a15,a8,a14 # [48] + mull a14,a8,a14 # [49] + + ssai 31 # [47] + + add a3,a3,a9 + saltu a2,a3,a9 + add.n a13,a13,a2 + src a13,a13,a3 + + add a4,a4,a9 + saltu a2,a4,a9 + add.n a12,a12,a2 + src a12,a12,a4 + ee.movi.32.q q0,a13,2 # [62] + + add a14,a14,a9 + saltu a2,a14,a9 + add.n a15,a15,a2 + src a15,a15,a14 + ee.movi.32.q q0,a12,3 # [62] + + mulsh a13,a8,a5 # [51] + mull a5,a8,a5 # [53] + ee.movi.32.q q0,a15,1 # [62] + + add a5,a5,a9 + saltu a2,a5,a9 + add.n a13,a13,a2 + src a13,a13,a5 + ee.movi.32.q q0,a13,0 # [62] + + +// mulhi32 for q1 + ee.movi.32.a q1,a3,2 # [44] + ee.movi.32.a q1,a4,3 # [45] + ee.movi.32.a q1,a14,1 # [46] + ee.movi.32.a q1,a5,0 # [62] + + mulsh a13,a8,a3 # [51] + mull a3,a8,a3 # [53] + + mulsh a12,a8,a4 # [50] + mull a4,a8,a4 # [55] + + mulsh a15,a8,a14 # [48] + mull a14,a8,a14 # [49] + + ssai 31 # [47] + + add a3,a3,a9 + saltu a2,a3,a9 + add.n a13,a13,a2 + src a13,a13,a3 + + add a4,a4,a9 + saltu a2,a4,a9 + add.n a12,a12,a2 + src a12,a12,a4 + ee.movi.32.q q1,a13,2 # [62] + + add a14,a14,a9 + saltu a2,a14,a9 + add.n a15,a15,a2 + src a15,a15,a14 + ee.movi.32.q q1,a12,3 # [62] + + mulsh a13,a8,a5 # [51] + mull a5,a8,a5 # [53] + ee.movi.32.q q1,a15,1 # [62] + l32i a14,a1,0 # [738] temp_neg_out_shift, -out_shift + + add a5,a5,a9 + saltu a2,a5,a9 + add.n a13,a13,a2 + src a13,a13,a5 + ee.movi.32.q q1,a13,0 # [62] + + + //q0-q1 has output + + blti a14,1,.skip_div_by2_out + addi.n a5,a1,24 + ee.vcmp.lt.s32 q2,q0,q6 + ee.vcmp.lt.s32 q3,q1,q6 + ee.vldbc.32 q5,a5 // 1 << (exponent - 1) + wsr.sar a14 // load right shift + ee.vadds.s32 q0,q0,q2 // subtract 1 `if (val < 0)` + ee.vadds.s32 q1,q1,q3 // subtract 1 `if (val < 0)` + ee.vadds.s32 q0,q0,q5 + ee.vadds.s32 q1,q1,q5 + ee.vsr.32 q0,q0 + ee.vsr.32 q1,q1 + +.skip_div_by2_out: + +// add offset and apply activation + addi a15,a1,96 + ee.vldbc.32 q3,a15 # [809] id:802 out_offset + ee.vadds.s32 q0,q0,q3 # [811] + ee.vadds.s32 q1,q1,q3 # [812] + addi a13,a1,108 + addi a14,a1,112 + ee.vldbc.32 q3,a14 # [813] id:803 activation_max + ee.vmin.s32 q0,q0,q3 # [815] + ee.vmin.s32 q1,q1,q3 # [816] + ee.vldbc.32 q3,a13 # [817] id:804 activation_min + l32i a13,a1,4 # [818] temp_neg_input2_shift + ee.vmax.s32 q1,q1,q3 # [819] + ee.vmax.s32 q0,q0,q3 # [820] + +//pack the data and store + l32i.n a9,a1,44 # [784] gra_spill_temp_10 + ee.vunzip.16 q0,q1 # [821] + ee.vunzip.8 q0,q1 # [822] + l32i.n a13,a1,28 # gra_spill_temp_6, multiple of 12 index + ee.vst.l.64.ip q0,a9,8 # [823] id:805 + l32i a15,a1,116 # [1], size + l32i.n a14,a1,40 # [20] gra_spill_temp_9 + l32i.n a10,a1,36 # [14] gra_spill_temp_8 + addi a13,a13,8 + s32i.n a13,a1,28 # gra_spill_temp_6 + bge a15,a13,.vector_loop + + l32i.n a2,a1,56 # [0] gra_spill_temp_13 + +// check for leftover + l32i a10,a1,116 # [1] + slli a2,a2,3 # [2] + bge a2,a10,.exit # [3] // done, exit + +.process_leftover: + l32i.n a3,a1,48 # [1] gra_spill_temp_11 + l32i.n a12,a1,52 # [2] gra_spill_temp_12 + + l32i.n a10,a1,12 # [3] gra_spill_temp_2 + l32i.n a14,a1,32 # [8] gra_spill_temp_7 + add.n a10,a2,a10 # [5] + add.n a14,a2,a14 # [6] + l8ui a14,a14,0 # [7] id:809, input1 + l8ui a10,a10,0 # [12] id:1370, input2 + + sext a14,a14,7 # [9] + sext a10,a10,7 # [10] + add.n a10,a10,a12 # [11] // add offset2 + add.n a14,a14,a3 # [16] // add offset1 + l32i a12,a1,88 # [13] left_shift + + // sat_round_doubling_high_mul step for input1 and input2 + ssl a12 # [15] + sll a10,a10 # [20] + sll a14,a14 # [17] + + l32r a12,.nudge_val # [0], nudge + + // a13,a3 are free, a12: nudge, a6:mult1 + mulsh a13,a14,a6 + mull a9,a14,a6 + ssai 31 + + add a9,a9,a12 + saltu a3,a9,a12 + add.n a13,a13,a3 + src a14,a13,a9 //result in a14 + + mulsh a13,a10,a7 + mull a9,a10,a7 + ssai 31 + + add a9,a9,a12 + saltu a3,a9,a12 + add.n a13,a13,a3 + src a10,a13,a9 //result in a10 + +// divide_by_power_of2_step for input1 (a14), input2 (a10) +// free registers: a13, a12, a9, a3 + + l32i.n a12,a1,8 // -input1_shift + l32i.n a13,a1,4 // -input2_shift + + blti a12,1,.skip_div_by2_in0_remain + l32i.n a3,a1,16 // 1 << (exponent - 1) + extui a9,a14,31,1 + ssr a12 // load right_shift + sub a3,a3,a9 // 1 << (exponent - 1) - (val < 0) + add a14,a14,a3 + sra a14,a14 +.skip_div_by2_in0_remain: + + blti a13,1,.skip_div_by2_in1_remain + l32i.n a3,a1,20 // 1 << (exponent - 1) + extui a9,a10,31,1 + ssr a13 // load right_shift + sub a3,a3,a9 // 1 << (exponent - 1) - (val < 0) + add a10,a10,a3 + sra a10,a10 +.skip_div_by2_in1_remain: + +// process output + l32r a12,.nudge_val # [0], nudge + l32i a13,a1,0 // -out_shift + add.n a10,a10,a14 # [45] + +// multiply and pick high32 + mulsh a3,a10,a8 + mull a10,a10,a8 + ssai 31 # [0] + add a10,a10,a12 + saltu a9,a10,a12 + add a12,a3,a9 + src a12,a12,a10 + +// div by power of 2 for output + + l32i a9,a1,96 # [31] out_offset + blti a13,1,.skip_div_by2_out_remain + l32i.n a3,a1,24 // 1 << (exponent - 1) + extui a14,a12,31,1 + ssr a13 // load right_shift + sub a3,a3,a14 // 1 << (exponent - 1) - (val < 0) + add a12,a12,a3 + sra a12,a12 +.skip_div_by2_out_remain: + +// add offset + add.n a9,a9,a12 # [33] + +// apply activation + l32i a13,a1,112 # [34] activation_max + l32i a12,a1,108 # [35] activation_min + min a13,a13,a9 # [36] + l32i a9,a1,92 # [37] output + max a13,a13,a12 # [38] + add.n a9,a2,a9 # [39] + s8i a13,a9,0 # [40] id:1371 + l32i a12,a1,116 + addi.n a2,a2,1 # [41] + blt a2,a12,.process_leftover + +.exit: + retw.n # [0] + + .size esp_nn_add_elementwise_s8_esp32s3, . - esp_nn_add_elementwise_s8_esp32s3 + +#elif defined(WIO_TERMINAL) +// dummy code, added for old ARM toolchain +.syntax unified +.thumb +.cpu cortex-m0 + +.section .text +#endif // EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN && EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN_S3 diff --git a/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/src/basic_math/esp_nn_mul_ansi.c b/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/src/basic_math/esp_nn_mul_ansi.c new file mode 100644 index 0000000..477d5c6 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/src/basic_math/esp_nn_mul_ansi.c @@ -0,0 +1,46 @@ +#include "edge-impulse-sdk/classifier/ei_classifier_config.h" +#if EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN +// Copyright 2020-2021 Espressif Systems (Shanghai) PTE LTD +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include + +void esp_nn_mul_elementwise_s8_ansi(const int8_t *input1_data, + const int8_t *input2_data, + const int32_t input1_offset, + const int32_t input2_offset, + int8_t *output, + const int32_t out_offset, + const int32_t out_mult, + const int32_t out_shift, + const int32_t activation_min, + const int32_t activation_max, + const int32_t size) +{ + for (int i = 0; i < size; i++) { + int32_t tmp1 = input1_data[i] + input1_offset; + int32_t tmp2 = input2_data[i] + input2_offset; + + int32_t out = tmp1 * tmp2; + out = esp_nn_multiply_by_quantized_mult(out, out_mult, out_shift); + out = out + out_offset; + + out = max(activation_min, min(out, activation_max)); + output[i] = (int8_t) out; + } +} + +#endif // EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN diff --git a/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/src/basic_math/esp_nn_mul_s8_esp32s3.S b/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/src/basic_math/esp_nn_mul_s8_esp32s3.S new file mode 100644 index 0000000..ca28573 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/src/basic_math/esp_nn_mul_s8_esp32s3.S @@ -0,0 +1,323 @@ +#include "edge-impulse-sdk/classifier/ei_classifier_config.h" +#if EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN && EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN_S3 +// Copyright 2021-2023 Espressif Systems (Shanghai) PTE LTD +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + .text + .align 4 + .literal_position + .literal .LC0_26_123, 1073741824 // `1 << 30` + + # Program Unit: esp_nn_mul_elementwise_s8_esp32s3 + .type esp_nn_mul_elementwise_s8_esp32s3, @function + .align 4 + .global esp_nn_mul_elementwise_s8_esp32s3 + +esp_nn_mul_elementwise_s8_esp32s3: # 0x4 + # to_add = 0 + # gra_spill_temp_0 = 4 + # gra_spill_temp_1 = 8 + # gra_spill_temp_2 = 12 + # gra_spill_temp_3 = 16 + # gra_spill_temp_4 = 20 + # gra_spill_temp_5 = 24 + # gra_spill_temp_6 = 28 + # gra_spill_temp_7 = 32 + # gra_spill_temp_8 = 36 + # gra_spill_temp_<> = 40 + # gra_spill_temp_<> = 44 + # gra_spill_temp_<> = 48 + # gra_spill_temp_13 = 64 + + // registers: + // a2: const int8_t *input1_data + // a3: const int8_t *input2_data + // a4: const int32_t input1_offset + // a5: const int32_t input2_offset + // a6: int8_t *output + // a7: const int32_t out_offset + + // on stack: + // 120: const int32_t out_mult + // 124: const int32_t out_shift + // 128: const int32_t activation_min + // 132: const int32_t activation_max + // 136: const int32_t size + + entry a1,120 # + s32i.n a4,a1,24 # [0] gra_spill_temp_5, input1_offset + s32i.n a5,a1,28 # [1] gra_spill_temp_12, input2_offset + + s32i.n a3,a1,4 # [5] gra_spill_temp_0, input2 + mov.n a10,a3 # [6] + l32i a3,a1,136 # [18] id:361 size+0x0 + mov.n a9,a6 # [2] // out_addr + blti a3,1,.exit # [0] // exit + + s32i.n a2,a1,16 # [9] gra_spill_temp_3, input1 + s32i a7,a1,40 # [4] id:358 out_offset+0x0 + movi.n a11,0 # [3] + mov.n a12,a2 # [10] + s32i a4,a1,44 # [13] id:356 input1_offset+0x0 + s32i a5,a1,48 # [14] id:357 input2_offset+0x0 + movi.n a2,1 # [15] + + l32i a15,a1,124 # [3] id:362 out_shift+0x0 + l32i a13,a1,120 # [4] id:363 out_mult+0x0 + s32i.n a6,a1,8 # [1] gra_spill_temp_1, out_addr + max a14,a15,a11 # [11] left_shift + sub a4,a14,a15 # right_shift + s32i.n a4,a1,20 # [9] gra_spill_temp_4 + + blti a3,8,.process_leftover # [20] + + // skip to leftover routine if inputs are unaligned + or a6,a12,a10 + extui a6,a6,0,4 + bnez a6,.process_leftover + + // `size > 8`, s3 optimisation path... + ee.zero.q q1 # [0] + addi a4,a1,44 # [7] + addi a8,a1,48 # [8] + ee.vldbc.16 q0,a4 # [17] id:359 input1_offset + ee.vldbc.16 q7,a8 # [16] id:360 input2_offset + l32r a4,.LC0_26_123 # [12] + movi a8, 8 + st.qr q0,a1,64 # [19] gra_spill_temp_13 + s32i.n a8,a1,12 # [6] gra_spill_temp_2 + +.Lt_0_7682: # 0x60 + s32i a9,a1,36 # [1] gra_spill_temp_8, out_addr + ld.qr q4,a1,64 # [2] gra_spill_temp_13, input1_offset + ee.vld.l.64.ip q2,a12,8 # [4] id:367, input1_ptr + movi.n a7,16 # [3] + ee.vld.h.64.ip q2,a10,8 # [5] id:368, input2_ptr + wsr.sar a7 # [6] + ee.vcmp.lt.s8 q5,q2,q1 # [7] + ee.vzip.8 q2,q5 # [8] + ee.vadds.s16 q5,q5,q7 # [9] input2_offset + ee.vadds.s16 q4,q2,q4 # [10] input1_offset + ee.vmul.s16 q3,q4,q5 # [11] + wsr.sar a11 # [12] + ee.vmul.s16 q2,q4,q5 # [13] + + wsr.sar a14 # [14] left_shift + ee.vzip.16 q2,q3 # [15] + ee.vsl.32 q6,q2 # [16] left_shift + ssai 31 # [17] + + ee.movi.32.a q6,a3,2 # [18] + ee.movi.32.a q6,a8,3 # [26] + + mulsh a6,a13,a3 # [19] + mull a3,a13,a3 # [20] + mulsh a7,a13,a8 # [27] + add.n a3,a4,a3 # [22] + saltu a2,a3,a4 # [23] + add.n a2,a2,a6 # [24] + src a2,a2,a3 # [25] + + mull a6,a13,a8 # [28] + add.n a6,a4,a6 # [30] + saltu a9,a6,a4 # [31] + add.n a9,a9,a7 # [32] + src a9,a9,a6 # [33] + ee.movi.32.q q2,a2,2 # [53] + ee.movi.32.q q2,a9,3 # [54] + + ee.movi.32.a q6,a6,1 # [34] + mulsh a7,a13,a6 # [35] + mull a6,a13,a6 # [36] + add.n a6,a4,a6 # [38] + saltu a3,a6,a4 # [39] + add.n a3,a3,a7 # [16] + src a3,a3,a6 # [41] + ee.movi.32.a q6,a2,0 # [42] + mulsh a8,a13,a2 # [43] + mull a7,a13,a2 # [4] + add.n a7,a4,a7 # [46] + saltu a6,a7,a4 # [47] + add.n a6,a6,a8 # [24] + src a6,a6,a7 # [49] + ee.movi.32.q q2,a3,1 # [28] + ee.movi.32.q q2,a6,0 # [50] + + wsr.sar a14 # [10] + ee.vsl.32 q4,q3 # [11] + ee.movi.32.a q4,a2,2 # [13] + mulsh a3,a13,a2 # [14] + mull a2,a13,a2 # [15] + ssai 31 # [12] + add.n a2,a4,a2 # [17] + saltu a5,a2,a4 # [18] + add.n a5,a5,a3 # [19] + src a5,a5,a2 # [20] + ee.movi.32.a q4,a3,3 # [21] + mulsh a6,a13,a3 # [22] + mull a3,a13,a3 # [23] + add.n a3,a4,a3 # [25] + saltu a8,a3,a4 # [26] + add.n a8,a8,a6 # [27] + src a8,a8,a3 # [28] + ee.movi.32.q q0,a5,2 # [24] + ee.movi.32.q q0,a8,3 # [51] + + ee.movi.32.a q4,a7,1 # [29] + mulsh a6,a13,a7 # [30] + mull a3,a13,a7 # [31] + add.n a3,a4,a3 # [33] + saltu a2,a3,a4 # [34] + add.n a2,a2,a6 # [35] + src a2,a2,a3 # [36] + ee.movi.32.a q4,a6,0 # [37] + mulsh a7,a13,a6 # [38] + mull a6,a13,a6 # [39] + add.n a6,a4,a6 # [41] + saltu a3,a6,a4 # [42] + add.n a3,a3,a7 # [43] + src a3,a3,a6 # [4] + ee.movi.32.q q0,a2,1 # [47] + ee.movi.32.q q0,a3,0 # [46] + + l32i.n a5,a1,20 # [0] gra_spill_temp_4, right_shift + movi.n a7,1 # [51] + + blti a5,1,.skip_div_by_pow_of_2 +// divide by power of 2 + ee.vcmp.lt.s32 q5,q2,q1 # [56] + ee.vcmp.lt.s32 q6,q0,q1 # [28] + + addi.n a8,a5,-1 # [1] + ssl a8 # [2] + sll a7,a7 # [3] + s32i.n a7,a1,0 # [4] to_add + ee.vldbc.32 q4,a1 # [5] id:376 to_add + + wsr.sar a5 # [6] + ee.vadds.s32 q5,q4,q5 # [7] + ee.vadds.s32 q5,q2,q5 # [8] + ee.vsr.32 q2,q5 # [9] + + wsr.sar a5 # [5] + ee.vadds.s32 q5,q4,q6 # [9] + ee.vadds.s32 q5,q0,q5 # [11] + ee.vsr.32 q0,q5 # [12] +.skip_div_by_pow_of_2: + +// add offset, apply activation + addi a8,a1,132 # [54] + ee.vldbc.32 q4,a8 # [55] id:385 activation_max + addi a5,a1,40 # [8] + ee.vldbc.32 q6,a5 # [10] id:384 out_offset + addi a7,a1,128 # [4] + ee.vadds.s32 q0,q0,q6 # [13] // add out_offset + ee.vadds.s32 q2,q2,q6 # [14] // add out_offset + ee.vldbc.32 q6,a7 # [16] id:386 activation_min + ee.vmin.s32 q0,q0,q4 # [17] + ee.vmin.s32 q2,q2,q4 # [15] + ee.vmax.s32 q0,q0,q6 # [18] + ee.vmax.s32 q2,q2,q6 # [19] + +// pack and store + ee.vunzip.16 q2,q0 # [20] + ee.vunzip.8 q2,q0 # [21] + l32i.n a7,a1,12 // count + l32i a9,a1,36 # [55] gra_spill_temp_8 + l32i.n a3,a1,136 # [1] , size + ee.vst.l.64.ip q2,a9,8 # [22] id:387 + addi a7,a7,8 + s32i.n a7,a1,12 // increment count + bge a3,a7,.Lt_0_7682 + + addi a11,a7,-8 + bge a11,a3,.exit # [3] // exit + +.process_leftover: + sub a8,a3,a11 # [1] + loopgtz a8,.LBB33_esp_nn_mul_elementwise_s8_esp32s3 # [9] + + ssl a14 # [0] left_shift + l32i.n a8,a1,24 # [1] gra_spill_temp_5, input1_offset + l32i.n a10,a1,4 # [2] gra_spill_temp_0, input2 + l32i.n a12,a1,16 # [3] gra_spill_temp_3, input1 + add.n a10,a11,a10 # [4], input2 + add.n a12,a11,a12 # [5], input1 + l8ui a12,a12,0 # [6] id:390 + l8ui a10,a10,0 # [7] id:391 + sext a12,a12,7 # [8] + add.n a12,a12,a8 # [9] + l32i.n a8,a1,28 # [10] gra_spill_temp_12, input2_offset + sext a10,a10,7 # [11] + add.n a10,a10,a8 # [12] + mull a10,a12,a10 # [13] // multiplication result + +// multiply by quantised mult + l32i.n a9,a1,20 # [0] gra_spill_temp_4, load right_shift + + sll a10,a10 # [15] // left shift + + mulsh a3,a10,a13 # [1] + mull a8,a10,a13 # [6] + ssai 31 # [0] + add.n a6,a8,a4 # [8] + saltu a8,a6,a8 # [9] + add.n a8,a8,a3 # [10] + src a3,a8,a6 # [19] // result + + blti a9, 1, .skip_div_by_pow_of_2_remains +// divide by power of 2 + // calculate to_add = `1 << (exponent - 1)` + addi a6,a9,-1 + ssl a6 # [23] + movi a7,1 + sll a7,a7 // to_add + + extui a8,a3,31,1 # [24], sign + add a3,a3,a8 // add sign + add a3,a3,a7 // add to_add + + ssr a9 # [20] load right_shift + sra a3,a3 // right shift +.skip_div_by_pow_of_2_remains: + + l32i.n a6,a1,40 # [32], out_offset + l32i.n a8,a1,132 # [35], act_max + l32i.n a7,a1,128 # [36], act_min + +// add offset and apply activation + add.n a3,a3,a6 # [34], offset added + min a8,a8,a3 # [37] + l32i.n a3,a1,8 # [38] gra_spill_temp_1, load base out_addr + max a8,a8,a7 # [39] + +// store + add.n a3,a11,a3 # [16], add index from `a11` + s8i a8,a3,0 # [41] id:392 // store + addi.n a11,a11,1 # [42] // inc index + +.LBB33_esp_nn_mul_elementwise_s8_esp32s3: # 0x2ed +.exit: + retw.n # [0] + + .size esp_nn_mul_elementwise_s8_esp32s3, . - esp_nn_mul_elementwise_s8_esp32s3 + +#elif defined(WIO_TERMINAL) +// dummy code, added for old ARM toolchain +.syntax unified +.thumb +.cpu cortex-m0 + +.section .text +#endif // EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN && EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN_S3 diff --git a/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/src/common/common_functions.h b/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/src/common/common_functions.h new file mode 100644 index 0000000..1158e9b --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/src/common/common_functions.h @@ -0,0 +1,255 @@ +// Copyright 2020-2021 Espressif Systems (Shanghai) PTE LTD +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include + +/** + * c99 standard still doesn't strictly inline functions + * We need to use attribute as well to do this. + */ +#define __NN_FORCE_INLINE__ __attribute((always_inline)) static inline + +/* min/max macros */ +#ifndef max +#define max(a, b) ({ \ + __typeof__ (a) _a = (a); \ + __typeof__ (b) _b = (b); \ + _a > _b ? _a : _b; \ +}) + +#define min(a, b) ({ \ + __typeof__ (a) _a = (a); \ + __typeof__ (b) _b = (b); \ + _a < _b ? _a : _b; \ +}) +#endif + +__NN_FORCE_INLINE__ int32_t esp_nn_clz32(uint32_t in) +{ +#if CONFIG_IDF_TARGET_ARCH_XTENSA + __asm__ volatile("nsau %0, %0" : "+r" (in)); + return in; +#elif defined(__GNUC__) + return __builtin_clz(in); +#else + int32_t count = 32; + uint32_t x = in, y = in >> 16; + if (y != 0) { + count -= 16; + x = y; + } + y = x >> 8; + if (y != 0) { + count -= 8; + x = y; + } + y = x >> 4; + if (y != 0) { + count -= 4; + x = y; + } + y = x >> 2; + if (y != 0) { + count -= 2; + x = y; + } + y = x >> 1; + if (y != 0) { + return count - 2; + } + return count - x; +#endif +} + +/** + * Signed saturate a 32 bit value to 8 bits keeping output in 32 bit variable. + */ +__NN_FORCE_INLINE__ int32_t esp_nn_saturate8(int32_t in) +{ +#if CONFIG_IDF_TARGET_ARCH_XTENSA + __asm__ volatile("clamps %0, %0, 7" : "+a"(in)); + return in; +#else + return max(INT8_MIN, min(in, INT8_MAX)); +#endif +} + +__NN_FORCE_INLINE__ int32_t esp_nn_pick_sat_high32_of64(int64_t val64) +{ + int32_t sign = (int32_t) (val64 >> 63); + int32_t to_add = sign & ((1ul << 31) - 1); + return (int32_t) ((int64_t) (val64 + to_add) >> 31); +} + +__NN_FORCE_INLINE__ int32_t esp_nn_sat_round_doubling_high_mul(int32_t in0, int32_t in1) +{ + int32_t result; + int64_t in0_64 = (int64_t) in0; + bool overflow = (in0 == in1) && (in0 == (int32_t) INT32_MIN); + + /* Nudge value */ + int64_t nudge_val = 1 << 30; + if ((in0 < 0) ^ (in1 < 0)) { + nudge_val = 1 - nudge_val; + } + + /* Multiply and add nudge */ + int64_t mult = in0_64 * in1 + nudge_val; + + /* Round and pickup 32 bits */ + result = esp_nn_pick_sat_high32_of64(mult); + + return overflow ? INT32_MAX : result; +} + +/** + * fast version + * this will fail for values closer to INT32_MAX and INT32_MIN by `1 << (exponent - 1)`. + * We can afford to do this because we are at the very last stage of filter. + * Also it is pretty rare condition as our output is going to be 8 bit. + */ +__NN_FORCE_INLINE__ int32_t esp_nn_div_by_power_of_two_fast(int32_t val, int32_t exponent) +{ + int32_t to_add = (1 << (exponent - 1)) - (val < 0); + return (int32_t) ((val + to_add) >> exponent); +} + +__NN_FORCE_INLINE__ int32_t esp_nn_div_by_power_of_two(int32_t val, int32_t exponent) +{ + int32_t result; + + const int32_t mask = (1 << exponent) - 1; + const int32_t remainder = val & mask; + + result = val >> exponent; + int32_t threshold = (mask >> 1) + (result < 0); + + if (remainder > threshold) { + result += 1; + } + return result; +} + +__NN_FORCE_INLINE__ int32_t esp_nn_multiply_by_quantized_mult(int32_t x, int32_t mult, int32_t shift) +{ + int32_t left_shift = shift > 0 ? shift : 0; + int32_t right_shift = shift > 0 ? 0 : -shift; + int32_t result = esp_nn_sat_round_doubling_high_mul(x * (1 << left_shift), mult); + return esp_nn_div_by_power_of_two(result, right_shift); +} + +__NN_FORCE_INLINE__ int32_t esp_nn_multiply_by_quantized_mult_fast(int32_t x, int32_t mult, int32_t shift) +{ + int32_t left_shift = max(shift, 0); + int32_t right_shift = left_shift - shift; + + int64_t nudge_val = 1 << 30; + int64_t in0_64 = (int64_t) (x << left_shift); + + /* Multiply and add nudge */ + int64_t mult_64 = in0_64 * mult + nudge_val; + int32_t result = (int32_t) (mult_64 >> 31); + if (right_shift) { + result = esp_nn_div_by_power_of_two_fast(result, right_shift); + } + return result; +} + +static void esp_nn_aligned_s8_pad_with_value(const int8_t *src, int8_t *dst, + const uint16_t input_wd, + const uint16_t input_ht, + const uint16_t channels, + const int32_t pad_val, + const uint16_t pad_wd, + const uint16_t pad_ht) +{ + /* memset with pad_val */ + memset(dst, pad_val, ((input_wd + 2 * pad_wd) * (input_ht + 2 * pad_ht)) * channels); + dst += (pad_wd + input_wd + pad_wd) * pad_ht * channels; + + for (int i = 0; i < input_ht; i++) { + dst += pad_wd * channels; + for (int j = 0; j < input_wd * channels; j++) { + *dst++ = *src++; + } + dst += pad_wd * channels; + } +} + +static void esp_nn_aligned_s8_pad_end_with_value(const int8_t *src, int8_t *dst, + const uint16_t input_wd, + const uint16_t input_ht, + const uint16_t channels, + const int32_t pad_val, + const uint16_t pad_wd, + const uint16_t pad_ht) +{ + for (int i = 0; i < input_ht; i++) { + for (int j = 0; j < input_wd * channels; j++) { + *dst++ = *src++; + } + if (pad_wd) { + memset(dst, pad_val, pad_wd * channels); + dst += pad_wd * channels; + } + } + /* pad end `pad_ht` lines at end */ + if (pad_ht) { + memset(dst, pad_val, (input_wd + pad_wd) * pad_ht * channels); + } +} + +/** + * @brief convert 8 bit input data to 16 bit + * + * @param src int8_t source data + * @param dst int16_t dst data + * @param size length of data + * @param offset offset to be added to src data. Range: [-128, 127] + */ +__NN_FORCE_INLINE__ void esp_nn_s8_to_s16_with_offset(const int8_t *src, int16_t *dst, + const int size, const int32_t offset) +{ + int i = 0; + for (; i < size; i += 2) { + dst[i + 0] = src[i + 0] + offset; + dst[i + 1] = src[i + 1] + offset; + } + if(i < size) { + dst[i] = src[i] + offset; + } +} + +/** + * @brief convert 8 bit input data to 16 bit + * + * @param src int8_t source data + * @param dst int16_t dst data + * @param size length of data + */ +__NN_FORCE_INLINE__ void esp_nn_s8_to_s16(const int8_t *src, int16_t *dst, const int size) +{ + int i = 0; + for (; i < size; i += 2) { + dst[i + 0] = src[i + 0]; + dst[i + 1] = src[i + 1]; + } + if(i < size) { + dst[i] = src[i]; + } +} diff --git a/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/src/common/esp_nn_common_functions_esp32s3.S b/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/src/common/esp_nn_common_functions_esp32s3.S new file mode 100644 index 0000000..68d1086 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/src/common/esp_nn_common_functions_esp32s3.S @@ -0,0 +1,266 @@ +#include "edge-impulse-sdk/classifier/ei_classifier_config.h" +#if EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN && EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN_S3 +// Copyright 2021-2022 Espressif Systems (Shanghai) PTE LTD +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + .text + + # Program Unit: esp_nn_aligned_s8_to_s16_with_offset_esp32s3 + .type esp_nn_aligned_s8_to_s16_with_offset_esp32s3, @function + .align 4 + .global esp_nn_aligned_s8_to_s16_with_offset_esp32s3 + +esp_nn_aligned_s8_to_s16_with_offset_esp32s3: # 0x30d + + entry a1,48 # + mov.n a10,a2 # // src + mov.n a9,a3 # // dst + mov.n a8,a4 # // size + s32i.n a5,a1,12 # [3] // offset + addi.n a2,a1,12 # [4] + + blti a4,32,.Lt_2_6402 # [5] if (size < 32) goto unopt + + addi.n a6,a8,-1 # [0] + ee.zero.q q5 # [1] + ee.vldbc.16 q4,a2 # [2] id:136 offset + mov.n a3,a10 # [3] + mov.n a2,a9 # [4] + ee.vld.128.ip q0,a3,16 # [5] id:137 + ee.vld.128.ip q1,a3,16 # [6] id:138 + ee.vcmp.lt.s8 q2,q0,q5 # [7] + ee.vzip.8 q0,q2 # [8] + ee.vadds.s16 q0,q0,q4 # [9] + ee.vadds.s16.st.incp q0,a2,q0,q2,q4 # [10] id:139 + blti a4,64,.Lt_2_7170 # [11] + + addi a5,a4,-32 # [0] + srai a5,a5,5 # [1] + slli a4,a5,5 # [2] + loopgtz a5,.LBB37_esp_nn_aligned_s8_to_s16_with_offset_esp32s3 # [3] + + ee.vst.128.ip q0,a2,16 # [0*II+0] id:140 + ee.vcmp.lt.s8 q0,q1,q5 # [0*II+1] + ee.vzip.8 q1,q0 # [0*II+2] + ee.vadds.s16.ld.incp q2,a3,q3,q1,q4 # [0*II+3] id:141 + ee.vadds.s16.st.incp q3,a2,q0,q0,q4 # [0*II+4] id:142 + ee.vcmp.lt.s8 q3,q2,q5 # [0*II+5] + ee.vst.128.ip q0,a2,16 # [0*II+6] id:143 + ee.vzip.8 q2,q3 # [0*II+7] + ee.vadds.s16.ld.incp q1,a3,q0,q2,q4 # [0*II+8] id:144 + ee.vadds.s16.st.incp q0,a2,q0,q3,q4 # [0*II+9] id:145 + +.LBB37_esp_nn_aligned_s8_to_s16_with_offset_esp32s3: # 0x36d + addi a4,a4,32 # [0] + +.Lt_2_3842: # 0x370 + ee.vst.128.ip q0,a2,16 # [0] id:146 + ee.vcmp.lt.s8 q2,q1,q5 # [1] + ee.vzip.8 q1,q2 # [2] + ee.vadds.s16 q2,q2,q4 # [3] + ee.vadds.s16 q3,q1,q4 # [4] + ee.vst.128.ip q3,a2,16 # [5] id:147 + ee.vst.128.ip q2,a2,16 # [6] id:148 + bge a4,a6,.Lt_2_4866 # [7] + + l32i.n a5,a1,12 # [0] id:135 offset+0x0 + +.Lt_2_5122: # 0x38a + mov.n a11,a4 # [0] + add.n a2,a4,a10 # [1] + # 576 dst[i + 0] = src[i + 0] + offset; + l8ui a7,a2,0 # [2] id:149 + addx2 a6,a4,a9 # [3] + sext a7,a7,7 # [4] + add.n a7,a7,a5 # [5] + s16i a7,a6,0 # [6] id:150 + # 577 dst[i + 1] = src[i + 1] + offset; + l8ui a3,a2,1 # [7] id:151 + sub a7,a8,a4 # [8] + addi.n a2,a2,2 # [9] + srai a7,a7,1 # [10] + sext a3,a3,7 # [11] + add.n a3,a3,a5 # [12] + s16i a3,a6,2 # [13] id:152 + addi.n a3,a7,-1 # [14] + loopgtz a3,.LBB52_esp_nn_aligned_s8_to_s16_with_offset_esp32s3 # [15] + + l8ui a3,a2,0 # [0*II+0] id:149 + addi.n a6,a6,4 # [1*II+1] + sext a3,a3,7 # [0*II+2] + add.n a3,a3,a5 # [0*II+3] + s16i a3,a6,0 # [0*II+4] id:150 + l8ui a3,a2,1 # [0*II+5] id:151 + addi.n a2,a2,2 # [0*II+6] + sext a3,a3,7 # [0*II+7] + add.n a3,a3,a5 # [0*II+8] + s16i a3,a6,2 # [0*II+9] id:152 + +.LBB52_esp_nn_aligned_s8_to_s16_with_offset_esp32s3: # 0x3ce + addx2 a4,a7,a11 # [0] + +.Lt_2_4866: # 0x3d1 + bge a4,a8,.Lt_2_7682 # [0] + + # 580 dst[i] = src[i] + offset; + addx2 a11,a4,a9 # [0] + add.n a8,a4,a10 # [1] + l8ui a8,a8,0 # [2] id:153 + l32i.n a12,a1,12 # [3] id:135 offset+0x0 + sext a8,a8,7 # [4] + add.n a8,a8,a12 # [5] + s16i a8,a11,0 # [6] id:154 + retw.n # [7] + +.Lt_2_6402: # 0x3e8 + blti a4,2,.Lt_2_6658 # [0] + + movi.n a4,0 # [0] + j .Lt_2_5122 # [1] + +.Lt_2_7682: # 0x3f0 + retw.n # [0] + +.Lt_2_6658: # 0x3f2 + blti a4,1,.Lt_2_7682 # [0] + + l8ui a11,a10,0 # [0] id:153 + sext a11,a11,7 # [2] + add.n a11,a11,a5 # [3] + s16i a11,a3,0 # [4] id:154 + retw.n # [5] + +.Lt_2_7170: # 0x402 + movi.n a4,32 # [0] + j .Lt_2_3842 # [1] + + .size esp_nn_aligned_s8_to_s16_with_offset_esp32s3, . - esp_nn_aligned_s8_to_s16_with_offset_esp32s3 + + + .literal_position + + # Program Unit: esp_nn_s8_to_s16_esp32s3 + .type esp_nn_s8_to_s16_esp32s3, @function + .align 4 + .global esp_nn_s8_to_s16_esp32s3 + +esp_nn_s8_to_s16_esp32s3: # 0x40b + entry a1,32 # + mov.n a9,a2 // src + mov.n a8,a3 // dst + mov.n a7,a4 // size + blti a4,1,.Lt_3_4866 // size == 0 + blti a4,16,.Lt_3_4610 // if (size < 16) jump to unopt path + + // load align_len to sar_byte + extui a2,a2,0,4 # [0] + wur.sar_byte a2 # [1] + mov.n a2,a9 # [2] + + // preload + ee.vld.128.ip q0,a2,16 + ee.vld.128.ip q1,a2,16 + ee.zero.q q4 + # 672 + # 673 for (i = 16; i < size - 15; i += 16) { + blti a4,32,.Lt_3_5378 # [5] + addi a6,a4,-16 # [1] + srai a6,a6,4 # [2] + slli a4,a6,4 # [3] + loopgtz a6,.LBB35_esp_nn_s8_to_s16_esp32s3 # [4] + + ee.src.q.qup q2,q0,q1 # [0*II+0] + ee.vcmp.lt.s8 q3,q2,q4 # [0*II+1] // sign + ee.vld.128.ip q1,a2,16 # [0*II+2] // for next iteration + ee.vzip.8 q2,q3 # [0*II+3] + ee.vst.128.ip q2,a3,16 # [0*II+4] id:93 + ee.vst.128.ip q3,a3,16 # [0*II+5] id:94 + +.LBB35_esp_nn_s8_to_s16_esp32s3: # 0x449 + addi a4,a4,16 # [0] + +.Lt_3_2050: # 0x44c + ee.src.q.qup q5,q0,q1 # [0] + ee.vcmp.lt.s8 q3,q5,q4 # [1] + ee.vzip.8 q5,q3 # [2] + ee.vst.128.ip q5,a3,16 # [3] id:96 + ee.vst.128.ip q3,a3,16 # [4] id:97 + # 687 + # 688 skip_to_remains_s8_to_s16: + # 689 for (; i < size; i += 2) { + bge a4,a7,.Lt_3_4866 # [5] + +.Lt_3_3330: # 0x45e + mov.n a11,a4 # [0] + add.n a2,a4,a9 # [1] + # 690 dst[i + 0] = src[i + 0]; + l8ui a10,a2,0 # [2] id:98 + addx2 a5,a4,a8 # [3] + sext a10,a10,7 # [4] + s16i a10,a5,0 # [5] id:99 + # 691 dst[i + 1] = src[i + 1]; + l8ui a3,a2,1 # [6] id:100 + sub a10,a7,a4 # [7] + addi.n a2,a2,2 # [8] + addi.n a10,a10,1 # [9] + srai a10,a10,1 # [10] + sext a3,a3,7 # [11] + s16i a3,a5,2 # [12] id:101 + addi.n a3,a10,-1 # [13] + loopgtz a3,.LBB50_esp_nn_s8_to_s16_esp32s3 # [14] + + l8ui a3,a2,0 # [0*II+0] id:98 + addi.n a5,a5,4 # [1*II+1] + sext a3,a3,7 # [0*II+2] + s16i a3,a5,0 # [0*II+3] id:99 + l8ui a3,a2,1 # [0*II+4] id:100 + addi.n a2,a2,2 # [0*II+5] + sext a3,a3,7 # [0*II+6] + s16i a3,a5,2 # [0*II+7] id:101 + +.LBB50_esp_nn_s8_to_s16_esp32s3: # 0x49c + addx2 a4,a10,a11 # [0] + # 692 } + # 693 if(i < size) { + bge a4,a7,.Lt_3_4866 # [1] + + # 694 dst[i] = src[i]; + add.n a11,a4,a9 # [0] + l8ui a11,a11,0 # [1] id:102 + addx2 a12,a4,a8 # [2] + sext a11,a11,7 # [3] + s16i a11,a12,0 # [4] id:103 + retw.n # [5] + +.Lt_3_4610: # 0x4b2 + movi.n a4,0 # [0] + j .Lt_3_3330 # [1] + +.Lt_3_4866: # 0x4ba + retw.n # [0] + +.Lt_3_5378: # 0x4bc + movi.n a4,16 # [1] + j .Lt_3_2050 # [2] + + .size esp_nn_s8_to_s16_esp32s3, . - esp_nn_s8_to_s16_esp32s3 + +#elif defined(WIO_TERMINAL) +// dummy code, added for old ARM toolchain +.syntax unified +.thumb +.cpu cortex-m0 + +.section .text +#endif // EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN && EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN_S3 diff --git a/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/src/common/esp_nn_multiply_by_quantized_mult_esp32s3.S b/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/src/common/esp_nn_multiply_by_quantized_mult_esp32s3.S new file mode 100644 index 0000000..08ff1b8 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/src/common/esp_nn_multiply_by_quantized_mult_esp32s3.S @@ -0,0 +1,127 @@ +#include "edge-impulse-sdk/classifier/ei_classifier_config.h" +#if EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN && EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN_S3 +// Copyright 2021-2022 Espressif Systems (Shanghai) PTE LTD +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// the macro `use_nudge` enables adding rounding factor similar to tflite implementation +// this barely changes any accuracy +// keep this disabled for better performance + +#ifndef SKIP_NUDGE + # set SKIP_NUDGE flag for ~20% faster (but not bit-exact) quantisation + .set use_nudge, 1 +#endif + + .text + .literal_position + .literal .nudge_val, 1073741824 # 1 << 30 + + .type esp_nn_multiply_by_quantized_mult_asm_esp32s3, @function + .align 4 + .global esp_nn_multiply_by_quantized_mult_asm_esp32s3 + +esp_nn_multiply_by_quantized_mult_asm_esp32s3: # 0x4 + # to_add = 4 + + entry a1,32 + wsr.sar a3 + ee.zero.q q2 + + bltz a3, .skip_left_shift + ee.vsl.32 q0,q0 # [13] +.skip_left_shift: + + ssai 31 # [15] + +# move data to general purpose registers + ee.movi.32.a q0,a12,0 # [17] + ee.movi.32.a q0,a13,1 # [16] + ee.movi.32.a q0,a14,2 # [18] + ee.movi.32.a q0,a15,3 # [19] + +.ifdef use_nudge + l32r a6,.nudge_val +.endif + +# perform 64 bit mult + mulsh a4,a2,a12 # [22] + mulsh a11,a2,a13 # [23] + mulsh a10,a2,a14 # [21] + mulsh a8,a2,a15 # [20] + mull a12,a2,a12 # [24] + mull a13,a2,a13 # [25] + mull a14,a2,a14 # [26] + mull a15,a2,a15 # [27] + +# add nudge_val and discard low31 + +.ifdef use_nudge + add.n a14,a6,a14 # [41] + saltu a2,a14,a6 # [44] + add.n a10,a10,a2 # [45] + + add.n a13,a6,a13 # [47] + saltu a9,a13,a6 # [50] + add.n a11,a11,a9 # [51] +.endif + + src a10,a10,a14 # [88] + src a11,a11,a13 # [78] + ee.movi.32.q q0,a10,2 + ee.movi.32.q q0,a11,1 + +.ifdef use_nudge + add.n a15,a6,a15 # [36] + saltu a2,a15,a6 # [39] + add.n a8,a8,a2 # [40] + + add.n a12,a6,a12 # [54] + saltu a10,a12,a6 # [57] + add.n a4,a4,a10 # [58] +.endif + + src a8,a8,a15 # [95] + src a4,a4,a12 # [69] # discard lower 31 bits + ee.movi.32.q q0,a8,3 + ee.movi.32.q q0,a4,0 + + bgez a3, .skip_div_by_power_of_2 + + neg a5,a3 # [0] right_shift/exponent = -shift + ee.vcmp.lt.s32 q2,q0,q2 # [97] + addi.n a7,a5,-1 # [0] exponent - 1 + ssl a7 # [1] + movi.n a6,1 # [92] + sll a6,a6 # [2] + s32i.n a6,a1,4 # [3] to_add + addi.n a4,a1,4 # [94] to_add_addr + ee.vldbc.32 q1,a4 # [4] id:148 to_add + wsr.sar a5 + ee.vadds.s32 q1,q1,q2 + ee.vadds.s32 q0,q0,q1 + ee.vsr.32 q0,q0 + +.skip_div_by_power_of_2: + retw.n # [9] + + .size esp_nn_multiply_by_quantized_mult_asm_esp32s3, . - esp_nn_multiply_by_quantized_mult_asm_esp32s3 + +#elif defined(WIO_TERMINAL) +// dummy code, added for old ARM toolchain +.syntax unified +.thumb +.cpu cortex-m0 + +.section .text +#endif // EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN && EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN_S3 diff --git a/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/src/common/esp_nn_multiply_by_quantized_mult_ver1_esp32s3.S b/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/src/common/esp_nn_multiply_by_quantized_mult_ver1_esp32s3.S new file mode 100644 index 0000000..ed83816 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/src/common/esp_nn_multiply_by_quantized_mult_ver1_esp32s3.S @@ -0,0 +1,163 @@ +#include "edge-impulse-sdk/classifier/ei_classifier_config.h" +#if EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN && EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN_S3 +// Copyright 2021-2022 Espressif Systems (Shanghai) PTE LTD +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// quantisation version where we deal with different shifts and mults. + + .set use_nudge, 1 + + .text + .literal_position + .literal .LC3_19_48, 1073741824 + + # Program Unit: esp_nn_multiply_by_quantized_mult_ver1_esp32s3 + .type esp_nn_multiply_by_quantized_mult_ver1_esp32s3, @function + .align 4 + .global esp_nn_multiply_by_quantized_mult_ver1_esp32s3 + +esp_nn_multiply_by_quantized_mult_ver1_esp32s3: # 0x1ee + entry a1,32 # + ee.zero.q q3 # [0] + l32i.n a8,a3,0 # [5] id:200 // shift0 + l32i.n a7,a3,4 # [2] id:201 // shift1 + l32i.n a12,a2,0 # [3] id:204 // mult0 + l32i.n a15,a2,4 # [1] id:205 // mult1 + movi.n a10,0 # [7] + + max a6,a10,a8 # [1] // left_shift0 + max a5,a10,a7 # [7] // left_shift1 + sub a8,a6,a8 # [2] // right_shift0 + sub a7,a5,a7 # [8] // right_shift1 + + ee.movi.32.a q0,a9,0 # [4] + ee.movi.32.a q0,a11,1 # [11] + ssl a6 # [3] + sll a9,a9 # [4] + mulsh a4,a12,a9 # [6] + mull a12,a12,a9 # [9] + ssl a5 # [10] + sll a11,a11 # [12] + mulsh a14,a15,a11 # [14] + mull a15,a15,a11 # [16] + l32r a13,.LC3_19_48 # [23] + + ee.movi.32.q q0,a9,0 # [5] + ee.movi.32.q q0,a11,1 # [15] + + + l32i.n a6,a3,8 # [6] id:202 // shift2 + l32i.n a9,a2,8 # [19] id:206 // mult2 + max a5,a10,a6 # [0] // left_shift2 + sub a6,a5,a6 # [24] // right_shift2 + + + ee.movi.32.a q0,a11,2 # [17] + ssl a5 # [13] + sll a11,a11 # [18] + ee.movi.32.q q0,a11,2 # [20] + mulsh a5,a9,a11 # [21] + mull a9,a9,a11 # [22] + mov a11, a5 + +// add nudge to result0 & result1 + add.n a12,a13,a12 # [25] + saltu a5,a12,a13 # [26] + add.n a15,a13,a15 # [27] + add.n a5,a5,a4 # [28] + saltu a4,a15,a13 # [29] + add.n a4,a4,a14 # [30] + + + l32i.n a14,a3,12 # [31] id:203 // shift3 + add.n a9,a13,a9 # [32] // add nudge low2 + max a10,a10,a14 # [33] // left_shift3 + sub a14,a10,a14 # [34] // right_shift3 + ssl a10 # [35] + ee.movi.32.a q0,a10,3 # [36] + sll a10,a10 # [37] + +// select high32 from result0 and resul1 + ssai 31 # [39] + src a5,a5,a12 # [40] + src a4,a4,a15 # [41] + movi.n a12,1 # [42] + ee.movi.32.q q0,a5,0 # [43] + saltu a15,a9,a13 # [44] + add.n a15,a15,a11 # [45] + ee.movi.32.q q0,a4,1 # [46] + l32i.n a11,a2,12 # [47] id:207 // mult3 + src a15,a15,a9 # [48] + ee.movi.32.q q0,a15,2 # [49] + mull a9,a11,a10 # [50] + mulsh a11,a11,a10 # [51] + add.n a9,a13,a9 # [52] + saltu a13,a9,a13 # [53] + add.n a13,a13,a11 # [54] + src a13,a13,a9 # [55] + ee.movi.32.q q0,a13,3 # [57] + +// divide_by_power_of2_step + ssl a8 # [56] + sll a9,a12 # [58] + ssl a7 # [59] + addi.n a9,a9,-1 # [60] + ee.movi.32.q q2,a9,0 # [61] + sll a11,a12 # [62] + addi.n a11,a11,-1 # [63] + ssl a6 # [64] + sll a10,a12 # [65] + ee.movi.32.q q2,a11,1 # [66] + ssl a14 # [67] + addi.n a10,a10,-1 # [68] + ee.movi.32.q q2,a10,2 # [69] + sll a9,a12 # [70] + addi.n a9,a9,-1 # [71] + ee.movi.32.q q2,a9,3 # [74] + ee.andq q1,q0,q2 # [75] + + ssr a8 # [72] + sra a5,a5 # [73] + ssr a7 # [76] + sra a4,a4 # [78] + ssr a6 # [79] + sra a15,a15 # [81] + ssr a14 # [82] + sra a13,a13 # [84] + wsr.sar a12 # [85] + + ee.movi.32.q q7,a5,0 # [77] + ee.movi.32.q q7,a4,1 # [80] + ee.movi.32.q q7,a15,2 # [83] + ee.movi.32.q q7,a13,3 # [86] + + ee.vcmp.lt.s32 q3,q7,q3 # [87] + ee.vsr.32 q2,q2 # [88] + ee.vsubs.s32 q2,q2,q3 # [89] + ee.vcmp.gt.s32 q1,q1,q2 # [90] + ee.vsubs.s32 q0,q7,q1 # [91] + +// return + retw.n # [92] + + .size esp_nn_multiply_by_quantized_mult_ver1_esp32s3, . - esp_nn_multiply_by_quantized_mult_ver1_esp32s3 + +#elif defined(WIO_TERMINAL) +// dummy code, added for old ARM toolchain +.syntax unified +.thumb +.cpu cortex-m0 + +.section .text +#endif // EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN && EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN_S3 diff --git a/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/src/convolution/esp_nn_conv_ansi.c b/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/src/convolution/esp_nn_conv_ansi.c new file mode 100644 index 0000000..60b6b41 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/src/convolution/esp_nn_conv_ansi.c @@ -0,0 +1,183 @@ +#include "edge-impulse-sdk/classifier/ei_classifier_config.h" +#if EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN +// Copyright 2020-2021 Espressif Systems (Shanghai) PTE LTD +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include + +int esp_nn_get_conv_scratch_size_ansi(const data_dims_t *input_dims, + const data_dims_t *filter_dims, + const data_dims_t *output_dims, + const conv_params_t *conv_params) +{ + return 0; +} + +void esp_nn_set_conv_scratch_buf_ansi(const void *buf) +{ + +} + +/** + * Assumption 1: i/p channels == o/p channels + * Assumption 2: Pointers are valid + * Assumption 3: dialation width = 1 + */ +void esp_nn_conv_u8_ansi(const uint8_t *input_data, + const uint16_t input_wd, + const uint16_t input_ht, + const uint16_t in_channels, + const int32_t input_offset, + const uint16_t pad_wd, + const uint16_t pad_ht, + const uint16_t stride_wd, + const uint16_t stride_ht, + const uint8_t *filter_data, + const uint16_t filter_wd, + const uint16_t filter_ht, + const int32_t filter_offset, + const int32_t *bias, + uint8_t *out_data, + const uint16_t out_wd, + const uint16_t out_ht, + const uint16_t out_channels, + const int32_t out_offset, + const int32_t out_shift, + const int32_t out_mult, + const int32_t activation_min, + const int32_t activation_max) +{ + for (int out_y = 0; out_y < out_ht; out_y++) { //height loop + const int16_t base_y = (out_y * stride_ht) - pad_ht; + for (int out_x = 0; out_x < out_wd; out_x++) { //width_loop + const int16_t base_x = (out_x * stride_wd) - pad_wd; + for (int out_ch_idx = 0; out_ch_idx < out_channels; out_ch_idx++) {//channel_loop + int32_t result = 0; + + /* Select filter so as the point doesn't lie outside block */ + int filter_y_start = max(0, -base_y); + int filter_x_start = max(0, -base_x); + int filter_y_end = min(filter_ht, input_ht - base_y); + int filter_x_end = min(filter_wd, input_wd - base_x); + + for (int filter_y_idx = filter_y_start; filter_y_idx < filter_y_end; filter_y_idx++) { + const int32_t idx_y = base_y + filter_y_idx; + for (int filter_x_idx = filter_x_start; filter_x_idx < filter_x_end; filter_x_idx++) { + const int32_t idx_x = base_x + filter_x_idx; + for (int in_ch_idx = 0; in_ch_idx < in_channels; in_ch_idx++) { + int32_t input_index = (idx_y * input_wd + idx_x) * in_channels + in_ch_idx; + int32_t filter_index = ((out_ch_idx * filter_ht + filter_y_idx) + * filter_wd + filter_x_idx) * in_channels + + in_ch_idx; + int32_t input_val = input_data[input_index] + input_offset; + int32_t filter_val = filter_data[filter_index] + filter_offset; + result += input_val * filter_val; + } + } + } + if (bias) { + result += bias[out_ch_idx]; + } + result = esp_nn_multiply_by_quantized_mult(result, out_mult, out_shift); + result += out_offset; + result = max(result, activation_min); + result = min(result, activation_max); + + int out_index = (out_y * out_wd + out_x) * out_channels + out_ch_idx; + out_data[out_index] = (uint8_t) result; + } + } + } +} + +/** + * Assumption 1: i/p channels == o/p channels + * Assumption 2: Pointers are valid + * Assumption 3: dialation width = 1 + */ +void esp_nn_conv_s8_ansi(const data_dims_t *input_dims, + const int8_t *input_data, + const data_dims_t *filter_dims, + const int8_t *filter_data, + const int32_t *bias, + const data_dims_t *output_dims, + int8_t *out_data, + const conv_params_t *conv_params, + const quant_data_t *quant_data) +{ + const uint16_t input_wd = input_dims->width; + const uint16_t input_ht = input_dims->height; + const uint16_t in_channels = input_dims->channels; + const int32_t input_offset = conv_params->in_offset; + const int32_t out_offset = conv_params->out_offset; + const uint16_t pad_wd = conv_params->padding.width; + const uint16_t pad_ht = conv_params->padding.height; + const uint16_t stride_wd = conv_params->stride.width; + const uint16_t stride_ht = conv_params->stride.height; + const uint16_t filter_wd = filter_dims->width; + const uint16_t filter_ht = filter_dims->height; + const uint16_t out_wd = output_dims->width; + const uint16_t out_ht = output_dims->height; + const uint16_t out_channels = output_dims->channels; + const int32_t *out_shift = quant_data->shift; + const int32_t *out_mult = quant_data->mult; + const int32_t activation_min = conv_params->activation.min; + const int32_t activation_max = conv_params->activation.max; + + int32_t out_ch_idx, out_y, out_x, in_ch_idx, filter_y_idx, filter_x_idx; + + for (out_y = 0; out_y < out_ht; out_y++) { + for (out_x = 0; out_x < out_wd; out_x++) { + for (out_ch_idx = 0; out_ch_idx < out_channels; out_ch_idx++) { + int32_t conv_out = 0; + + const int32_t base_y = stride_ht * out_y - pad_ht; + const int32_t base_x = stride_wd * out_x - pad_wd; + + const int32_t filter_y_start = max(0, -base_y); + const int32_t filter_x_start = max(0, -base_x); + + const int32_t filter_y_end = min(filter_ht, input_ht - base_y); + const int32_t filter_x_end = min(filter_wd, input_wd - base_x); + + for (filter_y_idx = filter_y_start; filter_y_idx < filter_y_end; filter_y_idx++) { + for (filter_x_idx = filter_x_start; filter_x_idx < filter_x_end; filter_x_idx++) { + const int32_t in_row = base_y + filter_y_idx; + const int32_t in_col = base_x + filter_x_idx; + int32_t input_base_offset = (in_row * input_wd + in_col) * in_channels; + int32_t filter_base_offset = out_ch_idx * in_channels * filter_ht * filter_wd + + (filter_y_idx * filter_wd + filter_x_idx) * in_channels; + for (in_ch_idx = 0; in_ch_idx < in_channels; in_ch_idx++) { + conv_out += + (input_data[input_base_offset + in_ch_idx] + input_offset) * + filter_data[filter_base_offset + in_ch_idx]; + } + } + } + if (bias) { + conv_out += bias[out_ch_idx]; + } + conv_out = esp_nn_multiply_by_quantized_mult(conv_out, out_mult[out_ch_idx], out_shift[out_ch_idx]); + conv_out += out_offset; + conv_out = max(conv_out, activation_min); + conv_out = min(conv_out, activation_max); + *out_data++ = (int8_t) conv_out; + } + } + } +} + +#endif // EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN diff --git a/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/src/convolution/esp_nn_conv_esp32s3.c b/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/src/convolution/esp_nn_conv_esp32s3.c new file mode 100644 index 0000000..1ddf4ba --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/src/convolution/esp_nn_conv_esp32s3.c @@ -0,0 +1,273 @@ +#include "edge-impulse-sdk/classifier/ei_classifier_config.h" +#if EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN +/* + * SPDX-FileCopyrightText: 2020-2023 Espressif Systems (Shanghai) CO LTD + * + * SPDX-License-Identifier: Apache-2.0 + */ + +/** + * Optimizations strategies used: + * Below optimizations are capable of any size of input/filter: + * + * 1. For filter wdxht = 1x1 (Refer esp_nn_conv_s8_mult8_1x1_esp32s3 function) + * - For this specific version, the strategy we employ: + * > This particular filter has only the channel + * dimension and we have `out_ch` number of such filters. + * > We take 8 input lines at a time and transpose those. + * > Keep loading and multiplying filter values one by one, + * to produce 8 outputs in parallel + * + * 2. General version: (Refer esp_nn_conv_s8_filter_aligned_input_padded_esp32s3) + * - For all other cases: + * > Consider `filter_wd * in_ch` as a single row. These many values can + * be continuosly loaded from inputs as well. + * > multiply accumulate into a single filter output. + * > To speed things up further, we pre-calculate + * (filter * in_offset + bias term) earlier and add it at the end of filter + * + * About ((filter * in_offset + bias term)) accumulate term: + * > The conv operation before requantization is as follows: + * for i in filter_size: + * conv_out += (input + input_offset) * filter; + * conv_out += bias + * + * > where input_offset is constant term hence, we can see that + * this term can be precalculated as: + * for i in filter_size: + * acc_term += input_offset * filter[i]; + * acc_term += bias + * OR + * for i in filter_size: + * acc_term += filter[i]; // accumulate filter values + * acc_term = acc_term * input_offset + bias + * + * + * In both the above versions we align the filter if needed, pad the input with + * -input_offset if needed and extend the channels to make those multiple + * of 8/16 as per function needs + */ + +#include +#include + +#include + +static int16_t *scratch_buffer = NULL; + +extern void esp_nn_conv_s8_mult8_1x1_esp32s3( + const int8_t *input_data, + const uint16_t input_wd, + const uint16_t input_ht, + const uint16_t in_channels, + const int32_t input_offset, + const int8_t *filter_aligned, + const int32_t *bias, + int8_t *out_data, + const uint16_t out_wd, + const uint16_t out_ht, + const uint16_t out_channels, + const int32_t out_offset, + const int32_t *out_shift, + const int32_t *out_mult, + const int32_t activation_min, + const int32_t activation_max, + void *buffer /* scratch buffer */); + +extern void esp_nn_conv_s8_filter_aligned_input_padded_esp32s3( + const int8_t *input_data, + const uint16_t input_wd, + const uint16_t input_ht, + const uint16_t in_channels, + const int32_t input_offset, + const uint16_t stride_wd, + const uint16_t stride_ht, + const int8_t *filter_data, + const uint16_t filter_wd, + const uint16_t filter_ht, + const int32_t *bias, + int8_t *out_data, + const uint16_t out_wd, + const uint16_t out_ht, + const uint16_t out_channels, + const int32_t out_offset, + const int32_t *out_shift, + const int32_t *out_mult, + const int32_t activation_min, + const int32_t activation_max, + void *scratch_buffer); + +int esp_nn_get_conv_scratch_size_esp32s3(const data_dims_t *input_dims, + const data_dims_t *filter_dims, + const data_dims_t *output_dims, + const conv_params_t *conv_params) +{ + const uint16_t input_wd = input_dims->width; + const uint16_t input_ht = input_dims->height; + const uint16_t in_ch = input_dims->channels; + const uint16_t filter_wd = filter_dims->width; + const uint16_t filter_ht = filter_dims->height; + const uint16_t out_ch = output_dims->channels; + const uint16_t pad_wd = conv_params->padding.width; + const uint16_t pad_ht = conv_params->padding.height; + const uint16_t stride_wd = conv_params->stride.width; + const uint16_t stride_ht = conv_params->stride.height; + + int new_channels = (in_ch + 7) & ~7; + + int input_scratch = input_wd * input_ht * in_ch; + int filter_scratch = filter_wd * filter_ht * in_ch * out_ch; + + int align_buf_size = 32; /* extra buffer for alignment */ + if ((filter_wd == 1 && filter_ht == 1 && pad_wd == 0 && pad_ht == 0) && + (stride_wd == 1 && stride_ht == 1)) { + int transpose_buf_size = 2 * (8 * new_channels); /* to store intermediate data */ + if (input_wd * input_ht < 8) { + transpose_buf_size = 0; // not using this for leftover + } + if (in_ch % 8) { + input_scratch = input_wd * input_ht * new_channels; + } else { + input_scratch = 0; + } + filter_scratch = new_channels * out_ch; + return input_scratch + filter_scratch + transpose_buf_size + align_buf_size; + } else { + new_channels = (in_ch + 15) & ~15; + if (pad_wd == 0 && pad_ht == 0) { + input_scratch = 0; + } else { + input_scratch = (input_wd + 2 * pad_wd) * (input_ht + 2 * pad_ht) * in_ch; + } + filter_scratch = filter_wd * filter_ht * new_channels * out_ch; + int offset_acc_scratch = out_ch * 4; + return input_scratch + filter_scratch + align_buf_size + offset_acc_scratch; + } + return align_buf_size; +} + +void esp_nn_set_conv_scratch_buf_esp32s3(void *buf) +{ + scratch_buffer = (int16_t *) buf; +} + +void esp_nn_conv_s8_esp32s3(const data_dims_t *input_dims, + const int8_t *input, + const data_dims_t *filter_dims, + const int8_t *filter_data, + const int32_t *bias, + const data_dims_t *output_dims, + int8_t *out_data, + const conv_params_t *conv_params, + const quant_data_t *quant_data) +{ + if (scratch_buffer == NULL) { + printf("esp_nn_conv error! scratch_buffer not set!\n"); + return; + } + const uint16_t input_wd = input_dims->width; + const uint16_t input_ht = input_dims->height; + const uint16_t channels = input_dims->channels; + const int32_t input_offset = conv_params->in_offset; + const int32_t out_offset = conv_params->out_offset; + const uint16_t pad_wd = conv_params->padding.width; + const uint16_t pad_ht = conv_params->padding.height; + const uint16_t stride_wd = conv_params->stride.width; + const uint16_t stride_ht = conv_params->stride.height; + const uint16_t filter_wd = filter_dims->width; + const uint16_t filter_ht = filter_dims->height; + const uint16_t out_wd = output_dims->width; + const uint16_t out_ht = output_dims->height; + const uint16_t out_channels = output_dims->channels; + const int32_t *out_shift = quant_data->shift; + const int32_t *out_mult = quant_data->mult; + const int32_t activation_min = conv_params->activation.min; + const int32_t activation_max = conv_params->activation.max; + + int filter_size = filter_wd * filter_ht * channels * out_channels; + + if (filter_wd == 1 && filter_ht == 1 && pad_wd == 0 && pad_ht == 0 && + stride_wd == 1 && stride_ht == 1) { + + int8_t *input_aligned = (int8_t *) input; + int8_t *scratch_buf = (int8_t *) scratch_buffer; + int8_t *filter_aligned = (int8_t *) scratch_buffer; + int new_channels = channels; + if (channels % 8 == 0) { + if ((int) filter_data & 7) { // if the filter_data is not aligned to 8 bytes + int scratch_offset = (int) (filter_aligned + filter_size); + scratch_buf = (int8_t *) (scratch_offset + 16 - (scratch_offset & 15)); + memcpy(filter_aligned, filter_data, filter_size); // copy to aligned address + } else { + filter_aligned = (int8_t *) filter_data; + } + } else { + // pad extra channel to make it multiple of 8. Both input and filter + new_channels = (channels + 7) & ~7; + for (int out_ch_idx = 0; out_ch_idx < out_channels; out_ch_idx++) { + memcpy(filter_aligned, filter_data, channels); + memset(filter_aligned + channels, 0, new_channels - channels); + filter_aligned += new_channels; + filter_data += channels; + } + filter_aligned = (int8_t *) scratch_buffer; + int filter_data_size = new_channels * out_channels; + input_aligned = filter_aligned + filter_data_size; + for (int input_idx = 0; input_idx < input_ht * input_wd; input_idx++) { + memcpy(input_aligned, input, channels); + memset(input_aligned + channels, 0, new_channels - channels); + input_aligned += new_channels; + input += channels; + } + input_aligned = filter_aligned + filter_data_size; + scratch_buf = input_aligned + input_ht * input_wd * new_channels; + } + esp_nn_conv_s8_mult8_1x1_esp32s3( + input_aligned, input_wd, input_ht, new_channels, input_offset, + filter_aligned, bias, out_data, out_wd, out_ht, out_channels, out_offset, + out_shift, out_mult, activation_min, activation_max, scratch_buf); + } else { + // align the `filter width * channels` to 16 bytes. Do zero padding for the same + int32_t filter_row_size = filter_wd * channels; + int32_t filter_alignment_padding = 16 - (filter_row_size & 15); + int8_t *filter_data_aligned = (int8_t *) filter_data; + int8_t *input_padded = (int8_t *) input; + int8_t *scratch_data = (int8_t *) scratch_buffer; + int new_input_wd = input_wd, new_input_ht = input_ht; + if (filter_alignment_padding != 16) { + // pad filter_data + int32_t new_row_size = filter_wd * channels + filter_alignment_padding; + filter_data_aligned = scratch_data; + int8_t *row_ptr = filter_data_aligned; + for (int32_t ch_idx = 0; ch_idx < out_channels; ch_idx++) { + for (int32_t row_idx = 0; row_idx < filter_ht; row_idx++) { + memcpy(row_ptr, filter_data, filter_row_size); + memset(row_ptr + filter_row_size, 0, new_row_size - filter_row_size); + filter_data += filter_row_size; + row_ptr += new_row_size; + } + } + scratch_data += new_row_size * filter_ht * out_channels; + filter_row_size = new_row_size; + } else if ( (int) filter_data & 15) { + filter_data_aligned = scratch_data; + memcpy(filter_data_aligned, filter_data, filter_size); + scratch_data += filter_size; + } + if (pad_wd != 0 || pad_ht != 0) { // need padding + input_padded = (int8_t *) scratch_data; + esp_nn_aligned_s8_pad_with_value(input, input_padded, input_wd, input_ht, channels, + -input_offset, pad_wd, pad_ht); + new_input_wd = input_wd + 2 * pad_wd; + new_input_ht = input_ht + 2 * pad_ht; + scratch_data += new_input_wd * new_input_ht * channels; + } + esp_nn_conv_s8_filter_aligned_input_padded_esp32s3( + input_padded, new_input_wd, new_input_ht, channels, input_offset, + stride_wd, stride_ht, filter_data_aligned, filter_wd, filter_ht, + bias, out_data, out_wd, out_ht, out_channels, out_offset, + out_shift, out_mult, activation_min, activation_max, scratch_data); + } +} + +#endif // EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN diff --git a/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/src/convolution/esp_nn_conv_opt.c b/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/src/convolution/esp_nn_conv_opt.c new file mode 100644 index 0000000..c1478ba --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/src/convolution/esp_nn_conv_opt.c @@ -0,0 +1,183 @@ +#include "edge-impulse-sdk/classifier/ei_classifier_config.h" +#if EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN +// Copyright 2020-2021 Espressif Systems (Shanghai) PTE LTD +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include + +int esp_nn_get_conv_scratch_size_opt(const data_dims_t *input_dims, + const data_dims_t *filter_dims, + const data_dims_t *output_dims, + const conv_params_t *conv_params) +{ + return 0; +} + +void esp_nn_set_conv_scratch_buf_opt(const void *buf) +{ + +} + +__attribute__ ((noinline)) +static void esp_nn_conv_s8_1x1(const data_dims_t *input_dims, + const int8_t *input_data, + const int8_t *filter_data, + const int32_t *bias, + const data_dims_t *output_dims, + int8_t *out_data, + const conv_params_t *conv_params, + const quant_data_t *quant_data) +{ + const uint16_t input_wd = input_dims->width; + const uint16_t in_channels = input_dims->channels; + const int32_t input_offset = conv_params->in_offset; + const int32_t out_offset = conv_params->out_offset; + const uint16_t stride_wd = conv_params->stride.width; + const uint16_t stride_ht = conv_params->stride.height; + const uint16_t out_wd = output_dims->width; + const uint16_t out_ht = output_dims->height; + const uint16_t out_channels = output_dims->channels; + const int32_t activation_min = conv_params->activation.min; + const int32_t activation_max = conv_params->activation.max; + + for (int32_t in_row = 0; in_row < out_ht * stride_ht; in_row += stride_ht) { + for (int32_t in_col = 0; in_col < out_wd * stride_wd; in_col += stride_wd) { + const int32_t *out_mult = quant_data->mult; + const int32_t *out_shift = quant_data->shift; + const int8_t *filter_ptr = filter_data; + const int8_t *input_base_ptr = input_data + (in_row * input_wd + in_col) * in_channels; + int32_t out_ch_idx = 0; + for (; out_ch_idx < out_channels; out_ch_idx++) { + int32_t conv_out = 0; + + const int8_t *input_ptr = input_base_ptr; + + int32_t in_ch_idx = 0; + for (; in_ch_idx < in_channels - 3; in_ch_idx += 4) { + conv_out += (*input_ptr++ + input_offset) * *filter_ptr++; + conv_out += (*input_ptr++ + input_offset) * *filter_ptr++; + conv_out += (*input_ptr++ + input_offset) * *filter_ptr++; + conv_out += (*input_ptr++ + input_offset) * *filter_ptr++; + } + for (; in_ch_idx < in_channels; in_ch_idx ++) { + conv_out += (*input_ptr++ + input_offset) * *filter_ptr++; + } + if (bias) { + conv_out += bias[out_ch_idx]; + } + conv_out = esp_nn_multiply_by_quantized_mult_fast(conv_out, *out_mult++, *out_shift++); + conv_out += out_offset; + conv_out = max(conv_out, activation_min); + conv_out = min(conv_out, activation_max); + *out_data++ = (int8_t) conv_out; + } + } + } +} + +/** + * Assumption 1: i/p channels == o/p channels + * Assumption 2: Pointers are valid + * Assumption 3: dialation width = 1 + */ +void esp_nn_conv_s8_opt(const data_dims_t *input_dims, + const int8_t *input_data, + const data_dims_t *filter_dims, + const int8_t *filter_data, + const int32_t *bias, + const data_dims_t *output_dims, + int8_t *out_data, + const conv_params_t *conv_params, + const quant_data_t *quant_data) +{ + const uint16_t filter_wd = filter_dims->width; + const uint16_t filter_ht = filter_dims->height; + + if (filter_wd == 1 && filter_ht == 1) { + esp_nn_conv_s8_1x1(input_dims, input_data, filter_data, bias, + output_dims, out_data, conv_params, quant_data); + return; + } + + const uint16_t input_wd = input_dims->width; + const uint16_t input_ht = input_dims->height; + const uint16_t in_channels = input_dims->channels; + const int32_t input_offset = conv_params->in_offset; + const int32_t out_offset = conv_params->out_offset; + const uint16_t pad_wd = conv_params->padding.width; + const uint16_t pad_ht = conv_params->padding.height; + const uint16_t stride_wd = conv_params->stride.width; + const uint16_t stride_ht = conv_params->stride.height; + const uint16_t out_wd = output_dims->width; + const uint16_t out_ht = output_dims->height; + const uint16_t out_channels = output_dims->channels; + const int32_t activation_min = conv_params->activation.min; + const int32_t activation_max = conv_params->activation.max; + + int32_t out_ch_idx, out_y, out_x, filter_y_idx, filter_x_idx; + + for (out_y = 0; out_y < out_ht; out_y++) { + for (out_x = 0; out_x < out_wd; out_x++) { + const int32_t *out_shift = quant_data->shift; + const int32_t *out_mult = quant_data->mult; + for (out_ch_idx = 0; out_ch_idx < out_channels; out_ch_idx++) { + int32_t conv_out = 0; + + const int32_t base_y = stride_ht * out_y - pad_ht; + const int32_t base_x = stride_wd * out_x - pad_wd; + + const int32_t filter_y_start = max(0, -base_y); + const int32_t filter_x_start = max(0, -base_x); + + const int32_t filter_y_end = min(filter_ht, input_ht - base_y); + const int32_t filter_x_end = min(filter_wd, input_wd - base_x); + + for (filter_y_idx = filter_y_start; filter_y_idx < filter_y_end; filter_y_idx++) { + for (filter_x_idx = filter_x_start; filter_x_idx < filter_x_end; filter_x_idx++) { + const int32_t in_row = base_y + filter_y_idx; + const int32_t in_col = base_x + filter_x_idx; + + const int8_t *input_ptr = input_data + + (in_row * input_wd + in_col) * in_channels; + const int8_t *filter_ptr = filter_data + + out_ch_idx * in_channels * filter_ht * filter_wd + + (filter_y_idx * filter_wd + filter_x_idx) * in_channels; + int32_t in_ch_idx = 0; + for (; in_ch_idx < in_channels - 3; in_ch_idx += 4) { + conv_out += (*input_ptr++ + input_offset) * *filter_ptr++; + conv_out += (*input_ptr++ + input_offset) * *filter_ptr++; + conv_out += (*input_ptr++ + input_offset) * *filter_ptr++; + conv_out += (*input_ptr++ + input_offset) * *filter_ptr++; + } + for (; in_ch_idx < in_channels; in_ch_idx ++) { + conv_out += (*input_ptr++ + input_offset) * *filter_ptr++; + } + } + } + if (bias) { + conv_out += bias[out_ch_idx]; + } + conv_out = esp_nn_multiply_by_quantized_mult_fast(conv_out, *out_mult++, *out_shift++); + conv_out += out_offset; + conv_out = max(conv_out, activation_min); + conv_out = min(conv_out, activation_max); + *out_data++ = (int8_t) conv_out; + } + } + } +} + +#endif // EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN diff --git a/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/src/convolution/esp_nn_conv_s16_mult4_1x1_esp32s3.S b/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/src/convolution/esp_nn_conv_s16_mult4_1x1_esp32s3.S new file mode 100644 index 0000000..50c00cc --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/src/convolution/esp_nn_conv_s16_mult4_1x1_esp32s3.S @@ -0,0 +1,358 @@ +#include "edge-impulse-sdk/classifier/ei_classifier_config.h" +#if EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN && EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN_S3 +// Copyright 2021-2022 Espressif Systems (Shanghai) PTE LTD +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + .text + .literal_position + .literal .nudge_val, 1073741824 + + # Program Unit: esp_nn_conv_s16_mult4_1x1_esp32s3 + .type esp_nn_conv_s16_mult4_1x1_esp32s3, @function + .align 4 + .global esp_nn_conv_s16_mult4_1x1_esp32s3 +esp_nn_conv_s16_mult4_1x1_esp32s3: # 0xa62 + # scratch_buf = 0 + # to_add = 32 + # gra_spill_temp_139 = 36 + # gra_spill_temp_140 = 40 + # gra_spill_temp_141 = 44 + # gra_spill_temp_155 = 48 + # gra_spill_temp_156 = 52 + # gra_spill_temp_144 = 56 + # gra_spill_temp_145 = 60 + # gra_spill_temp_146 = 64 + # gra_spill_temp_147 = 68 + # gra_spill_temp_148 = 72 + # gra_spill_temp_149 = 76 + # gra_spill_temp_150 = 80 + # gra_spill_temp_151 = 84 + # gra_spill_temp_152 = 88 + # gra_spill_temp_153 = 92 + # lgra_spill_temp_165 = 96 + # lgra_spill_temp_166 = 100 + # lgra_spill_temp_167 = 104 + # lgra_spill_temp_168 = 108 + # gra_spill_temp_158 = 112 + # gra_spill_temp_159 = 116 + # gra_spill_temp_160 = 120 + + + // registers: + // a2: int16_t *input_data + // a3: uint16_t input_wd + // a4: uint16_t input_ht + // a5: uint16_t in_channels + // a6: int16_t *filter_data + // a7: int32_t *bias + + // on stack: + // 160: int8_t *out_data + // 164: uint16_t out_wd + // 168: uint16_t out_ht + // 172: uint16_t out_channels + // 176: int32_t out_offset + // 180: int32_t *out_shift + // 184: int32_t *out_mult + // 188: int32_t activation_min + // 192: int32_t activation_max + // 196: *buffer /* scratch buffer */ + + + entry a1,160 # + s32i.n a2,a1,40 # [0] gra_spill_temp_140 + s32i a6,a1,68 # [1] gra_spill_temp_147 + s32i a7,a1,116 # [2] gra_spill_temp_159 + + mul16u a3,a3,a4 # [3] + addi a10,a1,112 # [4] + addmi a11,a1,176 # [5] + addmi a8,a1,176 # [6] + addmi a9,a1,176 # [7] + addi.n a9,a9,12 # [8] + addi a8,a8,16 # [9] + ee.vldbc.32 q5,a11 # [10] id:188 out_offset + ee.vldbc.32 q7,a8 # [12] id:270 activation_max + ee.vldbc.32 q6,a9 # [13] id:269 activation_min + blti a3,4,.Lt_3_6402 # [14] + +.LBB3_esp_nn_conv_s16_mult4_1x1_esp32s3: # 0xa90 + l32i a13,a1,160 # [0] id:280 out_data+0x0 + srai a8,a5,2 # [1] + addi a10,a3,-3 # [2] + addi a9,a5,-3 # [3] + movi.n a12,0 # [4] + slli a11,a5,2 # [5] + slli a15,a5,1 # [6] + l16ui a14,a1,172 # [7] id:271 out_channels+0x0 + s32i.n a15,a1,36 # [9] gra_spill_temp_139 + s32i.n a11,a1,56 # [10] gra_spill_temp_144 + s32i a12,a1,84 # [11] gra_spill_temp_151 + s32i a9,a1,52 # [12] gra_spill_temp_156 + s32i.n a10,a1,60 # [13] gra_spill_temp_145 + s32i a8,a1,88 # [14] gra_spill_temp_152 + movi.n a10,0 # [15] + l32i a8,a1,196 # [16] id:281 buffer+0x0 + slli a11,a11,1 # [19] + l32i a15,a1,184 # [20] id:192 out_mult+0x0 + s32i a11,a1,64 # [22] gra_spill_temp_146 + s32i a8,a1,112 # [25] gra_spill_temp_158 + s32i a10,a1,92 # [26] gra_spill_temp_153 + movi.n a8,0 # [27] + s32i a10,a1,80 # [31] gra_spill_temp_150 + s32i a8,a1,76 # [32] gra_spill_temp_149 + slli a8,a14,1 # [34] + addx2 a9,a14,a14 # [35] + s32i a9,a1,72 # [36] gra_spill_temp_148 + s32i.n a8,a1,44 # [37] gra_spill_temp_141 + addx4 a14,a14,a15 # [38] + s32i a14,a1,48 # [39] gra_spill_temp_155 + j .Lt_3_6914 # [40] + +.Lt_3_8194: # 0xb00 +# Part of loop body line 305, head labeled .Lt_3_6914 + l32i.n a12,a1,60 # [0] gra_spill_temp_145 + l32i.n a9,a1,56 # [1] gra_spill_temp_144 + l32i a8,a1,76 # [2] gra_spill_temp_149 + l32i a15,a1,64 # [3] gra_spill_temp_146 + l32i a11,a1,72 # [4] gra_spill_temp_148 + l32i a14,a1,84 # [5] gra_spill_temp_151 + add.n a13,a13,a11 # [6] + l32i a11,a1,80 # [7] gra_spill_temp_150 + add.n a14,a14,a15 # [8] + add.n a8,a8,a9 # [9] + s32i a8,a1,76 # [10] gra_spill_temp_149 + s32i a14,a1,84 # [11] gra_spill_temp_151 + addi.n a11,a11,4 # [12] + s32i a11,a1,80 # [13] gra_spill_temp_150 + bge a11,a12,.Lt_3_6402 # [14] + +.Lt_3_6914: # 0xb27 + l32i a12,a1,52 # [0] gra_spill_temp_156 + l32i a4,a1,112 # [1] gra_spill_temp_158 + blti a12,1,.Lt_3_7170 # [2] + +.LBB6_esp_nn_conv_s16_mult4_1x1_esp32s3: # 0xb30 + l32i a3,a1,88 # [0] gra_spill_temp_152 + l32i.n a5,a1,40 # [1] gra_spill_temp_140 + l32i a2,a1,84 # [3] gra_spill_temp_151 + add.n a2,a2,a5 # [7] + l32i.n a5,a1,36 # [9] gra_spill_temp_139 + + // load and transose 4 lines of input 4xchannels, + loopgtz a3,.transpose_loop_end + mov.n a3,a2 # [0*II+0] + ee.vld.l.64.xp q0,a3,a5 # [0*II+2] id:282 + ee.vld.l.64.xp q1,a3,a5 # [0*II+3] id:283 + ee.vld.l.64.xp q2,a3,a5 # [0*II+4] id:284 + ee.vld.l.64.xp q3,a3,a5 # [0*II+5] id:285 + ee.vzip.16 q0,q1 # [0*II+6] + ee.vzip.16 q2,q3 # [0*II+7] + ee.vzip.32 q0,q2 # [0*II+8] + ee.vst.128.ip q0,a4,16 # [0*II+9] id:286 + ee.vst.128.ip q2,a4,16 # [0*II+10] id:287 + addi.n a2,a2,8 # [0*II+1] +.transpose_loop_end: + +.Lt_3_7170: # 0xb7c + l32i a2,a1,68 # [0] gra_spill_temp_147 + l32i a9,a1,116 # [1] gra_spill_temp_159 + l16ui a8,a1,172 # [2] out_channels + s32i a9,a1,120 # [3] gra_spill_temp_160 + beqz.n a8,.Lt_3_8194 # [4] + + l32i a9,a1,180 # [0] out_shift + l32i a11,a1,184 # [1] out_mult + l32i a15,a1,72 # [2] gra_spill_temp_148 + l32i.n a14,a1,44 # [3] gra_spill_temp_141 + add.n a15,a15,a13 # [4] + add.n a14,a14,a13 # [5] + j .Lt_3_8706 # [6] + +.Lt_3_10754: # 0xb9a + + movi.n a3,0 # [0] + +.Lt_3_10498: # 0xb9c + +// esp_nn_multiply_by_quantized_mult_esp32s3 + ee.zero.q q0 # [0] + l32i a5,a1,92 # [1] gra_spill_temp_153 + s32i a2,a1,96 # [2] lgra_spill_temp_165 + s32i a11,a1,104 # [3] lgra_spill_temp_167 + s32i a13,a1,108 # [4] lgra_spill_temp_168 + s32i a9,a1,100 # [5] lgra_spill_temp_166 + + movi.n a13,0 # [6] + max a12,a12,a13 # [7] + wsr.sar a12 # [8] + ee.vsl.32 q1,q1 # [9] + ssai 31 # [10] + ee.movi.32.a q1,a7,0 # [11] + ee.movi.32.a q1,a8,1 # [12] + ee.movi.32.a q1,a6,3 # [13] + ee.movi.32.a q1,a9,2 # [14] + mulsh a12,a4,a9 # [15] + mulsh a11,a4,a6 # [16] + mulsh a2,a4,a8 # [17] + mulsh a13,a7,a4 # [18] + mull a8,a4,a8 # [19] + mull a7,a7,a4 # [20] + mull a6,a4,a6 # [24] + + add.n a11,a5,a11 # [21] + add.n a12,a5,a12 # [22] + add.n a2,a5,a2 # [23] + add.n a5,a5,a13 # [25] + + l32r a13,.nudge_val + mull a9,a4,a9 # [27] + + add.n a6,a13,a6 # [28] + add.n a9,a13,a9 # [29] + add.n a10,a13,a7 # [30] + add.n a8,a13,a8 # [32] + + saltu a7,a10,a13 # [33] + add.n a7,a7,a5 # [34] + saltu a5,a8,a13 # [35] + add.n a5,a5,a2 # [36] + src a5,a5,a8 # [37] + saltu a2,a9,a13 # [38] + add.n a2,a2,a12 # [40] + saltu a13,a6,a13 # [41] + addi.n a12,a3,-1 # [42] + src a2,a2,a9 # [43] + ee.movi.32.q q3,a5,1 # [51] + ee.movi.32.q q3,a2,2 # [54] + + add.n a13,a13,a11 # [44] + addi a9,a1,32 # [45] to_add + movi.n a11,1 # [46] + src a7,a7,a10 # [47] + src a13,a13,a6 # [48] + ee.movi.32.q q3,a7,0 # [50] + ee.movi.32.q q3,a13,3 # [57] + + addi a8,a1,112 # [49] + + l32i a7,a1,48 # [52] gra_spill_temp_155 + l16ui a5,a1,172 # [53] out_channels + ssl a12 # [55] + sll a11,a11 # [56] + wsr.sar a3 # [58] + ee.vcmp.lt.s32 q0,q3,q0 # [59] + l32i a13,a1,108 # [60] lgra_spill_temp_168 + s32i.n a11,a1,32 # [61] to_add + ee.vldbc.32 q1,a9 # [62] id:317 to_add + add.n a5,a5,a13 # [63] + l32i a9,a1,100 # [64] lgra_spill_temp_166 + ee.vadds.s32 q1,q1,q0 # [65] + addi.n a9,a9,4 # [66] + ee.vadds.s32 q1,q3,q1 # [67] + ee.vsr.32 q1,q1 # [69] + +# add offset, apply activation and store + ee.vadds.s32 q1,q1,q5 # [70] + ee.vmin.s32 q1,q1,q7 # [72] + ee.vmax.s32 q1,q1,q6 # [73] + ee.vst.128.ip q1,a1,0 # [74] id:320 + l8ui a6,a1,0 # [75] scratch_buf + s8i a6,a13,0 # [76] + addi.n a13,a13,1 # [77] + l8ui a2,a1,4 # [78] scratch_buf+4 + s8i a2,a5,0 # [79] + l8ui a12,a1,8 # [80] scratch_buf+8 + l32i a2,a1,96 # [81] lgra_spill_temp_165 + s8i a12,a14,0 # [82] + addi.n a14,a14,1 # [83] + l8ui a11,a1,12 # [84] scratch_buf+12 + s8i a11,a15,0 # [85] + l32i a11,a1,104 # [86] lgra_spill_temp_167 + addi.n a15,a15,1 # [87] + addi.n a11,a11,4 # [88] + sub a7,a11,a7 # [89] + beqz a7,.Lt_3_8194 # [90] + +.Lt_3_8706: # 0xc97 + ee.zero.qacc # [0] + l32i a8,a1,52 # [1] gra_spill_temp_156 + l32i a3,a1,112 # [2] gra_spill_temp_158 + blti a8,1,.Lt_3_8962 # [3] + + l32i a4,a1,88 # [0] gra_spill_temp_152 + loopgtz a4,.LBB53_esp_nn_conv_s16_mult4_1x1_esp32s3 # [2] + + ee.vld.l.64.ip q0,a2,8 # [0*II+0] id:289 + ee.vld.l.64.ip q1,a3,8 # [0*II+1] id:290 + ee.vld.l.64.ip q2,a3,8 # [0*II+2] id:291 + ee.vsmulas.s16.qacc q1,q0,0 # [0*II+3] + ee.vld.l.64.ip q3,a3,8 # [0*II+4] id:292 + ee.vsmulas.s16.qacc q2,q0,1 # [0*II+5] + ee.vld.l.64.ip q4,a3,8 # [0*II+6] id:293 + ee.vsmulas.s16.qacc q3,q0,2 # [0*II+7] + ee.vsmulas.s16.qacc q4,q0,3 # [0*II+8] + +.LBB53_esp_nn_conv_s16_mult4_1x1_esp32s3: # 0xcc4 + +.Lt_3_8962: # 0xcc4 + +// extract data: + mov a10,a1 + ee.st.qacc_l.l.128.ip a10,16 # [0] id:298 + ee.st.qacc_l.h.32.ip a10,-16 # [1] id:299 + l8ui a12,a1,16 # [2] scratch_buf+16 + l8ui a8,a1,6 # [3] scratch_buf+6 + s8i a8,a1,3 # [4] scratch_buf+3 + s8i a12,a1,7 # [5] scratch_buf+7 + l8ui a8,a1,15 # [6] scratch_buf+15 + l8ui a12,a1,5 # [7] scratch_buf+5 + s8i a12,a1,2 # [8] scratch_buf+2 + s8i a8,a1,6 # [9] scratch_buf+6 + l16ui a12,a1,10 # [10] scratch_buf+10 + movi.n a8,16 # [11] + ee.srcmb.s16.qacc q2,a8,0 # [12] + s16i a12,a1,4 # [13] scratch_buf+4 + ee.vld.l.64.ip q1,a10,0 # [14] id:309 + l32i a12,a1,116 # [15] gra_spill_temp_159, bias + ee.vzip.16 q1,q2 # [16] + + beqz.n a12,.Lt_3_9986 # [17] // skip bias + // add bias: + l32i a8,a1,120 # [0] gra_spill_temp_160 + ee.vldbc.32.ip q0,a8,4 # [2] id:311 + s32i a8,a1,120 # [3] gra_spill_temp_160 + ee.vadds.s32 q1,q1,q0 # [4] +.Lt_3_9986: # 0xd04 + + l32i.n a12,a9,0 # [0] id:313 + l32i.n a4,a11,0 # [1] id:312 + bgei a12,1,.Lt_3_10754 # [2] + + neg a3,a12 # [0] + j .Lt_3_10498 # [1] + +.Lt_3_6402: # 0xd11 + retw.n # [0] + + .size esp_nn_conv_s16_mult4_1x1_esp32s3, . - esp_nn_conv_s16_mult4_1x1_esp32s3 + +#elif defined(WIO_TERMINAL) +// dummy code, added for old ARM toolchain +.syntax unified +.thumb +.cpu cortex-m0 + +.section .text +#endif // EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN && EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN_S3 diff --git a/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/src/convolution/esp_nn_conv_s16_mult8_esp32s3.S b/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/src/convolution/esp_nn_conv_s16_mult8_esp32s3.S new file mode 100644 index 0000000..4c49f80 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/src/convolution/esp_nn_conv_s16_mult8_esp32s3.S @@ -0,0 +1,489 @@ +#include "edge-impulse-sdk/classifier/ei_classifier_config.h" +#if EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN && EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN_S3 +// Copyright 2021-2022 Espressif Systems (Shanghai) PTE LTD +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + .text + .literal_position + .literal .LC10_28_153, -2147483648 + .literal .LC11_28_154, -1073741823 + .literal .LC12_28_155, 2147483647 + .literal .LC13_28_156, 1073741824 + + # Program Unit: esp_nn_conv_s16_mult8_esp32s3 + .type esp_nn_conv_s16_mult8_esp32s3, @function + .align 4 + .global esp_nn_conv_s16_mult8_esp32s3 +esp_nn_conv_s16_mult8_esp32s3: # 0x6e2 + # qacc_scratch = 0 + # gra_spill_temp_96 = 48 + # gra_spill_temp_97 = 52 + # gra_spill_temp_98 = 56 + # gra_spill_temp_99 = 60 + # gra_spill_temp_100 = 64 + # gra_spill_temp_101 = 68 + # gra_spill_temp_102 = 72 + # gra_spill_temp_103 = 76 + # gra_spill_temp_104 = 80 + # gra_spill_temp_105 = 84 + # gra_spill_temp_106 = 88 + # gra_spill_temp_107 = 92 + # gra_spill_temp_108 = 96 + # gra_spill_temp_109 = 100 + # gra_spill_temp_110 = 104 + # gra_spill_temp_111 = 108 + # gra_spill_temp_112 = 112 + # gra_spill_temp_113 = 116 + # gra_spill_temp_114 = 120 + # gra_spill_temp_115 = 124 + # gra_spill_temp_116 = 128 + # gra_spill_temp_117 = 132 + # gra_spill_temp_118 = 136 + # gra_spill_temp_119 = 140 + # gra_spill_temp_120 = 144 + # gra_spill_temp_121 = 148 + # gra_spill_temp_122 = 152 + # gra_spill_temp_123 = 156 + # gra_spill_temp_124 = 160 + # gra_spill_temp_125 = 164 + # gra_spill_temp_126 = 168 + # gra_spill_temp_127 = 172 + # gra_spill_temp_128 = 176 + # gra_spill_temp_129 = 180 + # gra_spill_temp_130 = 184 + # gra_spill_temp_131 = 188 + # gra_spill_temp_132 = 192 + # gra_spill_temp_133 = 196 + # gra_spill_temp_134 = 200 + # gra_spill_temp_135 = 204 + # gra_spill_temp_136 = 208 + # gra_spill_temp_137 = 212 + + // registers: + // a2: const int16_t *input_data + // a3: const uint16_t input_wd + // a4: const uint16_t input_ht + // a5: const uint16_t in_channels + // a6: const uint16_t pad_wd + // a7: const uint16_t pad_ht + + // on stack: + // const uint16_t stride_wd + // const uint16_t stride_ht + // const int16_t *filter_data + // const uint16_t filter_wd + // const uint16_t filter_ht + // const int32_t *bias + // int8_t *out_data + // const uint16_t out_wd + // const uint16_t out_ht + // const uint16_t out_channels + // const int32_t out_offset + // const int32_t *out_shift + // const int32_t *out_mult + // const int32_t activation_min + // const int32_t activation_max + + + entry a1,256 # + s32i a2,a1,176 # [0] gra_spill_temp_128 + s32i a3,a1,192 # [1] gra_spill_temp_132 + s32i.n a6,a1,60 # [2] gra_spill_temp_99 + l16ui a8,a1,288 # [3] id:282 out_ht+0x0 + s32i a8,a1,68 # [4] gra_spill_temp_101 + beqz.n a8,.Lt_2_11778 # [5] + + s32i a7,a1,76 # [0] gra_spill_temp_103 + s32i a1,a1,156 # [1] gra_spill_temp_123 + l16ui a8,a1,272 # [2] id:285 filter_ht+0x0 + neg a11,a7 # [3] + movi.n a12,0 # [4] + neg a14,a6 # [5] + l16ui a15,a1,268 # [6] id:286 filter_wd+0x0 + l16ui a9,a1,292 # [7] id:283 out_channels+0x0 + l32i a10,a1,304 # [8] id:284 out_mult+0x0 + s32i a10,a1,88 # [9] gra_spill_temp_106 + s32i a9,a1,96 # [10] gra_spill_temp_108 + s32i a15,a1,196 # [11] gra_spill_temp_133 + s32i.n a14,a1,48 # [12] gra_spill_temp_96 + s32i a12,a1,72 # [13] gra_spill_temp_102 + s32i a11,a1,80 # [14] gra_spill_temp_104 + s32i.n a8,a1,52 # [15] gra_spill_temp_97 + sub a13,a3,a14 # [16] + mul16u a8,a5,a8 # [17] + s32i.n a13,a1,56 # [18] gra_spill_temp_98 + sub a11,a4,a11 # [19] + l32i a12,a1,276 # [20] id:292 bias+0x0 + s32i a12,a1,152 # [21] gra_spill_temp_122 + s32i a11,a1,84 # [22] gra_spill_temp_105 + l32i a14,a1,308 # [23] id:290 activation_min+0x0 + l32i a13,a1,312 # [24] id:291 activation_max+0x0 + s32i a13,a1,144 # [25] gra_spill_temp_120 + mull a15,a15,a8 # [26] + addx4 a9,a9,a10 # [27] + s32i a14,a1,140 # [28] gra_spill_temp_119 + l32i a11,a1,300 # [29] id:293 out_shift+0x0 + s32i a11,a1,92 # [30] gra_spill_temp_107 + slli a14,a5,1 # [31] + s32i a9,a1,124 # [32] gra_spill_temp_115 + s32i a15,a1,128 # [33] gra_spill_temp_116 + l32i a8,a1,280 # [34] id:288 out_data+0x0 + movi.n a10,0 # [35] + s32i a10,a1,160 # [36] gra_spill_temp_124 + s32i a8,a1,132 # [37] gra_spill_temp_117 + l32i a15,a1,296 # [38] id:289 out_offset+0x0 + l32i a9,a1,264 # [39] id:287 filter_data+0x0 + s32i a9,a1,180 # [40] gra_spill_temp_129 + s32i a15,a1,136 # [41] gra_spill_temp_118 + l16ui a8,a1,284 # [42] id:296 out_wd+0x0 + l16ui a10,a1,256 # [43] id:294 stride_wd+0x0 + s32i a10,a1,100 # [44] gra_spill_temp_109 + s32i a8,a1,104 # [45] gra_spill_temp_110 + addi.n a15,a5,-1 # [46] + l16ui a9,a1,260 # [47] id:295 stride_ht+0x0 + s32i a9,a1,64 # [48] gra_spill_temp_100 + srai a15,a15,3 # [49] + j .Lt_2_12290 # [50] + +.Lt_2_12546: # 0x788 + l32i a8,a1,68 # [0] gra_spill_temp_101 + l32i a12,a1,80 # [1] gra_spill_temp_104 + l32i a11,a1,84 # [2] gra_spill_temp_105 + l32i a10,a1,64 # [3] gra_spill_temp_100 + l32i a13,a1,72 # [4] gra_spill_temp_102 + l32i a9,a1,76 # [5] gra_spill_temp_103 + addi.n a13,a13,1 # [6] + s32i a13,a1,72 # [7] gra_spill_temp_102 + sub a9,a9,a10 # [8] + sub a11,a11,a10 # [9] + add.n a12,a12,a10 # [10] + s32i a12,a1,80 # [11] gra_spill_temp_104 + s32i a11,a1,84 # [12] gra_spill_temp_105 + s32i a9,a1,76 # [13] gra_spill_temp_103 + sub a13,a13,a8 # [14] + beqz a13,.Lt_2_11778 # [15] + +.Lt_2_12290: # 0x7b6 // width loop + l32i a13,a1,104 # [0] gra_spill_temp_110 + beqz.n a13,.Lt_2_12546 # [2] + + l32i a8,a1,192 # [0] gra_spill_temp_132 + l32i a9,a1,80 # [1] gra_spill_temp_104 + movi.n a11,0 # [2] + l32i a10,a1,76 # [3] gra_spill_temp_103 + l32i.n a12,a1,60 # [4] gra_spill_temp_99 + l32i.n a13,a1,56 # [5] gra_spill_temp_98 + s32i a13,a1,116 # [6] gra_spill_temp_113 + s32i a12,a1,112 # [7] gra_spill_temp_112 + max a10,a10,a11 # [8] + s32i a10,a1,148 # [9] gra_spill_temp_121 + add.n a9,a9,a10 # [10] + l32i.n a11,a1,48 # [11] gra_spill_temp_96 + s32i a11,a1,184 # [12] gra_spill_temp_130 + mull a8,a8,a9 # [13] + l32i a10,a1,84 # [14] gra_spill_temp_105 + s32i a8,a1,120 # [15] gra_spill_temp_114 + l32i.n a9,a1,52 # [16] gra_spill_temp_97 + movi.n a8,0 # [17] + s32i a8,a1,108 # [18] gra_spill_temp_111 + min a9,a9,a10 # [19] + s32i a9,a1,204 # [20] gra_spill_temp_135 + j .Lt_2_13058 # [21] + +.Lt_2_13314: # 0x7f6 +# Part of loop body line 186, head labeled .Lt_2_13058 + l32i a13,a1,104 # [0] gra_spill_temp_110 + l32i a11,a1,112 # [1] gra_spill_temp_112 + l32i a10,a1,184 # [2] gra_spill_temp_130 + l32i a9,a1,100 # [3] gra_spill_temp_109 + l32i a12,a1,108 # [4] gra_spill_temp_111 + l32i a8,a1,116 # [5] gra_spill_temp_113 + addi.n a12,a12,1 # [6] + s32i a12,a1,108 # [7] gra_spill_temp_111 + sub a8,a8,a9 # [8] + add.n a10,a10,a9 # [9] + sub a11,a11,a9 # [10] + s32i a11,a1,112 # [11] gra_spill_temp_112 + s32i a10,a1,184 # [12] gra_spill_temp_130 + s32i a8,a1,116 # [13] gra_spill_temp_113 + beq a12,a13,.Lt_2_12546 # [14] + +.Lt_2_13058: # 0x821 // channel loop + l32i a12,a1,96 # [0] gra_spill_temp_108 + beqz.n a12,.Lt_2_13314 # [2] + + movi.n a11,0 # [0] + l32i a10,a1,112 # [1] gra_spill_temp_112 + l32i a13,a1,92 # [2] gra_spill_temp_107 + l32i a8,a1,152 # [3] gra_spill_temp_122 + movi.n a9,0 # [4] + l32i a12,a1,88 # [5] gra_spill_temp_106 + s32i a12,a1,168 # [6] gra_spill_temp_126 + s32i a9,a1,188 # [7] gra_spill_temp_131 + s32i a8,a1,164 # [8] gra_spill_temp_125 + s32i a13,a1,172 # [9] gra_spill_temp_127 + l32i a8,a1,116 # [10] gra_spill_temp_113 + l32i a13,a1,196 # [11] gra_spill_temp_133 + max a10,a10,a11 # [12] + s32i a10,a1,208 # [13] gra_spill_temp_136 + min a13,a13,a8 # [14] + s32i a13,a1,200 # [15] gra_spill_temp_134 + j .Lt_2_13826 # [16] + +.Lt_2_14082: # 0x857 + +// extract data + l32i a4,a1,156 # [0] gra_spill_temp_123 + ee.st.qacc_l.l.128.ip a4,16 # [2] id:303 + ee.st.qacc_l.h.32.ip a4,0 # [3] id:304 + l8ui a9,a1,15 # [4] qacc_scratch+15 + l16ui a8,a1,10 # [5] qacc_scratch+10 + l8ui a12,a1,16 # [6] qacc_scratch+16 + l8ui a11,a1,6 # [7] qacc_scratch+6 + l8ui a10,a1,5 # [8] qacc_scratch+5 + s8i a10,a1,2 # [9] qacc_scratch+2 + s8i a11,a1,3 # [10] qacc_scratch+3 + s8i a12,a1,7 # [11] qacc_scratch+7 + s16i a8,a1,4 # [12] qacc_scratch+4 + s8i a9,a1,6 # [13] qacc_scratch+6 + + ee.st.qacc_h.l.128.ip a4,16 # [14] id:314 + ee.st.qacc_h.h.32.ip a4,-32 # [15] id:315 + l8ui a13,a1,32 # [16] qacc_scratch+32 + l8ui a9,a1,21 # [17] qacc_scratch+21 + l8ui a12,a1,31 # [18] qacc_scratch+31 + l16ui a11,a1,26 # [19] qacc_scratch+26 + l8ui a10,a1,22 # [20] qacc_scratch+22 + l16ui a8,a1,16 # [21] qacc_scratch+16 + s16i a8,a1,8 # [22] qacc_scratch+8 + s8i a10,a1,11 # [23] qacc_scratch+11 + s16i a11,a1,12 # [24] qacc_scratch+12 + s8i a12,a1,14 # [25] qacc_scratch+14 + s8i a9,a1,10 # [26] qacc_scratch+10 + s8i a13,a1,15 # [27] qacc_scratch+15 + + l32i a9,a1,152 # [28] gra_spill_temp_122, bias + movi.n a13,16 # [29] + ee.srcmb.s16.qacc q1,a13,0 # [30] + ee.vld.128.ip q0,a4,0 # [31] id:327 + s32i a4,a1,156 # [32] gra_spill_temp_123 + ee.vzip.16 q0,q1 # [33] + ee.vadds.s32 q0,q0,q1 # [34] + ee.movi.32.a q0,a12,3 # [35] + ee.movi.32.a q0,a11,2 # [36] + ee.movi.32.a q0,a10,0 # [37] + add.n a11,a11,a12 # [38] + ee.movi.32.a q0,a12,1 # [39] + add.n a10,a10,a12 # [40] + add.n a10,a10,a11 # [41] + + beqz.n a9,.Lt_2_17154 # [42] // skip bias + + l32i a13,a1,164 # [0] gra_spill_temp_125 + l32i.n a13,a13,0 # [2] id:329 + add.n a10,a10,a13 # [4] +.Lt_2_17154: # 0x8d7 + + # 259 conv_out = esp_nn_multiply_by_quantized_mult(conv_out, out_mult[out_ch_idx], out_shift[out_ch_idx]); + l32i a11,a1,172 # [0] gra_spill_temp_127 + l32i a4,a1,168 # [1] gra_spill_temp_126 + l32i.n a11,a11,0 # [2] id:331 + l32i.n a4,a4,0 # [3] id:330 + + blti a11,1,.LBB26_esp_nn_conv_s16_mult8_esp32s3 # [4] + movi.n a13,0 # [0] + j .Lt_2_17666 # [1] +.LBB26_esp_nn_conv_s16_mult8_esp32s3: # 0xa4e + neg a13,a11 # [0] +.Lt_2_17666: # 0x8e6 + + movi.n a12,0 # [0] + max a12,a11,a12 # [1] + movi.n a11,0 # [2] + ssl a12 # [3] + sll a10,a10 # [4] + bne a10,a4,.Lt_2_20994 # [5] + + l32r a9,.LC10_28_153 # [0] + movi.n a8,1 # [1] + sub a9,a10,a9 # [2] + moveqz a11,a8,a9 # [3] + +.Lt_2_20994: # 0x901 + extui a8,a4,31,1 # [0] + extui a12,a10,31,1 # [1] + xor a12,a12,a8 # [2] + extui a12,a12,0,8 # [3] + + beqz.n a12,.Lt_2_18434 # [4] + movi.n a12,-1 # [0] + l32r a9,.LC11_28_154 # [1] + j .Lt_2_18178 # [2] + +.Lt_2_18434: # 0xa54 + movi.n a12,0 # [0] + l32r a9,.LC13_28_156 # [1] +.Lt_2_18178: # 0x914 + + ssai 31 # [0] + l32r a8,.LC12_28_155 # [1] + mulsh a6,a4,a10 # [2] + mull a4,a4,a10 # [3] + add.n a6,a6,a12 # [4] + add.n a7,a4,a9 # [5] + saltu a4,a7,a4 # [6] + add.n a4,a4,a6 # [7] + srai a6,a4,31 # [8] + and a6,a6,a8 # [9] + add.n a7,a6,a7 # [10] + srai a3,a6,31 # [11] + add.n a3,a3,a4 # [12] + saltu a6,a7,a6 # [13] + add.n a6,a6,a3 # [14] + src a6,a6,a7 # [15] + extui a3,a11,0,8 # [16] + movi.n a7,1 # [17] + ssr a13 # [18] + movnez a6,a8,a3 # [19] + sra a8,a6 # [20] + + addi.n a3,a8,1 # [21] + ssl a13 # [22] + sll a7,a7 # [23] + extui a4,a8,31,1 # [24] + addi.n a7,a7,-1 # [25] + and a6,a6,a7 # [26] + srai a7,a7,1 # [27] + add.n a4,a4,a7 # [28] + l32i a7,a1,164 # [29] gra_spill_temp_125 + salt a4,a4,a6 # [30] + movnez a8,a3,a4 # [31] + l32i a6,a1,172 # [32] gra_spill_temp_127 + l32i a4,a1,132 # [33] gra_spill_temp_117 + l32i a3,a1,160 # [34] gra_spill_temp_124 + addi.n a7,a7,4 # [35] + s32i a7,a1,164 # [36] gra_spill_temp_125 + addi.n a6,a6,4 # [37] + s32i a6,a1,172 # [38] gra_spill_temp_127 + l32i a7,a1,136 # [39] gra_spill_temp_118 + l32i a6,a1,140 # [40] gra_spill_temp_119 + add.n a4,a3,a4 # [41] + add.n a7,a7,a8 # [42] + addi.n a3,a3,1 # [43] + l32i a8,a1,128 # [44] gra_spill_temp_116 + max a6,a6,a7 # [45] + s32i a3,a1,160 # [46] gra_spill_temp_124 + l32i a7,a1,188 # [47] gra_spill_temp_131 + l32i a3,a1,144 # [48] gra_spill_temp_120 + add.n a7,a7,a8 # [49] + min a3,a3,a6 # [50] + s8i a3,a4,0 # [51] id:332 + s32i a7,a1,188 # [52] gra_spill_temp_131 + l32i a4,a1,168 # [53] gra_spill_temp_126 + l32i a6,a1,124 # [54] gra_spill_temp_115 + addi.n a4,a4,4 # [55] + s32i a4,a1,168 # [56] gra_spill_temp_126 + sub a4,a4,a6 # [57] + beqz a4,.Lt_2_13314 # [58] + +.Lt_2_13826: # 0x9b4 + ee.zero.qacc # [0] + l32i a9,a1,204 # [1] gra_spill_temp_135 + l32i a8,a1,148 # [2] gra_spill_temp_121 + s32i a8,a1,212 # [3] gra_spill_temp_137 + bge a8,a9,.Lt_2_14082 # [4] + +.LBB12_esp_nn_conv_s16_mult8_esp32s3: # 0x9c3 +# Part of loop body line 187, head labeled .Lt_2_13826 + l32i a8,a1,196 # [0] gra_spill_temp_133 + l32i a7,a1,212 # [1] gra_spill_temp_137 + l32i a13,a1,200 # [2] gra_spill_temp_134 + mull a7,a7,a8 # [3] + l32i a6,a1,120 # [4] gra_spill_temp_114 + add.n a13,a7,a13 # [5] + j .Lt_2_14594 # [6] + +.Lt_2_14850: # 0x9d7 +# Part of loop body line 201, head labeled .Lt_2_14594 + l32i a9,a1,204 # [0] gra_spill_temp_135 + l32i a10,a1,212 # [1] gra_spill_temp_137 + l32i a12,a1,192 # [2] gra_spill_temp_132 + l32i a11,a1,196 # [3] gra_spill_temp_133 + add.n a6,a6,a12 # [4] + add.n a7,a7,a11 # [5] + add.n a13,a13,a11 # [6] + addi.n a10,a10,1 # [7] + s32i a10,a1,212 # [8] gra_spill_temp_137 + sub a9,a9,a10 # [9] + beqz a9,.Lt_2_14082 # [10] + +.Lt_2_14594: # 0x9f4 + l32i a9,a1,200 # [0] gra_spill_temp_134 + l32i a8,a1,208 # [1] gra_spill_temp_136 + bge a8,a9,.Lt_2_14850 # [3] + + l32i a11,a1,176 # [0] gra_spill_temp_128 + l32i a10,a1,184 # [1] gra_spill_temp_130 + add.n a12,a7,a8 # [2] + add.n a10,a10,a8 # [3] + add.n a10,a6,a10 # [4] + mull a10,a5,a10 # [5] + mull a8,a12,a5 # [6] + addx2 a10,a10,a11 # [7] + l32i a11,a1,188 # [8] gra_spill_temp_131 + add.n a11,a11,a8 # [10] + l32i a8,a1,180 # [11] gra_spill_temp_129 + mov.n a2,a10 # [12] + addx2 a11,a11,a8 # [13] + movi.n a8,8 # [14] + mov.n a3,a11 # [15] + j .Lt_2_15362 # [16] + +.LBB18_esp_nn_conv_s16_mult8_esp32s3: # 0xa26 + loopgtz a15,.LBB54_esp_nn_conv_s16_mult8_esp32s3 # [0] + + ee.vmulas.s16.qacc.ld.ip q0,a2,16,q0,q1 # [0*II+0] id:300 + ee.vld.128.ip q1,a3,16 # [0*II+1] id:301 +.LBB54_esp_nn_conv_s16_mult8_esp32s3: # 0xa30 + +.Lt_2_15618: # 0xa30 + ee.vmulas.s16.qacc q0,q1 # [0] + movi.n a8,8 # [1] + add.n a10,a10,a14 # [2] + add.n a11,a11,a14 # [3] + mov.n a3,a11 # [4] + mov.n a2,a10 # [5] + beq a12,a13,.Lt_2_14850 # [6] + +.Lt_2_15362: # 0xa40 + ee.vld.128.ip q1,a3,16 # [0] id:299 + ee.vld.128.ip q0,a2,16 # [1] id:298 + addi.n a12,a12,1 # [2] + bltu a8,a5,.LBB18_esp_nn_conv_s16_mult8_esp32s3 # [3] + + j .Lt_2_15618 # [0] + +.Lt_2_11778: # 0xa5c + retw.n # [0] + + .size esp_nn_conv_s16_mult8_esp32s3, . - esp_nn_conv_s16_mult8_esp32s3 + +#elif defined(WIO_TERMINAL) +// dummy code, added for old ARM toolchain +.syntax unified +.thumb +.cpu cortex-m0 + +.section .text +#endif // EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN && EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN_S3 diff --git a/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/src/convolution/esp_nn_conv_s8_filter_aligned_input_padded_esp32s3.S b/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/src/convolution/esp_nn_conv_s8_filter_aligned_input_padded_esp32s3.S new file mode 100644 index 0000000..5545b27 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/src/convolution/esp_nn_conv_s8_filter_aligned_input_padded_esp32s3.S @@ -0,0 +1,271 @@ +#include "edge-impulse-sdk/classifier/ei_classifier_config.h" +#if EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN && EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN_S3 +// +// SPDX-FileCopyrightText: 2023 Espressif Systems (Shanghai) CO LTD +// +// SPDX-License-Identifier: Apache-2.0 +// + + +// +// Contraints used by this function are: +// 1. pad_wd and pad_ht is 0. For versions needing padding we do this +// explicitly +// 2. All the filter rows are aligned to 16 bytes boundary. To make sure +// this is indeed the case, for filter rows (filter_wd * channels) not +// multiple of 16, we add zeros to fill it till 16 bondary. +// +// The optimized kernel assumes this and skips filter row with following +// size: ((filter_wd * input_ch) + 15) & ~15. + + .text + +.literal_position + .literal .LC1, 1073741824 + + # Program Unit: esp_nn_conv_s8_filter_aligned_input_padded_esp32s3 + .type esp_nn_conv_s8_filter_aligned_input_padded_esp32s3, @function + .align 4 + .global esp_nn_conv_s8_filter_aligned_input_padded_esp32s3 + // registers: + // a2: const int16_t *input_data + // a3: const uint16_t input_wd + // a4: const uint16_t input_ht + // a5: const uint16_t in_ch + // a6: const uint16_t input_offset + // a7: const uint16_t stride_wd + + // on stack: + // const uint16_t stride_ht : 80 + // const int8_t *filter_data : 84 + // const uint16_t filter_wd : 88 + // const uint16_t filter_ht : 92 + // const int32_t *bias : 96 + // int8_t *out_data : 100 + // const uint16_t out_wd : 104 + // const uint16_t out_ht : 108 + // const uint16_t out_channels : 112 + // const int32_t out_offset : 116 + // const int32_t *out_shift : 120 + // const int32_t *out_mult : 124 + // const int32_t activation_min: 128 + // const int32_t activation_max: 132 + // void *scratch_buffer: 136 + +esp_nn_conv_s8_filter_aligned_input_padded_esp32s3: + entry sp, 80 + s32i.n a2, sp, 40 # input_data + mov a11, a6 # input_offset + l16ui a2, sp, 88 # filter_wd + l32i a8, sp, 100 # out_data + l16ui a6, sp, 80 # stride_ht + mov.n a15, a5 + + mull a4, a2, a15 # filter_row_sz + s32i.n a8, sp, 24 # out_data_ptr + movi.n a9, 0 + s32i.n a9, sp, 36 # out_y + + addi.n a4, a4, 15 # to round the size up + srli a2, a4, 4 # (filter_row_sz) >> 4 + slli a12, a2, 4 # ((filter_row_sz) >> 4) << 4 + + mull a4, a6, a3 # stride_ht * input_wd + mull a5, a3, a15 # input_wd * in_ch + l32i.n a10, sp, 112 # out_ch + + mull a9, a7, a15 # stride_wd * in_ch + mull a4, a4, a15 # (stride_ht * input_wd) * in_ch + + slli a3, a10, 2 # out_ch * 4 + + s32i.n a3, sp, 32 # out_ch * 4 + s32i.n a5, sp, 12 # input_wd * in_ch + s32i.n a9, sp, 52 # stride_wd * in_ch + s32i a4, sp, 56 # (stride_ht * input_wd) * in_ch + + l32i.n a3, sp, 92 # filter_ht + l32i a13, sp, 136 # scratch_buf + l32i a5, sp, 84 # filter_data + mull a4, a12, a3 # (filter_wd * filter_ht * in_ch) + srai a4, a4, 1 + addx4 a10, a10, a13 # scratch_buf + 4 * out_ch + l32i a3, sp, 96 + // accumulate filter values per channel into scratch buffer +.L_acc_out_channel_loop: + movi.n a9, 0 // acc + loop a4, .L_acc_filter_size_loop + l8ui a14, a5, 0 + l8ui a7, a5, 1 + addi.n a5, a5, 2 + sext a14, a14, 7 + sext a7, a7, 7 + add a9, a9, a14 + add a9, a9, a7 + .L_acc_filter_size_loop: + + // multiply by offset, add bias and store the acc value per channel + mull a9, a9, a11 + beqz.n a3, .L_skip_bias + l32i a8, a3, 0 + addi a3, a3, 4 // this will remain 0 if bias not present + add a9, a9, a8 +.L_skip_bias: + s32i a9, a13, 0 + addi.n a13, a13, 4 + blt a13, a10, .L_acc_out_channel_loop + + movi.n a4, 0 # 0 + +.L_height_loop: + l32i.n a8, sp, 40 # in_row_ptr + movi.n a9, 0 + l32i.n a10, sp, 104 # out_wd + s32i.n a8, sp, 28 # input_ptr + s32i.n a9, sp, 44 # out_x + +.L_width_loop: + movi.n a9, 0 + l32i a5, sp, 84 # filter_data + s32i.n a9, sp, 20 + l32i a3, sp, 136 # scratch_buf + +.L_out_ch_loop: + movi.n a6, 0 + l32i.n a9, sp, 28 # input_ptr + mov.n a10, a6 + +.L_filter_ht_loop: + add.n a8, a5, a12 + mov.n a13, a9 + + ee.zero.accx + ee.ld.128.usar.ip q0, a13, 16 + ee.vld.128.ip q4, a13, 16 + ee.vld.128.ip q1, a5, 16 + + sub a15, a8, a5 // row_len - 16 + extui a14, a15, 4, 1 // if multiple of 16 and not 32 + srai a15, a15, 5 // multiples of 32 + ee.src.q.qup q2, q0, q4 + beqz a15, .L_vector_32_loop_end + + loop a15, .L_vector_32_loop_end + + ee.vld.128.ip q4, a13, 16 + ee.vmulas.s8.accx.ld.ip.qup q3, a5, 16, q2, q1, q0, q4 + ee.vld.128.ip q2, a13, 16 + ee.vmulas.s8.accx.ld.ip.qup q1, a5, 16, q0, q3, q4, q2 + ee.orq q0, q2, q2 + ee.orq q2, q4, q4 + +.L_vector_32_loop_end: + beqz a14, .L_vector_loop_end + ee.vmulas.s8.accx.ld.ip q4, a13, 16, q2, q1 + ee.src.q.ld.ip q1, a5, 16, q0, q4 + ee.orq q2, q0, q0 + +.L_vector_loop_end: + ee.vmulas.s8.accx q2, q1 + addi a13, a13, -16 // since we incremented by 16 too much + movi a15, 0 + ee.srs.accx a14, a15, 0 + + mov.n a5, a8 + add.n a6, a6, a14 +.L7: + l32i.n a8, sp, 12 # input_wd * in_ch + l32i.n a2, sp, 92 # filter_ht + addi.n a10, a10, 1 # filter_y_idx + add.n a9, a9, a8 + blt a10, a2, .L_filter_ht_loop +.L9: + l32i a7, a3, 0 # load input_offset acc + addi a3, a3, 4 # increment offset acc ptr + l32i.n a8, sp, 20 + add.n a6, a6, a7 # add input_offset accumulation + +.L_multiply_by_quant_mult: + l32i a10, sp, 120 + l32i a9, sp, 124 + add.n a2, a10, a8 + l32i.n a2, a2, 0 + add.n a7, a9, a8 + l32i.n a7, a7, 0 + max a8, a2, a4 + ssl a8 + sll a6, a6 + mull a9, a6, a7 + l32r a10, .LC1 + sub a2, a8, a2 + add.n a8, a9, a10 + mulsh a6, a6, a7 + movi.n a7, 1 + bltu a8, a9, .L13 + movi.n a7, 0 + +.L13: + add.n a6, a7, a6 + slli a6, a6, 1 + extui a8, a8, 31, 1 + or a6, a6, a8 + beqz.n a2, .L_skip_div_by_pow_of_2 + addi.n a7, a2, -1 + movi.n a9, 1 + extui a8, a6, 31, 1 + ssl a7 + sll a7, a9 + sub a7, a7, a8 + add.n a6, a7, a6 + ssr a2 + sra a6, a6 +.L_skip_div_by_pow_of_2: + l32i a10, sp, 116 + l32i a8, sp, 128 + add.n a2, a10, a6 + l32i a9, sp, 132 + l32i.n a10, sp, 24 # out_data_ptr + max a2, a2, a8 + min a2, a2, a9 + s8i a2, a10, 0 + l32i.n a2, sp, 20 + addi.n a10, a10, 1 + addi.n a2, a2, 4 + l32i.n a6, sp, 32 + s32i.n a2, sp, 20 + s32i.n a10, sp, 24 # out_data_ptr + bne a6, a2, .L_out_ch_loop + +.L4: + l32i.n a5, sp, 44 # out_x + l32i.n a6, sp, 28 # input_ptr (was stored by height loop) + l32i.n a8, sp, 52 # stride_wd * in_ch + addi.n a5, a5, 1 + add.n a6, a6, a8 # input_ptr + stride_wd * in_ch + l32i.n a9, sp, 104 # out_wd + s32i.n a5, sp, 44 # out_x + s32i.n a6, sp, 28 # input_ptr + bne a9, a5, .L_width_loop + + l32i.n a10, sp, 36 # out_y + l32i.n a2, sp, 40 # in_row_ptr + l32i a5, sp, 56 # (stride_ht * input_wd) * in_ch + l32i.n a6, sp, 108 # out_ht + addi.n a10, a10, 1 + add.n a2, a2, a5 # in_row_ptr + s32i.n a10, sp, 36 # out_y + s32i.n a2, sp, 40 # in_row_ptr + blt a10, a6, .L_height_loop + // end outer (height) loop + retw.n + + .size esp_nn_conv_s8_filter_aligned_input_padded_esp32s3, .-esp_nn_conv_s8_filter_aligned_input_padded_esp32s3 + +#elif defined(WIO_TERMINAL) +// dummy code, added for old ARM toolchain +.syntax unified +.thumb +.cpu cortex-m0 + +.section .text +#endif // EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN && EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN_S3 diff --git a/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/src/convolution/esp_nn_conv_s8_mult8_1x1_esp32s3.S b/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/src/convolution/esp_nn_conv_s8_mult8_1x1_esp32s3.S new file mode 100644 index 0000000..111fd08 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/src/convolution/esp_nn_conv_s8_mult8_1x1_esp32s3.S @@ -0,0 +1,497 @@ +#include "edge-impulse-sdk/classifier/ei_classifier_config.h" +#if EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN && EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN_S3 +// Copyright 2021-2022 Espressif Systems (Shanghai) PTE LTD +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + .text + .literal_position + .literal .nudge_val, 1073741824 + + # Program Unit: esp_nn_conv_s8_mult8_1x1_esp32s3 + .type esp_nn_conv_s8_mult8_1x1_esp32s3, @function + .align 4 + .global esp_nn_conv_s8_mult8_1x1_esp32s3 + +esp_nn_conv_s8_mult8_1x1_esp32s3: # 0xdbc + # scratch_buf = 0 // to store qacc regs need 36 bytes + # gra_spill_temp_164 = 36, channel itr, (in_channels - 1) >> 3 + # gra_spill_temp_165 = 40, i_out + # gra_spill_temp_166 = 44, in_channels + # gra_spill_temp_167 = 48, in_channels/8 - 1 + # gra_spill_temp_168 = 52, in_channels-7 + # gra_spill_temp_169 = 56, input + # gra_spill_temp_170 = 60, filter_data + # gra_spill_temp_171 = 64, input_offset + # gra_spill_temp_172 = 68, input_ptr + # gra_spill_temp_173 = 72, bias + # gra_spill_temp_174 = 76, in_channels*8 + # gra_spill_temp_175 = 80, size-7 + # gra_spill_temp_176 = 84, size + + // registers: + // a2: int8_t *input_data + // a3: uint16_t input_wd + // a4: uint16_t input_ht + // a5: uint16_t in_channels + // a6: int32_t input_offset + // a7: int16_t *filter_data + + // on stack: + // int32_t *bias // 160 + // int8_t *out_data // 164 + // uint16_t out_wd // 168 + // uint16_t out_ht // 172 + // uint16_t out_channels // 176 + // int32_t out_offset // 180 + // int32_t *out_shift // 184 + // int32_t *out_mult // 188 + // int32_t activation_min // 192 + // int32_t activation_max // 196 + // void *buffer // tmp buf // 200 + + entry a1,160 # + s32i a5,a1,44 # [0] gra_spill_temp_166, in_channels + s32i a6,a1,64 # [2] id:619 input_offset+0x0 + s32i a7,a1,60 # [1] gra_spill_temp_170, filter_data + mul16u a8,a3,a4 # [3] size = input_wd * input_ht; + s32i a2,a1,56 # [0] gra_spill_temp_169, input + l32i a4,a1,164 # [1] id:624 out_data+0x0 + mov.n a3,a1 # [52] scratch_buf + + s32i a8,a1,84 # [4] gra_spill_temp_176, size + blti a8,8,.prepare_leftover # [5] // process remaining lines one by one + addi a9,a8,-7 # [32] + s32i a9,a1,80 # [33] gra_spill_temp_175, size-7 + + s32i a2,a1,68 # [2] gra_spill_temp_172 , input_ptr + srai a15,a5,3 # [7] `in_ch/8` loop_cnt + movi.n a11,0 # [10] + s32i a11,a1,40 # [11] gra_spill_temp_165 + addi a15,a15,-1 # [17] `in_ch/8` loop_cnt - 1 + s32i a15,a1,48 # [18] gra_spill_temp_167 + slli a9,a5,3 # [19] in_channels*8 + s32i a9,a1,76 # [20] gra_spill_temp_174 + addi a15,a5,-7 # [31] + s32i a15,a1,52 # [34] gra_spill_temp_168 + +.outer_loop: // for (; i_out < size - 7; i_out += 8) { + + l32i a10,a1,200 # [1] gra_spill_temp_165, buffer + l32i.n a11,a1,44 # [1] gra_spill_temp_166, input_channels + l32i.n a8,a1,68 # [2] gra_spill_temp_172, input_ptr + srai a9,a11,3 # [7] `in_ch/8` loop_cnt for transpose loop + + ee.zero.q q7 # [0] + addi a12,a1,64 # [6] + ee.vldbc.16 q5,a12 # [0*II+16] id:638 input_offset + + // load and transose 8 lines of input 8xchannels, + // add input offset and store 16 bit data to tmp buffer + loopgtz a9,.transpose_loop_end # [10] + mov.n a9,a8 + ee.vld.l.64.xp q0,a9,a11 + ee.vld.l.64.xp q1,a9,a11 + ee.vld.h.64.xp q0,a9,a11 + ee.vld.h.64.xp q1,a9,a11 + ee.vld.l.64.xp q2,a9,a11 + ee.vzip.8 q0,q1 + ee.vld.l.64.xp q3,a9,a11 + ee.vld.h.64.xp q2,a9,a11 + ee.vld.h.64.ip q3,a9,0 + ee.vzip.16 q0,q1 + ee.vzip.8 q2,q3 + ee.vzip.16 q2,q3 + ee.vzip.32 q0,q2 + ee.vcmp.lt.s8 q4,q2,q7 + ee.vzip.8 q2,q4 + ee.vcmp.lt.s8 q6,q0,q7 + ee.vzip.8 q0,q6 + ee.vadds.s16 q0,q0,q5 + ee.vadds.s16.st.incp q0,a10,q6,q6,q5 + ee.vadds.s16.st.incp q6,a10,q2,q2,q5 + ee.vadds.s16.st.incp q2,a10,q4,q4,q5 + ee.vst.128.ip q4,a10,16 + ee.vzip.32 q1,q3 + ee.vcmp.lt.s8 q4,q3,q7 + ee.vzip.8 q3,q4 + ee.vcmp.lt.s8 q6,q1,q7 + ee.vzip.8 q1,q6 + ee.vadds.s16 q1,q1,q5 + ee.vadds.s16.st.incp q1,a10,q6,q6,q5 + ee.vadds.s16.st.incp q6,a10,q3,q3,q5 + ee.vadds.s16.st.incp q3,a10,q4,q4,q5 + ee.vst.128.ip q4,a10,16 + addi.n a8,a8,8 +.transpose_loop_end: # 0xeeb + + # 468 uint32_t bias_ptr = (uint32_t) bias; + # 469 uint32_t filter_ptr = (uint32_t) (filter_data); + # 470 const int32_t *out_mult_ptr = out_mult; + # 471 const int32_t *out_shift_ptr = out_shift; + l32i a6,a1,184 # [0] out_shift + l32i a2,a1,188 # [1] out_mult + l32i a5,a1,60 # [2] gra_spill_temp_170, filter + l32i a9,a1,160 # [3] gra_spill_temp_170, bias + # 472 for (int32_t out_ch_idx = 0; out_ch_idx < out_channels; out_ch_idx++) { + l16ui a8,a1,176 # [5] id:620 out_channels+0x0 + s32i a9,a1,72 # [5] gra_spill_temp_173 + blti a8,1,.outer_ch_loop_end + + movi.n a7,0 + +.out_ch_loop: # 0xf3e + l32i a8,a1,200 # [4] gra_spill_temp_165, buffer_ptr + ee.zero.qacc # [3] + ee.zero.q q5 # + l32i a10,a1,52 # [1] gra_spill_temp_168, in_channels-7 + l32i a9,a1,48 # [1] gra_spill_temp_167, in_channels/8 - 1 + ee.vld.l.64.ip q7,a5,8 # load filter 8 values + ee.vld.128.ip q0,a8,16 + ee.vld.128.ip q1,a8,16 + ee.vcmp.lt.s8 q6,q7,q5 + ee.vzip.8 q7,q6 + + ee.vsmulas.s16.qacc.ld.incp q2,a8,q0,q7,0 + ee.vsmulas.s16.qacc.ld.incp q3,a8,q1,q7,1 + ee.vsmulas.s16.qacc.ld.incp q0,a8,q2,q7,2 + ee.vsmulas.s16.qacc.ld.incp q1,a8,q3,q7,3 + ee.vsmulas.s16.qacc.ld.incp q2,a8,q0,q7,4 + ee.vsmulas.s16.qacc.ld.incp q3,a8,q1,q7,5 + blti a10,8,.inner_loop_end # [16] + + loopgtz a9,.inner_loop_end # [3] + + ee.vsmulas.s16.qacc.ld.incp q0,a8,q2,q7,6 # [0*II+0] id:657 + ee.vsmulas.s16.qacc.ld.incp q1,a8,q3,q7,7 # [0*II+1] id:658 + ee.vld.l.64.ip q7,a5,8 # [0*II+2] id:659, filter + ee.vcmp.lt.s8 q6,q7,q5 + ee.vzip.8 q7,q6 + ee.vsmulas.s16.qacc.ld.incp q2,a8,q0,q7,0 # [0*II+4] id:660 + ee.vsmulas.s16.qacc.ld.incp q3,a8,q1,q7,1 # [0*II+5] id:661 + ee.vsmulas.s16.qacc.ld.incp q0,a8,q2,q7,2 # [0*II+6] id:662 + ee.vsmulas.s16.qacc.ld.incp q1,a8,q3,q7,3 # [0*II+7] id:663 + ee.vsmulas.s16.qacc.ld.incp q2,a8,q0,q7,4 # [0*II+8] id:664 + ee.vsmulas.s16.qacc.ld.incp q3,a8,q1,q7,5 # [0*II+9] id:665 +.inner_loop_end: # 0xfaf + + ee.vsmulas.s16.qacc q2,q7,6 # [2] + ee.vsmulas.s16.qacc q3,q7,7 # [3] + + # store qacc registers and re-arrange data for low 16 bits + + ee.st.qacc_l.l.128.ip a3,16 # [5] id:668 + ee.st.qacc_l.h.32.ip a3,-16 # [6] id:669 + l32i.n a10, a1, 0 + l32i.n a11, a1, 5 + l32i.n a12, a1, 10 + l32i.n a13, a1, 15 + ee.movi.32.q q0, a10, 0 + ee.movi.32.q q0, a11, 1 + ee.movi.32.q q0, a12, 2 + ee.movi.32.q q0, a13, 3 + + ee.st.qacc_h.l.128.ip a3,16 # [5] id:668 + ee.st.qacc_h.h.32.ip a3,-16 # [6] id:669 + l32i.n a10, a1, 0 + l32i.n a11, a1, 5 + l32i.n a12, a1, 10 + l32i.n a13, a1, 15 + ee.movi.32.q q4, a10, 0 + ee.movi.32.q q4, a11, 1 + ee.movi.32.q q4, a12, 2 + ee.movi.32.q q4, a13, 3 + + l32i a9,a1,160 # [17] gra_spill_temp_170, bias + l32i a10,a1,72 # [0] gra_spill_temp_173, bias_ptr + + # add bias + beqz.n a9,.no_bias + ee.vldbc.32.ip q6,a10,4 + s32i a10,a1,72 # [3] gra_spill_temp_173, bias_ptr + ee.vadds.s32 q0,q0,q6 # [4] + ee.vadds.s32 q4,q4,q6 # [5] +.no_bias: # 0x102e + + l32i.n a11,a6,0 # [1] id:696 + l32i.n a10,a2,0 # [3] id:695 + .global esp_nn_multiply_by_quantized_mult_asm_esp32s3 + call8 esp_nn_multiply_by_quantized_mult_asm_esp32s3 # [4] esp_nn_multiply_by_quantized_mult_asm_esp32s3 + + l32i.n a10,a2,0 # [0] id:697, mult + l32i.n a11,a6,0 # [2] id:698, shift + mv.qr q5,q0 + mv.qr q0,q4 + call8 esp_nn_multiply_by_quantized_mult_asm_esp32s3 # [5] esp_nn_multiply_by_quantized_mult_asm_esp32s3 + + addi.n a6,a6,4 # out_shift_ptr++ + addi.n a2,a2,4 # out_mult_ptr++ + addi a9,a1,180 # [7] + addi a10,a1,192 # [5] + addi a8,a1,196 # [6] + +# load broadcast, activation and out_offset + ee.vldbc.32 q4,a9 # [14] id:699 out_offset + ee.vldbc.32 q2,a10 # [11] id:700 activation_min + ee.vldbc.32 q3,a8 # [12] id:701 activation_max + +# add offset + ee.vadds.s32 q1,q0,q4 # [17] + ee.vadds.s32 q0,q5,q4 # [22] + + # activation + ee.vmin.s32 q1,q1,q3 # [19] + ee.vmax.s32 q1,q1,q2 # [21] + ee.vmin.s32 q0,q0,q3 # [23] + ee.vmax.s32 q0,q0,q2 # [24] + + l16ui a9,a1,176 # [33] out_channels + +# unzip and store + ee.vunzip.16 q0,q1 # [25] + ee.vst.128.ip q0,a3,0 # [26] id:702, scratch_buf + + # a4 = out_data, out_channels = a1+176 + + l8ui a14,a1,0 # [27] + l8ui a11,a1,2 # [30] scratch_buf+2 + add a10,a4,a9 + s8i a14,a4,0 # [28], out_data + s8i a11,a10,0 # [31], out_data + out_channels + + l8ui a14,a1,4 # [32] scratch_buf+4 + l8ui a11,a1,6 # [37] scratch_buf+6 + add a12,a10,a9 + add a10,a12,a9 + s8i a14,a12,0 # [28] + s8i a11,a10,0 # [31] + + l8ui a14,a1,8 # [41] scratch_buf+8 + l8ui a11,a1,10 # [47] scratch_buf+10 + add a12,a10,a9 + add a10,a12,a9 + s8i a14,a12,0 # [28] + s8i a11,a10,0 # [31] + + l8ui a14,a1,12 # [51] scratch_buf+12 + l8ui a11,a1,14 # [55] scratch_buf+14 + add a12,a10,a9 + add a10,a12,a9 + s8i a14,a12,0 # [28] + s8i a11,a10,0 # [31] + + addi.n a4,a4,1 # [29] out_data++; + addi.n a7,a7,1 + bne a7,a9,.out_ch_loop + +.outer_ch_loop_end: + + subx8 a11,a9,a9 # (7 * out_channels); + l32i a10,a1,76 # [1] gra_spill_temp_174, in_channels * 8 + l32i a15,a1,40 # [4] gra_spill_temp_165 + l32i a9,a1,68 # [2] gra_spill_temp_172 + l32i a8,a1,80 # [0] gra_spill_temp_175, size-7 + add.n a4,a4,a11 # [5] out_data += (7 * out_channels); + addi.n a15,a15,8 + s32i a15,a1,40 # [7] gra_spill_temp_165 + add.n a9,a9,a10 # [8] + s32i a9,a1,68 # [9] gra_spill_temp_172 + blt a15,a8,.outer_loop # [10] + + # check if leftover + l32i a15,a1,40 + l32i a13,a1,84 # [1] gra_spill_temp_176, size + l32i a8,a1,44 # [0] gra_spill_temp_166, in_channels + bge a15, a13, .return_function # no leftover + +// This block below processes one input channel line at a time. +.process_leftover: + l32i a15,a1,40 # [1] gra_spill_temp_165, i_out + l32i a14,a1,56 # [2] gra_spill_temp_169, input + mull a15,a15,a8 # [3] in_channels * i_out + addi.n a8,a8,-1 # [4] in_channels - 1 + add.n a14,a14,a15 # [5] input_ptr = in_channels * i_out + input + srai a8,a8,3 # [6] iterations, (in_channels - 1) >> 3 + s32i a8,a1,36 # [7] gra_spill_temp_164, iterations + s32i a14,a1,68 # [8] gra_spill_temp_172, in_channels * i_out + input + addi a12,a1,64 + ee.vldbc.16 q4,a12 # [8] id:716 input_offset + +.leftover_outer_loop: + + l32i a15,a1,184 # [0] out_shift + l32i a2,a1,188 # [1] out_mult + l32i a8,a1,60 # [3] gra_spill_temp_170, filter_data + l32i a5,a1,160 # [0] gra_spill_temp_170, bias + movi.n a11,0 # [2] + +.leftover_out_ch_loop: + + ee.zero.qacc # [0] + ee.zero.q q3 # [1] + l32i.n a9,a1,68 # [4] gra_spill_temp_172, input_ptr + l32i a10,a1,36 # [1] gra_spill_temp_164, iterations, (in_channels - 1) >> 3 + ee.vld.l.64.ip q0,a9,8 # [7] id:717, input + ee.vld.l.64.ip q1,a8,8 # [7] filter + ee.vcmp.lt.s8 q6,q0,q3 + ee.vcmp.lt.s8 q7,q1,q3 + ee.vzip.8 q0,q6 + ee.vzip.8 q1,q7 + ee.vadds.s16 q0,q0,q4 # [11] id:718, add offset + + loopgtz a10,.leftover_inner_loop_end # [3] + + ee.vmulas.s16.qacc q0,q1 # mula(q0,q1) + ee.vld.l.64.ip q0,a9,8 # load 8 input values + ee.vld.l.64.ip q1,a8,8 # [7] load filter + ee.vcmp.lt.s8 q2,q0,q3 # sign + ee.vcmp.lt.s8 q7,q1,q3 + ee.vzip.8 q0,q2 # 16 bit input + ee.vzip.8 q1,q7 # 16 bit filter + ee.vadds.s16 q0,q0,q4 # add offset +.leftover_inner_loop_end: # 0x1262 + +# re-arrange data from qacc in 32 bit q registers + ee.vmulas.s16.qacc q0,q1 # [3] + ee.st.qacc_l.l.128.ip a3,16 # [5] id:722 + ee.st.qacc_l.h.32.ip a3,0 # [6] id:723 + l8ui a10,a1,5 # [11] scratch_buf+5 + l8ui a12,a1,6 # [10] scratch_buf+6 + l16ui a14,a1,10 # [8] scratch_buf+10 + l8ui a9,a1,15 # [7] scratch_buf+15 + l8ui a13,a1,16 # [9] scratch_buf+16 + s8i a10,a1,2 # [12] scratch_buf+2 + s8i a12,a1,3 # [13] scratch_buf+3 + s16i a14,a1,4 # [15] scratch_buf+4 + s8i a9,a1,6 # [16] scratch_buf+6 + s8i a13,a1,7 # [14] scratch_buf+7 + + ee.st.qacc_h.l.128.ip a3,16 # [17] id:724 + ee.st.qacc_h.h.32.ip a3,-32 # [18] id:725 + l16ui a13,a1,16 # [30] scratch_buf+16 + l8ui a14,a1,21 # [23] scratch_buf+21 + l8ui a9,a1,22 # [22] scratch_buf+22 + l16ui a10,a1,26 # [21] scratch_buf+26 + s16i a13,a1,8 # [31] scratch_buf+8 + l8ui a12,a1,31 # [20] scratch_buf+31 + l8ui a13,a1,32 # [19] scratch_buf+32 + s8i a14,a1,10 # [24] scratch_buf+10 + s8i a9,a1,11 # [25] scratch_buf+11 + s16i a10,a1,12 # [26] scratch_buf+12 + s8i a12,a1,14 # [27] scratch_buf+14 + s8i a13,a1,15 # [28] scratch_buf+15 + movi.n a12,16 + +# get data now + ee.vld.128.ip q0,a3,0 + ee.srcmb.s16.qacc q1,a12,0 + ee.vzip.16 q0,q1 + + ee.vadds.s32 q0,q0,q1 + ee.movi.32.a q0,a10,3 + ee.movi.32.a q0,a9,2 + ee.movi.32.a q0,a14,0 + add a9,a9,a10 + ee.movi.32.a q0,a10,1 + add a14,a14,a10 + add a14,a14,a9 + +# a14 contains conv_out + l32i a9,a1,160 # [43] gra_spill_temp_170, bias ptr + l32i.n a6,a15,0 # [44] id:730, shift + beqz.n a9,.leftover_multiply_by_quant_mult # [45] + +# load and add bias + l32i.n a9,a5,0 + add.n a14,a14,a9 + +.leftover_multiply_by_quant_mult: # 0x12e7 + l32i.n a9,a2,0 # [0] id:729, mult + movi.n a10,0 # [1] + max a10,a6,a10 # [2] left_shift + ssl a10 # [3] + sll a14,a14 # [4] (value << left_shift) + + sub a7,a10,a6 # right_shift + + l32r a13,.nudge_val + mulsh a12,a9,a14 + mull a14,a9,a14 + ssai 31 + + addi.n a2,a2,4 # [0] mult + addi.n a15,a15,4 # [1] shift + addi.n a5,a5,4 # [2] bias + addi.n a11,a11,1 # [3] + + add a13,a14,a13 # low part + saltu a14,a13,a14 + add a9,a12,a14 # high part + src a12,a9,a13 + + blti a7,1,.leftover_skip_div_by2 + + addi.n a14,a7,-1 + ssl a14 + movi.n a10,1 + sll a10,a10 # 1 << (exponent - 1) + extui a14,a12,31,1 + ssr a7 + sub a10,a10,a14 # 1 << (exponent - 1) - (val < 0) + add a12,a12,a10 # val += to_add + sra a12,a12 + +.leftover_skip_div_by2: + l32i a10,a1,180 # [26] id:733 out_offset+0x0 + l32i a9,a1,192 # [29] id:732 activation_min+0x0 + l16ui a13,a1,176 # [5] id:620 out_channels+0x0 + l32i a14,a1,196 # [31] id:731 activation_max+0x0 + +// add offset, apply activation and store + add.n a10,a10,a12 + max a9,a9,a10 + min a14,a14,a9 + s8i a14,a4,0 + addi.n a4,a4,1 + + bne a11,a13,.leftover_out_ch_loop + + l32i a15,a1,44 # [0] gra_spill_temp_166, in_channels + l32i a14,a1,68 # [1] gra_spill_temp_172, input_ptr + l32i a13,a1,40 # [2] gra_spill_temp_165, i_out + l32i a12,a1,84 # [3] gra_spill_temp_176, size + addi.n a13,a13,1 # [4] + s32i a13,a1,40 # [5] gra_spill_temp_165, i_out + add a14,a14,a15 # [7] input_ptr += in_channels + s32i a14,a1,68 # [8] gra_spill_temp_172, input_ptr + blt a13,a12,.leftover_outer_loop + +.return_function: + retw.n # [9] + +.prepare_leftover: + l32i a8,a1,44 # [0] gra_spill_temp_166, in_channels + movi.n a15,0 + s32i a15,a1,40 # [7] gra_spill_temp_165, i_out + j .process_leftover + + .size esp_nn_conv_s8_mult8_1x1_esp32s3, . - esp_nn_conv_s8_mult8_1x1_esp32s3 + +#elif defined(WIO_TERMINAL) +// dummy code, added for old ARM toolchain +.syntax unified +.thumb +.cpu cortex-m0 + +.section .text +#endif // EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN && EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN_S3 diff --git a/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/src/convolution/esp_nn_depthwise_conv_ansi.c b/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/src/convolution/esp_nn_depthwise_conv_ansi.c new file mode 100644 index 0000000..10c31b9 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/src/convolution/esp_nn_depthwise_conv_ansi.c @@ -0,0 +1,104 @@ +#include "edge-impulse-sdk/classifier/ei_classifier_config.h" +#if EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN +// Copyright 2020-2021 Espressif Systems (Shanghai) PTE LTD +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +int esp_nn_get_depthwise_conv_scratch_size_ansi(const data_dims_t *input_dims, + const data_dims_t *filter_dims, + const data_dims_t *output_dims, + const dw_conv_params_t *conv_params) +{ + return 0; +} + +void esp_nn_set_depthwise_conv_scratch_buf_ansi(const void *buf) +{ + +} + +void esp_nn_depthwise_conv_s8_ansi(const data_dims_t *input_dims, + const int8_t *input_data, + const data_dims_t *filter_dims, + const int8_t *filter_data, + const int32_t *bias, + const data_dims_t *output_dims, + int8_t *out_data, + const dw_conv_params_t *conv_params, + const quant_data_t *quant_data) +{ + const uint16_t input_wd = input_dims->width; + const uint16_t input_ht = input_dims->height; + const uint16_t channels = input_dims->channels; + const int32_t input_offset = conv_params->in_offset; + const int32_t out_offset = conv_params->out_offset; + const uint16_t pad_wd = conv_params->padding.width; + const uint16_t pad_ht = conv_params->padding.height; + const uint16_t stride_wd = conv_params->stride.width; + const uint16_t stride_ht = conv_params->stride.height; + const uint16_t filter_wd = filter_dims->width; + const uint16_t filter_ht = filter_dims->height; + const uint16_t out_wd = output_dims->width; + const uint16_t out_ht = output_dims->height; + const int32_t *out_shift = quant_data->shift; + const int32_t *out_mult = quant_data->mult; + const int32_t activation_min = conv_params->activation.min; + const int32_t activation_max = conv_params->activation.max; + const uint16_t ch_mult = conv_params->ch_mult; + + int out_idx = 0; + for (int out_y = 0; out_y < out_ht; out_y++) { //height loop + const int16_t base_y = (out_y * stride_ht) - pad_ht; + for (int out_x = 0; out_x < out_wd; out_x++) { //width_loop + const int16_t base_x = (out_x * stride_wd) - pad_wd; + for (int ch_idx = 0; ch_idx < channels; ch_idx++) {//channel_loop + for (int ch_mult_idx = 0; ch_mult_idx < ch_mult; ch_mult_idx++) { + int32_t result = 0; + const int out_ch_idx = ch_mult_idx + ch_idx * ch_mult; + + /* Select filter so as the point doesn't lie outside block */ + int filter_y_start = max(0, -base_y); + int filter_x_start = max(0, -base_x); + int filter_y_end = min(filter_ht, input_ht - base_y); + int filter_x_end = min(filter_wd, input_wd - base_x); + + for (int filter_y_idx = filter_y_start; filter_y_idx < filter_y_end; filter_y_idx++) { + const int32_t idx_y = base_y + filter_y_idx; + for (int filter_x_idx = filter_x_start; filter_x_idx < filter_x_end; filter_x_idx++) { + const int32_t idx_x = base_x + filter_x_idx; + int32_t input_index = (idx_y * input_wd + idx_x) * channels + ch_idx; + int32_t filter_index = (filter_y_idx * filter_wd + filter_x_idx) * (channels * ch_mult) + out_ch_idx; + int32_t input_val = input_data[input_index] + input_offset; + int32_t filter_val = filter_data[filter_index]; + result += input_val * filter_val; + } + } + if (bias) { + result += bias[out_ch_idx]; + } + result = esp_nn_multiply_by_quantized_mult(result, out_mult[out_ch_idx], out_shift[out_ch_idx]); + result += out_offset; + result = max(result, activation_min); + result = min(result, activation_max); + + out_data[out_idx++] = result; + } + } + } + } +} + +#endif // EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN diff --git a/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/src/convolution/esp_nn_depthwise_conv_opt.c b/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/src/convolution/esp_nn_depthwise_conv_opt.c new file mode 100644 index 0000000..e0cc29d --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/src/convolution/esp_nn_depthwise_conv_opt.c @@ -0,0 +1,295 @@ +#include "edge-impulse-sdk/classifier/ei_classifier_config.h" +#if EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN +// Copyright 2020-2021 Espressif Systems (Shanghai) PTE LTD +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +int esp_nn_get_depthwise_conv_scratch_size_opt(const data_dims_t *input_dims, + const data_dims_t *filter_dims, + const data_dims_t *output_dims, + const dw_conv_params_t *conv_params) +{ + return 0; +} + +void esp_nn_set_depthwise_conv_scratch_buf_opt(const void *buf) +{ + +} + +/* common channel multiplier == 1 case */ +__attribute__ ((noinline)) +static void esp_nn_depthwise_conv_s8_ch_mult_1(const data_dims_t *input_dims, + const int8_t *input_data, + const data_dims_t *filter_dims, + const int8_t *filter_data, + const int32_t *bias, + const data_dims_t *output_dims, + int8_t *out_data, + const dw_conv_params_t *conv_params, + const quant_data_t *quant_data) +{ + const uint16_t input_wd = input_dims->width; + const uint16_t input_ht = input_dims->height; + const uint16_t channels = input_dims->channels; + const int32_t input_offset = conv_params->in_offset; + const int32_t out_offset = conv_params->out_offset; + const uint16_t pad_wd = conv_params->padding.width; + const uint16_t pad_ht = conv_params->padding.height; + const uint16_t stride_wd = conv_params->stride.width; + const uint16_t stride_ht = conv_params->stride.height; + const uint16_t filter_wd = filter_dims->width; + const uint16_t filter_ht = filter_dims->height; + const uint16_t out_wd = output_dims->width; + const uint16_t out_ht = output_dims->height; + const int32_t activation_min = conv_params->activation.min; + const int32_t activation_max = conv_params->activation.max; + + int out_idx = 0; + for (int out_y = 0; out_y < out_ht; out_y++) { //height loop + const int16_t base_y = (out_y * stride_ht) - pad_ht; + for (int out_x = 0; out_x < out_wd; out_x++) { //width_loop + const int16_t base_x = (out_x * stride_wd) - pad_wd; + + const int32_t *out_shift = quant_data->shift; + const int32_t *out_mult = quant_data->mult; + + /* Select filter so as the point doesn't lie outside block */ + int filter_y_start = max(0, -base_y); + int filter_x_start = max(0, -base_x); + int filter_y_end = min(filter_ht, input_ht - base_y); + int filter_x_end = min(filter_wd, input_wd - base_x); + + int ch_idx = 0; + for (; ch_idx < channels - 3; ch_idx += 4) {//channel_loop + int32_t result0 = 0; + int32_t result1 = 0; + int32_t result2 = 0; + int32_t result3 = 0; + + for (int filter_y_idx = filter_y_start; filter_y_idx < filter_y_end; filter_y_idx++) { + const int32_t idx_y = base_y + filter_y_idx; + for (int filter_x_idx = filter_x_start; filter_x_idx < filter_x_end; filter_x_idx++) { + const int32_t idx_x = base_x + filter_x_idx; + int32_t input_index = (idx_y * input_wd + idx_x) * channels + ch_idx; + int32_t filter_index = (filter_y_idx * filter_wd + filter_x_idx) * (channels) + ch_idx; + int32_t input_val0 = input_data[input_index + 0] + input_offset; + int32_t input_val1 = input_data[input_index + 1] + input_offset; + int32_t input_val2 = input_data[input_index + 2] + input_offset; + int32_t input_val3 = input_data[input_index + 3] + input_offset; + int32_t filter_val0 = filter_data[filter_index + 0]; + int32_t filter_val1 = filter_data[filter_index + 1]; + int32_t filter_val2 = filter_data[filter_index + 2]; + int32_t filter_val3 = filter_data[filter_index + 3]; + result0 += input_val0 * filter_val0; + result1 += input_val1 * filter_val1; + result2 += input_val2 * filter_val2; + result3 += input_val3 * filter_val3; + } + } + if (bias) { + result0 += bias[ch_idx + 0]; + result1 += bias[ch_idx + 1]; + result2 += bias[ch_idx + 2]; + result3 += bias[ch_idx + 3]; + } + result0 = esp_nn_multiply_by_quantized_mult_fast(result0, *out_mult++, *out_shift++); + result1 = esp_nn_multiply_by_quantized_mult_fast(result1, *out_mult++, *out_shift++); + result2 = esp_nn_multiply_by_quantized_mult_fast(result2, *out_mult++, *out_shift++); + result3 = esp_nn_multiply_by_quantized_mult_fast(result3, *out_mult++, *out_shift++); + + result0 += out_offset; + result1 += out_offset; + result2 += out_offset; + result3 += out_offset; + + result0 = max(result0, activation_min); + result1 = max(result1, activation_min); + result2 = max(result2, activation_min); + result3 = max(result3, activation_min); + + result0 = min(result0, activation_max); + result1 = min(result1, activation_max); + result2 = min(result2, activation_max); + result3 = min(result3, activation_max); + + out_data[out_idx++] = result0; + out_data[out_idx++] = result1; + out_data[out_idx++] = result2; + out_data[out_idx++] = result3; + } + for (; ch_idx < channels; ch_idx++) {//channel_loop + int32_t result = 0; + + for (int filter_y_idx = filter_y_start; filter_y_idx < filter_y_end; filter_y_idx++) { + const int32_t idx_y = base_y + filter_y_idx; + for (int filter_x_idx = filter_x_start; filter_x_idx < filter_x_end; filter_x_idx++) { + const int32_t idx_x = base_x + filter_x_idx; + int32_t input_index = (idx_y * input_wd + idx_x) * channels + ch_idx; + int32_t filter_index = (filter_y_idx * filter_wd + filter_x_idx) * (channels) + ch_idx; + int32_t input_val = input_data[input_index] + input_offset; + int32_t filter_val = filter_data[filter_index]; + result += input_val * filter_val; + } + } + if (bias) { + result += bias[ch_idx]; + } + result = esp_nn_multiply_by_quantized_mult_fast(result, *out_mult++, *out_shift++); + result += out_offset; + result = max(result, activation_min); + result = min(result, activation_max); + + out_data[out_idx++] = result; + } + } + } +} + +void esp_nn_depthwise_conv_s8_opt(const data_dims_t *input_dims, + const int8_t *input_data, + const data_dims_t *filter_dims, + const int8_t *filter_data, + const int32_t *bias, + const data_dims_t *output_dims, + int8_t *out_data, + const dw_conv_params_t *conv_params, + const quant_data_t *quant_data) +{ + const uint16_t ch_mult = conv_params->ch_mult; + if (ch_mult == 1) { + esp_nn_depthwise_conv_s8_ch_mult_1(input_dims, input_data, filter_dims, filter_data, + bias, output_dims, out_data, conv_params, quant_data); + return; + } + const uint16_t input_wd = input_dims->width; + const uint16_t input_ht = input_dims->height; + const uint16_t channels = input_dims->channels; + const int32_t input_offset = conv_params->in_offset; + const int32_t out_offset = conv_params->out_offset; + const uint16_t pad_wd = conv_params->padding.width; + const uint16_t pad_ht = conv_params->padding.height; + const uint16_t stride_wd = conv_params->stride.width; + const uint16_t stride_ht = conv_params->stride.height; + const uint16_t filter_wd = filter_dims->width; + const uint16_t filter_ht = filter_dims->height; + const uint16_t out_wd = output_dims->width; + const uint16_t out_ht = output_dims->height; + const int32_t activation_min = conv_params->activation.min; + const int32_t activation_max = conv_params->activation.max; + + int out_idx = 0; + for (int out_y = 0; out_y < out_ht; out_y++) { //height loop + const int16_t base_y = (out_y * stride_ht) - pad_ht; + for (int out_x = 0; out_x < out_wd; out_x++) { //width_loop + const int16_t base_x = (out_x * stride_wd) - pad_wd; + + const int32_t *out_shift = quant_data->shift; + const int32_t *out_mult = quant_data->mult; + + /* Select filter so as the point doesn't lie outside block */ + int filter_y_start = max(0, -base_y); + int filter_x_start = max(0, -base_x); + int filter_y_end = min(filter_ht, input_ht - base_y); + int filter_x_end = min(filter_wd, input_wd - base_x); + + for (int ch_idx = 0; ch_idx < channels; ch_idx++) {//channel_loop + int ch_mult_idx = 0; + for (; ch_mult_idx < ch_mult - 3; ch_mult_idx += 4) { + int32_t result0 = 0; + int32_t result1 = 0; + int32_t result2 = 0; + int32_t result3 = 0; + const int out_ch_idx = ch_idx * ch_mult + ch_mult_idx; + + for (int filter_y_idx = filter_y_start; filter_y_idx < filter_y_end; filter_y_idx++) { + const int32_t idx_y = base_y + filter_y_idx; + for (int filter_x_idx = filter_x_start; filter_x_idx < filter_x_end; filter_x_idx++) { + const int32_t idx_x = base_x + filter_x_idx; + int32_t input_index = (idx_y * input_wd + idx_x) * channels + ch_idx; + int32_t filter_index = (filter_y_idx * filter_wd + filter_x_idx) * (channels * ch_mult) + out_ch_idx; + int32_t input_val = input_data[input_index] + input_offset; + int32_t filter_val0 = filter_data[filter_index + 0]; + int32_t filter_val1 = filter_data[filter_index + 1]; + int32_t filter_val2 = filter_data[filter_index + 2]; + int32_t filter_val3 = filter_data[filter_index + 3]; + result0 += input_val * filter_val0; + result1 += input_val * filter_val1; + result2 += input_val * filter_val2; + result3 += input_val * filter_val3; + } + } + if (bias) { + result0 += bias[out_ch_idx + 0]; + result1 += bias[out_ch_idx + 1]; + result2 += bias[out_ch_idx + 2]; + result3 += bias[out_ch_idx + 3]; + } + result0 = esp_nn_multiply_by_quantized_mult_fast(result0, *out_mult++, *out_shift++); + result1 = esp_nn_multiply_by_quantized_mult_fast(result1, *out_mult++, *out_shift++); + result2 = esp_nn_multiply_by_quantized_mult_fast(result2, *out_mult++, *out_shift++); + result3 = esp_nn_multiply_by_quantized_mult_fast(result3, *out_mult++, *out_shift++); + + result0 += out_offset; + result1 += out_offset; + result2 += out_offset; + result3 += out_offset; + + result0 = max(result0, activation_min); + result1 = max(result1, activation_min); + result2 = max(result2, activation_min); + result3 = max(result3, activation_min); + result0 = min(result0, activation_max); + result1 = min(result1, activation_max); + result2 = min(result2, activation_max); + result3 = min(result3, activation_max); + + out_data[out_idx++] = result0; + out_data[out_idx++] = result1; + out_data[out_idx++] = result2; + out_data[out_idx++] = result3; + } + for (; ch_mult_idx < ch_mult; ch_mult_idx++) { + int32_t result = 0; + const int out_ch_idx = ch_idx * ch_mult + ch_mult_idx; + + for (int filter_y_idx = filter_y_start; filter_y_idx < filter_y_end; filter_y_idx++) { + const int32_t idx_y = base_y + filter_y_idx; + for (int filter_x_idx = filter_x_start; filter_x_idx < filter_x_end; filter_x_idx++) { + const int32_t idx_x = base_x + filter_x_idx; + int32_t input_index = (idx_y * input_wd + idx_x) * channels + ch_idx; + int32_t filter_index = (filter_y_idx * filter_wd + filter_x_idx) * (channels * ch_mult) + out_ch_idx; + int32_t input_val = input_data[input_index] + input_offset; + int32_t filter_val = filter_data[filter_index]; + result += input_val * filter_val; + } + } + if (bias) { + result += bias[out_ch_idx]; + } + result = esp_nn_multiply_by_quantized_mult_fast(result, *out_mult++, *out_shift++); + result += out_offset; + result = max(result, activation_min); + result = min(result, activation_max); + + out_data[out_idx++] = result; + } + } + } + } +} + +#endif // EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN diff --git a/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/src/convolution/esp_nn_depthwise_conv_s16_mult1_3x3_esp32s3.S b/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/src/convolution/esp_nn_depthwise_conv_s16_mult1_3x3_esp32s3.S new file mode 100644 index 0000000..2042573 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/src/convolution/esp_nn_depthwise_conv_s16_mult1_3x3_esp32s3.S @@ -0,0 +1,403 @@ +#include "edge-impulse-sdk/classifier/ei_classifier_config.h" +#if EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN && EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN_S3 +// Copyright 2021-2022 Espressif Systems (Shanghai) PTE LTD +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + .text + .literal_position + + # Program Unit: esp_nn_depthwise_conv_s16_mult1_3x3_esp32s3 + .type esp_nn_depthwise_conv_s16_mult1_3x3_esp32s3, @function + .align 4 + .global esp_nn_depthwise_conv_s16_mult1_3x3_esp32s3 + +esp_nn_depthwise_conv_s16_mult1_3x3_esp32s3: # 0x776 + # qacc_scratch = 0 + # gra_spill_temp_35 = 48 + # gra_spill_temp_36 = 52 + # gra_spill_temp_37 = 56 + # gra_spill_temp_38 = 60 + # gra_spill_temp_39 = 64 + # gra_spill_temp_40 = 68 + # gra_spill_temp_41 = 72 + # gra_spill_temp_42 = 76 + # gra_spill_temp_43 = 80 + # gra_spill_temp_44 = 84 + # gra_spill_temp_45 = 88 + # gra_spill_temp_46 = 92 + # gra_spill_temp_47 = 96 + # gra_spill_temp_48 = 100 + # gra_spill_temp_49 = 104 + # gra_spill_temp_50 = 108 + # gra_spill_temp_51 = 112 + # gra_spill_temp_52 = 116 + # gra_spill_temp_53 = 120 + # gra_spill_temp_54 = 124 + # gra_spill_temp_55 = 128 + # gra_spill_temp_56 = 132 + # gra_spill_temp_57 = 136 + # gra_spill_temp_58 = 140 + # gra_spill_temp_59 = 144 + # gra_spill_temp_60 = 148 + # gra_spill_temp_61 = 152 + # gra_spill_temp_62 = 156 + # gra_spill_temp_63 = 160 + # gra_spill_temp_64 = 164 + # gra_spill_temp_65 = 168 + # gra_spill_temp_66 = 176 + # gra_spill_temp_67 = 192 + # gra_spill_temp_68 = 208 + # gra_spill_temp_69 = 224 + # gra_spill_temp_70 = 240 + + // registers: + // a2: const int16_t *input_data + // a3: const uint16_t input_wd + // a4: const uint16_t input_ht + // a5: const uint16_t channels + // a6: const uint16_t pad_wd + // a7: const uint16_t pad_ht + + // on stack + // const uint16_t stride_wd + // const uint16_t stride_ht + // const int16_t *filter_data + // const int32_t *bias + // int8_t *out_data + // const uint16_t out_wd + // const uint16_t out_ht + // const int32_t out_offset + // const int32_t *out_shift + // const int32_t *out_mult + // const int32_t activation_min + // const int32_t activation_max + + entry a1,288 # + s32i a2,a1,104 # [0] gra_spill_temp_49 + s32i a3,a1,112 # [1] gra_spill_temp_51 + s32i a5,a1,116 # [2] gra_spill_temp_52 + s32i.n a6,a1,56 # [3] gra_spill_temp_37 + addi a14,a1,112 # [4] + addmi a11,a1,256 # [5] + addmi a13,a1,256 # [6] + addmi a15,a1,256 # [7] + l32i a9,a1,304 # [8] id:251 out_data+0x0 + l16ui a8,a1,312 # [9] id:252 out_ht+0x0 + s32i a8,a1,64 # [10] gra_spill_temp_39 + s32i a9,a1,156 # [11] gra_spill_temp_62 + addi a15,a15,60 # [12] + addi a13,a13,72 # [13] + addi a11,a11,76 # [14] + ee.vldbc.32 q0,a11 # [15] id:250 activation_max + ee.vldbc.32 q1,a13 # [16] id:249 activation_min + ee.vldbc.32 q2,a15 # [17] id:248 out_offset + st.qr q2,a14,80 # [18] gra_spill_temp_67-112 + st.qr q1,a14,96 # [19] gra_spill_temp_68-112 + st.qr q0,a14,112 # [20] gra_spill_temp_69-112 + beqz.n a8,.Lt_5_7426 # [21] + +.LBB3_esp_nn_depthwise_conv_s16_mult1_3x3: # 0x7b9 + s32i a1,a1,160 # [0] gra_spill_temp_63 + s32i a7,a1,72 # [1] gra_spill_temp_41 + mul16u a6,a3,a5 # [2] + l32i a14,a1,296 # [3] id:254 filter_data+0x0 + l32i a15,a1,300 # [4] id:253 bias+0x0 + l16ui a9,a1,308 # [5] id:259 out_wd+0x0 + l16ui a13,a1,288 # [6] id:255 stride_wd+0x0 + neg a8,a7 # [7] + l16ui a10,a1,292 # [8] id:258 stride_ht+0x0 + l32i a11,a1,324 # [9] id:257 out_mult+0x0 + l32i a12,a1,320 # [10] id:256 out_shift+0x0 + s32i a12,a1,84 # [11] gra_spill_temp_44 + s32i a11,a1,88 # [12] gra_spill_temp_45 + s32i.n a10,a1,60 # [13] gra_spill_temp_38 + s32i a8,a1,124 # [14] gra_spill_temp_54 + s32i a13,a1,80 # [15] gra_spill_temp_43 + s32i a9,a1,92 # [16] gra_spill_temp_46 + s32i a15,a1,140 # [17] gra_spill_temp_58 + s32i a14,a1,108 # [18] gra_spill_temp_50 + slli a6,a6,1 # [19] + movi.n a14,16 # [20] + extui a15,a15,0,4 # [21] + addi a9,a5,-7 # [22] + movi.n a13,0 # [23] + sub a8,a4,a8 # [24] + addx2 a7,a5,a5 # [25] + slli a7,a7,1 # [26] + slli a4,a5,1 # [27] + s32i a13,a1,68 # [28] gra_spill_temp_40 + s32i a9,a1,144 # [29] gra_spill_temp_59 + s32i a15,a1,132 # [30] gra_spill_temp_56 + l32i.n a9,a1,56 # [31] gra_spill_temp_37 + s32i a8,a1,76 # [32] gra_spill_temp_42 + neg a9,a9 # [33] + s32i.n a9,a1,48 # [34] gra_spill_temp_35 + sub a8,a3,a9 # [35] + s32i.n a8,a1,52 # [36] gra_spill_temp_36 + +.Lt_5_7938: # 0x822 + l32i a10,a1,92 # [0] gra_spill_temp_46 + beqz.n a10,.Lt_5_8194 # [2] + +.LBB6_esp_nn_depthwise_conv_s16_mult1_3x3: # 0x827 + l32i.n a5,a1,52 # [0] gra_spill_temp_36 + l32i a11,a1,76 # [1] gra_spill_temp_42 + movi.n a13,0 # [2] + l32i a12,a1,72 # [3] gra_spill_temp_41 + movi.n a15,0 # [4] + l32i.n a8,a1,48 # [5] gra_spill_temp_35 + l32i.n a9,a1,56 # [6] gra_spill_temp_37 + s32i a9,a1,100 # [7] gra_spill_temp_48 + s32i a8,a1,128 # [8] gra_spill_temp_55 + s32i a15,a1,96 # [9] gra_spill_temp_47 + max a12,a12,a13 # [10] + s32i a12,a1,152 # [11] gra_spill_temp_61 + movi.n a13,3 # [12] + min a11,a11,a13 # [13] + s32i a11,a1,136 # [14] gra_spill_temp_57 + sub a11,a11,a12 # [15] + s32i a11,a1,120 # [16] gra_spill_temp_53 + +.Lt_5_8706: # 0x854 + l32i a2,a1,84 # [0] gra_spill_temp_44 + l32i a10,a1,144 # [1] gra_spill_temp_59 + l32i a11,a1,140 # [2] gra_spill_temp_58 + l32i a12,a1,88 # [3] gra_spill_temp_45 + s32i a12,a1,168 # [4] gra_spill_temp_65 + s32i a11,a1,148 # [5] gra_spill_temp_60 + blti a10,1,.Lt_5_8962 # [6] + + movi.n a8,0 # [0] + movi.n a13,0 # [1] + l32i a3,a1,100 # [2] gra_spill_temp_48 + s32i a13,a1,164 # [3] gra_spill_temp_64 + max a3,a3,a8 # [4] + +.Lt_5_9474: # 0x876 + l32i a10,a1,136 # [0] gra_spill_temp_57 + l32i a9,a1,152 # [1] gra_spill_temp_61 + ee.zero.qacc # [2] + bge a9,a10,.Lt_5_9730 # [3] + +.LBB12_esp_nn_depthwise_conv_s16_mult1_3x3: # 0x882 + l32i a12,a1,128 # [0] gra_spill_temp_55 + l32i a15,a1,112 # [1] gra_spill_temp_51 + l32i a10,a1,116 # [2] gra_spill_temp_52 + l32i a13,a1,124 # [3] gra_spill_temp_54 + mull a11,a9,a10 # [4] + add.n a13,a13,a9 # [5] + mull a13,a13,a15 # [6] + addx2 a11,a11,a11 # [7] + l32i a9,a1,164 # [8] gra_spill_temp_64 + add.n a12,a12,a13 # [9] + mull a10,a10,a12 # [10] + add.n a11,a9,a11 # [11] + l32i a12,a1,108 # [12] gra_spill_temp_50 + add.n a9,a9,a10 # [13] + l32i a10,a1,104 # [14] gra_spill_temp_49 + addx2 a11,a11,a12 # [15] + l32i a12,a1,120 # [16] gra_spill_temp_53 + addx2 a9,a9,a10 # [17] + loopgtz a12,.LBB32_esp_nn_depthwise_conv_s16_mult1_3x3 # [18] + + mov.n a13,a9 # [0] + mov.n a12,a11 # [1] + mov.n a9,a11 # [2] + mov.n a11,a13 # [3] + + beqz.n a3,.Lt_5_10498 # [4] if (filter_x_start) + + add.n a11,a4,a13 # [0] + add.n a9,a4,a12 # [1] +.Lt_5_10498: # 0x8c5 + + ee.vld.128.xp q0,a11,a4 # [0] id:261 + ee.vld.128.xp q1,a9,a4 # [1] id:262 + + bnez.n a3,.Lt_5_11010 # [2] if (filter_x_start) + + ee.vmulas.s16.qacc q0,q1 # [0] + ee.vld.128.xp q0,a11,a4 # [1] id:264 + ee.vld.128.xp q1,a9,a4 # [2] id:265 +.Lt_5_11010: # 0x8d6 + + ee.vmulas.s16.qacc q0,q1 # [0] + ee.vld.128.xp q0,a11,a4 # [1] id:267 + ee.vld.128.xp q1,a9,a4 # [2] id:268 + add.n a9,a6,a13 # [3] + + blti a5,3,.Lt_5_11522 # [4] if (filter_x_end) + ee.vmulas.s16.qacc q0,q1 # [0] +.Lt_5_11522: # 0x8e7 + + add.n a11,a7,a12 # [0] + +.LBB32_esp_nn_depthwise_conv_s16_mult1_3x3: # 0x8eb + +.Lt_5_9730: # 0x8eb + // extract data + l32i a9,a1,160 # [0] gra_spill_temp_63 + ee.st.qacc_l.l.128.ip a9,16 # [2] id:270 + ee.st.qacc_l.h.32.ip a9,0 # [3] id:271 + l8ui a11,a1,15 # [4] qacc_scratch+15 + l16ui a10,a1,10 # [5] qacc_scratch+10 + l8ui a15,a1,16 # [6] qacc_scratch+16 + l8ui a13,a1,6 # [7] qacc_scratch+6 + l8ui a12,a1,5 # [8] qacc_scratch+5 + s8i a12,a1,2 # [9] qacc_scratch+2 + s8i a13,a1,3 # [10] qacc_scratch+3 + s8i a15,a1,7 # [11] qacc_scratch+7 + s16i a10,a1,4 # [12] qacc_scratch+4 + s8i a11,a1,6 # [13] qacc_scratch+6 + + ee.st.qacc_h.l.128.ip a9,16 # [14] id:281 + ee.st.qacc_h.h.32.ip a9,-32 # [15] id:282 + ee.srcmb.s16.qacc q1,a14,0 # [16] + l8ui a15,a1,31 # [17] qacc_scratch+31 + l8ui a8,a1,32 # [18] qacc_scratch+32 + l16ui a13,a1,26 # [19] qacc_scratch+26 + l8ui a12,a1,22 # [20] qacc_scratch+22 + l8ui a11,a1,21 # [21] qacc_scratch+21 + l16ui a10,a1,16 # [22] qacc_scratch+16 + s16i a10,a1,8 # [23] qacc_scratch+8 + s8i a11,a1,10 # [24] qacc_scratch+10 + s8i a12,a1,11 # [25] qacc_scratch+11 + s16i a13,a1,12 # [26] qacc_scratch+12 + s8i a8,a1,15 # [27] qacc_scratch+15 + s8i a15,a1,14 # [28] qacc_scratch+14 + + + l32i a8,a1,140 # [29] gra_spill_temp_58 , bias + ee.vld.128.ip q0,a9,0 # [30] id:294 + s32i a9,a1,160 # [31] gra_spill_temp_63 + ee.vzip.16 q0,q1 # [32] + beqz.n a8,.Lt_5_12290 # [33] // skip bias + + addi a8,a1,112 # [0] + l32i a10,a1,132 # [1] gra_spill_temp_56 + l32i a9,a1,148 # [2] gra_spill_temp_60 + wur.sar_byte a10 # [3] + ee.vld.128.ip q4,a9,16 # [4] id:297 + ee.vld.128.ip q7,a9,16 # [5] id:298 + ee.vld.128.ip q5,a9,0 # [6] id:299 + s32i a9,a1,148 # [7] gra_spill_temp_60 + ee.src.q.qup q6,q4,q7 # [8] + ee.vadds.s32 q0,q0,q6 # [9] + ee.src.q.qup q3,q4,q5 # [10] + ee.vadds.s32 q1,q1,q3 # [11] + st.qr q1,a8,64 # [12] gra_spill_temp_66-112 + +.Lt_5_12290: # 0x974 + addi a11,a1,112 # [0] + + # 287 q0 = esp_nn_multiply_by_quantized_mult_ver1_esp32s3(q0, out_mult_ptr, out_shift_ptr); + l32i a10,a1,168 # [1] gra_spill_temp_65 + st.qr q1,a11,64 # [2] gra_spill_temp_66-112 + mov.n a11,a2 # [3] + call8 esp_nn_multiply_by_quantized_mult_ver1_esp32s3 # [4] esp_nn_multiply_by_quantized_mult_ver1_esp32s3 + + # 288 out_mult_ptr += 4; + # 289 out_shift_ptr += 4; + # 290 + # 291 q1 = esp_nn_multiply_by_quantized_mult_ver1_esp32s3(q1, out_mult_ptr, out_shift_ptr); + l32i a10,a1,168 # [0] gra_spill_temp_65 + addmi a12,a1,256 # [1] + addi a11,a1,112 # [2] + st.qr q0,a12,-16 # [3] gra_spill_temp_70-256 + ld.qr q0,a11,64 # [4] gra_spill_temp_66-112 + addi a10,a10,16 # [5] + addi a11,a2,16 # [6] + call8 esp_nn_multiply_by_quantized_mult_ver1_esp32s3 # [7] esp_nn_multiply_by_quantized_mult_ver1_esp32s3 + +.LBB25_esp_nn_depthwise_conv_s16_mult1_3x3: # 0x99a +# Part of loop body line 216, head labeled .Lt_5_9474 + movi.n a14,16 # [0] + # 292 out_mult_ptr += 4; + # 293 out_shift_ptr += 4; + addi a2,a2,32 # [1] + l32i a15,a1,144 # [2] gra_spill_temp_59 + l32i a9,a1,156 # [3] gra_spill_temp_62 + l32i a8,a1,168 # [4] gra_spill_temp_65 + addmi a12,a1,256 # [5] + addi a13,a1,112 # [6] + ld.qr q3,a13,112 # [7] gra_spill_temp_69-112 + ld.qr q1,a13,80 # [8] gra_spill_temp_67-112 + ld.qr q2,a12,-16 # [9] gra_spill_temp_70-256 + addi a8,a8,32 # [10] + s32i a8,a1,168 # [11] gra_spill_temp_65 + ee.vadds.s32 q2,q2,q1 # [12] + ee.vadds.s32 q1,q0,q1 # [13] + ee.vmin.s32 q0,q2,q3 # [14] + ee.vmin.s32 q1,q1,q3 # [15] + ld.qr q2,a13,96 # [16] gra_spill_temp_68-112 + l32i a13,a1,164 # [17] gra_spill_temp_64 + ee.vmax.s32 q1,q1,q2 # [18] + ee.vmax.s32 q0,q0,q2 # [19] + addi.n a13,a13,8 # [20] + s32i a13,a1,164 # [21] gra_spill_temp_64 + ee.vunzip.16 q0,q1 # [22] + ee.vunzip.8 q0,q1 # [23] + ee.vst.l.64.ip q0,a9,8 # [24] id:302 + s32i a9,a1,156 # [25] gra_spill_temp_62 + blt a13,a15,.Lt_5_9474 # [26] + +.Lt_5_8962: # 0x9e9 +# Part of loop body line 203, head labeled .Lt_5_8706 + l32i a8,a1,92 # [0] gra_spill_temp_46 + l32i a11,a1,100 # [1] gra_spill_temp_48 + l32i a10,a1,128 # [2] gra_spill_temp_55 + l32i a9,a1,80 # [3] gra_spill_temp_43 + l32i a15,a1,96 # [4] gra_spill_temp_47 + sub a5,a5,a9 # [5] + addi.n a15,a15,1 # [6] + s32i a15,a1,96 # [7] gra_spill_temp_47 + add.n a10,a10,a9 # [8] + sub a11,a11,a9 # [9] + s32i a11,a1,100 # [10] gra_spill_temp_48 + s32i a10,a1,128 # [11] gra_spill_temp_55 + sub a15,a15,a8 # [12] + bnez a15,.Lt_5_8706 # [13] + +.Lt_5_8194: # 0xa11 +# Part of loop body line 201, head labeled .Lt_5_7938 + l32i a13,a1,64 # [0] gra_spill_temp_39 + l32i a10,a1,72 # [1] gra_spill_temp_41 + l32i a9,a1,124 # [2] gra_spill_temp_54 + l32i.n a8,a1,60 # [3] gra_spill_temp_38 + l32i a12,a1,68 # [4] gra_spill_temp_40 + l32i a15,a1,76 # [5] gra_spill_temp_42 + addi.n a12,a12,1 # [6] + s32i a12,a1,68 # [7] gra_spill_temp_40 + sub a15,a15,a8 # [8] + add.n a9,a9,a8 # [9] + sub a10,a10,a8 # [10] + s32i a10,a1,72 # [11] gra_spill_temp_41 + s32i a9,a1,124 # [12] gra_spill_temp_54 + s32i a15,a1,76 # [13] gra_spill_temp_42 + sub a12,a12,a13 # [14] + bnez a12,.Lt_5_7938 # [15] + +.Lt_5_7426: # 0xa3e + retw.n # [0] + + .size esp_nn_depthwise_conv_s16_mult1_3x3_esp32s3, . - esp_nn_depthwise_conv_s16_mult1_3x3_esp32s3 + +#elif defined(WIO_TERMINAL) +// dummy code, added for old ARM toolchain +.syntax unified +.thumb +.cpu cortex-m0 + +.section .text +#endif // EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN && EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN_S3 diff --git a/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/src/convolution/esp_nn_depthwise_conv_s16_mult1_3x3_no_pad_esp32s3.S b/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/src/convolution/esp_nn_depthwise_conv_s16_mult1_3x3_no_pad_esp32s3.S new file mode 100644 index 0000000..06f9307 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/src/convolution/esp_nn_depthwise_conv_s16_mult1_3x3_no_pad_esp32s3.S @@ -0,0 +1,367 @@ +#include "edge-impulse-sdk/classifier/ei_classifier_config.h" +#if EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN && EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN_S3 +// Copyright 2021-2022 Espressif Systems (Shanghai) PTE LTD +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + .text + .literal_position + + # Program Unit: esp_nn_depthwise_conv_s16_mult1_3x3_no_pad_esp32s3 + .type esp_nn_depthwise_conv_s16_mult1_3x3_no_pad_esp32s3, @function + .align 4 + .global esp_nn_depthwise_conv_s16_mult1_3x3_no_pad_esp32s3 + +esp_nn_depthwise_conv_s16_mult1_3x3_no_pad_esp32s3: # 0xa42 + # qacc_scratch = 0 + # gra_spill_temp_71 = 48 + # gra_spill_temp_72 = 52 + # gra_spill_temp_73 = 56 + # gra_spill_temp_74 = 60 + # gra_spill_temp_75 = 64 + # gra_spill_temp_76 = 68 + # gra_spill_temp_77 = 72 + # gra_spill_temp_78 = 76 + # gra_spill_temp_79 = 80 + # gra_spill_temp_80 = 84 + # gra_spill_temp_81 = 88 + # gra_spill_temp_82 = 92 + # gra_spill_temp_83 = 96 + # gra_spill_temp_84 = 100 + # gra_spill_temp_85 = 104 + # gra_spill_temp_86 = 108 + # gra_spill_temp_87 = 112 + # gra_spill_temp_88 = 116 + # gra_spill_temp_89 = 120 + # gra_spill_temp_90 = 124 + # gra_spill_temp_91 = 128 + # gra_spill_temp_92 = 132 + # gra_spill_temp_93 = 136 + # gra_spill_temp_94 = 140 + # gra_spill_temp_95 = 144 + # gra_spill_temp_96 = 160 + # gra_spill_temp_97 = 176 + # gra_spill_temp_98 = 192 + # gra_spill_temp_99 = 208 + # gra_spill_temp_100 = 224 + # gra_spill_temp_101 = 240 + # gra_spill_temp_102 = 244 + # gra_spill_temp_103 = 248 + + // registers: + // a2: const int16_t *input_data + // a3: const uint16_t input_wd + // a4: const uint16_t input_ht + // a5: const uint16_t channels + // a6: const uint16_t stride_wd + // a7: const uint16_t stride_ht + + // on stack: + // const int16_t *filter_data + // const int32_t *bias + // int8_t *out_data + // const uint16_t out_wd + // const uint16_t out_ht + // const int32_t out_offset + // const int32_t *out_shift + // const int32_t *out_mult + // const int32_t activation_min + // const int32_t activation_max + + entry a1,288 # + s32i a2,a1,120 # [0] gra_spill_temp_89 + s32i.n a3,a1,48 # [1] gra_spill_temp_71 + s32i a5,a1,76 # [2] gra_spill_temp_78 + s32i a6,a1,84 # [3] gra_spill_temp_80 + s32i.n a7,a1,60 # [4] gra_spill_temp_74 + l32i a12,a1,296 # [5] id:241 out_data+0x0 + addi a14,a1,112 # [6] + addmi a10,a1,256 # [7] + addmi a13,a1,256 # [8] + addmi a15,a1,256 # [9] + + // height loop + l16ui a8,a1,304 # [10] id:242 out_ht+0x0 + s32i.n a8,a1,56 # [11] gra_spill_temp_73 + addi a15,a15,52 # [12] + addi a13,a13,64 # [13] + addi a10,a10,68 # [14] + ee.vldbc.32 q0,a10 # [15] id:240 activation_max + ee.vldbc.32 q1,a13 # [16] id:239 activation_min + ee.vldbc.32 q2,a15 # [17] id:238 out_offset + st.qr q2,a14,64 # [18] gra_spill_temp_97-112 + st.qr q1,a14,80 # [19] gra_spill_temp_98-112 + st.qr q0,a14,96 # [20] gra_spill_temp_99-112 + beqz.n a8,.Lt_6_6914 # [21] + +.LBB3_esp_nn_depthwise_conv_s16_mult1_3x3_no_pad: # 0xa83 + s32i a1,a1,144 # [0] gra_spill_temp_95 + mul16u a7,a3,a5 # [1] + s32i a4,a1,72 # [2] gra_spill_temp_77 + addi a9,a5,-7 # [3] + l16ui a11,a1,300 # [4] id:247 out_wd+0x0 + l32i a10,a1,292 # [5] id:243 bias+0x0 + l32i a15,a1,288 # [6] id:244 filter_data+0x0 + l32i a13,a1,316 # [7] id:246 out_mult+0x0 + l32i a14,a1,312 # [8] id:245 out_shift+0x0 + s32i a14,a1,88 # [9] gra_spill_temp_81 + s32i a13,a1,92 # [10] gra_spill_temp_82 + s32i a15,a1,124 # [11] gra_spill_temp_90 + s32i a10,a1,116 # [12] gra_spill_temp_88 + s32i a11,a1,96 # [13] gra_spill_temp_83 + s32i a9,a1,136 # [14] gra_spill_temp_93 + addx2 a4,a5,a5 # [15] + slli a4,a4,1 # [16] + slli a7,a7,1 # [17] + l32i.n a9,a1,60 # [18] gra_spill_temp_74 + movi.n a11,0 # [19] + extui a10,a10,0,4 # [20] + movi.n a15,0 # [21] + slli a5,a5,1 # [22] + s32i a15,a1,68 # [23] gra_spill_temp_76 + s32i a10,a1,112 # [24] gra_spill_temp_87 + s32i a11,a1,64 # [25] gra_spill_temp_75 + mul16u a8,a3,a9 # [26] + movi.n a11,0 # [27] + s32i a11,a1,80 # [28] gra_spill_temp_79 + s32i.n a8,a1,52 # [29] gra_spill_temp_72 + +.Lt_6_7426: # 0xad8 // width_loop + l32i a8,a1,96 # [0] gra_spill_temp_83 + beqz.n a8,.Lt_6_7682 # [2] + + movi.n a11,3 # [0] + l32i a10,a1,72 # [1] gra_spill_temp_77 + movi.n a9,0 # [2] + movi.n a13,0 # [3] + l32i.n a14,a1,48 # [4] gra_spill_temp_71 + s32i a14,a1,108 # [5] gra_spill_temp_86 + s32i a13,a1,104 # [6] gra_spill_temp_85 + s32i a9,a1,100 # [7] gra_spill_temp_84 + min a10,a10,a11 # [8] + s32i a10,a1,128 # [9] gra_spill_temp_91 + +.Lt_6_8194: # 0xaf7 + l32i a2,a1,88 # [0] gra_spill_temp_81 + l32i a6,a1,92 # [1] gra_spill_temp_82 + l32i a8,a1,116 # [2] gra_spill_temp_88 + +// channel loop + l32i a15,a1,136 # [3] gra_spill_temp_93 + s32i a8,a1,140 # [4] gra_spill_temp_94 + blti a15,1,.Lt_6_8450 # [5] + + movi.n a11,0 # [0] + movi.n a10,0 # [1] + l32i a9,a1,76 # [2] gra_spill_temp_78 + l32i a14,a1,80 # [3] gra_spill_temp_79 + movi.n a8,3 # [4] + l32i a3,a1,108 # [5] gra_spill_temp_86 + l32i a13,a1,104 # [6] gra_spill_temp_85 + min a3,a3,a8 # [7] + add.n a13,a13,a14 # [8] + mull a9,a9,a13 # [9] + s32i a9,a1,132 # [10] gra_spill_temp_92 + +.Lt_6_8962: # 0xb26 + ee.zero.qacc # [0] + l32i a9,a1,132 # [1] gra_spill_temp_92 + l32i a13,a1,120 # [2] gra_spill_temp_89 + add.n a9,a9,a10 # [3] + addx2 a9,a9,a13 # [4] + l32i a13,a1,124 # [5] gra_spill_temp_90 + l32i a14,a1,128 # [6] gra_spill_temp_91 + add.n a13,a11,a13 # [7] + loopgtz a14,.LBB30_esp_nn_depthwise_conv_s16_mult1_3x3_no_pad # [8] + +.Lt_6_9730: # 0xb3f +# Loop body line 360, nesting depth: 4, estimated iterations: 100 + mov.n a14,a13 # [0] + mov.n a15,a9 # [1] + ee.vld.128.xp q0,a15,a5 # [2] id:249 + ee.vld.128.xp q1,a14,a5 # [3] id:250 + add.n a9,a9,a7 # [4] + beqi a3,2,.LBB15_esp_nn_depthwise_conv_s16_mult1_3x3_no_pad # [5] + +.Lt_6_9986: # 0xb4e + beqi a3,3,.LBB17_esp_nn_depthwise_conv_s16_mult1_3x3_no_pad # [0] + +.Lt_6_10498: # 0xb51 + add.n a13,a13,a4 # [0] + ee.vmulas.s16.qacc q0,q1 # [1] + +.LBB30_esp_nn_depthwise_conv_s16_mult1_3x3_no_pad: # 0xb58 + + // extract data + l32i a15,a1,144 # [0] gra_spill_temp_95 + ee.st.qacc_l.l.128.ip a15,16 # [2] id:258 + ee.st.qacc_l.h.32.ip a15,0 # [3] id:259 + l8ui a14,a1,15 # [4] qacc_scratch+15 + l8ui a13,a1,16 # [5] qacc_scratch+16 + l8ui a8,a1,5 # [6] qacc_scratch+5 + l8ui a9,a1,6 # [7] qacc_scratch+6 + s8i a9,a1,3 # [8] qacc_scratch+3 + s8i a8,a1,2 # [9] qacc_scratch+2 + s8i a13,a1,7 # [10] qacc_scratch+7 + s8i a14,a1,6 # [11] qacc_scratch+6 + l16ui a13,a1,10 # [12] qacc_scratch+10 + s16i a13,a1,4 # [13] qacc_scratch+4 + ee.st.qacc_h.l.128.ip a15,16 # [14] id:269 + ee.st.qacc_h.h.32.ip a15,-32 # [15] id:270 + l8ui a9,a1,32 # [16] qacc_scratch+32 + l8ui a13,a1,22 # [17] qacc_scratch+22 + l8ui a8,a1,31 # [18] qacc_scratch+31 + l16ui a14,a1,26 # [19] qacc_scratch+26 + s16i a14,a1,12 # [20] qacc_scratch+12 + s8i a8,a1,14 # [21] qacc_scratch+14 + s8i a13,a1,11 # [22] qacc_scratch+11 + s8i a9,a1,15 # [23] qacc_scratch+15 + + l32i a13,a1,116 # [24] gra_spill_temp_88 + l8ui a9,a1,21 # [25] qacc_scratch+21 + l16ui a8,a1,16 # [26] qacc_scratch+16 + movi.n a14,16 # [27] + ee.srcmb.s16.qacc q1,a14,0 # [28] + s16i a8,a1,8 # [29] qacc_scratch+8 + s8i a9,a1,10 # [30] qacc_scratch+10 + ee.vld.128.ip q0,a15,0 # [31] id:282 + s32i a15,a1,144 # [32] gra_spill_temp_95 + ee.vzip.16 q0,q1 # [33] + + bnez.n a13,.LBB20_esp_nn_depthwise_conv_s16_mult1_3x3_no_pad # [34] + + s32i a12,a1,240 # [0] gra_spill_temp_101 + s32i a11,a1,244 # [1] gra_spill_temp_102 + s32i a10,a1,248 # [2] gra_spill_temp_103 + addi a14,a1,112 # [3] + st.qr q1,a14,48 # [4] gra_spill_temp_96-112 + j .Lt_6_11266 # [5] + +.LBB15_esp_nn_depthwise_conv_s16_mult1_3x3_no_pad: # 0xbce +# Part of loop body line 360, head labeled .Lt_6_9730 + ee.vmulas.s16.qacc.ld.xp q0,a15,a5,q0,q1 # [0] id:251 + ee.vld.128.xp q1,a14,a5 # [1] id:252 + bnei a3,3,.Lt_6_10498 # [2] + +.LBB17_esp_nn_depthwise_conv_s16_mult1_3x3_no_pad: # 0xbd8 + ee.vmulas.s16.qacc.ld.xp q3,a15,a5,q0,q1 # [0] id:253 + ee.vld.128.xp q4,a14,a5 # [1] id:254 + ee.vld.128.xp q1,a14,a5 # [2] id:256 + ee.vmulas.s16.qacc.ld.xp q0,a15,a5,q3,q4 # [3] id:255 + j .Lt_6_10498 # [4] + +.LBB20_esp_nn_depthwise_conv_s16_mult1_3x3_no_pad: # 0xbe9 +# Part of loop body line 358, head labeled .Lt_6_8962 + s32i a12,a1,240 # [0] gra_spill_temp_101 + s32i a11,a1,244 # [1] gra_spill_temp_102 + s32i a10,a1,248 # [2] gra_spill_temp_103 + addi a15,a1,112 # [3] + l32i a9,a1,112 # [4] gra_spill_temp_87 + l32i a8,a1,140 # [5] gra_spill_temp_94 + wur.sar_byte a9 # [6] + ee.vld.128.ip q6,a8,16 # [7] id:285 + ee.vld.128.ip q3,a8,16 # [8] id:286 + ee.vld.128.ip q7,a8,0 # [9] id:287 + s32i a8,a1,140 # [10] gra_spill_temp_94 + ee.src.q.qup q2,q6,q3 # [11] + ee.vadds.s32 q0,q0,q2 # [12] + ee.src.q.qup q5,q6,q7 # [13] + ee.vadds.s32 q1,q1,q5 # [14] + st.qr q1,a15,48 # [15] gra_spill_temp_96-112 + +.Lt_6_11266: # 0xc19 + # 423 q0 = esp_nn_multiply_by_quantized_mult_ver1_esp32s3(q0, out_mult_ptr, out_shift_ptr); + mov.n a10,a6 # [0] + mov.n a11,a2 # [1] + call8 esp_nn_multiply_by_quantized_mult_ver1_esp32s3 # [2] esp_nn_multiply_by_quantized_mult_ver1_esp32s3 + + addi a11,a1,112 # [0] + addi a10,a6,16 # [1] + st.qr q0,a11,112 # [2] gra_spill_temp_100-112 + ld.qr q0,a11,48 # [3] gra_spill_temp_96-112 + addi a11,a2,16 # [4] + call8 esp_nn_multiply_by_quantized_mult_ver1_esp32s3 # [5] esp_nn_multiply_by_quantized_mult_ver1_esp32s3 + + addi a6,a6,32 # [0] + addi a2,a2,32 # [1] + + l32i a13,a1,136 # [2] gra_spill_temp_93 + l32i a12,a1,240 # [3] gra_spill_temp_101 + l32i a10,a1,248 # [4] gra_spill_temp_103 + l32i a11,a1,244 # [5] gra_spill_temp_102 + addi a9,a1,112 # [6] + ld.qr q6,a9,80 # [7] gra_spill_temp_98-112 + ld.qr q7,a9,96 # [8] gra_spill_temp_99-112 + ld.qr q5,a9,64 # [9] gra_spill_temp_97-112 + ld.qr q4,a9,112 # [10] gra_spill_temp_100-112 + addi a11,a11,16 # [11] + addi.n a10,a10,8 # [12] + ee.vadds.s32 q4,q4,q5 # [13] + ee.vadds.s32 q5,q0,q5 # [14] + ee.vmin.s32 q4,q4,q7 # [15] + ee.vmax.s32 q4,q4,q6 # [16] + ee.vmin.s32 q5,q5,q7 # [17] + ee.vmax.s32 q5,q5,q6 # [18] + ee.vunzip.16 q4,q5 # [19] + ee.vunzip.8 q4,q5 # [20] + ee.vst.l.64.ip q4,a12,8 # [21] id:290 + blt a10,a13,.Lt_6_8962 # [22] + +.Lt_6_8450: # 0xc76 +# Part of loop body line 348, head labeled .Lt_6_8194 + l32i a11,a1,96 # [0] gra_spill_temp_83 + l32i a15,a1,104 # [1] gra_spill_temp_85 + l32i a14,a1,84 # [2] gra_spill_temp_80 + l32i a10,a1,100 # [3] gra_spill_temp_84 + l32i a13,a1,108 # [4] gra_spill_temp_86 + addi.n a10,a10,1 # [5] + s32i a10,a1,100 # [6] gra_spill_temp_84 + sub a13,a13,a14 # [7] + add.n a15,a15,a14 # [8] + s32i a15,a1,104 # [9] gra_spill_temp_85 + s32i a13,a1,108 # [10] gra_spill_temp_86 + sub a10,a10,a11 # [11] + bnez a10,.Lt_6_8194 # [12] + +.Lt_6_7682: # 0xc9b + l32i.n a9,a1,56 # [0] gra_spill_temp_73 + l32i a15,a1,64 # [1] gra_spill_temp_75 + l32i.n a14,a1,52 # [2] gra_spill_temp_72 + l32i a13,a1,80 # [3] gra_spill_temp_79 + l32i.n a11,a1,60 # [4] gra_spill_temp_74 + l32i a8,a1,68 # [5] gra_spill_temp_76 + l32i a10,a1,72 # [6] gra_spill_temp_77 + addi.n a8,a8,1 # [7] + s32i a8,a1,68 # [8] gra_spill_temp_76 + sub a10,a10,a11 # [9] + add.n a13,a13,a14 # [10] + add.n a15,a15,a11 # [11] + s32i a15,a1,64 # [12] gra_spill_temp_75 + s32i a13,a1,80 # [13] gra_spill_temp_79 + s32i a10,a1,72 # [14] gra_spill_temp_77 + sub a8,a8,a9 # [15] + bnez a8,.Lt_6_7426 # [16] + +.Lt_6_6914: # 0xcc8 + retw.n # [0] + + .size esp_nn_depthwise_conv_s16_mult1_3x3_no_pad_esp32s3, . - esp_nn_depthwise_conv_s16_mult1_3x3_no_pad_esp32s3 + +#elif defined(WIO_TERMINAL) +// dummy code, added for old ARM toolchain +.syntax unified +.thumb +.cpu cortex-m0 + +.section .text +#endif // EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN && EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN_S3 diff --git a/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/src/convolution/esp_nn_depthwise_conv_s16_mult1_esp32s3.S b/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/src/convolution/esp_nn_depthwise_conv_s16_mult1_esp32s3.S new file mode 100644 index 0000000..8568df5 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/src/convolution/esp_nn_depthwise_conv_s16_mult1_esp32s3.S @@ -0,0 +1,345 @@ +#include "edge-impulse-sdk/classifier/ei_classifier_config.h" +#if EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN && EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN_S3 +// Copyright 2021-2022 Espressif Systems (Shanghai) PTE LTD +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + .text + .literal_position + + # Program Unit: esp_nn_depthwise_conv_s16_mult1_esp32s3 + .type esp_nn_depthwise_conv_s16_mult1_esp32s3, @function + .align 4 + .global esp_nn_depthwise_conv_s16_mult1_esp32s3 + +esp_nn_depthwise_conv_s16_mult1_esp32s3: # 0x4c8 + # scratch_buf = 0 + # gra_spill_temp_2 = 48 + # gra_spill_temp_22 = 52 + # gra_spill_temp_4 = 56 + # gra_spill_temp_23 = 60 + # gra_spill_temp_24 = 64 + # gra_spill_temp_7 = 68 + # gra_spill_temp_26 = 72 + # gra_spill_temp_27 = 76 + # gra_spill_temp_28 = 80 + # gra_spill_temp_29 = 84 + # gra_spill_temp_12 = 88 + # gra_spill_temp_13 = 92 + # gra_spill_temp_14 = 96 + # gra_spill_temp_15 = 100 + # gra_spill_temp_21 = 104 + # gra_spill_temp_17 = 108 + # gra_spill_temp_18 = 112 + # gra_spill_temp_20 = 116 + # gra_spill_temp_30 = 0 + # gra_spill_temp_34 = 16 + + // in registers: + // a2: *input_data + // a3: input_wd + // a4: input_ht + // a5: channels + // a6: pad_wd + // a7: pad_ht + + // on stack: + // stride_wd + // stride_ht + // *filter_data + // filter_wd + // filter_ht + // *bias + // *out_data + // out_wd + // out_ht + // out_offset + // *out_shift + // *out_mult + // activation_min + // activation_max + + entry a1,160 # + l32i a9,a1,184 # [7] id:237 out_data+0x0 + l16ui a8,a1,192 # [8] id:238 out_ht+0x0 + s32i a2,a1,52 # [0] gra_spill_temp_22 + s32i.n a4,a1,56 # [1] gra_spill_temp_4 + s32i a5,a1,60 # [2] gra_spill_temp_23 + s32i a9,a1,112 # [10] gra_spill_temp_18 + beqz.n a8,.Lt_4_7170 # [20] + +.LBB3_esp_nn_depthwise_conv_s16_mult1: # 0x508 + l16ui a4,a1,172 # [0] id:240 filter_wd+0x0 + neg a13,a7 # [2] + neg a12,a6 # [3] + sext a12,a12,15 # [16] + sext a13,a13,15 # [17] + s32i a13,a1,92 # [18] gra_spill_temp_13 + s32i.n a12,a1,48 # [19] gra_spill_temp_2 + movi.n a8,0 # [20] + slli a9,a5,1 # [21] + addi a10,a5,-7 # [22] + s32i a10,a1,100 # [23] gra_spill_temp_15 + s32i a9,a1,64 # [24] gra_spill_temp_24 + s32i a8,a1,68 # [25] gra_spill_temp_7 + j .Lt_4_7682 # [30] + +.Lt_4_7938: # 0x561 + l32i a15,a1,192 # [0] out_ht + l32i.n a9,a1,164 # [1] stride_ht + l32i a14,a1,68 # [2] gra_spill_temp_7 + l32i a8,a1,92 # [3] gra_spill_temp_13 + addi.n a14,a14,1 # [4] + s32i a14,a1,68 # [5] gra_spill_temp_7 + add.n a9,a8,a9 # [6] + sub a14,a14,a15 # [7] + sext a8,a9,15 # [8] + s32i a8,a1,92 # [9] gra_spill_temp_13 + beqz a14,.Lt_4_7170 # [10] + +.Lt_4_7682: # 0x57f +# Loop body line 59, nesting depth: 1, estimated iterations: 100 + # 60 const int16_t base_y = (out_y * stride_ht) - pad_ht; + # 61 for (int out_x = 0; out_x < out_wd; out_x++) { //width_loop + l32i a10,a1,188 # [0] out_width + beqz.n a10,.Lt_4_7938 # [2] + +.LBB6_esp_nn_depthwise_conv_s16_mult1: # 0x584 +# Part of loop body line 59, head labeled .Lt_4_7682 + movi.n a14,0 # [0] + l32i.n a7,a1,176 # [1] filter_ht + l32i a13,a1,92 # [2] gra_spill_temp_13 + l32i.n a8,a1,56 # [3] gra_spill_temp_4 + movi.n a11,0 # [4] + l32i.n a12,a1,48 # [5] gra_spill_temp_2 + s32i a12,a1,84 # [6] gra_spill_temp_29 + s32i a11,a1,88 # [7] gra_spill_temp_12 + sub a8,a8,a13 # [8] + min a7,a7,a8 # [9] + neg a13,a13 # [10] + max a13,a13,a14 # [11] + s32i a13,a1,96 # [12] gra_spill_temp_14 + j .Lt_4_8450 # [13] + +.Lt_4_8706: # 0x5a9 +# Part of loop body line 61, head labeled .Lt_4_8450 + l32i a10,a1,188 # [0] out_width + l32i a12,a1,160 # [1] stride_wd + l32i a9,a1,88 # [2] gra_spill_temp_12 + l32i a11,a1,84 # [3] gra_spill_temp_29 + addi.n a9,a9,1 # [4] + s32i a9,a1,88 # [5] gra_spill_temp_12 + add.n a12,a11,a12 # [6] + sext a11,a12,15 # [7] + s32i a11,a1,84 # [8] gra_spill_temp_29 + beq a9,a10,.Lt_4_7938 # [9] + +.Lt_4_8450: # 0x5c5 +# Loop body line 61, nesting depth: 2, estimated iterations: 100 + # 69 uint32_t bias_ptr = (uint32_t) bias; + # 70 const int32_t *out_mult_ptr = out_mult; + # 71 const int32_t *out_shift_ptr = out_shift; + # 72 + # 73 for (int ch_idx = 0; ch_idx < channels - 7; ch_idx += 8) {//channel_loop + l32i a13,a1,100 # [0] gra_spill_temp_15 + l32i a14,a1,180 # [1] bias + l32i a15,a1,204 # [2] out_mult + l32i a8,a1,200 # [3] out_shift + s32i a8,a1,104 # [4] gra_spill_temp_21 + s32i a15,a1,116 # [5] gra_spill_temp_20 + s32i a14,a1,108 # [6] gra_spill_temp_17 + blti a13,1,.Lt_4_8706 # [7] + +.LBB9_esp_nn_depthwise_conv_s16_mult1: # 0x5dd +# Part of loop body line 61, head labeled .Lt_4_8450 + movi.n a2,0 # [0] + l32i a5,a1,84 # [1] gra_spill_temp_29 + movi.n a8,0 # [2] + neg a6,a5 # [3] + max a6,a6,a8 # [4] + sub a5,a3,a5 # [5] + min a5,a4,a5 # [6] + sub a9,a5,a6 # [7] + s32i a9,a1,72 # [8] gra_spill_temp_26 + j .Lt_4_9218 # [9] + +.Lt_4_9474: # 0x5f9 + +// extract data + mov a11,a1 + ee.st.qacc_l.l.128.ip a11,16 # [2] id:252 + ee.st.qacc_l.h.32.ip a11,0 # [3] id:253 + l8ui a12,a1,15 # [4] scratch_buf+15 + l16ui a10,a1,10 # [5] scratch_buf+10 + l8ui a13,a1,5 # [6] scratch_buf+5 + l8ui a14,a1,6 # [7] scratch_buf+6 + l8ui a15,a1,16 # [8] scratch_buf+16 + s8i a13,a1,2 # [11] scratch_buf+2 + s8i a14,a1,3 # [10] scratch_buf+3 + s8i a15,a1,7 # [9] scratch_buf+7 + s16i a10,a1,4 # [12] scratch_buf+4 + s8i a12,a1,6 # [13] scratch_buf+6 + + movi.n a10,16 # [14] + ee.st.qacc_h.l.128.ip a11,16 # [15] id:263 + ee.st.qacc_h.h.32.ip a11,-32 # [16] id:264 + ee.srcmb.s16.qacc q1,a10,0 # [17] + l8ui a8,a1,31 # [18] scratch_buf+31 + l8ui a9,a1,32 # [19] scratch_buf+32 + l16ui a12,a1,16 # [20] scratch_buf+16 + l8ui a13,a1,21 # [21] scratch_buf+21 + l8ui a14,a1,22 # [22] scratch_buf+22 + l16ui a15,a1,26 # [23] scratch_buf+26 + s8i a13,a1,10 # [26] scratch_buf+10 + s8i a14,a1,11 # [25] scratch_buf+11 + s16i a15,a1,12 # [24] scratch_buf+12 + s16i a12,a1,8 # [27] scratch_buf+8 + s8i a9,a1,15 # [28] scratch_buf+15 + s8i a8,a1,14 # [29] scratch_buf+14 + + l32i a9,a1,180 # [30] bias + ee.vld.128.ip q0,a11,0 # [31] id:164 + ee.vzip.16 q0,q1 # [33] + beqz.n a9,.Lt_4_11522 # [34] // skip bias + +// add bias + l32i a9,a1,108 # [0] gra_spill_temp_17 + addi a8,a1,112 # [1] + extui a10,a9,0,4 # [2] + wur.sar_byte a10 # [3] + ee.vld.128.ip q4,a9,16 # [4] id:279 + ee.vld.128.ip q7,a9,16 # [5] id:168 + ee.vld.128.ip q5,a9,0 # [6] id:281 + s32i a9,a1,108 # [7] gra_spill_temp_17 + ee.src.q q4,q4,q7 # [8] + ee.src.q q7,q7,q5 # [10] + ee.vadds.s32 q0,q0,q4 # [9] + ee.vadds.s32 q1,q1,q7 # [11] + st.qr q1,a1,0 # [12] gra_spill_temp_30-112 + +.Lt_4_11522: # 0x684 + +// apply quantisation: esp_nn_multiply_by_quantized_mult_ver1_esp32s3(q0, out_mult_ptr, out_shift_ptr); + + l32i a10,a1,116 # [1] gra_spill_temp_20 + l32i a11,a1,104 # [3] gra_spill_temp_21 + st.qr q1,a1,0 # [2] gra_spill_temp_30-112 + call8 esp_nn_multiply_by_quantized_mult_ver1_esp32s3 # [4] esp_nn_multiply_by_quantized_mult_ver1_esp32s3 + + l32i a10,a1,116 # [2] gra_spill_temp_20 + l32i a11,a1,104 # [0] gra_spill_temp_21 + st.qr q0,a1,16 # [3] gra_spill_temp_34-112 + ld.qr q0,a1,0 # [4] gra_spill_temp_30-112 + addi a10,a10,16 # [5] // out_mult_ptr += 4 + addi a11,a11,16 # [6] // out_shift_ptr += 4 + call8 esp_nn_multiply_by_quantized_mult_ver1_esp32s3 # [7] esp_nn_multiply_by_quantized_mult_ver1_esp32s3 + +// add offset, apply activation and store + l32i a13,a1,100 # [0] gra_spill_temp_15 + addi.n a2,a2,8 # [1] + l32i a8,a1,112 # [2] gra_spill_temp_18 + l32i a15,a1,116 # [3] gra_spill_temp_20 + l32i a14,a1,104 # [4] gra_spill_temp_21 + + addi a12,a1,212 + ee.vldbc.32 q3,a12 # [14] id:236 activation_max + addi a12,a1,196 + ee.vldbc.32 q1,a12 # [16] id:234 out_offset + addi a12,a1,208 + + ld.qr q2,a1,16 # [8] gra_spill_temp_34-112 + + addi a14,a14,32 # [9] + addi a15,a15,32 # [10] + s32i a15,a1,116 # [11] gra_spill_temp_20 + ee.vadds.s32 q2,q2,q1 # [12] + s32i a14,a1,104 # [13] gra_spill_temp_21 + ee.vadds.s32 q1,q0,q1 # [14] + ee.vmin.s32 q0,q2,q3 # [15] + ee.vldbc.32 q2,a12 # [16] id:234 out_offset + ee.vmin.s32 q1,q1,q3 # [17] + ee.vmax.s32 q1,q1,q2 # [18] + ee.vmax.s32 q0,q0,q2 # [19] + ee.vunzip.16 q0,q1 # [20] + ee.vunzip.8 q0,q1 # [21] + ee.vst.l.64.ip q0,a8,8 # [22] id:172 + s32i a8,a1,112 # [23] gra_spill_temp_18 + bge a2,a13,.Lt_4_8706 # [24] + +.Lt_4_9218: # 0x6f5 + ee.zero.qacc # [0] + l32i a13,a1,96 # [1] gra_spill_temp_14 + s32i a13,a1,80 # [2] gra_spill_temp_28 + bge a13,a7,.Lt_4_9474 # [3] + +.LBB12_esp_nn_depthwise_conv_s16_mult1: # 0x701 // channel_loop + mull a15,a13,a4 # [0] + l32i a14,a1,92 # [1] gra_spill_temp_13 + add.n a8,a15,a5 # [2] + add.n a14,a14,a13 # [3] + mull a14,a3,a14 # [4] + s32i a8,a1,76 # [5] gra_spill_temp_27 + bge a6,a5,.Lt_4_10242 # [6] + +.LBB15_esp_nn_depthwise_conv_s16_mult1: # 0x714 + l32i a12,a1,64 # [0] gra_spill_temp_24 + l32i a9,a1,168 # [1] filter_data + l32i a10,a1,60 # [2] gra_spill_temp_23 + l32i a11,a1,84 # [3] gra_spill_temp_29 + add.n a8,a15,a6 # [4] + add.n a11,a11,a6 # [5] + mull a8,a8,a10 # [6] + add.n a11,a14,a11 # [7] + mull a10,a10,a11 # [8] + add.n a8,a2,a8 # [9] + l32i a11,a1,52 # [10] gra_spill_temp_22 + addx2 a8,a8,a9 # [11] + add.n a10,a2,a10 # [12] + l32i a9,a1,72 # [13] gra_spill_temp_26 + addx2 a10,a10,a11 # [14] + loopgtz a9,.LBB41_esp_nn_depthwise_conv_s16_mult1 # [15] +// innermost loop + ee.vld.128.xp q0,a10,a12 # [0*II+3] id:249 + ee.vld.128.xp q1,a8,a12 # [0*II+4] id:250 + ee.vmulas.s16.qacc q0,q1 # [0*II+6] +.LBB41_esp_nn_depthwise_conv_s16_mult1: # 0x750 + +.Lt_4_10242: # 0x750 + add.n a14,a14,a3 # [0] + add.n a15,a15,a4 # [1] + l32i a9,a1,80 # [2] gra_spill_temp_28 + l32i a10,a1,76 # [3] gra_spill_temp_27 + addi.n a9,a9,1 # [4] + add.n a10,a10,a4 # [5] + s32i a10,a1,76 # [6] gra_spill_temp_27 + s32i a9,a1,80 # [7] gra_spill_temp_28 + sub a9,a7,a9 # [8] + beqz a9,.Lt_4_9474 # [9] + + blt a6,a5,.LBB15_esp_nn_depthwise_conv_s16_mult1 # [0] + + j .Lt_4_10242 # [0] + +.Lt_4_7170: # 0x770 + retw.n # [0] + + .size esp_nn_depthwise_conv_s16_mult1_esp32s3, . - esp_nn_depthwise_conv_s16_mult1_esp32s3 + +#elif defined(WIO_TERMINAL) +// dummy code, added for old ARM toolchain +.syntax unified +.thumb +.cpu cortex-m0 + +.section .text +#endif // EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN && EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN_S3 diff --git a/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/src/convolution/esp_nn_depthwise_conv_s16_mult4_esp32s3.S b/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/src/convolution/esp_nn_depthwise_conv_s16_mult4_esp32s3.S new file mode 100644 index 0000000..792d137 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/src/convolution/esp_nn_depthwise_conv_s16_mult4_esp32s3.S @@ -0,0 +1,416 @@ +#include "edge-impulse-sdk/classifier/ei_classifier_config.h" +#if EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN && EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN_S3 +// Copyright 2021-2022 Espressif Systems (Shanghai) PTE LTD +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + .text + .literal_position + + # Program Unit: esp_nn_depthwise_conv_s16_mult4_esp32s3 + .type esp_nn_depthwise_conv_s16_mult4_esp32s3, @function + .align 4 + .global esp_nn_depthwise_conv_s16_mult4_esp32s3 + +esp_nn_depthwise_conv_s16_mult4_esp32s3: # 0x17c8 + # qacc_scratch = 0 + # gra_spill_temp_220 = 32 + # gra_spill_temp_221 = 36 + # gra_spill_temp_222 = 40 + # gra_spill_temp_223 = 44 + # gra_spill_temp_224 = 48 + # gra_spill_temp_225 = 52 + # gra_spill_temp_226 = 56 + # gra_spill_temp_227 = 60 + # gra_spill_temp_228 = 64 + # gra_spill_temp_229 = 68 + # gra_spill_temp_230 = 72 + # gra_spill_temp_231 = 76 + # gra_spill_temp_232 = 80 + # gra_spill_temp_233 = 84 + # gra_spill_temp_234 = 88 + # gra_spill_temp_235 = 92 + # gra_spill_temp_236 = 96 + # gra_spill_temp_237 = 100 + # gra_spill_temp_238 = 104 + # gra_spill_temp_239 = 108 + # gra_spill_temp_240 = 112 + # gra_spill_temp_241 = 116 + # gra_spill_temp_242 = 120 + # gra_spill_temp_243 = 124 + # gra_spill_temp_244 = 128 + # gra_spill_temp_245 = 132 + # gra_spill_temp_246 = 136 + # gra_spill_temp_247 = 140 + # gra_spill_temp_248 = 144 + # gra_spill_temp_249 = 148 + # gra_spill_temp_250 = 152 + # gra_spill_temp_251 = 156 + # gra_spill_temp_252 = 160 + # gra_spill_temp_253 = 164 + # gra_spill_temp_254 = 168 + # gra_spill_temp_255 = 172 + # gra_spill_temp_256 = 176 + # gra_spill_temp_257 = 192 + # gra_spill_temp_258 = 208 + # gra_spill_temp_259 = 224 + # gra_spill_temp_260 = 240 + + // registers: + // a2: const int16_t *input_data + // a3: const uint16_t input_wd + // a4: const uint16_t input_ht + // a5: const uint16_t channels + // a6: const uint16_t pad_wd + // a7: const uint16_t pad_ht + + // on stack: + // const uint16_t stride_wd + // const uint16_t stride_ht + // const uint16_t ch_mult + // const int16_t *filter_data + // const uint16_t filter_wd + // const uint16_t filter_ht + // const int32_t *bias + // int8_t *out_data + // const uint16_t out_wd + // const uint16_t out_ht + // const int32_t out_offset + // const int32_t *out_shift + // const int32_t *out_mult + // const int32_t activation_min + // const int32_t activation_max + + + entry a1,288 # + s32i a2,a1,136 # [0] gra_spill_temp_246 + s32i.n a4,a1,40 # [1] gra_spill_temp_222 + s32i a5,a1,164 # [2] gra_spill_temp_253 + addi a12,a1,112 # [3] + addmi a10,a1,256 # [4] + addmi a11,a1,256 # [5] + addmi a13,a1,256 # [6] + l16ui a8,a1,324 # [7] id:216 out_ht+0x0 + s32i.n a8,a1,48 # [8] gra_spill_temp_224 + addi a13,a13,72 # [9] + addi a11,a11,88 # [10] + addi a10,a10,84 # [11] + ee.vldbc.32 q0,a10 # [12] id:215 activation_min + ee.vldbc.32 q1,a11 # [13] id:214 activation_max + ee.vldbc.32 q2,a13 # [14] id:213 out_offset + st.qr q2,a12,80 # [15] gra_spill_temp_257-112 + st.qr q1,a12,96 # [16] gra_spill_temp_258-112 + st.qr q0,a12,112 # [17] gra_spill_temp_259-112 + beqz.n a8,.Lt_10_8450 # [18] + + s32i a1,a1,112 # [0] gra_spill_temp_240 + neg a15,a6 # [1] + neg a4,a7 # [2] + addmi a8,a1,256 # [3] + movi.n a9,0 # [4] + movi.n a11,0 # [5] + slli a14,a5,1 # [6] + l16ui a13,a1,296 # [7] id:217 ch_mult+0x0 + l16ui a10,a1,308 # [8] id:227 filter_ht+0x0 + s32i.n a10,a1,36 # [9] gra_spill_temp_221 + s32i a13,a1,76 # [10] gra_spill_temp_231 + s32i a14,a1,148 # [11] gra_spill_temp_249 + s32i.n a11,a1,52 # [12] gra_spill_temp_225 + s32i a9,a1,116 # [13] gra_spill_temp_241 + st.qr q4,a8,-16 # [14] gra_spill_temp_260-256 + sext a4,a4,15 # [15] + sext a15,a15,15 # [16] + s32i.n a15,a1,32 # [17] gra_spill_temp_220 + mul16u a12,a5,a13 # [18] + s32i a4,a1,92 # [19] gra_spill_temp_235 + l16ui a8,a1,320 # [20] id:229 out_wd+0x0 + l16ui a9,a1,292 # [21] id:228 stride_ht+0x0 + l32i a11,a1,336 # [22] id:226 out_mult+0x0 + s32i a11,a1,64 # [23] gra_spill_temp_228 + s32i.n a9,a1,44 # [24] gra_spill_temp_223 + s32i a8,a1,68 # [25] gra_spill_temp_229 + l32i a4,a1,300 # [26] id:218 filter_data+0x0 + s32i a12,a1,140 # [27] gra_spill_temp_247 + l32i a15,a1,316 # [28] id:219 out_data+0x0 + s32i a15,a1,96 # [29] gra_spill_temp_236 + slli a12,a12,1 # [30] + s32i a4,a1,152 # [31] gra_spill_temp_250 + addi a14,a13,-3 # [32] + l16ui a4,a1,304 # [33] id:223 filter_wd+0x0 + s32i a14,a1,108 # [34] gra_spill_temp_239 + s32i a12,a1,144 # [35] gra_spill_temp_248 + slli a13,a13,2 # [36] + s32i a13,a1,80 # [37] gra_spill_temp_232 + l32i a12,a1,332 # [38] id:225 out_shift+0x0 + l32i a14,a1,312 # [39] id:222 bias+0x0 + s32i a14,a1,104 # [40] gra_spill_temp_238 + s32i.n a12,a1,60 # [41] gra_spill_temp_227 + l16ui a13,a1,288 # [42] id:224 stride_wd+0x0 + s32i.n a13,a1,56 # [43] gra_spill_temp_226 + j .Lt_10_8962 # [44] + +.Lt_10_9218: # 0x1880 + l32i.n a9,a1,48 # [0] gra_spill_temp_224 + l32i.n a11,a1,44 # [1] gra_spill_temp_223 + l32i.n a8,a1,52 # [2] gra_spill_temp_225 + l32i a10,a1,92 # [3] gra_spill_temp_235 + addi.n a8,a8,1 # [4] + s32i.n a8,a1,52 # [5] gra_spill_temp_225 + add.n a11,a10,a11 # [6] + sub a8,a8,a9 # [7] + sext a10,a11,15 # [8] + s32i a10,a1,92 # [9] gra_spill_temp_235 + beqz a8,.Lt_10_8450 # [10] + +.Lt_10_8962: # 0x189b +# Loop body line 1223, nesting depth: 1, estimated iterations: 100 + #1224 const int16_t base_y = (out_y * stride_ht) - pad_ht; + #1225 for (int out_x = 0; out_x < out_wd; out_x++) { //width_loop + l32i a12,a1,68 # [0] gra_spill_temp_229 + beqz.n a12,.Lt_10_9218 # [2] + +.LBB6_esp_nn_depthwise_conv_s16_mult4: # 0x18a0 + l32i.n a7,a1,36 # [0] gra_spill_temp_221 + movi.n a11,0 # [1] + l32i.n a8,a1,40 # [2] gra_spill_temp_222 + l32i a9,a1,92 # [3] gra_spill_temp_235 + movi.n a13,0 # [4] + l32i.n a14,a1,32 # [5] gra_spill_temp_220 + s32i a14,a1,160 # [6] gra_spill_temp_252 + s32i a13,a1,72 # [7] gra_spill_temp_230 + neg a10,a9 # [8] + sub a8,a8,a9 # [9] + max a10,a10,a11 # [10] + s32i a10,a1,100 # [11] gra_spill_temp_237 + min a7,a7,a8 # [12] + j .Lt_10_9730 # [13] + +.Lt_10_9986: # 0x18c5 + l32i a13,a1,68 # [0] gra_spill_temp_229 + l32i.n a15,a1,56 # [1] gra_spill_temp_226 + l32i a12,a1,72 # [2] gra_spill_temp_230 + l32i a14,a1,160 # [3] gra_spill_temp_252 + addi.n a12,a12,1 # [4] + s32i a12,a1,72 # [5] gra_spill_temp_230 + add.n a15,a14,a15 # [6] + sext a14,a15,15 # [7] + s32i a14,a1,160 # [8] gra_spill_temp_252 + beq a12,a13,.Lt_10_9218 # [9] + +.Lt_10_9730: # 0x18e0 + l32i a8,a1,164 # [0] gra_spill_temp_253 + l32i a9,a1,64 # [1] gra_spill_temp_228 + l32i.n a10,a1,60 # [2] gra_spill_temp_227 + s32i a10,a1,132 # [3] gra_spill_temp_245 + s32i a9,a1,128 # [4] gra_spill_temp_244 + beqz.n a8,.Lt_10_9986 # [5] + + movi.n a8,0 # [0] + l32i a5,a1,160 # [1] gra_spill_temp_252 + movi.n a12,0 # [2] + movi.n a13,0 # [3] + movi.n a14,0 # [4] + s32i a14,a1,84 # [5] gra_spill_temp_233 + s32i a13,a1,88 # [6] gra_spill_temp_234 + s32i a12,a1,176 # [7] gra_spill_temp_256 + neg a6,a5 # [8] + max a6,a6,a8 # [9] + sub a5,a3,a5 # [10] + min a5,a4,a5 # [11] + sub a11,a5,a6 # [12] + s32i a11,a1,156 # [13] gra_spill_temp_251 + j .Lt_10_10498 # [14] + +.Lt_10_10754: # 0x1919 + l32i a10,a1,164 # [0] gra_spill_temp_253 + l32i a14,a1,76 # [1] gra_spill_temp_231 + l32i a13,a1,84 # [2] gra_spill_temp_233 + l32i a12,a1,80 # [3] gra_spill_temp_232 + l32i a9,a1,176 # [4] gra_spill_temp_256 + l32i a11,a1,88 # [5] gra_spill_temp_234 + addi.n a9,a9,1 # [6] + s32i a9,a1,176 # [7] gra_spill_temp_256 + add.n a11,a11,a12 # [8] + add.n a13,a13,a14 # [9] + s32i a13,a1,84 # [10] gra_spill_temp_233 + s32i a11,a1,88 # [11] gra_spill_temp_234 + beq a9,a10,.Lt_10_9986 # [12] + +.Lt_10_10498: # 0x193d + l32i a15,a1,108 # [0] gra_spill_temp_239 + blti a15,1,.Lt_10_10754 # [2] + + l32i a2,a1,84 # [0] gra_spill_temp_233 + l32i a10,a1,104 # [1] gra_spill_temp_238 + l32i a9,a1,88 # [2] gra_spill_temp_234 + movi.n a8,0 # [3] + s32i a8,a1,120 # [4] gra_spill_temp_242 + add.n a9,a9,a10 # [5] + s32i a9,a1,124 # [6] gra_spill_temp_243 + j .Lt_10_11266 # [7] + +.Lt_10_11522: # 0x1959 + addmi a12,a1,256 # [0] + l32i a14,a1,112 # [1] gra_spill_temp_240 + movi.n a13,16 # [2] + ee.st.qacc_l.l.128.ip a14,16 # [3] id:234 + ee.st.qacc_l.h.32.ip a14,-16 # [4] id:235 + ee.srcmb.s16.qacc q5,a13,0 # [5] + l16ui a15,a1,10 # [6] qacc_scratch+10 + l8ui a8,a1,15 # [7] qacc_scratch+15 + l8ui a9,a1,5 # [8] qacc_scratch+5 + l8ui a11,a1,16 # [9] qacc_scratch+16 + l8ui a10,a1,6 # [10] qacc_scratch+6 + s8i a10,a1,3 # [11] qacc_scratch+3 + s8i a11,a1,7 # [12] qacc_scratch+7 + s8i a9,a1,2 # [13] qacc_scratch+2 + + l32i a11,a1,104 # [14] gra_spill_temp_238 + s8i a8,a1,6 # [15] qacc_scratch+6 + s16i a15,a1,4 # [16] qacc_scratch+4 + ee.vld.l.64.ip q0,a14,0 # [17] id:245 + s32i a14,a1,112 # [18] gra_spill_temp_240 + ee.vzip.16 q0,q5 # [19] + st.qr q5,a12,-16 # [20] gra_spill_temp_260-256 + + beqz.n a11,.Lt_10_13570 # [21] // skip_bias + + // add bias + l32i a13,a1,124 # [0] gra_spill_temp_243 + extui a12,a13,0,4 # [2] + ee.vld.128.ip q7,a13,16 # [3] id:248 + ee.vld.128.ip q1,a13,0 # [4] id:249 + wur.sar_byte a12 # [5] + ee.src.q.qup q6,q7,q1 # [6] + ee.vadds.s32 q0,q0,q6 # [7] + +.Lt_10_13570: # 0x19ae + #1287 q0 = esp_nn_multiply_by_quantized_mult_ver1_esp32s3(q0, out_mult_ptr, out_shift_ptr); + l32i a10,a1,128 # [0] gra_spill_temp_244 + l32i a11,a1,132 # [1] gra_spill_temp_245 + call8 esp_nn_multiply_by_quantized_mult_ver1_esp32s3 # [2] esp_nn_multiply_by_quantized_mult_ver1_esp32s3 + + addi.n a2,a2,4 # [0] + l32i a13,a1,96 # [1] gra_spill_temp_236 + l32i a11,a1,128 # [2] gra_spill_temp_244 + l32i a10,a1,132 # [3] gra_spill_temp_245 + addi a8,a1,112 # [4] + ld.qr q1,a8,96 # [5] gra_spill_temp_258-112 + ld.qr q2,a8,80 # [6] gra_spill_temp_257-112 + addi a10,a10,16 # [7] + addi a11,a11,16 # [8] + s32i a11,a1,128 # [9] gra_spill_temp_244 + ee.vadds.s32 q0,q0,q2 # [10] + s32i a10,a1,132 # [11] gra_spill_temp_245 + ee.vmin.s32 q0,q0,q1 # [12] + ld.qr q1,a8,112 # [13] gra_spill_temp_259-112 + l32i a8,a1,116 # [14] gra_spill_temp_241 + ee.vmax.s32 q0,q0,q1 # [15] + ee.movi.32.a q0,a14,2 # [16] + ee.movi.32.a q0,a15,1 # [17] + ee.movi.32.a q0,a9,0 # [18] + add.n a13,a8,a13 # [19] + ee.movi.32.a q0,a12,3 # [20] + addi.n a8,a8,4 # [21] + s8i a12,a13,3 # [22] id:254 + s32i a8,a1,116 # [23] gra_spill_temp_241 + s8i a9,a13,0 # [24] id:251 + s8i a15,a13,1 # [25] id:252 + s8i a14,a13,2 # [26] id:253 + l32i a15,a1,108 # [27] gra_spill_temp_239 + l32i a14,a1,120 # [28] gra_spill_temp_242 + l32i a9,a1,124 # [29] gra_spill_temp_243 + addi.n a14,a14,4 # [30] + addi a9,a9,16 # [31] + s32i a9,a1,124 # [32] gra_spill_temp_243 + s32i a14,a1,120 # [33] gra_spill_temp_242 + bge a14,a15,.Lt_10_10754 # [34] + +.Lt_10_11266: # 0x1a1c +# Loop body line 1230, nesting depth: 4, estimated iterations: 100 + ee.zero.qacc # [0] + l32i a9,a1,100 # [1] gra_spill_temp_237 + s32i a9,a1,172 # [2] gra_spill_temp_255 + bge a9,a7,.Lt_10_11522 # [3] + + mull a15,a9,a4 # [0] + l32i a14,a1,92 # [1] gra_spill_temp_235 + add.n a11,a15,a5 # [2] + add.n a14,a14,a9 # [3] + mull a14,a3,a14 # [4] + s32i a11,a1,168 # [5] gra_spill_temp_254 + bge a6,a5,.Lt_10_12290 # [6] + +.LBB18_esp_nn_depthwise_conv_s16_mult4: # 0x1a3b + l32i a10,a1,176 # [0] gra_spill_temp_256 + l32i a11,a1,164 # [1] gra_spill_temp_253 + l32i a12,a1,160 # [2] gra_spill_temp_252 + add.n a9,a15,a6 # [3] + l32i a8,a1,140 # [4] gra_spill_temp_247 + addmi a13,a1,256 # [5] + ld.qr q1,a13,-16 # [6] gra_spill_temp_260-256 + mull a8,a8,a9 # [7] + add.n a12,a12,a6 # [8] + l32i a9,a1,152 # [9] gra_spill_temp_250 + add.n a12,a14,a12 # [10] + mull a11,a11,a12 # [11] + add.n a8,a2,a8 # [12] + l32i a12,a1,148 # [13] gra_spill_temp_249 + addx2 a8,a8,a9 # [14] + add.n a10,a10,a11 # [15] + l32i a11,a1,136 # [16] gra_spill_temp_246 + l32i a9,a1,156 # [17] gra_spill_temp_251 + addx2 a10,a10,a11 # [18] + l32i a11,a1,144 # [19] gra_spill_temp_248 + loopgtz a9,.LBB45_esp_nn_depthwise_conv_s16_mult4 # [20] + + mov.n a9,a8 # [0*II+0] + ee.vldbc.16 q0,a10 # [0*II+1] id:232 + add.n a10,a10,a12 # [0*II+2] + ee.vld.l.64.ip q1,a9,0 # [0*II+3] id:231 + add.n a8,a8,a11 # [0*II+4] + ee.vmulas.s16.qacc q0,q1 # [0*II+5] +.LBB45_esp_nn_depthwise_conv_s16_mult4: # 0x1a84 + + addmi a10,a1,256 # [0] + st.qr q1,a10,-16 # [1] gra_spill_temp_260-256 + +.Lt_10_12290: # 0x1a8a + add.n a14,a14,a3 # [0] + add.n a15,a15,a4 # [1] + l32i a11,a1,172 # [2] gra_spill_temp_255 + l32i a12,a1,168 # [3] gra_spill_temp_254 + addi.n a11,a11,1 # [4] + add.n a12,a12,a4 # [5] + s32i a12,a1,168 # [6] gra_spill_temp_254 + s32i a11,a1,172 # [7] gra_spill_temp_255 + sub a11,a7,a11 # [8] + beqz a11,.Lt_10_11522 # [9] + + blt a6,a5,.LBB18_esp_nn_depthwise_conv_s16_mult4 # [0] + + j .Lt_10_12290 # [0] + +.Lt_10_8450: # 0x1aaa + retw.n # [0] + + .size esp_nn_depthwise_conv_s16_mult4_esp32s3, . - esp_nn_depthwise_conv_s16_mult4_esp32s3 + +#elif defined(WIO_TERMINAL) +// dummy code, added for old ARM toolchain +.syntax unified +.thumb +.cpu cortex-m0 + +.section .text +#endif // EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN && EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN_S3 diff --git a/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/src/convolution/esp_nn_depthwise_conv_s16_mult8_3x3_esp32s3.S b/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/src/convolution/esp_nn_depthwise_conv_s16_mult8_3x3_esp32s3.S new file mode 100644 index 0000000..b894713 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/src/convolution/esp_nn_depthwise_conv_s16_mult8_3x3_esp32s3.S @@ -0,0 +1,458 @@ +#include "edge-impulse-sdk/classifier/ei_classifier_config.h" +#if EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN && EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN_S3 +// Copyright 2021-2022 Espressif Systems (Shanghai) PTE LTD +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + .text + .literal_position + + # Program Unit: esp_nn_depthwise_conv_s16_mult8_3x3_esp32s3 + .type esp_nn_depthwise_conv_s16_mult8_3x3_esp32s3, @function + .align 4 + .global esp_nn_depthwise_conv_s16_mult8_3x3_esp32s3 + +esp_nn_depthwise_conv_s16_mult8_3x3_esp32s3: # 0x11b3 + # qacc_scratch = 0 + # gra_spill_temp_142 = 48 + # gra_spill_temp_143 = 52 + # gra_spill_temp_144 = 56 + # gra_spill_temp_145 = 60 + # gra_spill_temp_146 = 64 + # gra_spill_temp_147 = 68 + # gra_spill_temp_148 = 72 + # gra_spill_temp_149 = 76 + # gra_spill_temp_150 = 80 + # gra_spill_temp_151 = 84 + # gra_spill_temp_152 = 88 + # gra_spill_temp_153 = 92 + # gra_spill_temp_154 = 96 + # gra_spill_temp_155 = 100 + # gra_spill_temp_156 = 104 + # gra_spill_temp_157 = 108 + # gra_spill_temp_158 = 112 + # gra_spill_temp_159 = 116 + # gra_spill_temp_160 = 120 + # gra_spill_temp_161 = 124 + # gra_spill_temp_162 = 128 + # gra_spill_temp_163 = 132 + # gra_spill_temp_164 = 136 + # gra_spill_temp_165 = 140 + # gra_spill_temp_166 = 144 + # gra_spill_temp_167 = 148 + # gra_spill_temp_168 = 152 + # gra_spill_temp_169 = 156 + # gra_spill_temp_170 = 160 + # gra_spill_temp_171 = 164 + # gra_spill_temp_172 = 168 + # gra_spill_temp_173 = 172 + # gra_spill_temp_174 = 176 + # gra_spill_temp_175 = 180 + # gra_spill_temp_176 = 184 + # gra_spill_temp_177 = 188 + # gra_spill_temp_178 = 192 + # gra_spill_temp_179 = 208 + # gra_spill_temp_180 = 224 + # gra_spill_temp_181 = 240 + # gra_spill_temp_182 = 256 + + // registers: + // a2: const int16_t *input_data + // a3: const uint16_t input_wd + // a4: const uint16_t input_ht + // a5: const uint16_t channels + // a6: const uint16_t pad_wd + // a7: const uint16_t pad_ht + + // const uint16_t stride_wd + // const uint16_t stride_ht + // const uint16_t ch_mult + // const int16_t *filter_data + // const int32_t *bias + // int8_t *out_data + // const uint16_t out_wd + // const uint16_t out_ht + // const int32_t out_offset + // const int32_t *out_shift + // const int32_t *out_mult + // const int32_t activation_min + // const int32_t activation_max + + entry a1,304 # + s32i a2,a1,116 # [0] gra_spill_temp_159 + s32i a3,a1,120 # [1] gra_spill_temp_160 + s32i a5,a1,144 # [2] gra_spill_temp_166 + s32i.n a6,a1,60 # [3] gra_spill_temp_145 + + addmi a9,a1,256 # [4] + addi a12,a1,112 # [5] + addmi a10,a1,256 # [6] + addmi a11,a1,256 # [7] + addmi a13,a1,256 # [8] + + // height loop + l16ui a8,a1,332 # [9] id:261 out_ht+0x0 + l32i a14,a1,324 # [10] id:257 out_data+0x0 + s32i a14,a1,176 # [11] gra_spill_temp_174 + s32i a8,a1,68 # [12] gra_spill_temp_147 + addi a13,a13,80 # [13] + addi a11,a11,96 # [14] + addi a10,a10,92 # [15] + ee.vldbc.32 q0,a10 # [16] id:260 activation_min + ee.vldbc.32 q1,a11 # [17] id:259 activation_max + ee.vldbc.32 q2,a13 # [18] id:258 out_offset + st.qr q2,a12,96 # [19] gra_spill_temp_179-112 + st.qr q1,a12,112 # [20] gra_spill_temp_180-112 + st.qr q0,a9,-16 # [21] gra_spill_temp_181-256 + beqz.n a8,.Lt_8_8194 # [22] + +.LBB3_esp_nn_depthwise_conv_s16_mult8_3x3: # 0x11f9 + s32i a1,a1,180 # [0] gra_spill_temp_175 + mul16u a6,a3,a5 # [1] + s32i a7,a1,76 # [2] gra_spill_temp_149 + l32i a9,a1,316 # [3] id:264 filter_data+0x0 + l32i a15,a1,320 # [4] id:262 bias+0x0 + l16ui a10,a1,312 # [5] id:263 ch_mult+0x0 + slli a11,a5,1 # [6] + l16ui a12,a1,308 # [7] id:268 stride_ht+0x0 + l32i a13,a1,344 # [8] id:267 out_mult+0x0 + l32i a14,a1,340 # [9] id:266 out_shift+0x0 + s32i a14,a1,88 # [10] gra_spill_temp_152 + s32i a13,a1,92 # [11] gra_spill_temp_153 + s32i a12,a1,64 # [12] gra_spill_temp_146 + s32i a11,a1,124 # [13] gra_spill_temp_161 + s32i a10,a1,108 # [14] gra_spill_temp_157 + s32i a15,a1,160 # [15] gra_spill_temp_170 + s32i a9,a1,128 # [16] gra_spill_temp_162 + neg a7,a7 # [17] + slli a6,a6,1 # [18] + s32i a7,a1,136 # [19] gra_spill_temp_164 + movi.n a9,0 # [20] + extui a15,a15,0,4 # [21] + s32i a15,a1,152 # [22] gra_spill_temp_168 + s32i a9,a1,72 # [23] gra_spill_temp_148 + sub a7,a4,a7 # [24] + l32i.n a9,a1,60 # [25] gra_spill_temp_145 + s32i a7,a1,80 # [26] gra_spill_temp_150 + l16ui a4,a1,328 # [27] id:269 out_wd+0x0 + s32i a4,a1,96 # [28] gra_spill_temp_154 + l16ui a7,a1,304 # [29] id:265 stride_wd+0x0 + s32i a7,a1,84 # [30] gra_spill_temp_151 + mul16u a4,a5,a10 # [31] + neg a9,a9 # [32] + s32i.n a9,a1,52 # [33] gra_spill_temp_143 + sub a8,a3,a9 # [34] + addi a10,a10,-7 # [35] + s32i a10,a1,164 # [36] gra_spill_temp_171 + s32i.n a8,a1,56 # [37] gra_spill_temp_144 + addx2 a7,a4,a4 # [38] + slli a7,a7,1 # [39] + j .Lt_8_8706 # [40] + +.Lt_8_8962: # 0x1270 +# Part of loop body line 933, head labeled .Lt_8_8706 + l32i a10,a1,68 # [0] gra_spill_temp_147 + l32i a14,a1,76 # [1] gra_spill_temp_149 + l32i a13,a1,136 # [2] gra_spill_temp_164 + l32i a12,a1,64 # [3] gra_spill_temp_146 + l32i a9,a1,72 # [4] gra_spill_temp_148 + l32i a11,a1,80 # [5] gra_spill_temp_150 + addi.n a9,a9,1 # [6] + s32i a9,a1,72 # [7] gra_spill_temp_148 + sub a11,a11,a12 # [8] + add.n a13,a13,a12 # [9] + sub a14,a14,a12 # [10] + s32i a14,a1,76 # [11] gra_spill_temp_149 + s32i a13,a1,136 # [12] gra_spill_temp_164 + s32i a11,a1,80 # [13] gra_spill_temp_150 + sub a9,a9,a10 # [14] + beqz a9,.Lt_8_8194 # [15] + +.Lt_8_8706: # 0x129e +# Loop body line 933, nesting depth: 1, estimated iterations: 100 + # 934 const int32_t base_y = (out_y * stride_ht) - pad_ht; + # 935 for (int out_x = 0; out_x < out_wd; out_x++) { //width_loop + l32i a15,a1,96 # [0] gra_spill_temp_154 + beqz.n a15,.Lt_8_8962 # [2] + +.LBB6_esp_nn_depthwise_conv_s16_mult8_3x3: # 0x12a3 +# Part of loop body line 933, head labeled .Lt_8_8706 + l32i.n a3,a1,56 # [0] gra_spill_temp_144 + l32i a8,a1,80 # [1] gra_spill_temp_150 + movi.n a10,0 # [2] + l32i a9,a1,76 # [3] gra_spill_temp_149 + movi.n a11,0 # [4] + l32i.n a12,a1,52 # [5] gra_spill_temp_143 + l32i.n a13,a1,60 # [6] gra_spill_temp_145 + s32i a13,a1,104 # [7] gra_spill_temp_156 + s32i a12,a1,140 # [8] gra_spill_temp_165 + s32i a11,a1,100 # [9] gra_spill_temp_155 + max a9,a9,a10 # [10] + movi.n a10,3 # [11] + s32i a9,a1,172 # [12] gra_spill_temp_173 + min a8,a8,a10 # [13] + s32i a8,a1,156 # [14] gra_spill_temp_169 + sub a8,a8,a9 # [15] + s32i a8,a1,132 # [16] gra_spill_temp_163 + j .Lt_8_9474 # [17] + +.Lt_8_9730: # 0x12d3 +# Part of loop body line 935, head labeled .Lt_8_9474 + l32i a15,a1,96 # [0] gra_spill_temp_154 + l32i a10,a1,104 # [1] gra_spill_temp_156 + l32i a9,a1,140 # [2] gra_spill_temp_165 + l32i a8,a1,84 # [3] gra_spill_temp_151 + l32i a14,a1,100 # [4] gra_spill_temp_155 + sub a3,a3,a8 # [5] + addi.n a14,a14,1 # [6] + s32i a14,a1,100 # [7] gra_spill_temp_155 + add.n a9,a9,a8 # [8] + sub a10,a10,a8 # [9] + s32i a10,a1,104 # [10] gra_spill_temp_156 + s32i a9,a1,140 # [11] gra_spill_temp_165 + beq a14,a15,.Lt_8_8962 # [12] + +.Lt_8_9474: # 0x12f8 + # 936 const int32_t base_x = (out_x * stride_wd) - pad_wd; + # 937 const int32_t *out_mult_ptr = out_mult; + # 938 const int32_t *out_shift_ptr = out_shift; + l32i a2,a1,88 # [0] gra_spill_temp_152 + l32i a10,a1,92 # [1] gra_spill_temp_153 + # 939 uint32_t bias_ptr = (uint32_t) (bias); + l32i a12,a1,160 # [2] gra_spill_temp_170 + # 940 + # 941 for (int ch_idx = 0; ch_idx < channels; ch_idx++) {//channel_loop + l32i a11,a1,144 # [3] gra_spill_temp_166 + s32i a12,a1,168 # [4] gra_spill_temp_172 + beqz.n a11,.Lt_8_9730 # [5] + +.LBB9_esp_nn_depthwise_conv_s16_mult8_3x3: # 0x1309 +# Part of loop body line 935, head labeled .Lt_8_9474 + movi.n a8,0 # [0] + l32i a5,a1,104 # [1] gra_spill_temp_156 + movi.n a13,0 # [2] + movi.n a9,0 # [3] + s32i a9,a1,112 # [4] gra_spill_temp_158 + s32i a13,a1,148 # [5] gra_spill_temp_167 + max a5,a5,a8 # [6] + j .Lt_8_10242 # [7] + +.Lt_8_10498: # 0x131e +# Part of loop body line 941, head labeled .Lt_8_10242 + l32i a12,a1,144 # [0] gra_spill_temp_166 + l32i a14,a1,108 # [1] gra_spill_temp_157 + l32i a11,a1,148 # [2] gra_spill_temp_167 + l32i a13,a1,112 # [3] gra_spill_temp_158 + addi.n a11,a11,1 # [4] + s32i a11,a1,148 # [5] gra_spill_temp_167 + add.n a13,a13,a14 # [6] + s32i a13,a1,112 # [7] gra_spill_temp_158 + beq a11,a12,.Lt_8_9730 # [8] + +.Lt_8_10242: # 0x1337 + # 942 for (int ch_mult_idx = 0; ch_mult_idx < ch_mult - 7; ch_mult_idx += 8) { + l32i a15,a1,164 # [0] gra_spill_temp_171 + blti a15,1,.Lt_8_10498 # [2] + + movi.n a8,0 # [0] + l32i a9,a1,112 # [1] gra_spill_temp_158 + s32i a9,a1,188 # [2] gra_spill_temp_177 + s32i a8,a1,184 # [3] gra_spill_temp_176 + j .Lt_8_11010 # [4] + +.LBB23_esp_nn_depthwise_conv_s16_mult8_3x3: # 0x134b + s32i.n a10,a1,48 # [0] gra_spill_temp_142 + addi a11,a1,112 # [1] + l32i a13,a1,152 # [2] gra_spill_temp_168 + l32i a12,a1,168 # [3] gra_spill_temp_172 + wur.sar_byte a13 # [4] + ee.vld.128.ip q4,a12,16 # [5] id:307 + ee.vld.128.ip q7,a12,16 # [6] id:308 + ee.vld.128.ip q5,a12,0 # [7] id:309 + s32i a12,a1,168 # [8] gra_spill_temp_172 + ee.src.q.qup q6,q4,q7 # [9] + ee.vadds.s32 q0,q0,q6 # [10] + ee.src.q.qup q3,q4,q5 # [11] + ee.vadds.s32 q1,q1,q3 # [12] + st.qr q1,a11,80 # [13] gra_spill_temp_178-112 + +.Lt_8_13314: # 0x1374 + #1025 q0 = esp_nn_multiply_by_quantized_mult_ver1_esp32s3(q0, out_mult_ptr, out_shift_ptr); + l32i.n a10,a1,48 # [0] gra_spill_temp_142 + mov.n a11,a2 # [1] + call8 esp_nn_multiply_by_quantized_mult_ver1_esp32s3 + + #1026 out_mult_ptr += 4; + #1027 out_shift_ptr += 4; + #1028 + #1029 q1 = esp_nn_multiply_by_quantized_mult_ver1_esp32s3(q1, out_mult_ptr, out_shift_ptr); + l32i.n a10,a1,48 # [0] gra_spill_temp_142 + addmi a12,a1,256 # [1] + addi a11,a1,112 # [2] + st.qr q0,a12,0 # [3] gra_spill_temp_182-256 + ld.qr q0,a11,80 # [4] gra_spill_temp_178-112 + addi a10,a10,16 # [5] + addi a11,a2,16 # [6] + call8 esp_nn_multiply_by_quantized_mult_ver1_esp32s3 + +# Part of loop body line 942, head labeled .Lt_8_11010 + #1030 out_mult_ptr += 4; + #1031 out_shift_ptr += 4; + addi a2,a2,32 # [0] + l32i a14,a1,164 # [1] gra_spill_temp_171 + + l32i a8,a1,176 # [2] gra_spill_temp_174 + l32i a15,a1,188 # [3] gra_spill_temp_177 + l32i a13,a1,184 # [4] gra_spill_temp_176 + l32i.n a10,a1,48 # [5] gra_spill_temp_142 + addmi a11,a1,256 # [6] + addi a12,a1,112 # [7] + ld.qr q3,a12,112 # [8] gra_spill_temp_180-112 + ld.qr q1,a12,96 # [9] gra_spill_temp_179-112 + ld.qr q2,a11,0 # [10] gra_spill_temp_182-256 + addi a10,a10,32 # [11] + addi.n a13,a13,8 # [12] + addi.n a15,a15,8 # [13] + s32i a15,a1,188 # [14] gra_spill_temp_177 + ee.vadds.s32 q2,q2,q1 # [15] + s32i a13,a1,184 # [16] gra_spill_temp_176 + ee.vadds.s32 q1,q0,q1 # [17] + ee.vmin.s32 q0,q2,q3 # [18] + ld.qr q2,a11,-16 # [19] gra_spill_temp_181-256 + ee.vmin.s32 q1,q1,q3 # [20] + ee.vmax.s32 q1,q1,q2 # [21] + ee.vmax.s32 q0,q0,q2 # [22] + ee.vunzip.16 q0,q1 # [23] + ee.vunzip.8 q0,q1 # [24] + ee.vst.l.64.ip q0,a8,8 # [25] id:312 + s32i a8,a1,176 # [26] gra_spill_temp_174 + bge a13,a14,.Lt_8_10498 # [27] + +.Lt_8_11010: # 0x13e3 +# Loop body line 942, nesting depth: 4, estimated iterations: 100 + l32i a14,a1,156 # [0] gra_spill_temp_169 + l32i a13,a1,172 # [1] gra_spill_temp_173 + ee.zero.qacc # [2] + bge a13,a14,.Lt_8_11266 # [3] + +.LBB15_esp_nn_depthwise_conv_s16_mult8_3x3: # 0x13ef +# Part of loop body line 942, head labeled .Lt_8_11010 + l32i a12,a1,124 # [0] gra_spill_temp_161 + l32i a8,a1,140 # [1] gra_spill_temp_165 + l32i a11,a1,120 # [2] gra_spill_temp_160 + l32i a14,a1,188 # [3] gra_spill_temp_177 + l32i a9,a1,136 # [4] gra_spill_temp_164 + mull a15,a4,a13 # [5] + add.n a9,a9,a13 # [6] + addx2 a15,a15,a15 # [7] + l32i a13,a1,148 # [8] gra_spill_temp_167 + add.n a14,a14,a15 # [9] + mull a9,a9,a11 # [10] + l32i a15,a1,144 # [11] gra_spill_temp_166 + add.n a8,a8,a9 # [12] + mull a15,a15,a8 # [13] + l32i a8,a1,128 # [14] gra_spill_temp_162 + add.n a13,a13,a15 # [15] + l32i a15,a1,116 # [16] gra_spill_temp_159 + addx2 a14,a14,a8 # [17] + addx2 a13,a13,a15 # [18] + add.n a11,a12,a13 # [19] + l32i a15,a1,132 # [20] gra_spill_temp_163 + add.n a12,a12,a11 # [21] + loopgtz a15,.LBB34_esp_nn_depthwise_conv_s16_mult8_3x3 # [22] + +.Lt_8_11778: # 0x142e + mov.n a15,a14 # [0] + mov.n a9,a14 # [1] + bnez.n a5,.Lt_8_12034 # [2] + + ee.vldbc.16 q3,a13 # [0] id:271 + mov.n a9,a14 # [1] + ee.vld.128.ip q4,a9,0 # [2] id:272 + ee.vmulas.s16.qacc q3,q4 # [4] + +.Lt_8_12034: # 0x143f + ee.vldbc.16 q5,a11 # [0] id:274 + addx2 a9,a4,a9 # [1] + ee.vld.128.ip q6,a9,0 # [2] id:275 + add.n a13,a13,a6 # [3] + ee.vmulas.s16.qacc q5,q6 # [4] + blti a3,3,.Lt_8_12546 # [5] + + ee.vldbc.16 q7,a12 # [0] id:277 + addx2 a14,a4,a9 # [1] + ee.vld.128.ip q0,a14,0 # [2] id:278 + ee.vmulas.s16.qacc q7,q0 # [4] + +.Lt_8_12546: # 0x145c +# Part of loop body line 953, head labeled .Lt_8_11778 + add.n a11,a11,a6 # [0] + add.n a12,a12,a6 # [1] + add.n a14,a7,a15 # [2] + +.LBB34_esp_nn_depthwise_conv_s16_mult8_3x3: # 0x1464 +.Lt_8_11266: # 0x1464 + + l32i a8,a1,180 # [0] gra_spill_temp_175 + ee.st.qacc_l.l.128.ip a8,16 # [2] id:280 + ee.st.qacc_l.h.32.ip a8,0 # [3] id:281 + l16ui a9,a1,10 # [4] qacc_scratch+10 + l8ui a11,a1,15 # [5] qacc_scratch+15 + l8ui a12,a1,5 # [6] qacc_scratch+5 + l8ui a13,a1,6 # [7] qacc_scratch+6 + l8ui a14,a1,16 # [8] qacc_scratch+16 + s8i a14,a1,7 # [9] qacc_scratch+7 + s8i a13,a1,3 # [10] qacc_scratch+3 + s8i a12,a1,2 # [11] qacc_scratch+2 + s8i a11,a1,6 # [12] qacc_scratch+6 + s16i a9,a1,4 # [13] qacc_scratch+4 + ee.st.qacc_h.l.128.ip a8,16 # [14] id:291 + ee.st.qacc_h.h.32.ip a8,-32 # [15] id:292 + l16ui a9,a1,16 # [16] qacc_scratch+16 + l8ui a15,a1,32 # [17] qacc_scratch+32 + l8ui a12,a1,22 # [18] qacc_scratch+22 + l8ui a11,a1,21 # [19] qacc_scratch+21 + l8ui a14,a1,31 # [20] qacc_scratch+31 + l16ui a13,a1,26 # [21] qacc_scratch+26 + s16i a13,a1,12 # [22] qacc_scratch+12 + s8i a14,a1,14 # [23] qacc_scratch+14 + s8i a11,a1,10 # [24] qacc_scratch+10 + s8i a12,a1,11 # [25] qacc_scratch+11 + s8i a15,a1,15 # [26] qacc_scratch+15 + s16i a9,a1,8 # [27] qacc_scratch+8 + l32i a15,a1,160 # [28] gra_spill_temp_170 + movi.n a9,16 # [29] + ee.srcmb.s16.qacc q1,a9,0 # [30] + ee.vld.128.ip q0,a8,0 # [31] id:304 + s32i a8,a1,180 # [32] gra_spill_temp_175 + ee.vzip.16 q0,q1 # [33] + bnez.n a15,.LBB23_esp_nn_depthwise_conv_s16_mult8_3x3 # [34] + + s32i.n a10,a1,48 # [0] gra_spill_temp_142 + addi a15,a1,112 # [1] + st.qr q1,a15,80 # [2] gra_spill_temp_178-112 + j .Lt_8_13314 # [3] + +.Lt_8_8194: # 0x14d3 + retw.n # [0] + + .size esp_nn_depthwise_conv_s16_mult8_3x3_esp32s3, . - esp_nn_depthwise_conv_s16_mult8_3x3_esp32s3 + +#elif defined(WIO_TERMINAL) +// dummy code, added for old ARM toolchain +.syntax unified +.thumb +.cpu cortex-m0 + +.section .text +#endif // EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN && EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN_S3 diff --git a/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/src/convolution/esp_nn_depthwise_conv_s16_mult8_esp32s3.S b/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/src/convolution/esp_nn_depthwise_conv_s16_mult8_esp32s3.S new file mode 100644 index 0000000..4f9143b --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/src/convolution/esp_nn_depthwise_conv_s16_mult8_esp32s3.S @@ -0,0 +1,432 @@ +#include "edge-impulse-sdk/classifier/ei_classifier_config.h" +#if EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN && EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN_S3 +// Copyright 2021-2022 Espressif Systems (Shanghai) PTE LTD +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + .text + .literal_position + + # Program Unit: esp_nn_depthwise_conv_s16_mult8_esp32s3 + .type esp_nn_depthwise_conv_s16_mult8_esp32s3, @function + .align 4 + .global esp_nn_depthwise_conv_s16_mult8_esp32s3 + +esp_nn_depthwise_conv_s16_mult8_esp32s3: # 0x14d7 + # qacc_scratch = 0 + # gra_spill_temp_183 = 48 + # gra_spill_temp_184 = 52 + # gra_spill_temp_185 = 56 + # gra_spill_temp_186 = 60 + # gra_spill_temp_187 = 64 + # gra_spill_temp_188 = 68 + # gra_spill_temp_189 = 72 + # gra_spill_temp_190 = 76 + # gra_spill_temp_191 = 80 + # gra_spill_temp_192 = 84 + # gra_spill_temp_193 = 88 + # gra_spill_temp_194 = 92 + # gra_spill_temp_195 = 96 + # gra_spill_temp_196 = 100 + # gra_spill_temp_197 = 104 + # gra_spill_temp_198 = 108 + # gra_spill_temp_199 = 112 + # gra_spill_temp_200 = 116 + # gra_spill_temp_201 = 120 + # gra_spill_temp_202 = 124 + # gra_spill_temp_203 = 128 + # gra_spill_temp_204 = 132 + # gra_spill_temp_205 = 136 + # gra_spill_temp_206 = 140 + # gra_spill_temp_207 = 144 + # gra_spill_temp_208 = 148 + # gra_spill_temp_209 = 152 + # gra_spill_temp_210 = 156 + # gra_spill_temp_211 = 160 + # gra_spill_temp_212 = 164 + # gra_spill_temp_213 = 168 + # gra_spill_temp_214 = 172 + # gra_spill_temp_215 = 176 + # gra_spill_temp_216 = 180 + # gra_spill_temp_217 = 184 + # gra_spill_temp_218 = 192 + # gra_spill_temp_219 = 208 + + // registers: + // a2: const int16_t *input_data + // a3: const uint16_t input_wd + // a4: const uint16_t input_ht + // a5: const uint16_t channels + // a6: const uint16_t pad_wd + // a7: const uint16_t pad_ht + + // on stack: + // const uint16_t stride_wd + // const uint16_t stride_ht + // const uint16_t ch_mult + // const int16_t *filter_data + // const uint16_t filter_wd + // const uint16_t filter_ht + // const int32_t *bias + // int8_t *out_data + // const uint16_t out_wd + // const uint16_t out_ht + // const int32_t out_offset + // const int32_t *out_shift + // const int32_t *out_mult + // const int32_t activation_min + // const int32_t activation_max + + entry a1,256 # + s32i a2,a1,144 # [0] gra_spill_temp_207 + s32i.n a4,a1,56 # [1] gra_spill_temp_185 + s32i a5,a1,172 # [2] gra_spill_temp_214 + l32i a9,a1,284 # [3] id:241 out_data+0x0 + + l16ui a8,a1,292 # [4] id:242 out_ht+0x0 + s32i a8,a1,64 # [5] gra_spill_temp_187 + s32i a9,a1,124 # [6] gra_spill_temp_202 + beqz.n a8,.Lt_9_8450 # [7] + + s32i a1,a1,128 # [0] gra_spill_temp_203 + neg a13,a7 # [1] + movi.n a4,0 # [2] + neg a12,a6 # [3] + l32i a9,a1,280 # [4] id:243 bias+0x0 + slli a11,a5,1 # [5] + l16ui a10,a1,264 # [6] id:244 ch_mult+0x0 + l32i a14,a1,268 # [7] id:245 filter_data+0x0 + s32i a14,a1,160 # [8] gra_spill_temp_211 + s32i a10,a1,92 # [9] gra_spill_temp_194 + s32i a11,a1,156 # [10] gra_spill_temp_210 + s32i a9,a1,112 # [11] gra_spill_temp_199 + sext a12,a12,15 # [12] + s32i a4,a1,68 # [13] gra_spill_temp_188 + sext a13,a13,15 # [14] + l16ui a4,a1,272 # [15] id:246 filter_wd+0x0 + s32i a13,a1,100 # [16] gra_spill_temp_196 + s32i.n a12,a1,48 # [17] gra_spill_temp_183 + mul16u a8,a5,a10 # [18] + extui a9,a9,0,4 # [19] + l32i a11,a1,304 # [20] id:249 out_mult+0x0 + s32i a11,a1,80 # [21] gra_spill_temp_191 + s32i a9,a1,104 # [22] gra_spill_temp_197 + s32i a8,a1,148 # [23] gra_spill_temp_208 + addi a10,a10,-7 # [24] + l32i a12,a1,300 # [25] id:248 out_shift+0x0 + l16ui a13,a1,256 # [26] id:247 stride_wd+0x0 + s32i a13,a1,72 # [27] gra_spill_temp_189 + s32i a12,a1,76 # [28] gra_spill_temp_190 + s32i a10,a1,116 # [29] gra_spill_temp_200 + slli a8,a8,1 # [30] + l16ui a9,a1,260 # [31] id:251 stride_ht+0x0 + s32i.n a9,a1,60 # [32] gra_spill_temp_186 + s32i a8,a1,152 # [33] gra_spill_temp_209 + l16ui a10,a1,276 # [34] id:250 filter_ht+0x0 + s32i.n a10,a1,52 # [35] gra_spill_temp_184 + l16ui a8,a1,288 # [36] id:252 out_wd+0x0 + s32i a8,a1,84 # [37] gra_spill_temp_192 + j .Lt_9_8962 # [38] + +.Lt_9_9218: # 0x1561 +# Part of loop body line 1083, head labeled .Lt_9_8962 + l32i a15,a1,64 # [0] gra_spill_temp_187 + l32i.n a9,a1,60 # [1] gra_spill_temp_186 + l32i a14,a1,68 # [2] gra_spill_temp_188 + l32i a8,a1,100 # [3] gra_spill_temp_196 + addi.n a14,a14,1 # [4] + s32i a14,a1,68 # [5] gra_spill_temp_188 + add.n a9,a8,a9 # [6] + sub a14,a14,a15 # [7] + sext a8,a9,15 # [8] + s32i a8,a1,100 # [9] gra_spill_temp_196 + beqz a14,.Lt_9_8450 # [10] + +.Lt_9_8962: # 0x157f + l32i a10,a1,84 # [0] gra_spill_temp_192 + beqz.n a10,.Lt_9_9218 # [2] + + l32i.n a7,a1,52 # [0] gra_spill_temp_184 + movi.n a11,0 # [1] + l32i.n a8,a1,56 # [2] gra_spill_temp_185 + l32i a9,a1,100 # [3] gra_spill_temp_196 + l32i.n a12,a1,48 # [4] gra_spill_temp_183 + s32i a12,a1,168 # [5] gra_spill_temp_213 + neg a10,a9 # [6] + sub a8,a8,a9 # [7] + max a10,a10,a11 # [8] + s32i a10,a1,108 # [9] gra_spill_temp_198 + min a7,a7,a8 # [10] + movi.n a11,0 # [11] + s32i a11,a1,88 # [12] gra_spill_temp_193 + j .Lt_9_9730 # [13] + +.Lt_9_9986: # 0x15a9 +# Part of loop body line 1085, head labeled .Lt_9_9730 + l32i a13,a1,84 # [0] gra_spill_temp_192 + l32i a15,a1,72 # [1] gra_spill_temp_189 + l32i a12,a1,88 # [2] gra_spill_temp_193 + l32i a14,a1,168 # [3] gra_spill_temp_213 + addi.n a12,a12,1 # [4] + s32i a12,a1,88 # [5] gra_spill_temp_193 + add.n a15,a14,a15 # [6] + sext a14,a15,15 # [7] + s32i a14,a1,168 # [8] gra_spill_temp_213 + beq a12,a13,.Lt_9_9218 # [9] + +.Lt_9_9730: # 0x15c5 +# Loop body line 1085, nesting depth: 2, estimated iterations: 100 + #1086 const int16_t base_x = (out_x * stride_wd) - pad_wd; + #1087 const int32_t *out_mult_ptr = out_mult; + #1088 const int32_t *out_shift_ptr = out_shift; + #1089 uint32_t bias_ptr = (uint32_t) (bias); + #1090 for (int ch_idx = 0; ch_idx < channels; ch_idx++) {//channel_loop + l32i a8,a1,172 # [0] gra_spill_temp_214 + l32i a9,a1,80 # [1] gra_spill_temp_191 + l32i a10,a1,76 # [2] gra_spill_temp_190 + l32i a11,a1,112 # [3] gra_spill_temp_199 + s32i a11,a1,120 # [4] gra_spill_temp_201 + s32i a10,a1,140 # [5] gra_spill_temp_206 + s32i a9,a1,136 # [6] gra_spill_temp_205 + beqz.n a8,.Lt_9_9986 # [7] + +.LBB9_esp_nn_depthwise_conv_s16_mult8: # 0x15dc +# Part of loop body line 1085, head labeled .Lt_9_9730 + movi.n a8,0 # [0] + l32i a5,a1,168 # [1] gra_spill_temp_213 + movi.n a13,0 # [2] + movi.n a14,0 # [3] + s32i a14,a1,96 # [4] gra_spill_temp_195 + s32i a13,a1,184 # [5] gra_spill_temp_217 + neg a6,a5 # [6] + max a6,a6,a8 # [7] + sub a5,a3,a5 # [8] + min a5,a4,a5 # [9] + sub a12,a5,a6 # [10] + s32i a12,a1,164 # [11] gra_spill_temp_212 + j .Lt_9_10498 # [12] + +.Lt_9_10754: # 0x1600 +# Part of loop body line 1090, head labeled .Lt_9_10498 + l32i a10,a1,172 # [0] gra_spill_temp_214 + l32i a12,a1,92 # [1] gra_spill_temp_194 + l32i a9,a1,184 # [2] gra_spill_temp_217 + l32i a11,a1,96 # [3] gra_spill_temp_195 + addi.n a9,a9,1 # [4] + s32i a9,a1,184 # [5] gra_spill_temp_217 + add.n a11,a11,a12 # [6] + s32i a11,a1,96 # [7] gra_spill_temp_195 + beq a9,a10,.Lt_9_9986 # [8] + +.Lt_9_10498: # 0x1619 +# Loop body line 1090, nesting depth: 3, estimated iterations: 100 + #1091 for (int ch_mult_idx = 0; ch_mult_idx < ch_mult - 7; ch_mult_idx += 8) { + l32i a13,a1,116 # [0] gra_spill_temp_200 + blti a13,1,.Lt_9_10754 # [2] + +.LBB12_esp_nn_depthwise_conv_s16_mult8: # 0x161f +# Part of loop body line 1090, head labeled .Lt_9_10498 + l32i a2,a1,96 # [0] gra_spill_temp_195 + movi.n a14,0 # [1] + s32i a14,a1,132 # [2] gra_spill_temp_204 + j .Lt_9_11266 # [3] + +.Lt_9_11522: # 0x162a + l32i a9,a1,128 # [0] gra_spill_temp_203 + ee.st.qacc_l.l.128.ip a9,16 # [2] id:257 + ee.st.qacc_l.h.32.ip a9,0 # [3] id:258 + l8ui a10,a1,15 # [4] qacc_scratch+15 + l16ui a8,a1,10 # [5] qacc_scratch+10 + l8ui a13,a1,16 # [6] qacc_scratch+16 + l8ui a12,a1,6 # [7] qacc_scratch+6 + l8ui a11,a1,5 # [8] qacc_scratch+5 + s8i a11,a1,2 # [9] qacc_scratch+2 + s8i a12,a1,3 # [10] qacc_scratch+3 + s8i a13,a1,7 # [11] qacc_scratch+7 + s16i a8,a1,4 # [12] qacc_scratch+4 + s8i a10,a1,6 # [13] qacc_scratch+6 + + movi.n a8,16 # [14] + ee.st.qacc_h.l.128.ip a9,16 # [15] id:268 + ee.st.qacc_h.h.32.ip a9,-32 # [16] id:269 + ee.srcmb.s16.qacc q1,a8,0 # [17] + l16ui a13,a1,26 # [18] qacc_scratch+26 + l8ui a15,a1,32 # [19] qacc_scratch+32 + l8ui a12,a1,22 # [20] qacc_scratch+22 + l8ui a11,a1,21 # [21] qacc_scratch+21 + l16ui a10,a1,16 # [22] qacc_scratch+16 + l8ui a14,a1,31 # [23] qacc_scratch+31 + s8i a14,a1,14 # [24] qacc_scratch+14 + s16i a10,a1,8 # [25] qacc_scratch+8 + s8i a11,a1,10 # [26] qacc_scratch+10 + s8i a12,a1,11 # [27] qacc_scratch+11 + s8i a15,a1,15 # [28] qacc_scratch+15 + s16i a13,a1,12 # [29] qacc_scratch+12 + #1138 EE_VZIP_16(q0, q1); /* 4x32 */ + #1139 + #1140 if (bias) { + l32i a15,a1,112 # [30] gra_spill_temp_199 + ee.vld.128.ip q0,a9,0 # [31] id:281 + s32i a9,a1,128 # [32] gra_spill_temp_203 + ee.vzip.16 q0,q1 # [33] + beqz.n a15,.Lt_9_13570 # [34] + +.LBB23_esp_nn_depthwise_conv_s16_mult8: # 0x168e +# Part of loop body line 1091, head labeled .Lt_9_11266 + addi a14,a1,112 # [0] + l32i a8,a1,104 # [1] gra_spill_temp_197 + l32i a15,a1,120 # [2] gra_spill_temp_201 + wur.sar_byte a8 # [3] + ee.vld.128.ip q3,a15,16 # [4] id:284 + ee.vld.128.ip q6,a15,16 # [5] id:285 + ee.vld.128.ip q4,a15,0 # [6] id:286 + s32i a15,a1,120 # [7] gra_spill_temp_201 + ee.src.q.qup q5,q3,q6 # [8] + ee.vadds.s32 q0,q0,q5 # [9] + ee.src.q.qup q2,q3,q4 # [10] + ee.vadds.s32 q1,q1,q2 # [11] + st.qr q1,a14,96 # [12] gra_spill_temp_219-112 + +.Lt_9_13570: # 0x16b5 + #1158 q0 = esp_nn_multiply_by_quantized_mult_ver1_esp32s3(q0, out_mult_ptr, out_shift_ptr); + l32i a10,a1,136 # [0] gra_spill_temp_205 + l32i a11,a1,140 # [1] gra_spill_temp_206 + addi a9,a1,112 # [2] + st.qr q1,a9,96 # [3] gra_spill_temp_219-112 + call8 esp_nn_multiply_by_quantized_mult_ver1_esp32s3 + + #1159 out_mult_ptr += 4; + #1160 out_shift_ptr += 4; + #1161 + #1162 q1 = esp_nn_multiply_by_quantized_mult_ver1_esp32s3(q1, out_mult_ptr, out_shift_ptr); + l32i a11,a1,140 # [0] gra_spill_temp_206 + addi a12,a1,112 # [1] + l32i a10,a1,136 # [2] gra_spill_temp_205 + st.qr q0,a12,80 # [3] gra_spill_temp_218-112 + ld.qr q0,a12,96 # [4] gra_spill_temp_219-112 + addi a10,a10,16 # [5] + addi a11,a11,16 # [6] + call8 esp_nn_multiply_by_quantized_mult_ver1_esp32s3 + + addi.n a2,a2,8 # [0] + l32i a14,a1,116 # [1] gra_spill_temp_200 + l32i a15,a1,124 # [2] gra_spill_temp_202 + l32i a13,a1,132 # [3] gra_spill_temp_204 + l32i a10,a1,140 # [4] gra_spill_temp_206 + l32i a11,a1,136 # [5] gra_spill_temp_205 + addmi a9,a1,256 # [6] + addi a8,a1,112 # [7] + ld.qr q7,a8,80 # [8] gra_spill_temp_218-112 + addi a9,a9,56 # [9] + ee.vldbc.32 q2,a9 # [10] id:290 activation_max + addi a11,a11,32 # [11] + addi a10,a10,32 # [12] + addi.n a13,a13,8 # [13] + s32i a13,a1,132 # [14] gra_spill_temp_204 + s32i a10,a1,140 # [15] gra_spill_temp_206 + s32i a11,a1,136 # [16] gra_spill_temp_205 + addmi a10,a1,256 # [17] + addmi a11,a1,256 # [18] + addi a11,a11,52 # [19] + addi a10,a10,40 # [20] + ee.vldbc.32 q3,a10 # [21] id:289 out_offset + ee.vldbc.32 q1,a11 # [22] id:291 activation_min + ee.vadds.s32 q0,q0,q3 # [23] + ee.vadds.s32 q7,q7,q3 # [24] + ee.vmin.s32 q7,q7,q2 # [25] + ee.vmin.s32 q0,q0,q2 # [26] + ee.vmax.s32 q0,q0,q1 # [27] + ee.vmax.s32 q7,q7,q1 # [28] + ee.vunzip.16 q7,q0 # [29] + ee.vunzip.8 q7,q0 # [30] + ee.vst.l.64.ip q7,a15,8 # [31] id:292 + s32i a15,a1,124 # [32] gra_spill_temp_202 + bge a13,a14,.Lt_9_10754 # [33] + +.Lt_9_11266: # 0x1740 + + ee.zero.qacc # [0] + l32i a12,a1,108 # [1] gra_spill_temp_198 + s32i a12,a1,180 # [2] gra_spill_temp_216 + bge a12,a7,.Lt_9_11522 # [3] + + mull a15,a12,a4 # [0] + l32i a14,a1,100 # [1] gra_spill_temp_196 + add.n a8,a15,a5 # [2] + add.n a14,a14,a12 # [3] + mull a14,a3,a14 # [4] + s32i a8,a1,176 # [5] gra_spill_temp_215 + bge a6,a5,.Lt_9_12290 # [6] + +.LBB18_esp_nn_depthwise_conv_s16_mult8: # 0x175f +# Part of loop body line 1091, head labeled .Lt_9_11266 + l32i a10,a1,184 # [0] gra_spill_temp_217 + l32i a11,a1,172 # [1] gra_spill_temp_214 + l32i a12,a1,168 # [2] gra_spill_temp_213 + l32i a8,a1,148 # [3] gra_spill_temp_208 + add.n a9,a15,a6 # [4] + mull a8,a8,a9 # [5] + add.n a12,a12,a6 # [6] + l32i a9,a1,160 # [7] gra_spill_temp_211 + add.n a12,a14,a12 # [8] + mull a11,a11,a12 # [9] + add.n a8,a2,a8 # [10] + l32i a12,a1,156 # [11] gra_spill_temp_210 + addx2 a8,a8,a9 # [12] + add.n a10,a10,a11 # [13] + l32i a11,a1,144 # [14] gra_spill_temp_207 + l32i a9,a1,164 # [15] gra_spill_temp_212 + addx2 a10,a10,a11 # [16] + l32i a11,a1,152 # [17] gra_spill_temp_209 + loopgtz a9,.LBB45_esp_nn_depthwise_conv_s16_mult8 # [18] + + mov.n a9,a8 # [0*II+0] + ee.vldbc.16 q0,a10 # [0*II+1] id:255 + ee.vld.128.ip q1,a9,0 # [0*II+2] id:254 + add.n a10,a10,a12 # [0*II+3] + add.n a8,a8,a11 # [0*II+4] + ee.vmulas.s16.qacc q0,q1 # [0*II+5] + +.LBB45_esp_nn_depthwise_conv_s16_mult8: # 0x17a2 + +.Lt_9_12290: # 0x17a2 + + add.n a14,a14,a3 # [0] + add.n a15,a15,a4 # [1] + l32i a10,a1,180 # [2] gra_spill_temp_216 + l32i a11,a1,176 # [3] gra_spill_temp_215 + addi.n a10,a10,1 # [4] + add.n a11,a11,a4 # [5] + s32i a11,a1,176 # [6] gra_spill_temp_215 + s32i a10,a1,180 # [7] gra_spill_temp_216 + sub a10,a7,a10 # [8] + beqz a10,.Lt_9_11522 # [9] + +.Lt_9_12034: # 0x17bc + blt a6,a5,.LBB18_esp_nn_depthwise_conv_s16_mult8 # [0] + + j .Lt_9_12290 # [0] + +.Lt_9_8450: # 0x17c2 + retw.n # [0] + + .size esp_nn_depthwise_conv_s16_mult8_esp32s3, . - esp_nn_depthwise_conv_s16_mult8_esp32s3 + +#elif defined(WIO_TERMINAL) +// dummy code, added for old ARM toolchain +.syntax unified +.thumb +.cpu cortex-m0 + +.section .text +#endif // EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN && EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN_S3 diff --git a/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/src/convolution/esp_nn_depthwise_conv_s8_esp32s3.c b/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/src/convolution/esp_nn_depthwise_conv_s8_esp32s3.c new file mode 100644 index 0000000..abb11d3 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/src/convolution/esp_nn_depthwise_conv_s8_esp32s3.c @@ -0,0 +1,547 @@ +#include "edge-impulse-sdk/classifier/ei_classifier_config.h" +#if EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN +// Copyright 2020-2021 Espressif Systems (Shanghai) PTE LTD +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +#include + +static int16_t *scratch_buffer = NULL; + +extern void esp_nn_depthwise_conv_s16_mult8_3x3_esp32s3(const int16_t *input_data, + const uint16_t input_wd, + const uint16_t input_ht, + const uint16_t channels, + const uint16_t pad_wd, + const uint16_t pad_ht, + const uint16_t stride_wd, + const uint16_t stride_ht, + const uint16_t ch_mult, + const int16_t *filter_data, + const int32_t *bias, + int8_t *out_data, + const uint16_t out_wd, + const uint16_t out_ht, + const int32_t out_offset, + const int32_t *out_shift, + const int32_t *out_mult, + const int32_t activation_min, + const int32_t activation_max); + +extern void esp_nn_depthwise_conv_s8_mult1_3x3_padded_esp32s3(const int8_t *input_data, + const uint16_t input_wd, + const uint16_t input_ht, + const uint16_t channels, + const int32_t input_offset, + const uint16_t stride_wd, + const uint16_t stride_ht, + const int8_t *filter_data, + const int32_t *bias, + int8_t *out_data, + const uint16_t out_wd, + const uint16_t out_ht, + const int32_t out_offset, + const int32_t *out_shift, + const int32_t *out_mult, + const int32_t activation_min, + const int32_t activation_max); + +extern void esp_nn_depthwise_conv_s16_mult1_3x3_no_pad_esp32s3(const int16_t *input_data, + const uint16_t input_wd, + const uint16_t input_ht, + const uint16_t channels, + const uint16_t stride_wd, + const uint16_t stride_ht, + const int16_t *filter_data, + const int32_t *bias, + int8_t *out_data, + const uint16_t out_wd, + const uint16_t out_ht, + const int32_t out_offset, + const int32_t *out_shift, + const int32_t *out_mult, + const int32_t activation_min, + const int32_t activation_max); + +extern void esp_nn_depthwise_conv_s16_mult8_esp32s3(const int16_t *input_data, + const uint16_t input_wd, + const uint16_t input_ht, + const uint16_t channels, + const uint16_t pad_wd, + const uint16_t pad_ht, + const uint16_t stride_wd, + const uint16_t stride_ht, + const uint16_t ch_mult, + const int16_t *filter_data, + const uint16_t filter_wd, + const uint16_t filter_ht, + const int32_t *bias, + int8_t *out_data, + const uint16_t out_wd, + const uint16_t out_ht, + const int32_t out_offset, + const int32_t *out_shift, + const int32_t *out_mult, + const int32_t activation_min, + const int32_t activation_max); + +extern void esp_nn_depthwise_conv_s16_mult4_esp32s3(const int16_t *input_data, + const uint16_t input_wd, + const uint16_t input_ht, + const uint16_t channels, + const uint16_t pad_wd, + const uint16_t pad_ht, + const uint16_t stride_wd, + const uint16_t stride_ht, + const uint16_t ch_mult, + const int16_t *filter_data, + const uint16_t filter_wd, + const uint16_t filter_ht, + const int32_t *bias, + int8_t *out_data, + const uint16_t out_wd, + const uint16_t out_ht, + const int32_t out_offset, + const int32_t *out_shift, + const int32_t *out_mult, + const int32_t activation_min, + const int32_t activation_max); + +extern void esp_nn_depthwise_conv_s16_mult1_3x3_esp32s3(const int16_t *input_data, + const uint16_t input_wd, + const uint16_t input_ht, + const uint16_t channels, + const uint16_t pad_wd, + const uint16_t pad_ht, + const uint16_t stride_wd, + const uint16_t stride_ht, + const int16_t *filter_data, + const int32_t *bias, + int8_t *out_data, + const uint16_t out_wd, + const uint16_t out_ht, + const int32_t out_offset, + const int32_t *out_shift, + const int32_t *out_mult, + const int32_t activation_min, + const int32_t activation_max); + +extern void esp_nn_depthwise_conv_s16_mult1_esp32s3(const int16_t *input_data, + const uint16_t input_wd, + const uint16_t input_ht, + const uint16_t channels, + const uint16_t pad_wd, + const uint16_t pad_ht, + const uint16_t stride_wd, + const uint16_t stride_ht, + const int16_t *filter_data, + const uint16_t filter_wd, + const uint16_t filter_ht, + const int32_t *bias, + int8_t *out_data, + const uint16_t out_wd, + const uint16_t out_ht, + const int32_t out_offset, + const int32_t *out_shift, + const int32_t *out_mult, + const int32_t activation_min, + const int32_t activation_max); + +extern void esp_nn_s8_to_s16_esp32s3(const int8_t *src, int16_t *dst, const int size); + +extern void esp_nn_aligned_s8_to_s16_with_offset_esp32s3(const int8_t *src, int16_t *dst, + const int size, const int32_t offset); + +static void esp_nn_depthwise_conv_s8_unrolled(const int8_t *input_data, + const uint16_t input_wd, + const uint16_t input_ht, + const uint16_t channels, + const int32_t input_offset, + const uint16_t pad_wd, + const uint16_t pad_ht, + const uint16_t stride_wd, + const uint16_t stride_ht, + const uint16_t ch_mult, + const int8_t *filter_data, + const uint16_t filter_wd, + const uint16_t filter_ht, + const int32_t *bias, + int8_t *out_data, + const uint16_t out_wd, + const uint16_t out_ht, + const int32_t out_offset, + const int32_t *out_shift, + const int32_t *out_mult, + const int32_t activation_min, + const int32_t activation_max) +{ + int out_idx = 0; + for (int out_y = 0; out_y < out_ht; out_y++) { //height loop + const int16_t base_y = (out_y * stride_ht) - pad_ht; + for (int out_x = 0; out_x < out_wd; out_x++) { //width_loop + const int16_t base_x = (out_x * stride_wd) - pad_wd; + for (int ch_idx = 0; ch_idx < channels; ch_idx++) {//channel_loop + int ch_mult_idx = 0; + for (; ch_mult_idx < ch_mult - 3; ch_mult_idx += 4) { + int32_t result0 = 0, result1 = 0, result2 = 0, result3 = 0; + const int out_ch_idx = ch_mult_idx + ch_idx * ch_mult; + + /* Select filter so as the point doesn't lie outside block */ + int filter_y_start = max(0, -base_y); + int filter_x_start = max(0, -base_x); + int filter_y_end = min(filter_ht, input_ht - base_y); + int filter_x_end = min(filter_wd, input_wd - base_x); + + for (int filter_y_idx = filter_y_start; filter_y_idx < filter_y_end; filter_y_idx++) { + const int32_t idx_y = base_y + filter_y_idx; + for (int filter_x_idx = filter_x_start; filter_x_idx < filter_x_end; filter_x_idx++) { + const int32_t idx_x = base_x + filter_x_idx; + int32_t input_index = (idx_y * input_wd + idx_x) * channels + ch_idx; + int32_t filter_index = (filter_y_idx * filter_wd + filter_x_idx) * (channels * ch_mult) + out_ch_idx; + int32_t input_val = input_data[input_index] + input_offset; + int32_t filter_val0 = filter_data[filter_index + 0]; + int32_t filter_val1 = filter_data[filter_index + 1]; + int32_t filter_val2 = filter_data[filter_index + 2]; + int32_t filter_val3 = filter_data[filter_index + 3]; + result0 += input_val * filter_val0; + result1 += input_val * filter_val1; + result2 += input_val * filter_val2; + result3 += input_val * filter_val3; + } + } + if (bias) { + result0 += bias[out_ch_idx + 0]; + result1 += bias[out_ch_idx + 1]; + result2 += bias[out_ch_idx + 2]; + result3 += bias[out_ch_idx + 3]; + } + result0 = esp_nn_multiply_by_quantized_mult(result0, + out_mult[out_ch_idx + 0], out_shift[out_ch_idx + 0]); + result1 = esp_nn_multiply_by_quantized_mult(result1, + out_mult[out_ch_idx + 1], out_shift[out_ch_idx + 1]); + result2 = esp_nn_multiply_by_quantized_mult(result2, + out_mult[out_ch_idx + 2], out_shift[out_ch_idx + 2]); + result3 = esp_nn_multiply_by_quantized_mult(result3, + out_mult[out_ch_idx + 3], out_shift[out_ch_idx + 3]); + + result0 += out_offset; + result1 += out_offset; + result2 += out_offset; + result3 += out_offset; + + result0 = max(result0, activation_min); + result1 = max(result1, activation_min); + result2 = max(result2, activation_min); + result3 = max(result3, activation_min); + + result0 = min(result0, activation_max); + result1 = min(result1, activation_max); + result2 = min(result2, activation_max); + result3 = min(result3, activation_max); + + out_data[out_idx++] = result0; + out_data[out_idx++] = result1; + out_data[out_idx++] = result2; + out_data[out_idx++] = result3; + } + + /* left-over */ + for (; ch_mult_idx < ch_mult; ch_mult_idx++) { + int32_t result = 0; + const int out_ch_idx = ch_mult_idx + ch_idx * ch_mult; + + /* Select filter so as the point doesn't lie outside block */ + int filter_y_start = max(0, -base_y); + int filter_x_start = max(0, -base_x); + int filter_y_end = min(filter_ht, input_ht - base_y); + int filter_x_end = min(filter_wd, input_wd - base_x); + + for (int filter_y_idx = filter_y_start; filter_y_idx < filter_y_end; filter_y_idx++) { + const int32_t idx_y = base_y + filter_y_idx; + for (int filter_x_idx = filter_x_start; filter_x_idx < filter_x_end; filter_x_idx++) { + const int32_t idx_x = base_x + filter_x_idx; + int32_t input_index = (idx_y * input_wd + idx_x) * channels + ch_idx; + int32_t filter_index = (filter_y_idx * filter_wd + filter_x_idx) * (channels * ch_mult) + out_ch_idx; + int32_t input_val = input_data[input_index] + input_offset; + int32_t filter_val = filter_data[filter_index]; + result += input_val * filter_val; + } + } + if (bias) { + result += bias[out_ch_idx]; + } + result = esp_nn_multiply_by_quantized_mult(result, out_mult[out_ch_idx], out_shift[out_ch_idx]); + result += out_offset; + result = max(result, activation_min); + result = min(result, activation_max); + + out_data[out_idx++] = result; + } + } + } + } +} + +void esp_nn_depthwise_conv_s8_ch_mult1(const int8_t *input_data, + const uint16_t input_wd, + const uint16_t input_ht, + const uint16_t channels, + const int32_t input_offset, + const uint16_t pad_wd, + const uint16_t pad_ht, + const uint16_t stride_wd, + const uint16_t stride_ht, + const int8_t *filter_data, + const uint16_t filter_wd, + const uint16_t filter_ht, + const int32_t *bias, + int8_t *out_data, + const uint16_t out_wd, + const uint16_t out_ht, + const int32_t out_offset, + const int32_t *out_shift, + const int32_t *out_mult, + const int32_t activation_min, + const int32_t activation_max) +{ + int out_idx = 0; + for (int out_y = 0; out_y < out_ht; out_y++) { //height loop + const int16_t base_y = (out_y * stride_ht) - pad_ht; + for (int out_x = 0; out_x < out_wd; out_x++) { //width_loop + const int16_t base_x = (out_x * stride_wd) - pad_wd; + for (int ch_idx = 0; ch_idx < channels; ch_idx++) {//channel_loop + int32_t result = 0; + /* Select filter so as the point doesn't lie outside block */ + int filter_y_start = max(0, -base_y); + int filter_x_start = max(0, -base_x); + int filter_y_end = min(filter_ht, input_ht - base_y); + int filter_x_end = min(filter_wd, input_wd - base_x); + + for (int filter_y_idx = filter_y_start; filter_y_idx < filter_y_end; filter_y_idx++) { + const int32_t idx_y = base_y + filter_y_idx; + for (int filter_x_idx = filter_x_start; filter_x_idx < filter_x_end; filter_x_idx++) { + const int32_t idx_x = base_x + filter_x_idx; + int32_t input_index = (idx_y * input_wd + idx_x) * channels + ch_idx; + int32_t filter_index = (filter_y_idx * filter_wd + filter_x_idx) * channels + ch_idx; + int32_t input_val = input_data[input_index] + input_offset; + int32_t filter_val = filter_data[filter_index]; + result += input_val * filter_val; + } + } + if (bias) { + result += bias[ch_idx]; + } + result = esp_nn_multiply_by_quantized_mult(result, out_mult[ch_idx], out_shift[ch_idx]); + result += out_offset; + result = max(result, activation_min); + result = min(result, activation_max); + + out_data[out_idx++] = result; + } + } + } +} + +int esp_nn_get_depthwise_conv_scratch_size_esp32s3(const data_dims_t *input_dims, + const data_dims_t *filter_dims, + const data_dims_t *output_dims, + const dw_conv_params_t *conv_params) +{ + const uint16_t input_wd = input_dims->width; + const uint16_t input_ht = input_dims->height; + const uint16_t channels = input_dims->channels; + const uint16_t filter_wd = filter_dims->width; + const uint16_t filter_ht = filter_dims->height; + const uint16_t ch_mult = conv_params->ch_mult; + const uint16_t out_wd = output_dims->width; + const uint16_t out_ht = output_dims->height; + const uint16_t pad_wd = conv_params->padding.width; + const uint16_t pad_ht = conv_params->padding.height; + const uint16_t stride_wd = conv_params->stride.width; + const uint16_t stride_ht = conv_params->stride.height; + + int filter_size = filter_wd * filter_ht * channels * ch_mult; + int pad_width = 0, pad_height = 0; + + if ((ch_mult == 1) && (channels % 8 == 0) && (filter_wd == 3) && (filter_ht == 3)) { + if (channels % 16 == 0) { + if (pad_wd || pad_ht) { + pad_width = pad_wd * 2; + pad_height = pad_ht * 2; + } else { + // check if we need to pad additionally + pad_width = (out_wd * stride_wd + filter_wd - 1) - input_wd; + pad_height = (out_ht * stride_ht + filter_ht - 1) - input_ht; + // printf("in(%d %d %d), out(%d %d), filter (%d %d) stride (%d %d), pad (%d %d)", + // input_wd, input_ht, channels, out_wd, out_ht, filter_wd, filter_ht, + // stride_wd, stride_ht, pad_wd, pad_ht); + } + if (pad_width || pad_height) { + int input_size = (input_wd + pad_width) * (input_ht + pad_height) * channels; + // printf("ask1 %d\n", filter_size + input_size + 16); + return filter_size + input_size + 16; // 16 for alignment + } else { + // printf("ask2 %d\n", filter_size + 16); + return filter_size + 16; // 16 for alignment + } + } else { + int input_size = input_wd * input_ht * channels; + // printf("ask3 %d\n", 2 * (filter_size + input_size) + 16); + return 2 * (filter_size + input_size) + 16; // 16 for alignment + } + } else if (ch_mult % 4 == 0) { + int input_size = input_wd * input_ht * channels; + // printf("ask4 %d\n", 2 * (filter_size + input_size) + 16); + return 2 * (filter_size + input_size) + 16; // 16 for alignment + } + return 32; // just few bytes +} + +void esp_nn_set_depthwise_conv_scratch_buf_esp32s3(void *buf) +{ + scratch_buffer = (int16_t *) buf; +} + +/** + * Assumption 1: i/p channels == o/p channels + * Assumption 2: Pointers are valid + * Assumption 3: dialation width = 1 + */ + + + +void esp_nn_depthwise_conv_s8_esp32s3(const data_dims_t *input_dims, + const int8_t *input_data, + const data_dims_t *filter_dims, + const int8_t *filter_data, + const int32_t *bias, + const data_dims_t *output_dims, + int8_t *out_data, + const dw_conv_params_t *conv_params, + const quant_data_t *quant_data) +{ + const uint16_t input_wd = input_dims->width; + const uint16_t input_ht = input_dims->height; + const uint16_t channels = input_dims->channels; + const int32_t input_offset = conv_params->in_offset; + const int32_t out_offset = conv_params->out_offset; + const uint16_t pad_wd = conv_params->padding.width; + const uint16_t pad_ht = conv_params->padding.height; + const uint16_t stride_wd = conv_params->stride.width; + const uint16_t stride_ht = conv_params->stride.height; + const uint16_t filter_wd = filter_dims->width; + const uint16_t filter_ht = filter_dims->height; + const uint16_t out_wd = output_dims->width; + const uint16_t out_ht = output_dims->height; + const int32_t *out_shift = quant_data->shift; + const int32_t *out_mult = quant_data->mult; + const int32_t activation_min = conv_params->activation.min; + const int32_t activation_max = conv_params->activation.max; + const uint16_t ch_mult = conv_params->ch_mult; + + int filter_size = filter_wd * filter_ht * channels * ch_mult; + int align_len = 16 - (filter_size & 15); + int input_size = input_wd * input_ht * channels; + int16_t *filter_data16 = scratch_buffer; + int16_t *input_data16 = scratch_buffer + filter_size + align_len; + if (scratch_buffer == NULL) { + printf("esp_nn_depthwise_conv error! scratch_buffer not set!\n"); + return; + } + + if ((ch_mult == 1) && (channels % 8 == 0)) { + if ((filter_wd == 3) && (filter_ht == 3)) { + if ((channels % 16 == 0) && (pad_wd == 1) && (pad_ht == 1)) { + /* process in 8 bits */ + int8_t *filter_aligned = (int8_t *) scratch_buffer; + int8_t *input_padded = (int8_t *) scratch_buffer + filter_size + align_len; + memcpy(filter_aligned, filter_data, filter_size); + esp_nn_aligned_s8_pad_with_value(input_data, input_padded, input_wd, input_ht, channels, + -input_offset, pad_wd, pad_ht); + esp_nn_depthwise_conv_s8_mult1_3x3_padded_esp32s3(input_padded, input_wd + 2 * pad_wd, + input_ht + 2 * pad_ht, channels, input_offset, + stride_wd, stride_ht, filter_aligned, bias, + out_data, out_wd, out_ht, out_offset, out_shift, + out_mult, activation_min, activation_max); + } else if ((channels % 16 == 0) && (pad_wd == 0) && (pad_ht == 0)) { + /* process in 8 bits */ + int8_t *filter_aligned = (int8_t *) scratch_buffer; + int8_t *input_padded = (int8_t *) scratch_buffer + filter_size + align_len; + + // check if we need to pad additionally + int pad_right = (out_wd * stride_wd + filter_wd - 1) - input_wd; + int pad_bottom = (out_ht * stride_ht + filter_ht - 1) - input_ht; + if (pad_right || pad_bottom) { // pad right and bottom + esp_nn_aligned_s8_pad_end_with_value(input_data, input_padded, input_wd, input_ht, + channels, -input_offset, pad_right, pad_bottom); + } else { + input_padded = (int8_t *) input_data; + } + memcpy(filter_aligned, filter_data, filter_size); + esp_nn_depthwise_conv_s8_mult1_3x3_padded_esp32s3(input_padded, input_wd + pad_right, + input_ht + pad_bottom, channels, input_offset, + stride_wd, stride_ht, filter_aligned, bias, + out_data, out_wd, out_ht, out_offset, out_shift, + out_mult, activation_min, activation_max); + } else { /* (channels % 8) == 0 */ + esp_nn_s8_to_s16_esp32s3(filter_data, filter_data16, filter_size); + esp_nn_aligned_s8_to_s16_with_offset_esp32s3(input_data, input_data16, input_size, input_offset); + esp_nn_depthwise_conv_s16_mult1_3x3_esp32s3(input_data16, input_wd, input_ht, channels, + pad_wd, pad_ht, stride_wd, stride_ht, filter_data16, + bias, out_data, out_wd, out_ht, out_offset, out_shift, + out_mult, activation_min, activation_max); + } + } else { // all other ch_mult == 1, `channels % 8 == 0` + esp_nn_depthwise_conv_s8_ch_mult1(input_data, input_wd, input_ht, channels, input_offset, + pad_wd, pad_ht, stride_wd, stride_ht, + filter_data, filter_wd, filter_ht, + bias, out_data, out_wd, out_ht, out_offset, out_shift, + out_mult, activation_min, activation_max); + } + } else if (ch_mult % 8 == 0) { + esp_nn_s8_to_s16_esp32s3(filter_data, filter_data16, filter_size); + esp_nn_aligned_s8_to_s16_with_offset_esp32s3(input_data, input_data16, input_size, input_offset); + if (filter_wd == 3 && filter_ht == 3) { + esp_nn_depthwise_conv_s16_mult8_3x3_esp32s3(input_data16, input_wd, input_ht, channels, + pad_wd, pad_ht, stride_wd, stride_ht, ch_mult, + filter_data16, bias, + out_data, out_wd, out_ht, out_offset, out_shift, + out_mult, activation_min, activation_max); + } else { + esp_nn_depthwise_conv_s16_mult8_esp32s3(input_data16, input_wd, input_ht, channels, + pad_wd, pad_ht, stride_wd, stride_ht, ch_mult, + filter_data16, filter_wd, filter_ht, bias, + out_data, out_wd, out_ht, out_offset, out_shift, + out_mult, activation_min, activation_max); + } + } else if (ch_mult % 4 == 0) { + esp_nn_s8_to_s16_esp32s3(filter_data, filter_data16, filter_size); + esp_nn_aligned_s8_to_s16_with_offset_esp32s3(input_data, input_data16, input_size, input_offset); + esp_nn_depthwise_conv_s16_mult4_esp32s3(input_data16, input_wd, input_ht, channels, + pad_wd, pad_ht, stride_wd, stride_ht, ch_mult, + filter_data16, filter_wd, filter_ht, bias, + out_data, out_wd, out_ht, out_offset, out_shift, + out_mult, activation_min, activation_max); + } else { + esp_nn_depthwise_conv_s8_unrolled(input_data, input_wd, input_ht, channels, input_offset, + pad_wd, pad_ht, stride_wd, stride_ht, ch_mult, + filter_data, filter_wd, filter_ht, + bias, out_data, out_wd, out_ht, out_offset, out_shift, + out_mult, activation_min, activation_max); + } +} + +#endif // EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN diff --git a/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/src/convolution/esp_nn_depthwise_conv_s8_mult1_3x3_padded_esp32s3.S b/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/src/convolution/esp_nn_depthwise_conv_s8_mult1_3x3_padded_esp32s3.S new file mode 100644 index 0000000..c9240d4 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/src/convolution/esp_nn_depthwise_conv_s8_mult1_3x3_padded_esp32s3.S @@ -0,0 +1,512 @@ +#include "edge-impulse-sdk/classifier/ei_classifier_config.h" +#if EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN && EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN_S3 +// Copyright 2021-2022 Espressif Systems (Shanghai) PTE LTD +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + .text + .literal_position + +// processes multiple of 16 channels +// already padded version. no additional padding needed +// simply keep sliding filter window by stride_size + + # Program Unit: esp_nn_depthwise_conv_s8_mult1_3x3_padded_esp32s3 + .type esp_nn_depthwise_conv_s8_mult1_3x3_padded_esp32s3, @function + .align 4 + .global esp_nn_depthwise_conv_s8_mult1_3x3_padded_esp32s3 + +esp_nn_depthwise_conv_s8_mult1_3x3_padded_esp32s3: # 0xccc + # qacc_scratch = 0 + # gra_spill_temp_103 = 40 // stride_wd*channels + # gra_spill_temp_104 = 44 // bias_align + # gra_spill_temp_107 = 48 // input_offset + # gra_spill_temp_105 = 52 // out_mult_ptr + # gra_spill_temp_106 = 56 // out_shift_ptr + # gra_spill_temp_108 = 60 // ch_idx + # gra_spill_temp_109 = 64 // out_ch + # gra_spill_temp_110 = 68 // bias_ptr + # gra_spill_temp_111 = 72 // 2 * (input_wd * channels) + # gra_spill_temp_112 = 76 // input_data + # gra_spill_temp_118 = 96 + # gra_spill_temp_119 = 100 + # gra_spill_temp_120 = 104 + # gra_spill_temp_121 = 108 + # gra_spill_temp_113 = 112 // input_wd * channels + # gra_spill_temp_114 = 116 // input_wd + # gra_spill_temp_130 = 120 + + # gra_spill_temp_141 = 0 + # gra_spill_temp_120 = 16 + # gra_spill_temp_137 = 80 + +// offset+bias factor + # gra_spill_temp_134 = 128 //256-128 + # gra_spill_temp_135 = 144 //256-112 + # gra_spill_temp_133 = 160 //256-96 + # gra_spill_temp_132 = 176 //256-80 + + + // registers: + // a2: input_data + // a3: input_wd + // a4: input_ht + // a5: channels + // a6: input_offset + // a7: stride_wd + + // on stack: + + // 320: stride_ht + // 324: filter_data + // 328: *bias + // 332: *out_data + // 336: out_wd + // 340: out_ht + // 344: out_offset + // 348: *out_shift + // 352: *out_mult + // 356: activation_min + // 360: activation_max + + entry a1,320 # + mul16u a7,a7,a5 + s32i a3,a1,116 # [0] gra_spill_temp_114, input_wd + s32i a6,a1,48 # [1] gra_spill_temp_107, input_offset + s32i a7,a1,40 # gra_spill_temp_103, stride_wd*channels + + addi a8,a5,-15 # [2] + s32i a2,a1,76 # [3] gra_spill_temp_112, input_data + l32i a9,a1,328 # [4] id:664 bias+0x0 + mov.n a2,a5 # [5] + s32i a8,a1,64 # [7] gra_spill_temp_109 + s32i a9,a1,68 # [8] gra_spill_temp_110, bias_ptr + blti a8,1,.Lt_7_4610 # [9] + + l32i a12,a1,348 # [4] id:666 out_shift+0x0 + mul16u a15,a3,a5 # [1] + movi.n a9,0 # [13] + s32i a12,a1,56 # [9] gra_spill_temp_106 // out_shift_ptr + s32i a9,a1,60 # [14] gra_spill_temp_108, ch_idx + s32i a15,a1,112 # [12] gra_spill_temp_113, input_wd*channels + l32i a9,a1,352 # [24] id:665 out_mult+0x0 + slli a15,a15,1 # [15] + s32i a15,a1,72 # [23] gra_spill_temp_111, 2 * (input_wd * channels) + s32i a9,a1,52 # [25] gra_spill_temp_105, out_mult_ptr + +// outer most out_ch loop +.Lt_7_5122: # 0xd57 + l32i a13,a1,324 # [1] filter_data + l32i a6,a1,60 # [2] gra_spill_temp_108, ch_idx + l32i a9,a1,48 # [0] gra_spill_temp_107, input_offset + ee.zero.q q2 # [3] + add.n a13,a6,a13 # [4] + s32i a13,a1,108 # [5] gra_spill_temp_121 + +// multiply accumulate filter points + ee.vld.128.xp q1,a13,a2 # [6] id:673 + ee.vld.128.xp q3,a13,a2 # [7] id:674 + ee.vcmp.lt.s8 q0,q1,q2 # [8] + ee.vcmp.lt.s8 q4,q3,q2 # [9] + ee.vzip.8 q1,q0 # [10] + ee.vzip.8 q3,q4 # [11] + ee.vadds.s16 q0,q0,q4 # [12] + ee.vld.128.xp q4,a13,a2 # [13] id:675 + ee.vadds.s16 q1,q1,q3 # [14] + ee.vcmp.lt.s8 q3,q4,q2 # [15] + ee.vzip.8 q4,q3 # [16] + ee.vadds.s16 q1,q1,q4 # [17] + ee.vld.128.xp q4,a13,a2 # [18] id:676 + ee.vadds.s16 q0,q0,q3 # [19] + ee.vcmp.lt.s8 q3,q4,q2 # [20] + ee.vzip.8 q4,q3 # [21] + ee.vadds.s16 q0,q0,q3 # [22] + ee.vld.128.xp q3,a13,a2 # [23] id:677 + ee.vadds.s16 q1,q1,q4 # [24] + ee.vcmp.lt.s8 q4,q3,q2 # [25] + ee.vzip.8 q3,q4 # [26] + ee.vadds.s16 q1,q1,q3 # [27] + ee.vld.128.xp q3,a13,a2 # [28] id:678 + ee.vadds.s16 q0,q0,q4 # [29] + ee.vcmp.lt.s8 q4,q3,q2 # [30] + ee.vzip.8 q3,q4 # [31] + ee.vadds.s16 q0,q0,q4 # [32] + ee.vld.128.xp q4,a13,a2 # [33] id:679 + ee.vadds.s16 q1,q1,q3 # [34] + ee.vcmp.lt.s8 q3,q4,q2 # [35] + ee.vzip.8 q4,q3 # [36] + ee.vadds.s16 q1,q1,q4 # [37] + ee.vld.128.xp q4,a13,a2 # [38] id:680 + ee.vadds.s16 q0,q0,q3 # [39] + ee.vcmp.lt.s8 q3,q4,q2 # [40] + ee.vzip.8 q4,q3 # [41] + ee.vadds.s16 q0,q0,q3 # [42] + ee.vld.128.xp q3,a13,a2 # [44] id:681 + ee.vadds.s16 q1,q1,q4 # [43] + ee.vcmp.lt.s8 q2,q3,q2 # [47] + ee.vzip.8 q3,q2 # [48] + ee.vadds.s16 q0,q0,q2 # [49] + ee.vadds.s16 q1,q1,q3 # [50] + + ee.movi.32.a q1,a15,1 # [51] + ee.movi.32.a q1,a8,3 # [52] + ee.movi.32.a q0,a10,3 # [54] + ee.movi.32.a q0,a13,1 # [55] + srai a11,a10,16 # [56] + srai a12,a8,16 # [57] + mull a12,a9,a12 # [58] + mull a11,a9,a11 # [59] + sext a8,a8,15 # [328] + sext a10,a10,15 # [61] + srai a14,a13,16 # [62] + mull a14,a9,a14 # [63] + mull a10,a9,a10 # [64] + mull a8,a9,a8 # [65] + sext a13,a13,15 # [66] + mull a13,a9,a13 # [67] + ee.movi.32.q q3,a11,3 # [68] + ee.movi.32.q q4,a12,3 # [69] + ee.movi.32.q q4,a8,2 # [70] + ee.movi.32.q q3,a10,2 # [71] + ee.movi.32.a q1,a11,2 # [72] + srai a12,a11,16 # [74] + srai a8,a15,16 # [75] + mull a8,a9,a8 # [76] + mull a12,a9,a12 # [77] + sext a15,a15,15 # [78] + sext a11,a11,15 # [79] + mull a11,a9,a11 # [80] + mull a15,a9,a15 # [81] + ee.movi.32.q q4,a12,1 # [82] + ee.movi.32.q q1,a8,3 # [83] + ee.movi.32.q q1,a15,2 # [84] + ee.movi.32.q q4,a11,0 # [85] + ee.movi.32.a q0,a15,2 # [86] + ee.movi.32.q q0,a14,3 # [88] + ee.movi.32.q q0,a13,2 # [91] + srai a8,a15,16 # [89] + mull a8,a9,a8 # [90] + sext a15,a15,15 # [92] + mull a15,a9,a15 # [93] + # 526 MUL_IN_OFFSET_EXPAND(q_sum2, 0, q_sum2, 0); + ee.movi.32.a q0,a11,0 # [94] + srai a13,a11,16 # [95] + ee.movi.32.q q3,a8,1 # [96] + ee.movi.32.q q3,a15,0 # [100] + sext a11,a11,15 # [97] + mull a13,a9,a13 # [98] + l32i a8,a1,332 # [99] + ee.movi.32.a q1,a10,0 # [103] + ee.movi.32.q q0,a13,1 # [100] + srai a12,a10,16 # [105] + sext a10,a10,15 # [106] + mull a12,a9,a12 # [107] + mull a10,a9,a10 # [108] + mull a9,a9,a11 # [109] + ee.movi.32.q q1,a12,1 # [110] + ee.movi.32.q q1,a10,0 # [111] + + l32i a11,a1,328 // load bias + add.n a6,a6,a8 # [102] + ee.movi.32.q q0,a9,0 # [113] + beqz.n a11,.Lt_7_5378 # [114] + +// add bias + l32i a8,a1,68 # [0] gra_spill_temp_110, bias_ptr + extui a11,a11,0,4 # [2] // bias_align + wur.sar_byte a11 # [4] + ee.vld.128.ip q5,a8,16 # [5] id:683 + ee.vld.128.ip q6,a8,16 # [6] id:684 + ee.vld.128.ip q7,a8,16 # [7] id:685 + addmi a10,a1,256 # [2] + ee.src.q.ld.ip q2,a8,16,q5,q6 # [9] + ee.vadds.s32 q1,q1,q5 # [12] + ee.src.q.ld.ip q5,a8,0,q6,q7 # [13] + s32i a8,a1,68 # [11] gra_spill_temp_110, bias_ptr + ee.vadds.s32 q4,q4,q6 # [18] + ee.src.q q7,q7,q2 # [9] + ee.src.q q2,q2,q5 # [13] + ee.vadds.s32 q0,q0,q7 # [12] + ee.vadds.s32 q3,q3,q2 # [12] +.Lt_7_5378: # 0xeef + +// store offset+bias factor (q1,q4,q0,q3) + st.qr q4,a10,-112 # [17] gra_spill_temp_135-256 + st.qr q3,a10,-128 # [21] gra_spill_temp_134-256 + st.qr q1,a10,-96 # [7] gra_spill_temp_133-256 + st.qr q0,a10,-80 # [8] gra_spill_temp_132-256 + +// prepare height loop + movi.n a15,0 # [1] + movi.n a8,0 # [2] + movi.n a9,0 # [3] + s32i a9,a1,100 # [4] gra_spill_temp_119 + s32i a8,a1,104 # [5] gra_spill_temp_120 + s32i a15,a1,96 # [6] gra_spill_temp_118 + +// height loop +.Lt_7_6402: # 0xf0c + l32i a4,a1,104 # [2] gra_spill_temp_120 // out_y * (input_wd * stride_ht) * channels) + l32i a8,a1,100 # [3] gra_spill_temp_119 // initialised to 0 before height loop + l32i a5,a1,76 # [1] gra_spill_temp_112, input_data + l32i a3,a1,60 # [0] gra_spill_temp_108, ch_idx + l32i a7,a1,112 # [1] gra_spill_temp_113, input_wd*channels + l32i a10,a1,336 # [0] out_wd + add.n a4,a4,a5 # [4] // input_data + (out_y * stride_ht) * input_wd * channels + mov.n a5,a8 # [5] // index + add.n a3,a3,a4 # [6] // input_row0 + l32i a4,a1,72 # [9] gra_spill_temp_111, 2 * (input_wd * channels) + add.n a7,a7,a3 # [7] // input_row1 = (input_wd * channels) + add.n a8,a8,a10 # [8] + s32i a8,a1,120 # [10] gra_spill_temp_130 + add.n a4,a4,a3 # [11] // input_row2 + +// width loop +.Lt_7_7170: # 0xf32 + l32i a9,a1,108 # [3] gra_spill_temp_121, filter_ptr + ee.zero.qacc # [2] + mov.n a12,a3 # [4] + mov.n a11,a7 # [1] + mov.n a10,a4 # [0] + ee.vld.128.xp q0,a12,a2 # [5] id:693 + ee.vld.128.xp q6,a12,a2 # [6] id:695 + ee.vld.128.xp q1,a9,a2 # [7] id:694 + ee.vld.128.xp q7,a9,a2 # [8] id:696 + ee.vld.128.xp q5,a9,a2 # [9] id:698 + ee.vld.128.xp q3,a9,a2 # [10] id:700 + ee.vmulas.s8.qacc.ld.xp q4,a12,a2,q0,q1 # [11] id:697 + ee.vmulas.s8.qacc.ld.xp q2,a11,a2,q6,q7 # [13] id:699 + ee.vld.128.xp q1,a9,a2 # [14] id:702 + ee.vmulas.s8.qacc.ld.xp q0,a11,a2,q4,q5 # [15] id:701 + ee.vmulas.s8.qacc.ld.xp q6,a11,a2,q2,q3 # [16] id:703 + ee.vld.128.xp q7,a9,a2 # [17] id:704 + ee.vld.128.xp q3,a9,a2 # [18] id:706 + ee.vmulas.s8.qacc.ld.xp q0,a10,a2,q0,q1 # [19] id:705 + ee.vmulas.s8.qacc.ld.xp q1,a10,a2,q6,q7 # [20] id:707 + ee.vmulas.s8.qacc.ld.xp q4,a10,a2,q0,q3 # [21] id:709 + ee.vld.128.xp q6,a9,a2 # [22] id:708 + ee.vld.128.xp q5,a9,a2 # [23] id:710 + ee.vmulas.s8.qacc q1,q6 # [24] + ee.vmulas.s8.qacc q4,q5 # [25] + + // extract data + mov a12,a1 //// scratch + ee.st.qacc_l.l.128.ip a12,16 # [27] id:713 + ee.st.qacc_l.h.32.ip a12,-16 # [28] id:714 + + l32i.n a9,a1,8 # [29] qacc_scratch+8 + l32i.n a11,a1,4 # [30] qacc_scratch+4 + l32i.n a15,a1,0 # [31] qacc_scratch + slli a14,a11,24 # [32] + sext a8,a15,19 # [33] + slli a10,a9,16 # [34] + slli a13,a11,4 # [35] + extui a9,a9,16,16 # [36] + srai a13,a13,12 # [37] + extui a15,a15,20,12 # [39] + srai a14,a14,12 # [40] + srai a10,a10,12 # [41] + extui a11,a11,28,4 # [42] + or a10,a10,a11 # [43] + or a14,a14,a15 # [44] + +// insert to q0 + ee.movi.32.q q0,a8,0 # [38] + ee.movi.32.q q0,a14,1 # [45] + ee.movi.32.q q0,a13,2 # [48] + ee.movi.32.q q0,a10,3 # [49] + + l32i.n a11,a1,16 # [46] qacc_scratch+16 + l32i.n a14,a1,12 # [47] qacc_scratch+12 + slli a13,a11,20 # [50] + + ee.st.qacc_h.l.128.ip a12,16 # [51] id:720 + ee.st.qacc_h.h.32.ip a12,-16 # [55] id:721 + srai a11,a11,12 # [52] + srai a13,a13,12 # [53] + slli a8,a14,28 # [54] + slli a15,a14,8 # [56] + srai a15,a15,12 # [57] + srai a8,a8,12 # [59] + + l32i.n a12,a1,8 # [328] qacc_scratch+8 + or a8,a8,a9 # [61] + extui a14,a14,24,8 # [62] + l32i.n a9,a1,0 # [63] qacc_scratch + or a13,a13,a14 # [64] +//insert to q3 + ee.movi.32.q q3,a8,0 # [65] + ee.movi.32.q q3,a15,1 # [67] + ee.movi.32.q q3,a13,2 # [69] + ee.movi.32.q q3,a11,3 # [70] + + l32i.n a14,a1,4 # [66] qacc_scratch+4 + sext a10,a9,19 # [68] + extui a9,a9,20,12 # [72] + slli a13,a12,16 # [73] + slli a8,a14,24 # [74] + extui a12,a12,16,16 # [75] + srai a13,a13,12 # [76] + srai a8,a8,12 # [77] + slli a15,a14,4 # [78] + srai a15,a15,12 # [79] + or a8,a8,a9 # [80] + extui a14,a14,28,4 # [81] + l32i.n a9,a1,12 # [82] qacc_scratch+12 + or a13,a13,a14 # [83] +// insert to q1 + ee.movi.32.q q1,a10,0 # [71] + ee.movi.32.q q1,a8,1 # [84] + ee.movi.32.q q1,a15,2 # [85] + ee.movi.32.q q1,a13,3 # [88] + +// load in_offset+bias factor + addmi a14,a1,256 # [86] + ld.qr q7,a14,-128 # [87] gra_spill_temp_134-256 + ld.qr q4,a14,-112 # [89] gra_spill_temp_135-256 + l32i.n a15,a1,16 # [90] qacc_scratch+16 + ld.qr q2,a14,-96 # [91] gra_spill_temp_133-256 + slli a11,a9,28 # [92] + slli a10,a9,8 # [93] + srai a10,a10,12 # [94] + srai a11,a11,12 # [95] + extui a9,a9,24,8 # [96] + or a11,a11,a12 # [97] + ee.vadds.s32 q0,q0,q2 # [98] + slli a8,a15,20 # [99] + ee.vadds.s32 q3,q3,q4 # [100] + st.qr q3,a1,80 # [101] gra_spill_temp_137-256 + srai a15,a15,12 # [102] + ld.qr q2,a14,-80 # [103] gra_spill_temp_132-256 + srai a8,a8,12 # [105] + or a8,a8,a9 # [108] + +// insert to q6 + ee.movi.32.q q6,a11,0 # [100] + ee.movi.32.q q6,a10,1 # [107] + ee.movi.32.q q6,a8,2 # [112] + ee.movi.32.q q6,a15,3 # [113] + + ee.vadds.s32 q1,q1,q2 # [110] + ee.vadds.s32 q6,q6,q7 # [114] + st.qr q1,a1,16 # [111] gra_spill_temp_120 + s32i.n a7,a1,32 # [0] // tmp + s32i.n a6,a1,36 # [106] // tmp + l32i a7,a1,52 # [109] gra_spill_temp_105, out_mult_ptr + l32i a6,a1,56 # [106] gra_spill_temp_106, out_shift_ptr + addi.n a10,a7,0 + addi.n a11,a6,0 + call8 esp_nn_multiply_by_quantized_mult_ver1_esp32s3 # [116] esp_nn_multiply_by_quantized_mult_ver1_esp32s3 + + mv.qr q5,q0 + ld.qr q0,a1,80 # [4] gra_spill_temp_137-256 + addi.n a10,a7,16 + addi.n a11,a6,16 + call8 esp_nn_multiply_by_quantized_mult_ver1_esp32s3 # [5] esp_nn_multiply_by_quantized_mult_ver1_esp32s3 + + mv.qr q4,q0 + ld.qr q0,a1,16 # [5] gra_spill_temp_120 + addi.n a10,a7,32 + addi.n a11,a6,32 + call8 esp_nn_multiply_by_quantized_mult_ver1_esp32s3 # [6] esp_nn_multiply_by_quantized_mult_ver1_esp32s3 + + st.qr q0,a1,0 # [3] gra_spill_temp_141 + mv.qr q0,q6 + addi.n a10,a7,48 + addi.n a11,a6,48 + call8 esp_nn_multiply_by_quantized_mult_ver1_esp32s3 # [6] esp_nn_multiply_by_quantized_mult_ver1_esp32s3 + + + l32i.n a6,a1,36 # [106] // tmp + l32i.n a7,a1,32 # [0] // tmp + l32i a15,a1,40 # gra_spill_temp_103, stride_wd * channels + l32i a11,a1,120 # [3] gra_spill_temp_130 + + add.n a3,a3,a15 # [0] + add.n a4,a4,a15 # [1] + add.n a7,a7,a15 # [2] + addi.n a5,a5,1 # [4] + + // add offset, apply activation and store + addmi a13,a1,256 # [8] + ld.qr q3,a1,0 # [10] gra_spill_temp_141 + mv.qr q2,q5 + addi a8,a13,88 # [14] + addi a9,a13,100 # [15] + addi a15,a13,104 # [13] + ee.vldbc.32 q6,a9 # [17] id:723 activation_min + ee.vldbc.32 q1,a8 # [18] id:722 out_offset + ee.vldbc.32 q7,a15 # [19] id:724 activation_max + ee.vadds.s32 q4,q4,q1 # [20] + ee.vadds.s32 q2,q2,q1 # [21] + ee.vadds.s32 q5,q0,q1 # [22] + ee.vadds.s32 q3,q3,q1 # [23] + ee.vmin.s32 q3,q3,q7 # [24] + ee.vmin.s32 q5,q5,q7 # [25] + ee.vmin.s32 q2,q2,q7 # [26] + ee.vmin.s32 q4,q4,q7 # [27] + ee.vmax.s32 q4,q4,q6 # [28] + ee.vmax.s32 q2,q2,q6 # [29] + ee.vmax.s32 q5,q5,q6 # [30] + ee.vmax.s32 q3,q3,q6 # [31] + ee.vunzip.16 q3,q5 # [32] + ee.vunzip.16 q2,q4 # [33] + ee.vunzip.8 q2,q3 # [34] + ee.vst.128.xp q2,a6,a2 # [35] id:725 + bne a5,a11,.Lt_7_7170 # [36] + +.Lt_7_6658: # 0x112f +# Part of loop body line 548, head labeled .Lt_7_6402 + l32i a15,a1,112 # [3] gra_spill_temp_113, input_wd*channels + l32i a10,a1,320 # gra_spill_temp_103 + l32i a13,a1,340 # [0] // out_ht + l32i a9,a1,116 # [1] gra_spill_temp_114, input_wd + l32i a12,a1,96 # [4] gra_spill_temp_118 + mull a15,a10,a15 # // (input_wd * stride_ht) * channels + l32i a14,a1,104 # [5] gra_spill_temp_120 + l32i a8,a1,100 # [2] gra_spill_temp_119 + + addi.n a12,a12,1 # [6] + s32i a12,a1,96 # [7] gra_spill_temp_118 + add.n a14,a14,a15 # [8] + add.n a8,a8,a9 # [9] + s32i a8,a1,100 # [10] gra_spill_temp_119 + s32i a14,a1,104 # [11] gra_spill_temp_120, (input_wd * stride_wd) * channels + bne a12,a13,.Lt_7_6402 # [13] // iterate over height loop + +# Part of loop body line 348, head labeled .Lt_7_5122 + l32i a11,a1,56 # [6] gra_spill_temp_106 // out_shift_ptr + l32i a15,a1,52 # [2] gra_spill_temp_105, out_mult_ptr + l32i a10,a1,60 # [24] gra_spill_temp_108, ch_idx + addi a11,a11,64 # [8] + addi a15,a15,64 # [13] + s32i a11,a1,56 # [23] gra_spill_temp_106 + s32i a15,a1,52 # [18] gra_spill_temp_105, out_mult_ptr + l32i a11,a1,64 # [25] gra_spill_temp_109 + addi a10,a10,16 # [26] + s32i a10,a1,60 # [27] gra_spill_temp_108, ch_idx + blt a10,a11,.Lt_7_5122 # [28] // iterate over outer most out_ch loop + +.Lt_7_4610: # 0x11ad + retw.n # [0] + + .size esp_nn_depthwise_conv_s8_mult1_3x3_padded_esp32s3, . - esp_nn_depthwise_conv_s8_mult1_3x3_padded_esp32s3 + +#elif defined(WIO_TERMINAL) +// dummy code, added for old ARM toolchain +.syntax unified +.thumb +.cpu cortex-m0 + +.section .text +#endif // EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN && EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN_S3 diff --git a/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/src/fully_connected/esp_nn_fully_connected_ansi.c b/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/src/fully_connected/esp_nn_fully_connected_ansi.c new file mode 100644 index 0000000..788a65b --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/src/fully_connected/esp_nn_fully_connected_ansi.c @@ -0,0 +1,54 @@ +#include "edge-impulse-sdk/classifier/ei_classifier_config.h" +#if EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN +// Copyright 2020-2021 Espressif Systems (Shanghai) PTE LTD +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include + +void esp_nn_fully_connected_s8_ansi(const int8_t *input_data, + const int32_t input_offset, + const uint16_t row_len, + const int8_t *filter_data, + const int32_t filter_offset, + const int32_t *bias, + int8_t *out_data, + const uint16_t out_channels, + const int32_t out_offset, + const int32_t out_shift, + const int32_t out_mult, + const int32_t activation_min, + const int32_t activation_max) +{ + for (int32_t out_c = 0; out_c < out_channels; ++out_c) { + int32_t result = 0; + for (int32_t data_idx = 0; data_idx < row_len; data_idx++) { + int32_t filter_index = row_len * out_c + data_idx; + int32_t input_val = input_data[data_idx]; + int32_t filter_val = filter_data[filter_index]; + result += (filter_val + filter_offset) * (input_val + input_offset); + } + if (bias) { + result += bias[out_c]; + } + result = esp_nn_multiply_by_quantized_mult(result, out_mult, out_shift); + result += out_offset; + result = max(result, activation_min); + result = min(result, activation_max); + out_data[out_c] = (int8_t) result; + } +} + +#endif // EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN diff --git a/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/src/fully_connected/esp_nn_fully_connected_s8_esp32s3.S b/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/src/fully_connected/esp_nn_fully_connected_s8_esp32s3.S new file mode 100644 index 0000000..9c1a835 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/src/fully_connected/esp_nn_fully_connected_s8_esp32s3.S @@ -0,0 +1,220 @@ +#include "edge-impulse-sdk/classifier/ei_classifier_config.h" +#if EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN && EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN_S3 +// +// SPDX-FileCopyrightText: 2020-2023 Espressif Systems (Shanghai) CO LTD +// +// SPDX-License-Identifier: Apache-2.0 +// + .text + .align 4 + .literal_position + .literal .LC3_26_101, 1073741824 // nudge (1 << 30) + + # Program Unit: esp_nn_fully_connected_s8_esp32s3 + .type esp_nn_fully_connected_s8_esp32s3, @function + .align 4 + .global esp_nn_fully_connected_s8_esp32s3 + +// a2: input_data +// a3: input_offset +// a4: row_len +// a5: filter_data +// a6: filter_offset +// a7: bias +// on stack: out_data +// on stack: out_channels +// on stack: out_offset +// on stack: out_shift +// on stack: out_mult +// on stack: activation_min +// on stack: activation_max + +esp_nn_fully_connected_s8_esp32s3: # 0x4 + # qacc_scratch = 0 + // 40, filter_offset + // 44, input_offset + # gra_spill_temp_7 = 48 + # gra_spill_temp_0 = 52 + # gra_spill_temp_1 = 56 + # gra_spill_temp_2 = 60 + # gra_spill_temp_3 = 64 + # gra_spill_temp_4 = 68 + # gra_spill_temp_5 = 72 + # gra_spill_temp_6 = 76 + + entry a1,112 # + s32i.n a5,a1,60 # [0] gra_spill_temp_2, filter_data + s32i a7,a1,48 # [1] gra_spill_temp_7, bias + s32i a6,a1,40 # [2] id:252 filter_offset+0x0 + s32i a3,a1,44 # [3] id:251 input_offset+0x0 + mov.n a13,a2 # [5] + mov.n a12,a4 # [6] + + // out_channel loop + l16ui a2,a1,116 # [7] id:255 out_channels+0x0 + addi a4,a1,40 # [8] + addi a8,a1,44 # [9] + ee.vldbc.16 q5,a8 # [10] id:253 input_offset + ee.vldbc.16 q6,a4 # [12] id:254 filter_offset + beqz.n a2,.Lt_0_7938 # [13] + + ee.zero.q q7 # [0] + srai a11,a12,3 # [2] + l32i a10,a1,128 # [5] id:257 out_mult+0x0 + l32i a8,a1,112 # [6] id:259 out_data+0x0 + addi a9,a12,-7 # [7] + s32i a9,a1,76 # [8] gra_spill_temp_6 + s32i a8,a1,72 # [9] gra_spill_temp_5 + s32i a11,a1,64 # [14] gra_spill_temp_3 + slli a11,a11,3 # [16] + s32i a11,a1,68 # [18] gra_spill_temp_4 + l32i a10,a1,124 # [25] id:256 out_shift+0x0 + movi.n a15,0 # [17] + mov.n a14,a7 # [15] + max a11,a10,a15 # [29] + s32i a11,a1,52 # [30] gra_spill_temp_0 // left_shift + sub a10,a11,a10 # // right_shift + s32i.n a10,a1,56 # [28] gra_spill_temp_1 // right_shift + mov.n a11,a5 # [31] + movi.n a10,0 # [32] + mov.n a2,a11 # [33] + +.Lt_0_8450: # 0x12b + + l32i a9,a1,76 # [2] gra_spill_temp_6 + extui a5,a11,0,3 # [34] + ee.zero.accx + slli a5,a5,1 # [3] + bgei a9,0,.LBB6_esp_nn_fully_connected_s8_esp32s3 # [9] + + mov.n a5,a10 # [6] + movi.n a2,0 # [0] + j .Lt_0_8706 # [1] + +.LBB6_esp_nn_fully_connected_s8_esp32s3: # 0x147 + wur.sar_byte a5 # [5] + ee.vld.l.64.ip q4,a2,8 # [4] id:267 + l32i a4,a1,64 # [0] gra_spill_temp_3 + mov.n a3,a13 # [1] + addx8 a5,a4,a10 # [2] + ee.vcmp.lt.s8 q2,q4,q7 # [7] + ee.vzip.8 q4,q2 # [8] + loopgtz a4,.LBB45_esp_nn_fully_connected_s8_esp32s3 # [3] + + ee.vld.l.64.ip q0,a2,8 # [0*II+0] id:268 + ee.vld.l.64.ip q1,a3,8 # [0*II+1] id:270 + ee.vcmp.lt.s8 q2,q0,q7 # [0*II+2] + ee.vcmp.lt.s8 q3,q1,q7 # [0*II+3] + ee.vzip.8 q0,q2 # [0*II+4] + ee.vzip.8 q1,q3 # [0*II+5] + ee.vadds.s16 q1,q1,q5 # [0*II+6] + ee.src.q.qup q2,q4,q0 # [0*II+7] + ee.vadds.s16 q2,q2,q6 # [0*II+8] + ee.vmulas.s16.accx q1,q2 # [0*II+9] + +.LBB45_esp_nn_fully_connected_s8_esp32s3: # 0x170 + l32i a2,a1,68 # [0] gra_spill_temp_4 + +.Lt_0_8706: # 0x173 + movi a9, 0 + ee.srs.accx a6, a9, 0 + + bge a2,a12,.Lt_0_9730 # [38] + +// prepare remaining loop + l32i a8,a1,44 # [0] id:251 input_offset+0x0 + l32i a7,a1,40 # [1] id:252 filter_offset+0x0 + sub a3,a12,a2 # [2] + l32i.n a4,a1,60 # [3] gra_spill_temp_2 + add.n a2,a2,a13 # [4] + add.n a4,a4,a5 # [5] + loopgtz a3,.LBB60_esp_nn_fully_connected_s8_esp32s3 # [6] + +// remaining c loop + l8ui a3,a2,0 # [0*II+0] id:299 + l8ui a5,a4,0 # [0*II+1] id:300 + sext a3,a3,7 # [0*II+2] + sext a5,a5,7 # [0*II+3] + add.n a5,a5,a7 # [0*II+5] + add.n a3,a3,a8 # [0*II+6] + mull a3,a3,a5 # [0*II+7] + addi.n a2,a2,1 # [0*II+8] + addi.n a4,a4,1 # [0*II+4] + add.n a6,a6,a3 # [0*II+9] + +.LBB60_esp_nn_fully_connected_s8_esp32s3: # 0x20f + +// add bias +.Lt_0_9730: # 0x20f + l32i a8,a1,48 # [0] gra_spill_temp_7, bias + beqz.n a8,.Lt_0_10754 # [2], skip_bias + + l32i.n a9,a14,0 # [0] id:301 + add.n a6,a6,a9 # [2] + +// apply quantization +.Lt_0_10754: # 0x218 + l32i a2,a1,52 # [1] gra_spill_temp_0 // left_shift + l32i a5,a1,56 # [2] gra_spill_temp_1 // right_shift + ssl a2 # [3] + sll a6,a6 # [5] // x * (1 << left_shift) + + l32r a3,.LC3_26_101 # [0] + + add.n a10,a10,a12 # [0] + addi.n a14,a14,4 # [1] + + l32i a4,a1,128 # [2] gra_spill_temp_10 //out_mult + add.n a11,a11,a12 # [6] + +// multiply add nudge and pick high32 + ssai 31 + mulsh a7,a4,a6 # [4] + mull a4,a4,a6 # [5] + + mov.n a2,a11 # [27] + add a4,a4,a3 + saltu a8,a4,a3 + add.n a7,a7,a8 + src a3,a7,a4 + +// divide_by_power_of2_step + blti a5,1,.skip_divide_by2 + movi.n a8,1 # [28] + addi a4,a5,-1 + ssl a4 // load left_shift + sll a8,a8 // to_add factor ( 1 << (exponent - 1)) + extui a6,a3,31,1 # [33] + sub a8,a8,a6 // modified to_add factor ( 1 << (exponent - 1) - (val < 0)) + add a3,a3,a8 // val + to_add + ssr a5 # [29] //load right_shift + sra a3,a3 # [31] +.skip_divide_by2: + + l32i a8,a1,120 # [41] out_offset + l32i a7,a1,132 # [44] // activation_min + l32i a4,a1,136 # [45] // activation_max + + add.n a8,a8,a3 # [46] // add out_offset + l32i a6,a1,72 # [47] gra_spill_temp_5 + l32i.n a3,a1,116 # [48] out_channels + max a7,a7,a8 # [49] + add.n a6,a15,a6 # [50] + min a4,a4,a7 # [51] + addi.n a15,a15,1 # [52] + s8i a4,a6,0 # [53] id:302 + bne a3,a15,.Lt_0_8450 # [55] + +.Lt_0_7938: # 0x25c + retw.n # [0] + + .size esp_nn_fully_connected_s8_esp32s3, . - esp_nn_fully_connected_s8_esp32s3 + +#elif defined(WIO_TERMINAL) +// dummy code, added for old ARM toolchain +.syntax unified +.thumb +.cpu cortex-m0 + +.section .text +#endif // EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN && EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN_S3 diff --git a/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/src/pooling/esp_nn_avg_pool_ansi.c b/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/src/pooling/esp_nn_avg_pool_ansi.c new file mode 100644 index 0000000..84bb786 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/src/pooling/esp_nn_avg_pool_ansi.c @@ -0,0 +1,76 @@ +#include "edge-impulse-sdk/classifier/ei_classifier_config.h" +#if EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN +// Copyright 2020-2021 Espressif Systems (Shanghai) PTE LTD +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include + +void esp_nn_avg_pool_s8_ansi(const int8_t *input, + const uint16_t input_wd, + const uint16_t input_ht, + int8_t *output, + const uint16_t output_wd, + const uint16_t output_ht, + const uint16_t stride_wd, + const uint16_t stride_ht, + const uint16_t filter_wd, + const uint16_t filter_ht, + const uint16_t pad_wd, + const uint16_t pad_ht, + const int32_t activation_min, + const int32_t activation_max, + const uint16_t channels) +{ + int32_t base_y = -pad_ht; + for (int32_t out_y = 0; out_y < output_ht; out_y++, base_y += stride_ht) { + int32_t base_x = -pad_wd; + for (int32_t out_x = 0; out_x < output_wd; out_x++, base_x += stride_wd) { + for (int32_t ch_idx = 0; ch_idx < channels; ch_idx++) { + int32_t result = 0; + int32_t filter_cnt = 0; + /* Make sure filter does not cross the input box */ + int32_t filter_y_start = max(0, -base_y); + int32_t filter_x_start = max(0, -base_x); + + int32_t filter_y_end = min(filter_ht, input_ht - base_y); + int32_t filter_x_end = min(filter_wd, input_wd - base_x); + + for (int32_t filter_y = filter_y_start; filter_y < filter_y_end; filter_y++) { + for (int32_t filter_x = filter_x_start; filter_x < filter_x_end; filter_x++) { + int32_t in_x_idx = base_x + filter_x; + int32_t in_y_idx = base_y + filter_y; + int32_t input_index = (in_y_idx * input_wd + in_x_idx) * channels + ch_idx; + result += input[input_index]; + filter_cnt++; + } + } + + /* Rounded average */ + result = result > 0 ? (result + filter_cnt / 2) / filter_cnt + : (result - filter_cnt / 2) / filter_cnt; + + /* Activation function */ + result = max(result, activation_min); + result = min(result, activation_max); + + int32_t output_index = (out_y * output_wd + out_x) * channels + ch_idx; + output[output_index] = (int8_t) result; + } + } + } +} + +#endif // EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN diff --git a/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/src/pooling/esp_nn_avg_pool_s8_esp32s3.S b/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/src/pooling/esp_nn_avg_pool_s8_esp32s3.S new file mode 100644 index 0000000..9e76a1e --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/src/pooling/esp_nn_avg_pool_s8_esp32s3.S @@ -0,0 +1,686 @@ +#include "edge-impulse-sdk/classifier/ei_classifier_config.h" +#if EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN && EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN_S3 +// Copyright 2021-2022 Espressif Systems (Shanghai) PTE LTD +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + .text + .align 4 + .literal_position + + # Program Unit: esp_nn_avg_pool_s8_esp32s3 + .type esp_nn_avg_pool_s8_esp32s3, @function + .align 4 + .global esp_nn_avg_pool_s8_esp32s3 + +// no of channels must be multiple of 4. + +// a2: input +// a3: input_wd +// a4: input_ht +// a5: output +// a6: output_wd +// a7: output_ht +// on stack: stride_wd +// on stack: stride_ht +// on stack: filter_wd +// on stack: filter_ht +// on stack: pad_wd +// on stack: pad_ht +// on stack: activation_min +// on stack: activation_max +// on stack: channels + +esp_nn_avg_pool_s8_esp32s3: # 0x4 + # activation_min = 0 + # activation_max = 4 + # gra_spill_temp_0 = 8 + # gra_spill_temp_1 = 12 + # gra_spill_temp_2 = 16 + # gra_spill_temp_3 = 20 + # gra_spill_temp_4 = 24 + # gra_spill_temp_5 = 28 + # gra_spill_temp_6 = 32 + # gra_spill_temp_7 = 36 + # gra_spill_temp_8 = 40 + # gra_spill_temp_9 = 44 + # gra_spill_temp_10 = 48 + # gra_spill_temp_11 = 52 + # gra_spill_temp_12 = 56 + # gra_spill_temp_13 = 60 + # gra_spill_temp_14 = 64 + # gra_spill_temp_15 = 68 + # gra_spill_temp_16 = 72 + # gra_spill_temp_17 = 76 + # gra_spill_temp_18 = 80 + # gra_spill_temp_19 = 84 + # gra_spill_temp_20 = 88 + # gra_spill_temp_21 = 92 + # gra_spill_temp_22 = 96 + # gra_spill_temp_23 = 100 + # gra_spill_temp_24 = 104 + # gra_spill_temp_25 = 108 + # gra_spill_temp_26 = 112 + # gra_spill_temp_27 = 116 + # gra_spill_temp_28 = 120 + # gra_spill_temp_29 = 124 + # gra_spill_temp_30 = 128 + # gra_spill_temp_31 = 132 + # gra_spill_temp_32 = 136 + # gra_spill_temp_33 = 140 + # gra_spill_temp_34 = 144 + # gra_spill_temp_35 = 148 + # gra_spill_temp_36 = 152 + # gra_spill_temp_37 = 156 + # gra_spill_temp_38 = 160 + # gra_spill_temp_39 = 164 + # gra_spill_temp_40 = 168 + # gra_spill_temp_41 = 172 + # gra_spill_temp_43 = 180 + + entry a1,240 # + mov.n a11,a3 # [0] + mov.n a12,a2 # [1] + s32i a5,a1,136 # [4] gra_spill_temp_30 + s32i a6,a1,128 # [3] gra_spill_temp_32 + + l16ui a5,a1,272 # [5] id:663 channels+0x0 + s32i a7,a1,72 # [6] gra_spill_temp_16 + + l32i a9,a1,264 # [1] id:664 activation_min+0x0 + l32i a10,a1,268 # [2] id:666 activation_max+0x0 + s32i.n a9,a1,0 # [4] activation_min + s32i.n a10,a1,4 # [3] activation_max + addi.n a8,a1,4 # [0] activation_max + ee.vldbc.32 q7,a1 # [5] id:668 activation_min + ee.vldbc.32 q6,a8 # [6] id:669 activation_max + ee.zero.q q4 # [0] + + extui a10,a5,0,3 # [7] + beqz.n a10,.LBB3_esp_nn_avg_pool_s8_esp32s3 # [8], if (channels % 8 == 0) + + extui a13,a5,0,2 # [0] + beqz.n a13,.LBB52_esp_nn_avg_pool_s8_esp32s3 # [1], if (channels % 4 == 0) + +// exit +.Lt_0_44546: # 0x1e9 + retw.n # [0] + +.LBB3_esp_nn_avg_pool_s8_esp32s3: # 0x1eb // if (channels % 8 == 0) + + l16ui a7,a1,256 # [1] id:671 pad_wd+0x0 + l16ui a10,a1,260 # [5] id:670 pad_ht+0x0 + l32i a15,a1,72 # [12] gra_spill_temp_16 + movi.n a14,0 # [13] + movi.n a8,0 # [14] + neg a10,a10 # [15] + s32i a10,a1,56 # [16] gra_spill_temp_12 + s32i a8,a1,44 # [17] gra_spill_temp_9 + s32i.n a14,a1,20 # [18] gra_spill_temp_3 + sub a9,a4,a10 # [19] + s32i a9,a1,40 # [20] gra_spill_temp_8 + mul16u a15,a15,a5 # [21] + neg a13,a7 # [22] + s32i a13,a1,104 # [23] gra_spill_temp_24 + s32i.n a15,a1,16 # [24] gra_spill_temp_2 + sub a13,a3,a13 # [25] + s32i.n a13,a1,12 # [26] gra_spill_temp_1 + j .Lt_0_28162 # [27] + +.Lt_0_28418: # 0x24e +# Part of loop body line 44, head labeled .Lt_0_28162 + l32i a15,a1,260 # [0] pad_ht + l32i a14,a1,56 # [1] gra_spill_temp_12 + l32i.n a9,a1,16 # [2] gra_spill_temp_2 + l32i a13,a1,244 # [3] stride_ht + l32i a10,a1,40 # [4] gra_spill_temp_8 + l32i a8,a1,44 # [5] gra_spill_temp_9 + sub a10,a10,a13 # [6] + add.n a8,a8,a9 # [7] + add.n a14,a14,a13 # [8] + sub a15,a15,a13 # [9] + s32i a15,a1,260 # [10] pad_ht + s32i a14,a1,56 # [11] gra_spill_temp_12 + s32i a8,a1,44 # [12] gra_spill_temp_9 + s32i a10,a1,40 # [13] gra_spill_temp_8 + l32i.n a8,a1,20 # [14] gra_spill_temp_3 + l32i a9,a1,72 # [15] gra_spill_temp_16 + addi.n a8,a8,1 # [16] + s32i.n a8,a1,20 # [17] gra_spill_temp_3 + beq a8,a9,.Lt_0_44546 # [18] + +.Lt_0_28162: # 0x281 + l32i a10,a1,128 # [0] gra_spill_temp_32 + beqz.n a10,.Lt_0_28418 # [2] + +.LBB7_esp_nn_avg_pool_s8_esp32s3: # 0x286 +# Part of loop body line 44, head labeled .Lt_0_28162 + s32i a7,a1,112 # [0] gra_spill_temp_26 + movi.n a10,0 # [1] + l32i a9,a1,260 # [2] pad_ht + l32i.n a6,a1,12 # [3] gra_spill_temp_1 + l32i a8,a1,44 # [4] gra_spill_temp_9 + movi.n a13,0 # [5] + l32i a15,a1,104 # [6] gra_spill_temp_24 + s32i a15,a1,116 # [7] gra_spill_temp_27 + s32i a13,a1,48 # [8] gra_spill_temp_10 + s32i a8,a1,124 # [9] gra_spill_temp_29 + s32i a6,a1,120 # [10] gra_spill_temp_28 + l32i a8,a1,40 # [11] gra_spill_temp_8 + l32i a6,a1,252 # [12] filter_ht + movi.n a13,0 # [13] + max a9,a9,a10 # [14] + s32i a9,a1,160 # [15] gra_spill_temp_38 + s32i a13,a1,92 # [16] gra_spill_temp_21 + min a6,a6,a8 # [17] + bnez.n a5,.LBB10_esp_nn_avg_pool_s8_esp32s3 # [18] + +.Lt_0_29186: # 0x2ba + l32i a8,a1,116 # [0] gra_spill_temp_27 + l32i a15,a1,120 # [1] gra_spill_temp_28 + l32i a9,a1,48 # [2] gra_spill_temp_10 + l32i a14,a1,240 # [3] stride_wd + l32i a10,a1,124 # [4] gra_spill_temp_29 + l32i a13,a1,112 # [5] gra_spill_temp_26 + add.n a10,a10,a5 # [6] + s32i a10,a1,124 # [7] gra_spill_temp_29 + sub a13,a13,a14 # [8] + add.n a9,a9,a14 # [9] + sub a15,a15,a14 # [10] + add.n a8,a8,a14 # [11] + s32i a8,a1,116 # [12] gra_spill_temp_27 + s32i a15,a1,120 # [13] gra_spill_temp_28 + s32i a9,a1,48 # [14] gra_spill_temp_10 + s32i a13,a1,112 # [15] gra_spill_temp_26 + l32i a9,a1,92 # [16] gra_spill_temp_21 + l32i a10,a1,128 # [17] gra_spill_temp_32 + addi.n a9,a9,1 # [18] + s32i a9,a1,92 # [19] gra_spill_temp_21 + beq a9,a10,.Lt_0_28418 # [20] + +.Lt_0_28930: # 0x2f5 +# Part of loop body line 46, head labeled .Lt_0_29186 + beqz.n a5,.Lt_0_29186 # [0] + +.LBB10_esp_nn_avg_pool_s8_esp32s3: # 0x2f7 +# Part of loop body line 44, head labeled .Lt_0_28162 + l32i a14,a1,120 # [0] gra_spill_temp_28 + l32i a13,a1,248 # [1] filter_wd + l32i a9,a1,136 # [2] gra_spill_temp_30 + l32i a8,a1,124 # [3] gra_spill_temp_29 + movi.n a15,0 # [4] + s32i a15,a1,24 # [5] gra_spill_temp_60 + add.n a10,a8,a5 # [6] + movi.n a15,0 # [7] + add.n a8,a8,a9 # [8] + min a13,a13,a14 # [9] + add.n a10,a9,a10 # [10] + s32i a10,a1,180 # [11] gra_spill_temp_43 + s32i a13,a1,76 # [12] gra_spill_temp_17 + l32i a14,a1,112 # [13] gra_spill_temp_26 + s32i a8,a1,148 # [14] gra_spill_temp_45 + max a14,a14,a15 # [15] + l32i a15,a1,116 # [16] gra_spill_temp_27 + s32i a14,a1,152 # [17] gra_spill_temp_63 + add.n a8,a15,a14 # [18] + s32i a8,a1,36 # [19] gra_spill_temp_7 + add.n a15,a15,a13 # [20] + s32i a15,a1,204 # [21] gra_spill_temp_39 + sub a13,a13,a14 # [22] + s32i a13,a1,280 # [23] gra_spill_temp_58 + j .Lt_0_29698 # [24] + +.LBB13_esp_nn_avg_pool_s8_esp32s3: # 0x33b +# Part of loop body line 16, head labeled .Lt_0_29698 + l32i a10,a1,56 # [0] gra_spill_temp_12 + l32i a14,a1,204 # [1] gra_spill_temp_39 + add.n a10,a10,a15 # [2] + mull a10,a11,a10 # [3] + movi.n a15,0 # [4] + add.n a14,a10,a14 # [5] + +.Lt_0_30466: # 0x34a +# Loop body line 61, nesting depth: 4, estimated iterations: 252 + l32i a9,a1,76 # [0] gra_spill_temp_17 + l32i a8,a1,152 # [1] gra_spill_temp_63 + add.n a14,a14,a11 # [2] + bge a8,a9,.Lt_0_30722 # [3] + +.LBB16_esp_nn_avg_pool_s8_esp32s3: # 0x355 +# Part of loop body line 61, head labeled .Lt_0_30466 + l32i a3,a1,36 # [0] gra_spill_temp_7 + l32i a2,a1,24 # [1] gra_spill_temp_4 + add.n a3,a3,a10 # [2] + mull a3,a3,a5 # [3] + movi.n a8,0 # [4] + add.n a2,a2,a3 # [5] + l32i a3,a1,280 # [6] gra_spill_temp_58 + add.n a2,a12,a2 # [7] + loopgtz a3,.LBB140_esp_nn_avg_pool_s8_esp32s3 # [8] + + ee.vld.l.64.xp q0,a2,a5 # [0*II+1] id:677 + ee.vcmp.lt.s8 q1,q0,q4 # [0*II+3] + ee.vzip.8 q0,q1 # [0*II+4] + ee.vcmp.lt.s16 q1,q0,q4 # [0*II+5] + ee.vzip.16 q0,q1 # [0*II+6] + ee.vadds.s32 q2,q2,q1 # [0*II+7] + ee.vadds.s32 q3,q3,q0 # [0*II+8] + + +.LBB140_esp_nn_avg_pool_s8_esp32s3: # 0x385 +# Part of loop body line 61, head labeled .Lt_0_30466 + l32i a2,a1,48 # [0] gra_spill_temp_10 + sub a9,a7,a2 # [2] + sub a2,a2,a7 # [3] + max a9,a9,a8 # [4] + l32i a8,a1,248 # [5] filter_wd + sub a2,a11,a2 # [6] + min a8,a8,a2 # [7] + sub a8,a8,a9 # [8] + add.n a15,a15,a8 # [9] + +.Lt_0_30722: # 0x39f +# Part of loop body line 61, head labeled .Lt_0_30466 + add.n a10,a10,a11 # [0] + addi.n a13,a13,1 # [1] + bne a6,a13,.Lt_0_30466 # [2] + +.Lt_0_29954: # 0x3a6 + srai a2,a15,1 # [3] + +// move data to general purpose registers and average + ee.movi.32.a q3,a9,0 # [0] + ee.movi.32.a q3,a4,1 # [0] + + blti a9,1,.Lt_0_32258 # [4] + add.n a9,a9,a2 # [0] + j .Lt_0_32002 # [2] +.Lt_0_32258: # 0x45e + sub a9,a9,a2 # [0] +.Lt_0_32002: # 0x3b9 + + blti a4,1,.Lt_0_32770 # [1] + add.n a4,a2,a4 # [0] + j .Lt_0_32514 # [2] +.Lt_0_32770: + sub a4,a4,a2 # [0] +.Lt_0_32514: # 0x3c4 + + quos a9,a9,a15 # [1] + quos a4,a4,a15 # [1] + ee.movi.32.q q3,a9,0 # [0] + ee.movi.32.q q3,a4,1 # [1] + + ee.movi.32.a q3,a9,2 # [2] + ee.movi.32.a q3,a14,3 # [0] + + blti a9,1,.Lt_0_33282 # [3] + add.n a9,a9,a2 # [0] + j .Lt_0_33026 # [2] +.Lt_0_33282: # 0x470 + sub a9,a9,a2 # [0] +.Lt_0_33026: # 0x3d5 + + blti a14,1,.Lt_0_33794 # [1] + add.n a14,a2,a14 # [0] + j .Lt_0_33538 # [2] +.Lt_0_33794: # 0x479 + sub a14,a14,a2 # [0] +.Lt_0_33538: # 0x3e0 + + quos a9,a9,a15 # [1] + quos a14,a14,a15 # [1] + ee.movi.32.q q3,a9,2 # [0] + ee.movi.32.q q3,a14,3 # [1] + + + ee.movi.32.a q2,a9,0 # [0] + ee.movi.32.a q2,a4,1 # [0] + + blti a9,1,.Lt_0_34306 # [3] + add.n a9,a9,a2 # [0] + j .Lt_0_34050 # [2] +.Lt_0_34306: # 0x482 + sub a9,a9,a2 # [0] +.Lt_0_34050: # 0x3f1 + + blti a4,1,.Lt_0_34818 # [1] + add.n a4,a2,a4 # [0] + j .Lt_0_34562 # [2] +.Lt_0_34818: # 0x48b + sub a4,a4,a2 # [0] +.Lt_0_34562: # 0x3fc + + quos a9,a9,a15 # [1] + quos a4,a4,a15 # [1] + ee.movi.32.q q2,a9,0 # [0] + ee.movi.32.q q2,a4,1 # [1] + + ee.movi.32.a q2,a9,2 # [2] + ee.movi.32.a q2,a14,3 # [0] + + blti a9,1,.Lt_0_35330 # [3] + add.n a9,a9,a2 # [0] + j .Lt_0_35074 # [2] +.Lt_0_35330: # 0x494 + sub a9,a9,a2 # [0] +.Lt_0_35074: # 0x40d + + blti a14,1,.Lt_0_35842 # [1] + add.n a14,a2,a14 # [0] + j .Lt_0_35586 # [2] +.Lt_0_35842: # 0x49d + sub a14,a14,a2 # [0] +.Lt_0_35586: # 0x418 + + quos a9,a9,a15 # [1] + quos a14,a14,a15 # [1] + ee.movi.32.q q2,a9,2 # [0] + ee.movi.32.q q2,a14,3 # [1] + + + l32i a9,a1,180 # [0] gra_spill_temp_43 + l32i a14,a1,24 # [1] gra_spill_temp_4 + l32i a13,a1,148 # [2] gra_spill_temp_45 + ee.vmin.s32 q1,q3,q6 # [4] + ee.vmax.s32 q1,q1,q7 # [5] + ee.vmin.s32 q5,q2,q6 # [8] + addi.n a14,a14,8 # [9] + s32i a14,a1,24 # [10] gra_spill_temp_4 + ee.vmax.s32 q5,q5,q7 # [11] + addi.n a8,a13,8 # [12] + s32i a8,a1,148 # [13] gra_spill_temp_45 + ee.vunzip.16 q1,q5 # [14] + ee.vunzip.8 q1,q5 # [15] + ee.vst.l.64.ip q1,a13,0 # [16] id:678 + bge a8,a9,.Lt_0_29186 # [17] + +.Lt_0_29698: # 0x44b +# Loop body line 16, nesting depth: 3, estimated iterations: 252 + mv.qr q3,q4 # [0] + l32i a15,a1,160 # [1] gra_spill_temp_38 + mv.qr q2,q4 # [2] + mov.n a13,a15 # [3] + blt a15,a6,.LBB13_esp_nn_avg_pool_s8_esp32s3 # [4] + +.Lt_0_51458: # 0x459 +# Part of loop body line 16, head labeled .Lt_0_29698 + movi.n a15,0 # [0] + j .Lt_0_29954 # [1] + + +.LBB52_esp_nn_avg_pool_s8_esp32s3: # 0x4a6 // if (channels % 4 == 0) + + l16ui a7,a1,256 # [1] id:671 pad_wd+0x0 + l16ui a13,a1,260 # [5] id:670 pad_ht+0x0 + s32i a13,a1,64 # [8] gra_spill_temp_4 + l32i a8,a1,72 # [12] gra_spill_temp_16 + movi.n a15,0 # [13] + movi.n a9,0 # [14] + neg a13,a13 # [15] + s32i a13,a1,192 # [16] gra_spill_temp_36 + s32i a9,a1,32 # [17] gra_spill_temp_6 + s32i.n a15,a1,8 # [18] gra_spill_temp_0 + sub a10,a4,a13 # [19] + s32i a10,a1,28 # [20] gra_spill_temp_5 + mul16u a8,a8,a5 # [21] + neg a14,a7 # [22] + s32i a14,a1,104 # [23] gra_spill_temp_24 + s32i.n a8,a1,16 # [24] gra_spill_temp_2 + sub a14,a3,a14 # [25] + s32i.n a14,a1,12 # [26] gra_spill_temp_1 + j .Lt_0_37890 # [27] + +.Lt_0_38146: # 0x50b +# Part of loop body line 161, head labeled .Lt_0_37890 + l32i a15,a1,64 # [0] gra_spill_temp_4 + l32i a14,a1,192 # [1] gra_spill_temp_36 + l32i.n a9,a1,16 # [2] gra_spill_temp_2 + l32i a13,a1,244 # [3] stride_ht + l32i a10,a1,28 # [4] gra_spill_temp_5 + l32i a8,a1,32 # [5] gra_spill_temp_6 + sub a10,a10,a13 # [6] + add.n a8,a8,a9 # [7] + add.n a14,a14,a13 # [8] + sub a15,a15,a13 # [9] + s32i a15,a1,64 # [10] gra_spill_temp_4 + s32i a14,a1,192 # [11] gra_spill_temp_36 + s32i a8,a1,32 # [12] gra_spill_temp_6 + s32i a10,a1,28 # [13] gra_spill_temp_5 + l32i.n a8,a1,8 # [14] gra_spill_temp_0 + l32i a9,a1,72 # [15] gra_spill_temp_16 + addi.n a8,a8,1 # [16] + s32i.n a8,a1,8 # [17] gra_spill_temp_0 + sub a8,a8,a9 # [18] + beqz a8,.Lt_0_44546 # [19] + +.Lt_0_37890: # 0x541 +# Loop body line 161, nesting depth: 1, estimated iterations: 252 + l32i a10,a1,128 # [0] gra_spill_temp_32 + beqz.n a10,.Lt_0_38146 # [2] + +# Part of loop body line 161, head labeled .Lt_0_37890 + s32i a7,a1,96 # [0] gra_spill_temp_22 + movi.n a10,0 # [1] + l32i a9,a1,64 # [2] gra_spill_temp_4 + l32i.n a6,a1,12 # [3] gra_spill_temp_1 + l32i a8,a1,32 # [4] gra_spill_temp_6 + movi.n a13,0 # [5] + l32i a15,a1,104 # [6] gra_spill_temp_24 + s32i a15,a1,100 # [7] gra_spill_temp_23 + s32i a13,a1,148 # [8] gra_spill_temp_35 + s32i a8,a1,108 # [9] gra_spill_temp_25 + s32i a6,a1,144 # [10] gra_spill_temp_24 + l32i a8,a1,28 # [11] gra_spill_temp_5 + l32i a6,a1,252 # [12] filter_ht + max a9,a9,a10 # [14] + s32i a9,a1,168 # [15] gra_spill_temp_40 + s32i a13,a1,88 # [16] gra_spill_temp_20 + min a6,a6,a8 # [17] + bnez.n a5,.LBB59_esp_nn_avg_pool_s8_esp32s3 # [18] + +.Lt_0_38914: # 0x57a +# Loop body line 163 + l32i a8,a1,100 # [0] gra_spill_temp_23 + l32i a15,a1,144 # [1] gra_spill_temp_24 + l32i a9,a1,148 # [2] gra_spill_temp_35 + l32i a14,a1,240 # [3] stride_wd + l32i a10,a1,108 # [4] gra_spill_temp_25 + l32i a13,a1,96 # [5] gra_spill_temp_22 + add.n a10,a10,a5 # [6] + s32i a10,a1,108 # [7] gra_spill_temp_25 + sub a13,a13,a14 # [8] + add.n a9,a9,a14 # [9] + sub a15,a15,a14 # [10] + add.n a8,a8,a14 # [11] + s32i a8,a1,100 # [12] gra_spill_temp_23 + s32i a15,a1,144 # [13] gra_spill_temp_24 + s32i a9,a1,148 # [14] gra_spill_temp_35 + s32i a13,a1,96 # [15] gra_spill_temp_22 + l32i a9,a1,88 # [16] gra_spill_temp_20 + l32i a10,a1,128 # [17] gra_spill_temp_32 + addi.n a9,a9,1 # [18] + s32i a9,a1,88 # [19] gra_spill_temp_20 + beq a9,a10,.Lt_0_38146 # [20] + + beqz.n a5,.Lt_0_38914 # [0] + +.LBB59_esp_nn_avg_pool_s8_esp32s3: # 0x5b7 +# Part of loop body line 161, head labeled .Lt_0_37890 + l32i a14,a1,144 # [0] gra_spill_temp_24 + l32i a13,a1,248 # [1] filter_wd + l32i a9,a1,136 # [2] gra_spill_temp_30 + l32i a8,a1,108 # [3] gra_spill_temp_25 + movi.n a15,0 # [4] + s32i a15,a1,216 # [5] gra_spill_temp_52 + add.n a10,a8,a5 # [6] + add.n a8,a8,a9 # [8] + min a13,a13,a14 # [9] + add.n a10,a9,a10 # [10] + s32i a10,a1,172 # [11] gra_spill_temp_41 + s32i a13,a1,132 # [12] gra_spill_temp_31 + l32i a14,a1,96 # [13] gra_spill_temp_22 + s32i a8,a1,164 # [14] gra_spill_temp_39 + max a14,a14,a15 # [15] + l32i a15,a1,100 # [16] gra_spill_temp_23 + s32i a14,a1,208 # [17] gra_spill_temp_50 + add.n a8,a15,a14 # [18] + s32i a8,a1,60 # [19] gra_spill_temp_13 + add.n a15,a15,a13 # [20] + s32i a15,a1,196 # [21] gra_spill_temp_37 + sub a13,a13,a14 # [22] + s32i a13,a1,52 # [23] gra_spill_temp_11 + j .Lt_0_39426 # [24] + +.LBB62_esp_nn_avg_pool_s8_esp32s3: # 0x5fb +# Part of loop body line 173, head labeled .Lt_0_39426 + l32i a10,a1,192 # [0] gra_spill_temp_36 + l32i a14,a1,196 # [1] gra_spill_temp_37 + add.n a10,a10,a15 # [2] + mull a10,a11,a10 # [3] + movi.n a15,0 # [4] + add.n a14,a10,a14 # [5] + +.Lt_0_40194: # 0x60a +# Loop body line 178, nesting depth: 4, estimated iterations: 252 + l32i a9,a1,132 # [0] gra_spill_temp_31 + l32i a8,a1,208 # [1] gra_spill_temp_50 + add.n a14,a14,a11 # [2] + bge a8,a9,.Lt_0_40450 # [3] + +.LBB65_esp_nn_avg_pool_s8_esp32s3: # 0x615 +# Part of loop body line 178, head labeled .Lt_0_40194 + l32i a3,a1,60 # [0] gra_spill_temp_13 + l32i a2,a1,216 # [1] gra_spill_temp_52 + add.n a3,a3,a10 # [2] + mull a3,a3,a5 # [3] + l32i a4,a1,52 # [4] gra_spill_temp_11 + add.n a2,a2,a3 # [5] + add.n a2,a12,a2 # [6] + loopgtz a4,.LBB155_esp_nn_avg_pool_s8_esp32s3 # [7] + + ee.vldbc.32.xp q0,a2,a5 # [0*II+0] id:684 + ee.vcmp.lt.s8 q1,q0,q4 # [0*II+2] + ee.vzip.8 q0,q1 # [0*II+3] + ee.vcmp.lt.s16 q1,q0,q4 # [0*II+4] + ee.vzip.16 q0,q1 # [0*II+5] + ee.vadds.s32 q2,q2,q0 # [0*II+6] + +.LBB155_esp_nn_avg_pool_s8_esp32s3: # 0x63e +# Part of loop body line 178, head labeled .Lt_0_40194 + l32i a2,a1,148 # [0] gra_spill_temp_35 + movi.n a8,0 # [1] + sub a9,a7,a2 # [2] + sub a2,a2,a7 # [3] + max a9,a9,a8 # [4] + l32i a8,a1,248 # [5] filter_wd + sub a2,a11,a2 # [6] + min a8,a8,a2 # [7] + sub a8,a8,a9 # [8] + add.n a15,a15,a8 # [9] + +.Lt_0_40450: # 0x65a +# Part of loop body line 178, head labeled .Lt_0_40194 + add.n a10,a10,a11 # [0] + addi.n a13,a13,1 # [1] + bne a6,a13,.Lt_0_40194 # [2] + +.Lt_0_39682: # 0x661 +# Part of loop body line 173, head labeled .Lt_0_39426 + srai a2,a15,1 # [5] + +// move to gp registers and average + + ee.movi.32.a q2,a9,0 # [0] + ee.movi.32.a q2,a4,1 # [0] + + blti a9,1,.Lt_0_41986 # [3] + add.n a9,a9,a2 # [0] + j .Lt_0_41730 # [2] +.Lt_0_41986: # 0x482 + sub a9,a9,a2 # [0] +.Lt_0_41730: # 0x3f1 + + blti a4,1,.Lt_0_42498 # [1] + add.n a4,a2,a4 # [0] + j .Lt_0_42242 # [2] +.Lt_0_42498: # 0x48b + sub a4,a4,a2 # [0] +.Lt_0_42242: # 0x3fc + + + quos a9,a9,a15 # [1] + quos a4,a4,a15 # [1] + ee.movi.32.q q2,a9,0 # [0] + ee.movi.32.q q2,a4,1 # [1] + + ee.movi.32.a q2,a9,2 # [2] + ee.movi.32.a q2,a14,3 # [0] + + blti a9,1,.Lt_0_43010 # [3] + add.n a9,a9,a2 # [0] + j .Lt_0_42754 # [2] +.Lt_0_43010: # 0x494 + sub a9,a9,a2 # [0] +.Lt_0_42754: # 0x40d + + + blti a14,1,.Lt_0_43522 # [1] + add.n a14,a2,a14 # [0] + j .Lt_0_43266 # [2] +.Lt_0_43522: # 0x49d + sub a14,a14,a2 # [0] +.Lt_0_43266: # 0x418 + + quos a9,a9,a15 # [1] + quos a14,a14,a15 # [1] + ee.movi.32.q q2,a9,2 # [0] + ee.movi.32.q q2,a14,3 # [1] + + + l32i a9,a1,172 # [0] gra_spill_temp_41 + l32i a8,a1,164 # [1] gra_spill_temp_39 + l32i a14,a1,216 # [2] gra_spill_temp_52 + addi.n a14,a14,4 # [5] + ee.vmin.s32 q2,q2,q6 # [6] + s32i a14,a1,216 # [7] gra_spill_temp_52 + ee.vmax.s32 q2,q2,q7 # [8] + ee.vunzip.16 q2,q1 # [9] + ee.vunzip.8 q2,q1 # [10] + ee.vst.l.64.ip q2,a1,0 # [11] id:691 + l32i.n a13,a1,0 # [12] id:692 + s32i.n a13,a8,0 # [13] id:693 + addi.n a8,a8,4 # [14] + s32i a8,a1,164 # [15] gra_spill_temp_39 + bge a8,a9,.Lt_0_38914 # [16] + +.Lt_0_39426: # 0x6cb + l32i a15,a1,168 # [0] gra_spill_temp_40 + mv.qr q2,q4 # [1] + mov.n a13,a15 # [2] + blt a15,a6,.LBB62_esp_nn_avg_pool_s8_esp32s3 # [3] + +.Lt_0_52738: # 0x6d6 + movi.n a15,0 # [0] + j .Lt_0_39682 # [1] + + .size esp_nn_avg_pool_s8_esp32s3, . - esp_nn_avg_pool_s8_esp32s3 + +#elif defined(WIO_TERMINAL) +// dummy code, added for old ARM toolchain +.syntax unified +.thumb +.cpu cortex-m0 + +.section .text +#endif // EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN && EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN_S3 diff --git a/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/src/pooling/esp_nn_max_pool_ansi.c b/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/src/pooling/esp_nn_max_pool_ansi.c new file mode 100644 index 0000000..94d2344 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/src/pooling/esp_nn_max_pool_ansi.c @@ -0,0 +1,70 @@ +#include "edge-impulse-sdk/classifier/ei_classifier_config.h" +#if EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN +// Copyright 2020-2021 Espressif Systems (Shanghai) PTE LTD +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include + +void esp_nn_max_pool_s8_ansi(const int8_t *input, + const uint16_t input_wd, + const uint16_t input_ht, + int8_t *output, + const uint16_t output_wd, + const uint16_t output_ht, + const uint16_t stride_wd, + const uint16_t stride_ht, + const uint16_t filter_wd, + const uint16_t filter_ht, + const uint16_t pad_wd, + const uint16_t pad_ht, + const int32_t activation_min, + const int32_t activation_max, + const uint16_t channels) +{ + int32_t base_y = -pad_ht; + for (int32_t out_y = 0; out_y < output_ht; out_y++, base_y += stride_ht) { + int32_t base_x = -pad_wd; + for (int32_t out_x = 0; out_x < output_wd; out_x++, base_x += stride_wd) { + /* Make sure filter does not cross the input box */ + int32_t filter_y_start = max(0, -base_y); + int32_t filter_x_start = max(0, -base_x); + int32_t filter_y_end = min(filter_ht, input_ht - base_y); + int32_t filter_x_end = min(filter_wd, input_wd - base_x); + + for (int32_t ch_idx = 0; ch_idx < channels; ch_idx++) { + int8_t result = INT8_MIN; + + for (int32_t filter_y = filter_y_start; filter_y < filter_y_end; filter_y++) { + for (int32_t filter_x = filter_x_start; filter_x < filter_x_end; filter_x++) { + int32_t in_x_idx = base_x + filter_x; + int32_t in_y_idx = base_y + filter_y; + int32_t input_index = (in_y_idx * input_wd + in_x_idx) * channels + ch_idx; + result = max(input[input_index], result); + } + } + + /* Activation function */ + result = max(result, activation_min); + result = min(result, activation_max); + + int32_t output_index = (out_y * output_wd + out_x) * channels + ch_idx; + output[output_index] = result; + } + } + } +} + +#endif // EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN diff --git a/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/src/pooling/esp_nn_max_pool_s8_esp32s3.S b/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/src/pooling/esp_nn_max_pool_s8_esp32s3.S new file mode 100644 index 0000000..722e0db --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/src/pooling/esp_nn_max_pool_s8_esp32s3.S @@ -0,0 +1,449 @@ +#include "edge-impulse-sdk/classifier/ei_classifier_config.h" +#if EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN && EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN_S3 +// Copyright 2021-2022 Espressif Systems (Shanghai) PTE LTD +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + .text + .align 4 + .literal_position + + # Program Unit: esp_nn_max_pool_s8_esp32s3 + .type esp_nn_max_pool_s8_esp32s3, @function + .align 4 + .global esp_nn_max_pool_s8_esp32s3 + +// no of channels must be multiple of 4 + +esp_nn_max_pool_s8_esp32s3: # 0x4 + # int8_min = 0 + # gra_spill_temp_0 = 4 + # gra_spill_temp_1 = 8 + # gra_spill_temp_2 = 12 + # gra_spill_temp_3 = 16 + # gra_spill_temp_4 = 20 + # gra_spill_temp_5 = 24 + # gra_spill_temp_6 = 28 + # gra_spill_temp_7 = 32 + # gra_spill_temp_8 = 36 + # gra_spill_temp_9 = 40 + # gra_spill_temp_10 = 44 + # gra_spill_temp_11 = 48 + # gra_spill_temp_12 = 52 + # gra_spill_temp_13 = 56 + # gra_spill_temp_14 = 60 + # gra_spill_temp_15 = 64 + # gra_spill_temp_16 = 68 + # gra_spill_temp_17 = 72 + # gra_spill_temp_18 = 76 + # gra_spill_temp_19 = 80 + # gra_spill_temp_20 = 84 + # gra_spill_temp_21 = 88 + # gra_spill_temp_22 = 92 + # gra_spill_temp_23 = 96 + +// a2: input +// a3: input_wd +// a4: input_ht +// a5: output +// a6: output_wd +// a7: output_ht +// on stack: stride_wd = 120 +// on stack: stride_ht = 124 +// on stack: filter_wd = 128 +// on stack: filter_ht = 132 +// on stack: pad_wd = 136 +// on stack: pad_ht = 140 +// on stack: activation_min +// on stack: activation_max +// on stack: channels + + + entry a1,120 # + mov.n a12,a2 # [0] + s32i a6,a1,4 # [2] gra_spill_temp_0 + s32i a7,a1,68 # [3] gra_spill_temp_16 + mov.n a11,a3 # [4] + s32i a5,a1,96 # [5] gra_spill_temp_23 + + l16ui a5,a1,152 # [6] id:465 channels+0x0 + movi a3,-128 # [7] + s32i.n a3,a1,0 # [1] int8_min + + addi.n a9,a1,148 # [0] activation_max + addi.n a15,a1,144 # [1] activation_min + ee.vldbc.8 q3,a1 # [7] id:473 int8_min+0x0 + ee.vldbc.8 q5,a15 # [8] id:470 activation_min+0x0 + ee.vldbc.8 q4,a9 # [9] id:471 activation_max+0x0 + + extui a8,a5,0,3 # [8] + beqz.n a8,.LBB3_esp_nn_max_pool_s8_esp32s3 # [9] // if (channels % 8 == 0) + + extui a14,a5,0,2 # [0] + beqz.n a14,.LBB25_esp_nn_max_pool_s8_esp32s3 # [1] // if (channels % 4 == 0) + + retw.n # [0] // exit + +.LBB3_esp_nn_max_pool_s8_esp32s3: # 0x1c5 // if (channels % 8 == 0) + + l16ui a15,a1,136 # [1] id:475 pad_wd+0x0 + l16ui a14,a1,140 # [4] id:474 pad_ht+0x0 + movi.n a8,0 # [13] + movi.n a10,0 # [15] + s32i a14,a1,44 # [7] gra_spill_temp_10 + neg a15,a15 # [12] + mul16u a9,a6,a5 # [14] + neg a14,a14 # [16] + s32i a14,a1,92 # [17] gra_spill_temp_22 + s32i a10,a1,52 # [18] gra_spill_temp_12 + s32i a9,a1,60 # [19] gra_spill_temp_14 + s32i.n a8,a1,36 # [16] gra_spill_temp_8 + s32i a15,a1,56 # [21] gra_spill_temp_13 + sub a13,a4,a14 # [22] + s32i a13,a1,48 # [23] gra_spill_temp_11 + sub a15,a11,a15 # [24] + s32i.n a15,a1,40 # [25] gra_spill_temp_9 + +.Lt_0_21506: # 0x229 + l32i a8,a1,4 # [0] gra_spill_temp_0 + beqz.n a8,.Lt_0_21762 # [2] + + movi.n a10,0 # [0] + l32i a9,a1,44 # [1] gra_spill_temp_10 + l32i.n a15,a1,40 # [2] gra_spill_temp_9 + l32i a8,a1,52 # [3] gra_spill_temp_12 + l32i.n a13,a1,136 # [4] ,pad_wd + l32i a14,a1,56 # [5] gra_spill_temp_13 + s32i a14,a1,80 # [6] gra_spill_temp_19 + s32i a13,a1,76 # [7] gra_spill_temp_18 + s32i a8,a1,88 # [8] gra_spill_temp_21 + s32i a15,a1,84 # [9] gra_spill_temp_20 + l32i a8,a1,48 # [10] gra_spill_temp_11 + max a9,a9,a10 # [11] + l32i a15,a1,132 # [12] filter_ht + s32i a9,a1,8 # [13] gra_spill_temp_1 + movi.n a9,0 # [14] + min a15,a15,a8 # [15] + s32i a9,a1,64 # [16] gra_spill_temp_15 + +.Lt_0_22274: # 0x25d + beqz.n a5,.Lt_0_22530 # [0] + +.LBB10_esp_nn_max_pool_s8_esp32s3: # 0x25f +# Part of loop body line 46, head labeled .Lt_0_22274 + l32i a6,a1,76 # [0] gra_spill_temp_18 + l32i a13,a1,96 # [1] gra_spill_temp_23 + l32i a8,a1,84 # [2] gra_spill_temp_20 + l32i a7,a1,128 # [3] filter_wd + l32i a10,a1,88 # [4] gra_spill_temp_21 + movi.n a9,0 # [5] + s32i a9,a1,20 # [6] gra_spill_temp_4 + add.n a14,a10,a5 # [7] + min a7,a7,a8 # [8] + add.n a10,a10,a13 # [9] + add.n a14,a13,a14 # [10] + s32i a14,a1,12 # [11] gra_spill_temp_2 + s32i a10,a1,16 # [12] gra_spill_temp_3 + movi.n a8,0 # [13] + l32i a10,a1,80 # [14] gra_spill_temp_19 + max a6,a6,a8 # [15] + sub a9,a7,a6 # [16] + s32i a9,a1,28 # [17] gra_spill_temp_6 + add.n a13,a10,a6 # [18] + s32i a13,a1,24 # [19] gra_spill_temp_5 + add.n a10,a10,a7 # [16] + s32i a10,a1,72 # [21] gra_spill_temp_17 + +.Lt_0_23042: # 0x29a + l32i a8,a1,8 # [0] gra_spill_temp_1 + mv.qr q1,q3 # [1] + mov.n a13,a8 # [2] + bge a8,a15,.Lt_0_23298 # [3] + +.LBB13_esp_nn_max_pool_s8_esp32s3: # 0x2a5 +# Part of loop body line 40, head labeled .Lt_0_23042 + l32i a10,a1,92 # [0] gra_spill_temp_22 + l32i a14,a1,72 # [1] gra_spill_temp_17 + add.n a10,a10,a8 # [2] + mull a10,a11,a10 # [3] + add.n a14,a10,a14 # [5] + +.Lt_0_23810: # 0x2b2 + add.n a14,a14,a11 # [0] + addi.n a13,a13,1 # [1] + bge a6,a7,.Lt_0_24066 # [2] + +.LBB16_esp_nn_max_pool_s8_esp32s3: # 0x2b9 + l32i a3,a1,24 # [0] gra_spill_temp_5 + l32i a2,a1,20 # [1] gra_spill_temp_4 + add.n a3,a3,a10 # [2] + mull a3,a3,a5 # [3] + add.n a2,a2,a3 # [5] + l32i a3,a1,28 # [6] gra_spill_temp_6 + add.n a2,a12,a2 # [7] + loopgtz a3,.LBB93_esp_nn_max_pool_s8_esp32s3 # [8] + + ee.vld.l.64.ip q0,a2,0 # [0*II+1] id:481 + add.n a2,a2,a5 # [0*II+2] + ee.vmax.s8 q1,q1,q0 # [0*II+3] +.LBB93_esp_nn_max_pool_s8_esp32s3: # 0x2d8 + +.Lt_0_24066: # 0x2d8 + add.n a10,a10,a11 # [0] + bne a15,a13,.Lt_0_23810 # [1] + +.Lt_0_23298: # 0x2dd + l32i a9,a1,12 # [0] gra_spill_temp_2 + l32i a13,a1,20 # [1] gra_spill_temp_4 + l32i a8,a1,16 # [2] gra_spill_temp_3 + ee.vmin.s8 q2,q1,q4 # [3] + ee.vmax.s8 q2,q2,q5 # [4] + mov.n a10,a8 # [5] + addi.n a13,a13,8 # [6] + s32i a13,a1,20 # [7] gra_spill_temp_4 + ee.vst.l.64.ip q2,a10,0 # [8] id:482 + addi.n a8,a8,8 # [9] + s32i a8,a1,16 # [10] gra_spill_temp_3 + blt a8,a9,.Lt_0_23042 # [11] + +.Lt_0_22530: # 0x2fe + l32i a13,a1,84 # [0] gra_spill_temp_20 + l32i a14,a1,80 # [1] gra_spill_temp_19 + l32i a10,a1,120 # [2] stride_wd + l32i a8,a1,88 # [3] gra_spill_temp_21 + l32i a9,a1,76 # [4] gra_spill_temp_18 + add.n a8,a8,a5 # [5] + s32i a8,a1,88 # [6] gra_spill_temp_21 + sub a9,a9,a10 # [7] + add.n a14,a14,a10 # [8] + sub a13,a13,a10 # [9] + s32i a13,a1,84 # [10] gra_spill_temp_20 + s32i a14,a1,80 # [11] gra_spill_temp_19 + s32i a9,a1,76 # [12] gra_spill_temp_18 + l32i a14,a1,64 # [13] gra_spill_temp_15 + l32i a8,a1,4 # [14] gra_spill_temp_0 + addi.n a14,a14,1 # [15] + s32i a14,a1,64 # [16] gra_spill_temp_15 + sub a14,a14,a8 # [17] + bnez a14,.Lt_0_22274 # [18] + +.Lt_0_21762: # 0x334 +# Part of loop body line 20, head labeled .Lt_0_21506 + l32i a8,a1,44 # [0] gra_spill_temp_10 + l32i a15,a1,92 # [1] gra_spill_temp_22 + l32i a10,a1,60 # [2] gra_spill_temp_14 + l32i a14,a1,124 # [3] stride_ht + l32i a13,a1,48 # [4] gra_spill_temp_11 + l32i a9,a1,52 # [5] gra_spill_temp_12 + sub a13,a13,a14 # [6] + add.n a9,a9,a10 # [7] + add.n a15,a15,a14 # [8] + sub a8,a8,a14 # [9] + s32i a8,a1,44 # [10] gra_spill_temp_10 + s32i a15,a1,92 # [11] gra_spill_temp_22 + s32i a9,a1,52 # [12] gra_spill_temp_12 + s32i a13,a1,48 # [13] gra_spill_temp_11 + l32i.n a9,a1,36 # [14] gra_spill_temp_8 + l32i a10,a1,68 # [15] gra_spill_temp_16 + addi.n a9,a9,1 # [16] + s32i.n a9,a1,36 # [17] gra_spill_temp_8 + sub a9,a9,a10 # [18] + bnez a9,.Lt_0_21506 # [19] + + retw.n # [0] // exit + +.LBB25_esp_nn_max_pool_s8_esp32s3: # 0x36d // if (channels % 4 == 0) + + l16ui a10,a1,136 # [1] id:475 pad_wd+0x0 + l16ui a9,a1,140 # [4] id:474 pad_ht+0x0 + movi.n a13,0 # [13] + movi.n a15,0 # [15] + neg a10,a10 # [12] + s32i a9,a1,44 # [7] gra_spill_temp_10 + mul16u a14,a6,a5 # [14] + neg a9,a9 # [16] + s32i a9,a1,92 # [17] gra_spill_temp_22 + s32i a15,a1,52 # [18] gra_spill_temp_12 + s32i a14,a1,60 # [19] gra_spill_temp_14 + s32i.n a13,a1,36 # [16] gra_spill_temp_8 + s32i a10,a1,56 # [21] gra_spill_temp_13 + sub a8,a4,a9 # [22] + s32i a8,a1,48 # [23] gra_spill_temp_11 + sub a10,a11,a10 # [24] + s32i.n a10,a1,40 # [25] gra_spill_temp_9 + +.Lt_0_27138: # 0x3d5 + l32i a13,a1,4 # [0] gra_spill_temp_0 + beqz.n a13,.Lt_0_27394 # [2] + +.LBB29_esp_nn_max_pool_s8_esp32s3: # 0x3da +# Part of loop body line 107, head labeled .Lt_0_27138 + movi.n a10,0 # [0] + l32i a9,a1,44 # [1] gra_spill_temp_10 + l32i.n a15,a1,40 # [2] gra_spill_temp_9 + l32i a8,a1,52 # [3] gra_spill_temp_12 + l32i a14,a1,56 # [4] gra_spill_temp_13 + l32i.n a13,a1,136 # [5] pad_wd + s32i a13,a1,76 # [6] gra_spill_temp_18 + s32i a14,a1,80 # [7] gra_spill_temp_19 + s32i a8,a1,88 # [8] gra_spill_temp_21 + s32i a15,a1,84 # [9] gra_spill_temp_20 + l32i a8,a1,48 # [10] gra_spill_temp_11 + l32i a15,a1,132 # [11] filter_ht + movi.n a14,0 # [12] + max a9,a9,a10 # [13] + s32i a9,a1,8 # [14] gra_spill_temp_1 + s32i a14,a1,64 # [15] gra_spill_temp_15 + min a15,a15,a8 # [16] + +.Lt_0_27906: # 0x409 +# Loop body line 109, nesting depth: 2, estimated iterations: 56 + beqz.n a5,.Lt_0_28162 # [0] + +.LBB32_esp_nn_max_pool_s8_esp32s3: # 0x40b +# Part of loop body line 109, head labeled .Lt_0_27906 + l32i a6,a1,76 # [0] gra_spill_temp_18 + l32i a13,a1,96 # [1] gra_spill_temp_23 + l32i a8,a1,84 # [2] gra_spill_temp_20 + l32i a7,a1,128 # [3] filter_wd + l32i a10,a1,88 # [4] gra_spill_temp_21 + movi.n a9,0 # [5] + s32i a9,a1,32 # [6] gra_spill_temp_7 + add.n a14,a10,a5 # [7] + min a7,a7,a8 # [8] + add.n a10,a10,a13 # [9] + add.n a14,a13,a14 # [10] + s32i a14,a1,12 # [11] gra_spill_temp_2 + s32i a10,a1,16 # [12] gra_spill_temp_3 + movi.n a8,0 # [13] + l32i a10,a1,80 # [14] gra_spill_temp_19 + max a6,a6,a8 # [15] + sub a9,a7,a6 # [16] + s32i a9,a1,28 # [17] gra_spill_temp_6 + add.n a13,a10,a6 # [18] + s32i a13,a1,24 # [19] gra_spill_temp_5 + add.n a10,a10,a7 # [16] + s32i a10,a1,72 # [21] gra_spill_temp_17 + +.Lt_0_28674: # 0x446 +# Loop body line 8, nesting depth: 3, estimated iterations: 56 + l32i a8,a1,8 # [0] gra_spill_temp_1 + mv.qr q1,q3 # [1] + mov.n a13,a8 # [2] + bge a8,a15,.Lt_0_28930 # [3] + +.LBB35_esp_nn_max_pool_s8_esp32s3: # 0x451 +# Part of loop body line 8, head labeled .Lt_0_28674 + l32i a10,a1,92 # [0] gra_spill_temp_22 + l32i a14,a1,72 # [1] gra_spill_temp_17 + add.n a10,a10,a8 # [2] + mull a10,a11,a10 # [3] + add.n a14,a10,a14 # [5] + +.Lt_0_29442: # 0x45e + add.n a14,a14,a11 # [0] + addi.n a13,a13,1 # [1] + bge a6,a7,.Lt_0_29698 # [2] + +.LBB38_esp_nn_max_pool_s8_esp32s3: # 0x465 + l32i a3,a1,24 # [0] gra_spill_temp_5 + l32i a2,a1,32 # [1] gra_spill_temp_7 + add.n a3,a3,a10 # [2] + mull a3,a3,a5 # [3] + l32i a4,a1,28 # [4] gra_spill_temp_6 + add.n a2,a2,a3 # [5] + add.n a2,a12,a2 # [6] + loopgtz a4,.LBB108_esp_nn_max_pool_s8_esp32s3 # [7] + + ee.vldbc.32 q0,a2 # [0*II+0] id:489 + add.n a2,a2,a5 # [0*II+1] + ee.vmax.s8 q1,q1,q0 # [0*II+2] +.LBB108_esp_nn_max_pool_s8_esp32s3: # 0x482 + +.Lt_0_29698: # 0x482 + add.n a10,a10,a11 # [0] + bne a15,a13,.Lt_0_29442 # [1] + +.Lt_0_28930: # 0x487 +# Part of loop body line 8, head labeled .Lt_0_28674 + l32i a9,a1,12 # [0] gra_spill_temp_2 + l32i a8,a1,16 # [1] gra_spill_temp_3 + l32i a10,a1,32 # [3] gra_spill_temp_7 + + ee.vmin.s8 q5,q1,q4 # [4] + ee.vmax.s8 q5,q5,q5 # [5] + addi.n a10,a10,4 # [6] + ee.movi.32.a q5,a13,0 + s32i a10,a1,32 # [9] gra_spill_temp_7 + s32i.n a13,a8,0 # [10] id:492 + addi.n a8,a8,4 # [11] + s32i a8,a1,16 # [12] gra_spill_temp_3 + blt a8,a9,.Lt_0_28674 # [13] + +.Lt_0_28162: # 0x4ad +# Part of loop body line 109, head labeled .Lt_0_27906 + l32i a13,a1,84 # [0] gra_spill_temp_20 + l32i a14,a1,80 # [1] gra_spill_temp_19 + l32i a10,a1,120 # [2] stride_wd + l32i a8,a1,88 # [3] gra_spill_temp_21 + l32i a9,a1,76 # [4] gra_spill_temp_18 + add.n a8,a8,a5 # [5] + s32i a8,a1,88 # [6] gra_spill_temp_21 + sub a9,a9,a10 # [7] + add.n a14,a14,a10 # [8] + sub a13,a13,a10 # [9] + s32i a13,a1,84 # [10] gra_spill_temp_20 + s32i a14,a1,80 # [11] gra_spill_temp_19 + s32i a9,a1,76 # [12] gra_spill_temp_18 + l32i a14,a1,64 # [13] gra_spill_temp_15 + l32i a8,a1,4 # [14] gra_spill_temp_0 + addi.n a14,a14,1 # [15] + s32i a14,a1,64 # [16] gra_spill_temp_15 + sub a14,a14,a8 # [17] + bnez a14,.Lt_0_27906 # [18] + +.Lt_0_27394: # 0x4e3 +# Part of loop body line 107, head labeled .Lt_0_27138 + l32i a8,a1,44 # [0] gra_spill_temp_10 + l32i a15,a1,92 # [1] gra_spill_temp_22 + l32i a10,a1,60 # [2] gra_spill_temp_14 + l32i a14,a1,124 # [3] stride_ht + l32i a13,a1,48 # [4] gra_spill_temp_11 + l32i a9,a1,52 # [5] gra_spill_temp_12 + sub a13,a13,a14 # [6] + add.n a9,a9,a10 # [7] + add.n a15,a15,a14 # [8] + sub a8,a8,a14 # [9] + s32i a8,a1,44 # [10] gra_spill_temp_10 + s32i a15,a1,92 # [11] gra_spill_temp_22 + s32i a9,a1,52 # [12] gra_spill_temp_12 + s32i a13,a1,48 # [13] gra_spill_temp_11 + l32i.n a9,a1,36 # [14] gra_spill_temp_8 + l32i a10,a1,68 # [15] gra_spill_temp_16 + addi.n a9,a9,1 # [16] + s32i.n a9,a1,36 # [17] gra_spill_temp_8 + sub a9,a9,a10 # [18] + bnez a9,.Lt_0_27138 # [19] + + retw.n # [0] // exit + + .size esp_nn_max_pool_s8_esp32s3, . - esp_nn_max_pool_s8_esp32s3 + +#elif defined(WIO_TERMINAL) +// dummy code, added for old ARM toolchain +.syntax unified +.thumb +.cpu cortex-m0 + +.section .text +#endif // EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN && EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN_S3 diff --git a/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/src/softmax/esp_nn_softmax_ansi.c b/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/src/softmax/esp_nn_softmax_ansi.c new file mode 100644 index 0000000..219f3c0 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/src/softmax/esp_nn_softmax_ansi.c @@ -0,0 +1,92 @@ +#include "edge-impulse-sdk/classifier/ei_classifier_config.h" +#if EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN +// Copyright 2022 Espressif Systems (Shanghai) PTE LTD +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "softmax_common.h" + +int32_t esp_nn_get_softmax_scratch_size_ansi(const int32_t width, const int32_t height) +{ + (void) width; + (void) height; + return 0; +} + +void esp_nn_set_softmax_scratch_buf_ansi(void *buffer) +{ + (void) buffer; + return; +} + +void esp_nn_softmax_s8_ansi(const int8_t *input_data, + const int32_t height, + const int32_t width, + const int32_t mult, + const int32_t shift, + const int32_t diff_min, + int8_t *output_data) +{ + // The representation chosen for the input to the exp() function is Q5.26. + // We need to leave extra space since values that we skip might be as large as + // -32 before multiplying by input mult, and therefore as large as + // -16 afterwards. Note that exp(-8) is definitely not insignificant to + // accumulation, but exp(-16) definitely is. +#define ACCUM_BITS 12 +#define DIFF_BITS 5 + + const int32_t mask = (1 << shift); + int32_t col = 0; + const int8_t *in_ptr = input_data; + int8_t *out_ptr = output_data; + + for (int row_idx = 0; row_idx < height; row_idx++) { + int8_t max_in_row = in_ptr[0]; + for (col = 1; col < width; col++) { + max_in_row = max(max_in_row, in_ptr[col]); + } + + int32_t input_diff = 0; + int32_t sum_of_exps = 0; + + for (col = 0; col < width; col++) { + input_diff = in_ptr[col] - max_in_row; + if (input_diff >= diff_min) { + const int32_t input_diff_rescaled = SAT_HIGH_MUL(input_diff * mask, mult); + const int32_t exp_raw = esp_nn_exp_on_negative_values(input_diff_rescaled); + sum_of_exps += DIV_POW2(exp_raw, ACCUM_BITS); + } + } + + const int32_t headroom_plus1 = esp_nn_clz32((uint32_t) sum_of_exps); + const int32_t shifted_scale = ONE_OVER_ONE_X((sum_of_exps << headroom_plus1) - (1 << 31)); + const int32_t bits_over_unit = ACCUM_BITS - headroom_plus1 + 31 - sizeof(int8_t) * 8; + + for (col = 0; col < width; col++) { + input_diff = in_ptr[col] - max_in_row; + if (input_diff >= diff_min) { + const int32_t input_diff_rescaled = SAT_HIGH_MUL(input_diff * mask, mult); + const int32_t exp_raw = esp_nn_exp_on_negative_values(input_diff_rescaled); + const int32_t shifted_output = SAT_HIGH_MUL(shifted_scale, exp_raw); + const int32_t result = DIV_POW2(shifted_output, bits_over_unit) - 128; + out_ptr[col] = (int8_t) esp_nn_saturate8(result); + } else { + out_ptr[col] = -128; + } + } + in_ptr += width; + out_ptr += width; + } +} + +#endif // EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN diff --git a/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/src/softmax/esp_nn_softmax_opt.c b/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/src/softmax/esp_nn_softmax_opt.c new file mode 100644 index 0000000..8d001eb --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/src/softmax/esp_nn_softmax_opt.c @@ -0,0 +1,112 @@ +#include "edge-impulse-sdk/classifier/ei_classifier_config.h" +#if EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN +// Copyright 2022 Espressif Systems (Shanghai) PTE LTD +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "softmax_common.h" +#include + +static int32_t *scratch_buf = NULL; + +/** + * @brief Get scratch buffer size needed by softmax function + * + * @param width + * @param height + * @return size in bytes + * + * @note buffer must be 4 byte aligned + */ +int32_t esp_nn_get_softmax_scratch_size_opt(const int32_t width, const int32_t height) +{ + (void) height; + return width * 4; +} + +/** + * @brief Set scratch buffer to be used by softmax function + * + * @param buffer this can be NULL if one needs to unset it + * must be aligned to 4 bytes + */ +void esp_nn_set_softmax_scratch_buf_opt(void *buffer) +{ + scratch_buf = (int32_t *) buffer; +} + +void esp_nn_softmax_s8_opt(const int8_t *input_data, + const int32_t height, + const int32_t width, + const int32_t mult, + const int32_t shift, + const int32_t diff_min, + int8_t *output_data) +{ + if (scratch_buf == NULL) { + printf("%s error! scratch buffer not set\n", __FUNCTION__); + return; + } + // The representation chosen for the input to the exp() function is Q5.26. + // We need to leave extra space since values that we skip might be as large as + // -32 before multiplying by input mult, and therefore as large as + // -16 afterwards. Note that exp(-8) is definitely not insignificant to + // accumulation, but exp(-16) definitely is. +#define ACCUM_BITS 12 +#define DIFF_BITS 5 + + const int32_t mask = (1 << shift); + int32_t col = 0; + const int8_t *in_ptr = input_data; + int8_t *out_ptr = output_data; + + for (int row_idx = 0; row_idx < height; row_idx++) { + int8_t max_in_row = in_ptr[0]; + for (col = 1; col < width; col++) { + max_in_row = max(max_in_row, in_ptr[col]); + } + + int32_t input_diff = 0; + int32_t sum_of_exps = 0; + + for (col = 0; col < width; col++) { + input_diff = in_ptr[col] - max_in_row; + if (input_diff >= diff_min) { + const int32_t input_diff_rescaled = SAT_HIGH_MUL(input_diff * mask, mult); + const int32_t exp_raw = esp_nn_exp_on_negative_values(input_diff_rescaled); + scratch_buf[col] = exp_raw; // store to avoid duplicate calculation later + sum_of_exps += DIV_POW2(exp_raw, ACCUM_BITS); + } + } + + const int32_t headroom_plus1 = esp_nn_clz32((uint32_t) sum_of_exps); + const int32_t shifted_scale = ONE_OVER_ONE_X((sum_of_exps << headroom_plus1) - (1 << 31)); + const int32_t bits_over_unit = ACCUM_BITS - headroom_plus1 + 31 - sizeof(int8_t) * 8; + + for (col = 0; col < width; col++) { + input_diff = in_ptr[col] - max_in_row; + if (input_diff >= diff_min) { + int32_t exp_raw = scratch_buf[col]; + const int32_t shifted_output = SAT_HIGH_MUL(shifted_scale, exp_raw); + const int32_t result = DIV_POW2(shifted_output, bits_over_unit) - 128; + out_ptr[col] = (int8_t) esp_nn_saturate8(result); + } else { + out_ptr[col] = -128; + } + } + in_ptr += width; + out_ptr += width; + } +} + +#endif // EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN diff --git a/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/src/softmax/softmax_common.h b/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/src/softmax/softmax_common.h new file mode 100644 index 0000000..6d1847a --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/porting/espressif/ESP-NN/src/softmax/softmax_common.h @@ -0,0 +1,104 @@ +// Copyright 2022 Espressif Systems (Shanghai) PTE LTD +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +#define MASK_IF_ZERO(x) (x) == 0 ? ~0 : 0 +#define MASK_IF_NON_ZERO(x) (x) != 0 ? ~0 : 0 +#define SELECT_USING_MASK(mask, a, b) ((mask) & (a)) ^ (~(mask) & (b)) +#define SAT_HIGH_MUL(x, y) esp_nn_sat_round_doubling_high_mul((x), (y)) +#define DIV_POW2(x,y) esp_nn_div_by_power_of_two((x), (y)) + +__NN_FORCE_INLINE__ int32_t mul_power_of_2(int val, int exp) +{ + const int32_t thresh = ((1 << (31 - exp)) - 1); + int32_t result = val << exp; + result = SELECT_USING_MASK(MASK_IF_NON_ZERO(val > thresh), INT32_MAX, result); + result = SELECT_USING_MASK(MASK_IF_NON_ZERO(val < -thresh), INT32_MIN, result); + return result; +} + +/** + * @brief Calculate `1 / (1 + x)` for x in [0, 1] + * + * @param val input value to calculate `1/(1+x)` for + * @return `int32_t` result + * @note Newton-Raphson division + * + * https://en.wikipedia.org/wiki/Division_algorithm#Newton.E2.80.93Raphson_division + * Refer to that page for the logic behind the 48/17 and 32/17 constants. + * Pseudocode: https://en.wikipedia.org/wiki/Division_algorithm#Pseudocode + */ +__NN_FORCE_INLINE__ int32_t esp_nn_one_over_one_plus_x_for_x_in_0_1(int32_t val) +{ + const int64_t sum = (int64_t) val + INT32_MAX; + const int32_t half_denominator = (int32_t) ((sum + (sum >= 0 ? 1 : -1)) / 2L); + int32_t constant_48_over_17 = 1515870810; + int32_t constant_neg_32_over_17 = -1010580540; + int32_t x = constant_48_over_17 + SAT_HIGH_MUL(half_denominator, constant_neg_32_over_17); + const int32_t fixed_2_one = (1 << 29); + + x += mul_power_of_2(SAT_HIGH_MUL(x, fixed_2_one - SAT_HIGH_MUL(half_denominator, x)), 2); + x += mul_power_of_2(SAT_HIGH_MUL(x, fixed_2_one - SAT_HIGH_MUL(half_denominator, x)), 2); + x += mul_power_of_2(SAT_HIGH_MUL(x, fixed_2_one - SAT_HIGH_MUL(half_denominator, x)), 2); + + return mul_power_of_2(x, 1); +} + +#define ONE_OVER_ONE_X(x) esp_nn_one_over_one_plus_x_for_x_in_0_1((x)) + +/** + * @brief Return exp(x) for x < 0. + * + */ +__NN_FORCE_INLINE__ int32_t esp_nn_exp_on_negative_values(int32_t val) +{ + int32_t shift = 24; + + const int32_t one_quarter = (1 << shift); + int32_t mask = one_quarter - 1; + const int32_t val_mod_minus_quarter = (val & mask) - one_quarter; + const int32_t remainder = val_mod_minus_quarter - val; + + // calculate exponent for x in [-1/4, 0) in `result` + const int32_t x = (val_mod_minus_quarter << 5) + (1 << 28); + const int32_t x2 = SAT_HIGH_MUL(x, x); + const int32_t x3 = SAT_HIGH_MUL(x2, x); + const int32_t x4 = SAT_HIGH_MUL(x2, x2); + const int32_t one_over_3 = 715827883; + const int32_t one_over_8 = 1895147668; + + const int32_t x4_over_4 = DIV_POW2(x4, 2); + const int32_t x4_over_4_plus_x3_over_6_plus_x2_over_2 = DIV_POW2(SAT_HIGH_MUL(x4_over_4 + x3, one_over_3) + x2, 1); + int32_t result = one_over_8 + SAT_HIGH_MUL(one_over_8, x + x4_over_4_plus_x3_over_6_plus_x2_over_2); + +#define SELECT_IF_NON_ZERO(x) { \ + mask = MASK_IF_NON_ZERO(remainder & (1 << shift++)); \ + result = SELECT_USING_MASK(mask, SAT_HIGH_MUL(result, x), result); \ +} + + SELECT_IF_NON_ZERO(1672461947) + SELECT_IF_NON_ZERO(1302514674) + SELECT_IF_NON_ZERO(790015084) + SELECT_IF_NON_ZERO(290630308) + SELECT_IF_NON_ZERO(39332535) + SELECT_IF_NON_ZERO(720401) + SELECT_IF_NON_ZERO(242) + +#undef SELECT_IF_NON_ZERO + + mask = MASK_IF_ZERO(val); + return SELECT_USING_MASK(mask, INT32_MAX, result); +} \ No newline at end of file diff --git a/edgeimpulse/edge-impulse-sdk/porting/espressif/debug_log.cpp b/edgeimpulse/edge-impulse-sdk/porting/espressif/debug_log.cpp new file mode 100644 index 0000000..be789c9 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/porting/espressif/debug_log.cpp @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2022 EdgeImpulse Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an "AS + * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language + * governing permissions and limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "../ei_classifier_porting.h" +#if EI_PORTING_ESPRESSIF == 1 + +#include "edge-impulse-sdk/tensorflow/lite/micro/debug_log.h" +#include +#include + +// On mbed platforms, we set up a serial port and write to it for debug logging. +#if defined(__cplusplus) && EI_C_LINKAGE == 1 +extern "C" +#endif // defined(__cplusplus) && EI_C_LINKAGE == 1 +void DebugLog(const char* s) { + ei_printf("%s", s); +} + +#endif // EI_PORTING_ESPRESSIF diff --git a/edgeimpulse/edge-impulse-sdk/porting/espressif/ei_classifier_porting.cpp b/edgeimpulse/edge-impulse-sdk/porting/espressif/ei_classifier_porting.cpp new file mode 100644 index 0000000..5e6e74c --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/porting/espressif/ei_classifier_porting.cpp @@ -0,0 +1,110 @@ +/* + * Copyright (c) 2022 EdgeImpulse Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an "AS + * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language + * governing permissions and limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "../ei_classifier_porting.h" +#if EI_PORTING_ESPRESSIF == 1 + +#include +#include +#include +#include +// Include FreeRTOS for delay +#include +#include + +// for millis and micros +#include "esp_timer.h" + +#define EI_WEAK_FN __attribute__((weak)) + +EI_WEAK_FN EI_IMPULSE_ERROR ei_run_impulse_check_canceled() { + return EI_IMPULSE_OK; +} + +EI_WEAK_FN EI_IMPULSE_ERROR ei_sleep(int32_t time_ms) { + vTaskDelay(time_ms / portTICK_RATE_MS); + return EI_IMPULSE_OK; +} + +uint64_t ei_read_timer_ms() { + return esp_timer_get_time()/1000; +} + +uint64_t ei_read_timer_us() { + return esp_timer_get_time(); +} + +void ei_putchar(char c) +{ + /* Send char to serial output */ + putchar(c); +} + +/** + * Printf function uses vsnprintf and output using USB Serial + */ +__attribute__((weak)) void ei_printf(const char *format, ...) { + static char print_buf[1024] = { 0 }; + + va_list args; + va_start(args, format); + int r = vsnprintf(print_buf, sizeof(print_buf), format, args); + va_end(args); + + if (r > 0) { + printf(print_buf); + } +} + +__attribute__((weak)) void ei_printf_float(float f) { + ei_printf("%f", f); +} + +// we use alligned alloc instead of regular malloc +// due to https://github.com/espressif/esp-nn/issues/7 +__attribute__((weak)) void *ei_malloc(size_t size) { +#if defined(CONFIG_IDF_TARGET_ESP32S3) + return aligned_alloc(16, size); +#endif + return malloc(size); +} + +__attribute__((weak)) void *ei_calloc(size_t nitems, size_t size) { +#if defined(CONFIG_IDF_TARGET_ESP32S3) + void *p; + p = aligned_alloc(16, nitems * size); + if (p == nullptr) + return p; + + memset(p, '\0', nitems * size); + return p; +#endif + return calloc(nitems, size); +} + +__attribute__((weak)) void ei_free(void *ptr) { + free(ptr); +} + +#if defined(__cplusplus) && EI_C_LINKAGE == 1 +extern "C" +#endif +__attribute__((weak)) void DebugLog(const char* s) { + ei_printf("%s", s); +} + +#endif // EI_PORTING_ESPRESSIF == 1 diff --git a/edgeimpulse/edge-impulse-sdk/porting/ethos-core-driver/.clang-format b/edgeimpulse/edge-impulse-sdk/porting/ethos-core-driver/.clang-format new file mode 100644 index 0000000..9a7ae82 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/porting/ethos-core-driver/.clang-format @@ -0,0 +1,144 @@ +# +# Copyright (c) 2019-2020 Arm Limited. All rights reserved. +# +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the License); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an AS IS BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +--- +Language: Cpp +AccessModifierOffset: -2 +AlignAfterOpenBracket: Align +AlignConsecutiveMacros: false +AlignConsecutiveAssignments: true +AlignConsecutiveDeclarations: false +AlignEscapedNewlines: Right +AlignOperands: true +AlignTrailingComments: true +AllowAllArgumentsOnNextLine: true +AllowAllConstructorInitializersOnNextLine: true +AllowAllParametersOfDeclarationOnNextLine: true +AllowShortBlocksOnASingleLine: false +AllowShortCaseLabelsOnASingleLine: false +AllowShortFunctionsOnASingleLine: Empty +AllowShortLambdasOnASingleLine: All +AllowShortIfStatementsOnASingleLine: Never +AllowShortLoopsOnASingleLine: false +AlwaysBreakAfterDefinitionReturnType: None +AlwaysBreakAfterReturnType: None +AlwaysBreakBeforeMultilineStrings: false +AlwaysBreakTemplateDeclarations: MultiLine +BinPackArguments: false +BinPackParameters: false +BraceWrapping: + AfterCaseLabel: true + AfterClass: true + AfterControlStatement: true + AfterEnum: true + AfterFunction: true + AfterNamespace: true + AfterObjCDeclaration: true + AfterStruct: true + AfterUnion: true + AfterExternBlock: false + BeforeCatch: true + BeforeElse: true + IndentBraces: false + SplitEmptyFunction: true + SplitEmptyRecord: true + SplitEmptyNamespace: true +BreakBeforeBinaryOperators: None +BreakBeforeBraces: Custom +BreakBeforeInheritanceComma: true +BreakInheritanceList: AfterColon +BreakBeforeTernaryOperators: false +BreakConstructorInitializersBeforeComma: true +BreakConstructorInitializers: AfterColon +BreakAfterJavaFieldAnnotations: false +BreakStringLiterals: true +ColumnLimit: 120 +CommentPragmas: '^ IWYU pragma:' +CompactNamespaces: false +ConstructorInitializerAllOnOneLineOrOnePerLine: false +ConstructorInitializerIndentWidth: 4 +ContinuationIndentWidth: 4 +Cpp11BracedListStyle: true +DerivePointerAlignment: false +DisableFormat: false +ExperimentalAutoDetectBinPacking: false +FixNamespaceComments: true +ForEachMacros: + - foreach + - Q_FOREACH + - BOOST_FOREACH +IncludeBlocks: Preserve +IncludeCategories: + - Regex: '^"(llvm|llvm-c|clang|clang-c)/' + Priority: 2 + - Regex: '^(<|"(gtest|gmock|isl|json)/)' + Priority: 3 + - Regex: '.*' + Priority: 1 +IncludeIsMainRegex: '(Test)?$' +IndentCaseLabels: false +IndentPPDirectives: None +IndentWidth: 4 +IndentWrappedFunctionNames: false +JavaScriptQuotes: Leave +JavaScriptWrapImports: true +KeepEmptyLinesAtTheStartOfBlocks: true +MacroBlockBegin: '' +MacroBlockEnd: '' +MaxEmptyLinesToKeep: 1 +NamespaceIndentation: None +ObjCBinPackProtocolList: Auto +ObjCBlockIndentWidth: 2 +ObjCSpaceAfterProperty: false +ObjCSpaceBeforeProtocolList: true +PenaltyBreakAssignment: 2 +PenaltyBreakBeforeFirstCallParameter: 19 +PenaltyBreakComment: 300 +PenaltyBreakFirstLessLess: 120 +PenaltyBreakString: 1000 +PenaltyBreakTemplateDeclaration: 10 +PenaltyExcessCharacter: 1000000 +PenaltyReturnTypeOnItsOwnLine: 1000000 +PointerAlignment: Right +ReflowComments: true +SortIncludes: true +SortUsingDeclarations: true +SpaceAfterCStyleCast: false +SpaceAfterLogicalNot: false +SpaceAfterTemplateKeyword: true +SpaceBeforeAssignmentOperators: true +SpaceBeforeCpp11BracedList: false +SpaceBeforeCtorInitializerColon: true +SpaceBeforeInheritanceColon: true +SpaceBeforeParens: ControlStatements +SpaceBeforeRangeBasedForLoopColon: true +SpaceInEmptyParentheses: false +SpacesBeforeTrailingComments: 1 +SpacesInAngles: false +SpacesInContainerLiterals: false +SpacesInCStyleCastParentheses: false +SpacesInParentheses: false +SpacesInSquareBrackets: false +Standard: Cpp11 +StatementMacros: + - Q_UNUSED + - QT_REQUIRE_VERSION +TabWidth: 8 +UseTab: Never +... + diff --git a/edgeimpulse/edge-impulse-sdk/porting/ethos-core-driver/CMakeLists.txt b/edgeimpulse/edge-impulse-sdk/porting/ethos-core-driver/CMakeLists.txt new file mode 100644 index 0000000..1dd1e54 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/porting/ethos-core-driver/CMakeLists.txt @@ -0,0 +1,89 @@ +# +# Copyright (c) 2019-2020 Arm Limited. All rights reserved. +# +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the License); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an AS IS BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +cmake_minimum_required(VERSION 3.15.6) + +macro(test_and_convert_error_level log_level ethos_level) + if(${log_level} STREQUAL ${LOG_LEVEL}) + set(ETHOSU_LOG_SEVERITY ${ethos_level}) + endif() +endmacro() + +project(ethosu_core_driver VERSION 0.0.1) + +# +# Build options +# + +option(DRIVER_PMU_AUTOINIT "Enable PMU boot auto-initialization" OFF) + +set(CMSIS_PATH "${CMAKE_CURRENT_SOURCE_DIR}/../cmsis" CACHE PATH "Path to CMSIS.") + +set(LOG_NAMES emerg alert crit err warning notice info debug) +if(DEFINED LOG_LEVEL) + test_and_convert_error_level( LOG_LEVEL_ERROR err ) + test_and_convert_error_level( LOG_LEVEL_DEBUG debug ) + test_and_convert_error_level( LOG_LEVEL_TRACE debug ) + test_and_convert_error_level( LOG_LEVEL_INFO info ) + test_and_convert_error_level( LOG_LEVEL_WARN warning ) +else() + set(ETHOSU_LOG_SEVERITY "info" CACHE STRING "Driver log severity level ${LOG_NAMES}") + set_property(CACHE ETHOSU_LOG_SEVERITY PROPERTY STRINGS ${LOG_NAMES}) +endif() + +# +# Global settings +# + +# Check that ETHOSU_LOG_SEVERITY has one of the supported levels +list(FIND LOG_NAMES ${ETHOSU_LOG_SEVERITY} LOG_SEVERITY) +if (${LOG_SEVERITY} EQUAL -1) + message(FATAL_ERROR "Unsupported log level ${ETHOSU_LOG_SEVERITY}") +endif() + +# Make include directories available for current- and sub projects +include_directories(include src) +include_directories(${CMSIS_PATH}/CMSIS/Core/Include) + +# +# Build libraries +# + +# Build driver library +add_library(ethosu_core_driver STATIC) +target_include_directories(ethosu_core_driver PUBLIC include) +target_sources(ethosu_core_driver PRIVATE src/ethosu_driver.c src/ethosu_device.c src/ethosu_pmu.c) + +# Set the log level for the target +target_compile_definitions(ethosu_core_driver PRIVATE ETHOSU_LOG_SEVERITY=${LOG_SEVERITY}) + +# Install library and include files +install(TARGETS ethosu_core_driver LIBRARY DESTINATION "lib") +install(FILES include/ethosu_device.h include/ethosu_driver.h include/pmu_ethosu.h + DESTINATION "include") + +# +# Print build status +# + +message(STATUS "*******************************************************") +message(STATUS "PROJECT_NAME : ${PROJECT_NAME}") +message(STATUS "CMAKE_SYSTEM_PROCESSOR : ${CMAKE_SYSTEM_PROCESSOR}") +message(STATUS "CMSIS_PATH : ${CMSIS_PATH}") +message(STATUS "ETHOSU_LOG_SEVERITY : ${ETHOSU_LOG_SEVERITY}") +message(STATUS "*******************************************************") diff --git a/edgeimpulse/edge-impulse-sdk/porting/ethos-core-driver/LICENSE.txt b/edgeimpulse/edge-impulse-sdk/porting/ethos-core-driver/LICENSE.txt new file mode 100644 index 0000000..8dada3e --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/porting/ethos-core-driver/LICENSE.txt @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "{}" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright {yyyy} {name of copyright owner} + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/edgeimpulse/edge-impulse-sdk/porting/ethos-core-driver/README.md b/edgeimpulse/edge-impulse-sdk/porting/ethos-core-driver/README.md new file mode 100644 index 0000000..e23ed2e --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/porting/ethos-core-driver/README.md @@ -0,0 +1,86 @@ +# Arm(R) Ethos(TM)-U core driver + +This repository contains a device driver for the Arm(R) Ethos(TM)-U NPU. + +## Building + +The source code comes with a CMake based build system. The driver is expeced to +be cross compiled for any of the supported Arm Cortex(R)-M CPUs, which will +require the user to setup a custom toolchain file. + +The user is also required to define `CMAKE_SYSTEM_PROCESSOR` for the target CPU, +for example cortex-m55+nodsp+nofp. This can be done either in the toolchain +file or on the command line. + +``` +$ mkdir build +$ cd build +$ cmake .. -DCMAKE_TOOLCHAIN_FILE= -DCMAKE_SYSTEM_PROCESSOR=cortex-m +$ make +``` + +For running the driver on Arm CPUs which are configured with datacache, the +cache maintenance functions in the driver are exported with weakly linked +symbols that should be overriden. An example implementation using the CMSIS +primitives found in cachel1_armv7.h could be as below: + +``` +extern "C" { +void ethosu_flush_dcache(uint32_t *p, size_t bytes) { + if (p) + SCB_CleanDCache_by_Addr(p, bytes); + else + SCB_CleanDCache(); +} + +void ethosu_invalidate_dcache(uint32_t *p, size_t bytes) { + if (p) + SCB_InvalidateDCache_by_Addr(p, bytes); + else + SCB_InvalidateDCache(); +} +} +``` + +# License + +The Arm Ethos-U core driver is provided under an Apache-2.0 license. Please see +[LICENSE.txt](LICENSE.txt) for more information. + +# Contributions + +The Arm Ethos-U project welcomes contributions under the Apache-2.0 license. + +Before we can accept your contribution, you need to certify its origin and give +us your permission. For this process we use the Developer Certificate of Origin +(DCO) V1.1 (https://developercertificate.org). + +To indicate that you agree to the terms of the DCO, you "sign off" your +contribution by adding a line with your name and e-mail address to every git +commit message. You must use your real name, no pseudonyms or anonymous +contributions are accepted. If there are more than one contributor, everyone +adds their name and e-mail to the commit message. + +``` +Author: John Doe \ +Date: Mon Feb 29 12:12:12 2016 +0000 + +Title of the commit + +Short description of the change. + +Signed-off-by: John Doe john.doe@example.org +Signed-off-by: Foo Bar foo.bar@example.org +``` + +The contributions will be code reviewed by Arm before they can be accepted into +the repository. + +# Security + +Please see [Security](SECURITY.md). + +# Trademark notice + +Arm, Cortex and Ethos are registered trademarks of Arm Limited (or its +subsidiaries) in the US and/or elsewhere. diff --git a/edgeimpulse/edge-impulse-sdk/porting/ethos-core-driver/SECURITY.md b/edgeimpulse/edge-impulse-sdk/porting/ethos-core-driver/SECURITY.md new file mode 100644 index 0000000..29c6ce4 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/porting/ethos-core-driver/SECURITY.md @@ -0,0 +1,85 @@ +# Security + +If you believe you have identified a security related issue or vulnerability, +then we encourage you to responsibly disclose it to us as soon as possible. + +## Reporting vulnerabilities + +Arm takes security issues seriously and welcomes feedback from researchers and +the security community in order to improve the security of its products and +services. We operate a coordinated disclosure policy for disclosing +vulnerabilities and other security issues. + +Security issues can be complex and one single timescale doesn't fit all +circumstances. We will make best endeavours to inform you when we expect +security notifications and fixes to be available and facilitate coordinated +disclosure when notifications and patches/mitigations are available. + +### Report + +For all security issues, contact Arm by email at +[arm-security@arm.com](mailto:arm-security@arm.com). In the body of the email +include as much information as possible about the issue or vulnerability and any +additional contact details. + +### Secure submission using PGP + +We support and encourage secure submission of vulnerability reports using PGP, +using the key below. If you would like replies to be encrypted, please provide +your own public key through a secure mechanism. + +~~~none +-----BEGIN PGP PUBLIC KEY BLOCK----- +mQINBFr7/RMBEACjHR5QZL/z1t2aLCRNXLE4KJiQmCo7edU5Be+7MTjIJDzZNu68 +lNEUYRoLexeayif8eC4T19bUsSbGpxHiYsFFjV8ewLXDyDJRRuaBGPfQ5rn/mE6X +Nvu+9Pputr+mB1R3CXcvrNkhmzPkK7zVM15oeyBMhogqPssuT4OeMduQdip8smfK +xTMk91RrJTLb+G3eE1tf+81kXBYvzp2e24Sn0/VeYe0IWnBobjVBZk3TmcYxDvz5 +Y47fU9V6cNj3Zq4VYrgxuLoFCA2VtetyiFQm5IYa3Bt3SWcAwihr8nbR2HoNdWyA +u8wJYYVzSq3hvT5l/IjTHxEcY+6RBq8poDSsftzvX386u9hmw7sJQFlTw6/pUjdr +gbsZ2ZzRBzKtU17ercpn4kU6VgVP3WRB5HiTFFkEpZuqAznOYaHbMq4dfd/g7Quq +C0VTbWiJnhku2i+g4BdHHRDtIF6U3aVQAfbrDb1LjVTa65p5ULOeY3HRAWtMNtu/ +Cj8cD98JDanzXtcnisds8vMQ8LZ6iMFChEnF8K4V0eLw9Ju6CMNiFYY7SEBndD/H +M4KcU4li7mROSbJcshgEbe1SYkxdMuI9eY4DNYxl3VjxoPUGzeqXo/ADFKE9bHsi +GTyEoij4ku0HspLVKnYHXn/LqHGwEcwjF8zphS+w5cn/e01akYwz5EVSQwARAQAB +tB1Bcm0gU3VwcG9ydCA8c3VwcG9ydEBhcm0uY29tPokCTgQTAQgAOBYhBN9zqDwZ +RL/vF0ihcdfNKdz4bBRiBQJa+/0TAhsDBQsJCAcCBhUKCQgLAgQWAgMBAh4BAheA +AAoJENfNKdz4bBRibK0P/jLlJR/QYypcjb+8BnHT9tCDgcV2KFYXS15VpbSNviN9 +Xs/UOnSadoGUMGCXDyb1PRNn98yUn7JlNR9rtsqPRmkpbo5cuw46ehgxjVlfcHnk +CttaE8Davx6zo0fyrBib2+oVVW8usi9+uRK4vhhPUicO3oXwzldsVFz+RbpubZxc +Bg/CZ+dQ2jMKOv1zDtInOG6OBnbQZRAeiWXgGhcIoPZ4zBQOi8nr0+bLcfvMeZi2 +uz6uKnylpXwZbl4ijcG8MKV/ei+7du+SzA9NY0WOT2g3FXDREWUhjKs8bmEZgIx4 +QgvDNpxAUThF+TqQ7zrsA8nT8POvDD0MhN/Z+A3QdPTdcaZFaXzIdxbDg+0FKmzu +OgtQBH4C01RWrkmZlhO5w7/Qjt0vLlhfyQIL9BW/HeEPtjnH2Hnq8xYnZhlVqJEh +FJU7F9sMvyiJiKviobFTd6AmpVkhxhcJ3k2L2C03imTsmUwAoejQCXwiYcOhyQ2t +Z9Nk8YIZTEw2urGFi4HSQPwPq2j/2j7ABJ4rlzJvO6vs5ppGkumvzIIP9JnpVXbp +wcbK6Ev6KdkX4s14Mzd6Hsd8LpL8t5nHhxUey6G0xKe2eSlHVm5Mlfhoete9UmIZ +dzIOZkgTgWXlYXRIxwGQ2Pss7pURtofykvLklq4jcobQuHxurl9cteodETfbWk/J +uQINBFr7/RMBEADWZG8eqt5D22g3T7ehnH/T3fuTX8LPUBnODMWGAEUY8uv64To8 +46odvrXFgWBgCf0sROEyJchc3SGLyR9S4lJsVJRde3QLN3WZkHlB4pSn4IQHFyQd +wsLQi+S9uggHMPlQ6MNvc5n0P3k5bT9fLUmtJWJ3QVjW7k963ZXpzf1zbQJqs30w +rlqGUZllfRoYQTfcYxFEaUFhwRJ//skNImWH8Vz+PTnqg2zRtFn3usrBV4GpNvsM +6jy+YEsSvUa7IY8k4wpPzEhIfqDjGbZxFSQ1H1G+mLUL+DD7oGffej/ZoC86TIdM +p6ew1rGhJdQBLh9nx+1ADOLWjNo2R0h60u7VR5q/K6V4fwWmeGFipPXZCD92I+nR +t/cjznwNyD/6J9YrBMF7mbGrS1TyfLaLt4tpdcBnsgqDTodd5OmG65mroXsg/lNO +7YZdecLZ34krfaLrWTtKkqULXbppB+uQvbVj8p8ONRImn6bZ+iAhnNaH9wJ06ico +b1F0imJ2SJWnFr6PzPRr0gPStLgu9wrRKheaORwF/H/HxSyPZxNVxFqu81q518A/ +plhub9INQLaxHf/TTjXpqZCcfdNTYUAW8rwbQfW9doSIT4lHY8bJXktb6BsVjkFj +PzDeYpXeOoTWetQqsEuTdg/F+qg041QBLtNj9Lr3Vy3StgMciRUIP8m0XwARAQAB +iQI2BBgBCAAgFiEE33OoPBlEv+8XSKFx180p3PhsFGIFAlr7/RMCGwwACgkQ180p +3PhsFGLWMA//V/XKrnI2YBh/SptUrgg7knPXva45bb7tGSH1fJg8f/wqycOSFFCY +ES45boA5jlQ3z8uw6BYCz5KeOucGhxAMw+x5EDdxZ33ksY5zqXB35WaMXzEwGYYb +E113/yhOsTbzu4bBKABSXbJO98MdAWvWpyCpp2MHIR3S9+ycM7/FMZ5xi3czZNRg +9+WZP+7W4qWhJptQ0kBh5C3N/tiltju5WQ2Y7XIn+5dMOJdtseFS7CNerxXZGAtH +nfRxaD/4ENdbWOwaVJiVW7+ioUJz09OWgy0gLYSDW+hciDnW1QAaJLpdAbniGZ0S +JsTmaZla8JnUKqZPgbFfA2OcnH9H+DWc0pHv17c5tJzTMP7rgirgGRX/U2LOzmFZ +1UxjQj5nn3Oa5frXbIAzb8xKiR0VDaquCM/3sti1AesYiS0Gw0Sqnw8qpFypgFXN +CKVgYXppIT+TmbDbNJDOB2UycxeI4vbiBwU8fI4qSpW12WsGdAJt/rx3UsyhZ+02 +4aSqDHzhJmtDPQ6lnaKe1fUkC90tgp8loVGmriWQx82jAQMqATVjIklTpE4vm00f +ocQIWOKEE90mKNEoV6rNbfl5QevmapTVdV/pmrRBzhbsa1uAUS4HZdH0Nf/OXEyv +yYCr2gCFPymkkRYhPr2w5EgbWyzLaBIwqjyIbXaveuB3DYi2Lhbf64I= +=EaN7 +-----END PGP PUBLIC KEY BLOCK----- +~~~ + +For more information visit + \ No newline at end of file diff --git a/edgeimpulse/edge-impulse-sdk/porting/ethos-core-driver/include/ethosu_device.h b/edgeimpulse/edge-impulse-sdk/porting/ethos-core-driver/include/ethosu_device.h new file mode 100644 index 0000000..c3bcecb --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/porting/ethos-core-driver/include/ethosu_device.h @@ -0,0 +1,430 @@ +/* + * Copyright (c) 2022 Arm Limited. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an "AS + * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language + * governing permissions and limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef ETHOSU_DEVICE_H +#define ETHOSU_DEVICE_H + +/****************************************************************************** + * Includes + ******************************************************************************/ + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/****************************************************************************** + * Defines + ******************************************************************************/ + +#define ETHOSU_DRIVER_VERSION_MAJOR 0 ///< Driver major version +#define ETHOSU_DRIVER_VERSION_MINOR 16 ///< Driver minor version +#define ETHOSU_DRIVER_VERSION_PATCH 0 ///< Driver patch version +#define ETHOSU_DRIVER_BASEP_INDEXES 8 ///< Number of base pointer indexes + +#ifndef ETHOSU_PMU_NCOUNTERS +#define ETHOSU_PMU_NCOUNTERS 4 +#endif + +/****************************************************************************** + * Types + ******************************************************************************/ + +enum ethosu_error_codes +{ + ETHOSU_SUCCESS = 0, ///< Success + ETHOSU_GENERIC_FAILURE = -1, ///< Generic failure + ETHOSU_INVALID_PARAM = -2 ///< Invalid parameter +}; + +struct ethosu_device +{ + volatile uintptr_t base_address; + uint32_t proto; + uint32_t pmcr; + uint32_t pmccntr[2]; + uint32_t pmcnten; + uint32_t pmint; + uint32_t pmccntr_cfg; + uint32_t pmu_evcntr[ETHOSU_PMU_NCOUNTERS]; + uint32_t pmu_evtypr[ETHOSU_PMU_NCOUNTERS]; + uint32_t secure; + uint32_t privileged; +}; + +struct ethosu_id +{ + uint32_t version_status; ///< Version status + uint32_t version_minor; ///< Version minor + uint32_t version_major; ///< Version major + uint32_t product_major; ///< Product major + uint32_t arch_patch_rev; ///< Architecture version patch + uint32_t arch_minor_rev; ///< Architecture version minor + uint32_t arch_major_rev; ///< Architecture version major +}; + +struct ethosu_config +{ + struct + { + uint32_t macs_per_cc; ///< MACs per clock cycle + uint32_t cmd_stream_version; ///< NPU command stream version + uint32_t shram_size; ///< SHRAM size + uint32_t custom_dma; ///< Custom DMA enabled + }; +}; + +/** + * Memory type parameter for set_regioncfg_reg: + * Counter{0,1}: Outstanding transactions for + * AXI port 0 for memory type/region a=0,b=1 + * Counter{2,3}: Outstanding transactions for + * AXI port 1 for memory type/region a=2,b=3 + */ +enum ethosu_memory_type +{ + ETHOSU_AXI0_OUTSTANDING_COUNTER0 = 0, ///< NPU axi0_outstanding_counter0 + ETHOSU_AXI0_OUTSTANDING_COUNTER1 = 1, ///< NPU axi0_outstanding_counter1 + ETHOSU_AXI1_OUTSTANDING_COUNTER2 = 2, ///< NPU axi1_outstanding_counter2 + ETHOSU_AXI1_OUTSTANDING_COUNTER3 = 3 ///< NPU axi1_outstanding_counter3 +}; + +enum ethosu_axi_limit_beats +{ + ETHOSU_AXI_LIMIT_64_BYTES = 0, ///< NPU AXI limit 64 byte burst split alignment. + ETHOSU_AXI_LIMIT_128_BYTES = 1, ///< NPU AXI limit 128 byte burst split alignment. + ETHOSU_AXI_LIMIT_256_BYTES = 2 ///< NPU AXI limit 256 byte burst split alignment. +}; + +enum ethosu_axi_limit_mem_type +{ + ETHOSU_MEM_TYPE_DEVICE_NON_BUFFERABLE = 0, + ETHOSU_MEM_TYPE_DEVICE_BUFFERABLE = 1, + ETHOSU_MEM_TYPE_NORMAL_NON_CACHEABLE_NON_BUFFERABLE = 2, + ETHOSU_MEM_TYPE_NORMAL_NON_CACHEABLE_BUFFERABLE = 3, + ETHOSU_MEM_TYPE_WRITE_THROUGH_NO_ALLOCATE = 4, + ETHOSU_MEM_TYPE_WRITE_THROUGH_READ_ALLOCATE = 5, + ETHOSU_MEM_TYPE_WRITE_THROUGH_WRITE_ALLOCATE = 6, + ETHOSU_MEM_TYPE_WRITE_THROUGH_READ_AND_WRITE_ALLOCATE = 7, + ETHOSU_MEM_TYPE_WRITE_BACK_NO_ALLOCATE = 8, + ETHOSU_MEM_TYPE_WRITE_BACK_READ_ALLOCATE = 9, + ETHOSU_MEM_TYPE_WRITE_BACK_WRITE_ALLOCATE = 10, + ETHOSU_MEM_TYPE_WRITE_BACK_READ_AND_WRITE_ALLOCATE = 11 +}; + +enum ethosu_clock_q_request +{ + ETHOSU_CLOCK_Q_DISABLE = 0, ///< Disble NPU signal ready for clock off. + ETHOSU_CLOCK_Q_ENABLE = 1 ///< Enable NPU signal ready for clock off when stop+idle state reached. +}; + +enum ethosu_power_q_request +{ + ETHOSU_POWER_Q_DISABLE = 0, ///< Disble NPU signal ready for power off. + ETHOSU_POWER_Q_ENABLE = 1 ///< Enable NPU signal ready for power off when stop+idle state reached. +}; + +/****************************************************************************** + * Prototypes + ******************************************************************************/ + +/** + * Initialize the device. + */ +enum ethosu_error_codes ethosu_dev_init(struct ethosu_device *dev, + const void *base_address, + uint32_t secure_enable, + uint32_t privilege_enable); + +/** + * Get device id. + */ +enum ethosu_error_codes ethosu_get_id(struct ethosu_device *dev, struct ethosu_id *id); + +/** + * Get device configuration. + */ +enum ethosu_error_codes ethosu_get_config(struct ethosu_device *dev, struct ethosu_config *config); + +/** + * Execute a given command stream on NPU. + * \param[in] cmd_stream_ptr Pointer to the command stream + * \param[in] cms_length Command stream length + * \param[in] base_addr Pointer to array of base addresses + * - 0: weight tensor + * - 1: scratch tensor + * - All input tensors + * - All output tensors + * \param[in] num_base_addr Number of base addresses. + * \return \ref ethosu_error_codes + */ +enum ethosu_error_codes ethosu_run_command_stream(struct ethosu_device *dev, + const uint8_t *cmd_stream_ptr, + uint32_t cms_length, + const uint64_t *base_addr, + int num_base_addr); + +/** + * Check if IRQ is raised. + * \param[out] irq_status Pointer to IRQ status + * - 0 IRQ not raised + * - 1 IRQ raised + * \return \ref ethosu_error_codes + */ +enum ethosu_error_codes ethosu_is_irq_raised(struct ethosu_device *dev, uint8_t *irq_status); + +/** + * Clear IRQ status. + * \return \ref ethosu_error_codes + */ +enum ethosu_error_codes ethosu_clear_irq_status(struct ethosu_device *dev); + +/** + * Get the 16 bit status mask. + * \param[out] irq_status_mask Pointer to the status mask. + * The lower 16 bits of status reg are returned. + * bit0: state + * bit1: irq_raised + * bit2: bus_status + * bit3: reset_status + * bit4: cmd_parse_error + * bit5: cmd_end_reached + * bit6: pmu_irq_raised + * bit7-15: reserved + * \return \ref ethosu_error_codes + */ +enum ethosu_error_codes ethosu_get_status_mask(struct ethosu_device *dev, uint16_t *status_mask); + +/** + * Get the 16 bit IRQ history mask. + * \param[out] irq_history_mask Pointer to the IRQ history mask. + * \return \ref ethosu_error_codes + */ +enum ethosu_error_codes ethosu_get_irq_history_mask(struct ethosu_device *dev, uint16_t *irq_history_mask); + +/** + * Clear the given bits in the + * IRQ history mask. + * \param[in] irq_history_clear_mask 16 bit mask indicating which bits to + * clear in the IRQ history mask. + * \return \ref ethosu_error_codes + */ +enum ethosu_error_codes ethosu_clear_irq_history_mask(struct ethosu_device *dev, uint16_t irq_history_clear_mask); + +/** + * Perform a NPU soft reset. + * \return \ref ethosu_error_codes + */ +enum ethosu_error_codes ethosu_soft_reset(struct ethosu_device *dev); + +/** + * Wait for reset ready. + * \return \ref ethosu_error_codes + */ +enum ethosu_error_codes ethosu_wait_for_reset(struct ethosu_device *dev); + +/** + * Read and return the content of a given NPU APB + * register range. + * \param[in] start_address Start address. + * \param[in] num_reg Number of registers to read. + * \param[out] reg_p Pointer to a output area, allocated by the + * caller, where the register content shall be + * written. + * \return \ref ethosu_error_codes + */ +enum ethosu_error_codes ethosu_read_apb_reg(struct ethosu_device *dev, + uint32_t start_address, + uint16_t num_reg, + uint32_t *reg_p); + +/** + * Set qconfig register. I.e. + * AXI configuration for the command stream. + * \param[in] memory_type Memory_type to use for command stream: + * enum ethosu_memory_type. + * \return \ref ethosu_error_codes + */ +enum ethosu_error_codes ethosu_set_qconfig(struct ethosu_device *dev, enum ethosu_memory_type memory_type); + +/** + * Set register REGIONCFG. + * Base pointer configuration. + * Bits[2*k+1:2*k] give the memory type for BASEP[k]. + * \param[in] region Region field to set: 0 - 7. + * \param[in] memory_type Memory_type to use for region: enum ethosu_memory_type. + * \return \ref ethosu_error_codes + */ +enum ethosu_error_codes ethosu_set_regioncfg(struct ethosu_device *dev, + uint8_t region, + enum ethosu_memory_type memory_type); + +/** + * Set AXI limit parameters for port 0 counter 0. + * \param[in] max_beats Burst split alignment, \ref ethosu_axi_limit_beats. + * \param[in] memtype Cache policy \ref ethosu_axi_limit_mem_type + * \param[in] max_reads Maximum number of outstanding reads. + * \param[in] max_writes Maximum number of outstanding writes. + * \return \ref ethosu_error_codes + */ +enum ethosu_error_codes ethosu_set_axi_limit0(struct ethosu_device *dev, + enum ethosu_axi_limit_beats max_beats, + enum ethosu_axi_limit_mem_type memtype, + uint8_t max_reads, + uint8_t max_writes); +/** + * Set AXI limit parameters for port 0 counter 1. + * \param[in] max_beats Burst split alignment, \ref ethosu_axi_limit_beats. + * \param[in] memtype Cache policy \ref ethosu_axi_limit_mem_type + * \param[in] max_reads Maximum number of outstanding reads. + * \param[in] max_writes Maximum number of outstanding writes. + * \return \ref ethosu_error_codes + */ +enum ethosu_error_codes ethosu_set_axi_limit1(struct ethosu_device *dev, + enum ethosu_axi_limit_beats max_beats, + enum ethosu_axi_limit_mem_type memtype, + uint8_t max_reads, + uint8_t max_writes); +/** + * Set AXI limit parameters for port 1 counter 2. + * \param[in] max_beats Burst split alignment, \ref ethosu_axi_limit_beats. + * \param[in] memtype Cache policy \ref ethosu_axi_limit_mem_type + * \param[in] max_reads Maximum number of outstanding reads. + * \param[in] max_writes Maximum number of outstanding writes. + * \return \ref ethosu_error_codes + */ +enum ethosu_error_codes ethosu_set_axi_limit2(struct ethosu_device *dev, + enum ethosu_axi_limit_beats max_beats, + enum ethosu_axi_limit_mem_type memtype, + uint8_t max_reads, + uint8_t max_writes); +/** + * Set AXI limit parameters for port 1 counter 3. + * \param[in] max_beats Burst split alignment, \ref ethosu_axi_limit_beats. + * \param[in] memtype Cache policy \ref ethosu_axi_limit_mem_type + * \param[in] max_reads Maximum number of outstanding reads. + * \param[in] max_writes Maximum number of outstanding writes. + * \return \ref ethosu_error_codes + */ +enum ethosu_error_codes ethosu_set_axi_limit3(struct ethosu_device *dev, + enum ethosu_axi_limit_beats max_beats, + enum ethosu_axi_limit_mem_type memtype, + uint8_t max_reads, + uint8_t max_writes); + +/** + * Get current command stream queue read position. + * \param[out] qread Pointer to queue read. + * \return \ref ethosu_error_codes + */ +enum ethosu_error_codes ethosu_get_qread(struct ethosu_device *dev, uint32_t *qread); + +/** + * Get revision of NPU + * \param[out] revision Pointer to revision read. + * \return \ref ethosu_error_codes + */ +enum ethosu_error_codes ethosu_get_revision(struct ethosu_device *dev, uint32_t *revision); + +/** + * Issue run command for the currently programmed + * command stream, starting at current queue read + * position. + * \return \ref ethosu_error_codes + */ +enum ethosu_error_codes ethosu_set_command_run(struct ethosu_device *dev); + +/** + * Dump a 1KB section of SHRAM. + * \param[in] section Section offset to 1KB section in SHRAM. + * \param[out] shram_p Pointer to a output area, allocated by the + * caller, where the SHRAM content shall be + * written. + * \return \ref ethosu_error_codes + */ +enum ethosu_error_codes ethosu_get_shram_data(struct ethosu_device *dev, int section, uint32_t *shram_p); + +/** + * Set clock and power q request enable bits. + * \param[in] clock_q Clock q ENABLE/DISABLE \ref clock_q_request. + * \param[in] power_q Power q ENABLE/DISABLE \ref power_q_request. + * \return \ref ethosu_error_codes + */ +enum ethosu_error_codes ethosu_set_clock_and_power(struct ethosu_device *dev, + enum ethosu_clock_q_request clock_q, + enum ethosu_power_q_request power_q); + +/** + * Read register. + * \param[in] address Address to read. + * \return Register value. + */ +uint32_t ethosu_read_reg(struct ethosu_device *dev, uint32_t address); + +/** + * Write register. + * \param[in] address Address to read. + * \param[in] value Value to be written. + */ +void ethosu_write_reg(struct ethosu_device *dev, uint32_t address, uint32_t value); + +/** + * Write register with shadow variable. + * \param[in] address Address to read. + * \param[in] value Value to be written. + */ +void ethosu_write_reg_shadow(struct ethosu_device *dev, uint32_t address, uint32_t value, uint32_t *shadow); + +/** + * Save the PMU configuration to ethosu_device struct. + * \param[in] dev Ethos-U device where the PMU configuration is + * saved. + * \return \ref ethosu_error_codes + */ +enum ethosu_error_codes ethosu_save_pmu_config(struct ethosu_device *dev); + +/** + * Restore the PMU configuration from a ethosu_device struct. + * \param[in] dev Ethos-U device where the PMU configuration is + * stored. + * \return \ref ethosu_error_codes + */ +enum ethosu_error_codes ethosu_restore_pmu_config(struct ethosu_device *dev); + +/** + * Save PMU counters to shadow variables in memory. + * \param[in] dev Ethos-U device where the PMU configuration is + * stored. + * \return \ref ethosu_error_codes + */ +enum ethosu_error_codes ethosu_save_pmu_counters(struct ethosu_device *dev); + +/** + * Check if the STATUS register has any error bits set or not. + * \param[in] dev Ethos-U device to check. + * \return true if any error bits set, + * false otherwise. + */ +bool ethosu_status_has_error(struct ethosu_device *dev); + +#ifdef __cplusplus +} +#endif + +#endif // ETHOSU_DEVICE_H diff --git a/edgeimpulse/edge-impulse-sdk/porting/ethos-core-driver/include/ethosu_driver.h b/edgeimpulse/edge-impulse-sdk/porting/ethos-core-driver/include/ethosu_driver.h new file mode 100644 index 0000000..ff040fc --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/porting/ethos-core-driver/include/ethosu_driver.h @@ -0,0 +1,189 @@ +/* + * Copyright (c) 2022 Arm Limited. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an "AS + * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language + * governing permissions and limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef ETHOSU_DRIVER_H +#define ETHOSU_DRIVER_H + +/****************************************************************************** + * Includes + ******************************************************************************/ + +#include "ethosu_device.h" + +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/****************************************************************************** + * Types + ******************************************************************************/ + +struct ethosu_driver +{ + struct ethosu_device dev; + bool abort_inference; + uint64_t fast_memory; + size_t fast_memory_size; + bool status_error; + bool dev_power_always_on; + struct ethosu_driver *next; + bool reserved; + volatile bool irq_triggered; + void *semaphore; + uint8_t clock_request; + uint8_t power_request; +}; + +struct ethosu_version_id +{ + // Ethos-U id + uint8_t version_status; + uint8_t version_minor; + uint8_t version_major; + uint8_t product_major; + uint8_t arch_patch_rev; + uint8_t arch_minor_rev; + uint8_t arch_major_rev; + + // Driver Version + uint8_t driver_patch_rev; + uint8_t driver_minor_rev; + uint8_t driver_major_rev; +}; + +struct ethosu_version_config +{ + uint8_t macs_per_cc; + uint8_t cmd_stream_version; + uint8_t shram_size; + uint8_t custom_dma; +}; + +struct ethosu_version +{ + struct ethosu_version_id id; + struct ethosu_version_config cfg; +}; + +enum ethosu_request_clients +{ + ETHOSU_PMU_REQUEST = 0, + ETHOSU_INFERENCE_REQUEST = 1, +}; + +/****************************************************************************** + * Variables + ******************************************************************************/ + +extern struct ethosu_driver ethosu_drv; + +/****************************************************************************** + * Prototypes + ******************************************************************************/ + +/** + * Initialize the Ethos-U driver. + */ +int ethosu_init(struct ethosu_driver *drv, + const void *base_address, + const void *fast_memory, + const size_t fast_memory_size, + uint32_t secure_enable, + uint32_t privilege_enable); + +/** + * Get Ethos-U version. + */ +int ethosu_get_version(struct ethosu_driver *drv, struct ethosu_version *version); + +/** + * Invoke Vela command stream. + */ +int ethosu_invoke(struct ethosu_driver *drv, + const void *custom_data_ptr, + const int custom_data_size, + const uint64_t *base_addr, + const size_t *base_addr_size, + const int num_base_addr); + +/** + * Abort Ethos-U inference. + */ +void ethosu_abort(struct ethosu_driver *drv); + +/** + * Interrupt handler do be called on IRQ from Ethos-U + */ +void ethosu_irq_handler(struct ethosu_driver *drv); + +/** + * Set Ethos-U power mode. + */ +void ethosu_set_power_mode(struct ethosu_driver *drv, bool always_on); + +/** + * Register a driver for multiNPU usage + */ +int ethosu_register_driver(struct ethosu_driver *drv); + +/** + * Deregister a driver from multiNPU usage + */ +int ethosu_deregister_driver(struct ethosu_driver *drv); + +/** + * Reserves a driver to execute inference with + */ +struct ethosu_driver *ethosu_reserve_driver(void); + +/** + * Change driver status to available + */ +void ethosu_release_driver(struct ethosu_driver *drv); + +/** + * Set clock and power request bits + */ +enum ethosu_error_codes set_clock_and_power_request(struct ethosu_driver *drv, + enum ethosu_request_clients client, + enum ethosu_clock_q_request clock_request, + enum ethosu_power_q_request power_request); + +/** + * Static inline for backwards-compatibility + */ +static inline int ethosu_invoke_v2(const void *custom_data_ptr, + const int custom_data_size, + const uint64_t *base_addr, + const size_t *base_addr_size, + const int num_base_addr) +{ + struct ethosu_driver *drv = ethosu_reserve_driver(); + int result = ethosu_invoke(drv, custom_data_ptr, custom_data_size, base_addr, base_addr_size, num_base_addr); + ethosu_release_driver(drv); + return result; +} + +#ifdef __cplusplus +} +#endif + +#endif // ETHOSU_DRIVER_H diff --git a/edgeimpulse/edge-impulse-sdk/porting/ethos-core-driver/include/pmu_ethosu.h b/edgeimpulse/edge-impulse-sdk/porting/ethos-core-driver/include/pmu_ethosu.h new file mode 100644 index 0000000..acd2a94 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/porting/ethos-core-driver/include/pmu_ethosu.h @@ -0,0 +1,309 @@ +/* + * Copyright (c) 2022 Arm Limited. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an "AS + * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language + * governing permissions and limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef PMU_ETHOSU_H +#define PMU_ETHOSU_H + +/***************************************************************************** + * Includes + *****************************************************************************/ + +#include + +#include "ethosu_driver.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/***************************************************************************** + * Defines + *****************************************************************************/ + +#define ETHOSU_PMU_NCOUNTERS 4 + +#define ETHOSU_PMU_CNT1_Msk (1UL << 0) +#define ETHOSU_PMU_CNT2_Msk (1UL << 1) +#define ETHOSU_PMU_CNT3_Msk (1UL << 2) +#define ETHOSU_PMU_CNT4_Msk (1UL << 3) +#define ETHOSU_PMU_CCNT_Msk (1UL << 31) + +/***************************************************************************** + * Types + *****************************************************************************/ + +/** \brief HW Supported ETHOSU PMU Events + * + * Note: These values are symbolic. Actual HW-values may change. I.e. always use API + * to set/get actual event-type value. + * */ +enum ethosu_pmu_event_type +{ + ETHOSU_PMU_NO_EVENT = 0, + ETHOSU_PMU_CYCLE, + ETHOSU_PMU_NPU_IDLE, + ETHOSU_PMU_CC_STALLED_ON_BLOCKDEP, + ETHOSU_PMU_CC_STALLED_ON_SHRAM_RECONFIG, + ETHOSU_PMU_NPU_ACTIVE, + ETHOSU_PMU_MAC_ACTIVE, + ETHOSU_PMU_MAC_ACTIVE_8BIT, + ETHOSU_PMU_MAC_ACTIVE_16BIT, + ETHOSU_PMU_MAC_DPU_ACTIVE, + ETHOSU_PMU_MAC_STALLED_BY_WD_ACC, + ETHOSU_PMU_MAC_STALLED_BY_WD, + ETHOSU_PMU_MAC_STALLED_BY_ACC, + ETHOSU_PMU_MAC_STALLED_BY_IB, + ETHOSU_PMU_MAC_ACTIVE_32BIT, + ETHOSU_PMU_MAC_STALLED_BY_INT_W, + ETHOSU_PMU_MAC_STALLED_BY_INT_ACC, + ETHOSU_PMU_AO_ACTIVE, + ETHOSU_PMU_AO_ACTIVE_8BIT, + ETHOSU_PMU_AO_ACTIVE_16BIT, + ETHOSU_PMU_AO_STALLED_BY_OFMP_OB, + ETHOSU_PMU_AO_STALLED_BY_OFMP, + ETHOSU_PMU_AO_STALLED_BY_OB, + ETHOSU_PMU_AO_STALLED_BY_ACC_IB, + ETHOSU_PMU_AO_STALLED_BY_ACC, + ETHOSU_PMU_AO_STALLED_BY_IB, + ETHOSU_PMU_WD_ACTIVE, + ETHOSU_PMU_WD_STALLED, + ETHOSU_PMU_WD_STALLED_BY_WS, + ETHOSU_PMU_WD_STALLED_BY_WD_BUF, + ETHOSU_PMU_WD_PARSE_ACTIVE, + ETHOSU_PMU_WD_PARSE_STALLED, + ETHOSU_PMU_WD_PARSE_STALLED_IN, + ETHOSU_PMU_WD_PARSE_STALLED_OUT, + ETHOSU_PMU_WD_TRANS_WS, + ETHOSU_PMU_WD_TRANS_WB, + ETHOSU_PMU_WD_TRANS_DW0, + ETHOSU_PMU_WD_TRANS_DW1, + ETHOSU_PMU_AXI0_RD_TRANS_ACCEPTED, + ETHOSU_PMU_AXI0_RD_TRANS_COMPLETED, + ETHOSU_PMU_AXI0_RD_DATA_BEAT_RECEIVED, + ETHOSU_PMU_AXI0_RD_TRAN_REQ_STALLED, + ETHOSU_PMU_AXI0_WR_TRANS_ACCEPTED, + ETHOSU_PMU_AXI0_WR_TRANS_COMPLETED_M, + ETHOSU_PMU_AXI0_WR_TRANS_COMPLETED_S, + ETHOSU_PMU_AXI0_WR_DATA_BEAT_WRITTEN, + ETHOSU_PMU_AXI0_WR_TRAN_REQ_STALLED, + ETHOSU_PMU_AXI0_WR_DATA_BEAT_STALLED, + ETHOSU_PMU_AXI0_ENABLED_CYCLES, + ETHOSU_PMU_AXI0_RD_STALL_LIMIT, + ETHOSU_PMU_AXI0_WR_STALL_LIMIT, + ETHOSU_PMU_AXI1_RD_TRANS_ACCEPTED, + ETHOSU_PMU_AXI1_RD_TRANS_COMPLETED, + ETHOSU_PMU_AXI1_RD_DATA_BEAT_RECEIVED, + ETHOSU_PMU_AXI1_RD_TRAN_REQ_STALLED, + ETHOSU_PMU_AXI1_WR_TRANS_ACCEPTED, + ETHOSU_PMU_AXI1_WR_TRANS_COMPLETED_M, + ETHOSU_PMU_AXI1_WR_TRANS_COMPLETED_S, + ETHOSU_PMU_AXI1_WR_DATA_BEAT_WRITTEN, + ETHOSU_PMU_AXI1_WR_TRAN_REQ_STALLED, + ETHOSU_PMU_AXI1_WR_DATA_BEAT_STALLED, + ETHOSU_PMU_AXI1_ENABLED_CYCLES, + ETHOSU_PMU_AXI1_RD_STALL_LIMIT, + ETHOSU_PMU_AXI1_WR_STALL_LIMIT, + ETHOSU_PMU_AXI_LATENCY_ANY, + ETHOSU_PMU_AXI_LATENCY_32, + ETHOSU_PMU_AXI_LATENCY_64, + ETHOSU_PMU_AXI_LATENCY_128, + ETHOSU_PMU_AXI_LATENCY_256, + ETHOSU_PMU_AXI_LATENCY_512, + ETHOSU_PMU_AXI_LATENCY_1024, + ETHOSU_PMU_ECC_DMA, + ETHOSU_PMU_ECC_SB0, + ETHOSU_PMU_ECC_SB1, + + ETHOSU_PMU_SENTINEL // End-marker (not event) +}; + +/***************************************************************************** + * Functions + *****************************************************************************/ + +/** + * \brief Enable the PMU + */ +void ETHOSU_PMU_Enable(struct ethosu_driver *drv); + +/** + * \brief Disable the PMU + */ +void ETHOSU_PMU_Disable(struct ethosu_driver *drv); + +/** + * \brief Set event to count for PMU eventer counter + * \param [in] num Event counter (0-ETHOSU_PMU_NCOUNTERS) to configure + * \param [in] type Event to count + */ +void ETHOSU_PMU_Set_EVTYPER(struct ethosu_driver *drv, uint32_t num, enum ethosu_pmu_event_type type); + +/** + * \brief Get event to count for PMU eventer counter + * \param [in] num Event counter (0-ETHOSU_PMU_NCOUNTERS) to configure + * \return type Event to count + */ +enum ethosu_pmu_event_type ETHOSU_PMU_Get_EVTYPER(struct ethosu_driver *drv, uint32_t num); + +/** + * \brief Reset cycle counter + */ +void ETHOSU_PMU_CYCCNT_Reset(struct ethosu_driver *drv); + +/** + * \brief Reset all event counters + */ +void ETHOSU_PMU_EVCNTR_ALL_Reset(struct ethosu_driver *drv); + +/** + * \brief Enable counters + * \param [in] mask Counters to enable + * \note Enables one or more of the following: + * - event counters (bit 0-ETHOSU_PMU_NCOUNTERS) + * - cycle counter (bit 31) + */ +void ETHOSU_PMU_CNTR_Enable(struct ethosu_driver *drv, uint32_t mask); + +/** + * \brief Disable counters + * \param [in] mask Counters to disable + * \note Disables one or more of the following: + * - event counters (bit 0-ETHOSU_PMU_NCOUNTERS) + * - cycle counter (bit 31) + */ +void ETHOSU_PMU_CNTR_Disable(struct ethosu_driver *drv, uint32_t mask); + +/** + * \brief Determine counters activation + * + * \return Event count + * \param [in] mask Counters to enable + * \return a bitmask where bit-set means: + * - event counters activated (bit 0-ETHOSU_PMU_NCOUNTERS) + * - cycle counter activate (bit 31) + * \note ETHOSU specific. Usage breaks CMSIS complience + */ +uint32_t ETHOSU_PMU_CNTR_Status(struct ethosu_driver *drv); + +/** + * \brief Read cycle counter (64 bit) + * \return Cycle count + * \note Two HW 32-bit registers that can increment independently in-between reads. + * To work-around raciness yet still avoid turning + * off the event both are read as one value twice. If the latter read + * is not greater than the former, it means overflow of LSW without + * incrementing MSW has occurred, in which case the former value is used. + */ +uint64_t ETHOSU_PMU_Get_CCNTR(struct ethosu_driver *drv); + +/** + * \brief Set cycle counter (64 bit) + * \param [in] val Conter value + * \note Two HW 32-bit registers that can increment independently in-between reads. + * To work-around raciness, counter is temporary disabled if enabled. + * \note ETHOSU specific. Usage breaks CMSIS complience + */ +void ETHOSU_PMU_Set_CCNTR(struct ethosu_driver *drv, uint64_t val); + +/** + * \brief Read event counter + * \param [in] num Event counter (0-ETHOSU_PMU_NCOUNTERS) + * \return Event count + */ +uint32_t ETHOSU_PMU_Get_EVCNTR(struct ethosu_driver *drv, uint32_t num); + +/** + * \brief Set event counter value + * \param [in] num Event counter (0-ETHOSU_PMU_NCOUNTERS) + * \param [in] val Conter value + * \note ETHOSU specific. Usage breaks CMSIS complience + */ +void ETHOSU_PMU_Set_EVCNTR(struct ethosu_driver *drv, uint32_t num, uint32_t val); + +/** + * \brief Read counter overflow status + * \return Counter overflow status bits for the following: + * - event counters (bit 0-ETHOSU_PMU_NCOUNTERS)) + * - cycle counter (bit 31) + */ +uint32_t ETHOSU_PMU_Get_CNTR_OVS(struct ethosu_driver *drv); + +/** + * \brief Clear counter overflow status + * \param [in] mask Counter overflow status bits to clear + * \note Clears overflow status bits for one or more of the following: + * - event counters (bit 0-ETHOSU_PMU_NCOUNTERS) + * - cycle counter (bit 31) + */ +void ETHOSU_PMU_Set_CNTR_OVS(struct ethosu_driver *drv, uint32_t mask); + +/** + * \brief Enable counter overflow interrupt request + * \param [in] mask Counter overflow interrupt request bits to set + * \note Sets overflow interrupt request bits for one or more of the following: + * - event counters (bit 0-ETHOSU_PMU_NCOUNTERS) + * - cycle counter (bit 31) + */ +void ETHOSU_PMU_Set_CNTR_IRQ_Enable(struct ethosu_driver *drv, uint32_t mask); + +/** + * \brief Disable counter overflow interrupt request + * \param [in] mask Counter overflow interrupt request bits to clear + * \note Clears overflow interrupt request bits for one or more of the following: + * - event counters (bit 0-ETHOSU_PMU_NCOUNTERS) + * - cycle counter (bit 31) + */ +void ETHOSU_PMU_Set_CNTR_IRQ_Disable(struct ethosu_driver *drv, uint32_t mask); + +/** + * \brief Get counters overflow interrupt request stiinings + * \return mask Counter overflow interrupt request bits + * \note Sets overflow interrupt request bits for one or more of the following: + * - event counters (bit 0-ETHOSU_PMU_NCOUNTERS) + * - cycle counter (bit 31) + * \note ETHOSU specific. Usage breaks CMSIS compliance + */ +uint32_t ETHOSU_PMU_Get_IRQ_Enable(struct ethosu_driver *drv); + +/** + * \brief Software increment event counter + * \param [in] mask Counters to increment + * - event counters (bit 0-ETHOSU_PMU_NCOUNTERS) + * - cycle counter (bit 31) + * \note Software increment bits for one or more event counters. + */ +void ETHOSU_PMU_CNTR_Increment(struct ethosu_driver *drv, uint32_t mask); + +/** + * \brief Set start event number for the cycle counter + * \param [in] start_event Event to trigger start of the cycle counter + * \note Sets the event number that starts the cycle counter. + */ +void ETHOSU_PMU_PMCCNTR_CFG_Set_Start_Event(struct ethosu_driver *drv, enum ethosu_pmu_event_type start_event); + +/** + * \brief Set stop event number for the cycle counter + * \param [in] stop_event Event number + * \note Sets the event number that stops the cycle counter. + */ +void ETHOSU_PMU_PMCCNTR_CFG_Set_Stop_Event(struct ethosu_driver *drv, enum ethosu_pmu_event_type stop_event); + +#ifdef __cplusplus +} +#endif + +#endif /* PMU_ETHOSU_H */ diff --git a/edgeimpulse/edge-impulse-sdk/porting/ethos-core-driver/src/ethosu55_interface.h b/edgeimpulse/edge-impulse-sdk/porting/ethos-core-driver/src/ethosu55_interface.h new file mode 100644 index 0000000..0d1ee6c --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/porting/ethos-core-driver/src/ethosu55_interface.h @@ -0,0 +1,14126 @@ +/* + * Copyright (c) 2019-2021 Arm Limited. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ETHOSU55_INTERFACE_H +#define ETHOSU55_INTERFACE_H + +#ifdef __KERNEL__ +#include +#else +#include +#endif + +#if !defined(__cplusplus) || __cplusplus < 201402L +#define CONSTEXPR +#else +#define CONSTEXPR constexpr +#endif + +#ifndef __cplusplus +#define STRUCT struct +#else +#define STRUCT +#include +#endif + +#define NNX_ARCH_VERSION_MAJOR 1 +#define NNX_ARCH_VERSION_MINOR 0 +#define NNX_ARCH_VERSION_PATCH 6 + +// Register offsets + +// +// Register subpage BASE +// +#define NPU_REG_ID 0x0000 +#define NPU_REG_STATUS 0x0004 +#define NPU_REG_CMD 0x0008 +#define NPU_REG_RESET 0x000C +#define NPU_REG_QBASE0 0x0010 +#define NPU_REG_QBASE1 0x0014 +#define NPU_REG_QREAD 0x0018 +#define NPU_REG_QCONFIG 0x001C +#define NPU_REG_QSIZE 0x0020 +#define NPU_REG_PROT 0x0024 +#define NPU_REG_CONFIG 0x0028 +#define NPU_REG_LOCK 0x002C +#define NPU_REG_REGIONCFG 0x003C +#define NPU_REG_AXI_LIMIT0 0x0040 +#define NPU_REG_AXI_LIMIT1 0x0044 +#define NPU_REG_AXI_LIMIT2 0x0048 +#define NPU_REG_AXI_LIMIT3 0x004C +#define BASE_REGISTERS_SIZE 0x0050 + +// +// Register subpage BASE_POINTERS +// +#define NPU_REG_BASEP0 0x0080 +#define NPU_REG_BASEP1 0x0084 +#define NPU_REG_BASEP2 0x0088 +#define NPU_REG_BASEP3 0x008C +#define NPU_REG_BASEP4 0x0090 +#define NPU_REG_BASEP5 0x0094 +#define NPU_REG_BASEP6 0x0098 +#define NPU_REG_BASEP7 0x009C +#define NPU_REG_BASEP8 0x00A0 +#define NPU_REG_BASEP9 0x00A4 +#define NPU_REG_BASEP10 0x00A8 +#define NPU_REG_BASEP11 0x00AC +#define NPU_REG_BASEP12 0x00B0 +#define NPU_REG_BASEP13 0x00B4 +#define NPU_REG_BASEP14 0x00B8 +#define NPU_REG_BASEP15 0x00BC +#define BASE_POINTERS_REGISTERS_SIZE 0x00C0 + +// +// Register subpage DEBUG +// +#define NPU_REG_WD_STATUS 0x0100 +#define NPU_REG_MAC_STATUS 0x0104 +#define NPU_REG_AO_STATUS 0x0108 +#define NPU_REG_DMA_STATUS0 0x0110 +#define NPU_REG_DMA_STATUS1 0x0114 +#define NPU_REG_CLKFORCE 0x0140 +#define NPU_REG_DEBUG_ADDRESS 0x0144 +#define NPU_REG_DEBUG_MISC 0x0148 +#define NPU_REG_DEBUGCORE 0x014C +#define NPU_REG_DEBUG_BLOCK 0x0150 +#define DEBUG_REGISTERS_SIZE 0x0154 + +// +// Register subpage ID +// +#define NPU_REG_REVISION 0x0FC0 +#define NPU_REG_PID4 0x0FD0 +#define NPU_REG_PID5 0x0FD4 +#define NPU_REG_PID6 0x0FD8 +#define NPU_REG_PID7 0x0FDC +#define NPU_REG_PID0 0x0FE0 +#define NPU_REG_PID1 0x0FE4 +#define NPU_REG_PID2 0x0FE8 +#define NPU_REG_PID3 0x0FEC +#define NPU_REG_CID0 0x0FF0 +#define NPU_REG_CID1 0x0FF4 +#define NPU_REG_CID2 0x0FF8 +#define NPU_REG_CID3 0x0FFC +#define ID_REGISTERS_SIZE 0x1000 + +// +// Register subpage PMU +// +#define NPU_REG_PMCR 0x0180 +#define NPU_REG_PMCNTENSET 0x0184 +#define NPU_REG_PMCNTENCLR 0x0188 +#define NPU_REG_PMOVSSET 0x018C +#define NPU_REG_PMOVSCLR 0x0190 +#define NPU_REG_PMINTSET 0x0194 +#define NPU_REG_PMINTCLR 0x0198 +#define NPU_REG_PMCCNTR_LO 0x01A0 +#define NPU_REG_PMCCNTR_HI 0x01A4 +#define NPU_REG_PMCCNTR_CFG 0x01A8 +#define NPU_REG_PMCAXI_CHAN 0x01AC +#define NPU_REG_PMEVCNTR0 0x0300 +#define NPU_REG_PMEVCNTR1 0x0304 +#define NPU_REG_PMEVCNTR2 0x0308 +#define NPU_REG_PMEVCNTR3 0x030C +#define NPU_REG_PMEVTYPER0 0x0380 +#define NPU_REG_PMEVTYPER1 0x0384 +#define NPU_REG_PMEVTYPER2 0x0388 +#define NPU_REG_PMEVTYPER3 0x038C +#define PMU_REGISTERS_SIZE 0x0390 + +// +// Register subpage SHARED_BUFFER +// +#define NPU_REG_SHARED_BUFFER0 0x0400 +#define NPU_REG_SHARED_BUFFER1 0x0404 +#define NPU_REG_SHARED_BUFFER2 0x0408 +#define NPU_REG_SHARED_BUFFER3 0x040C +#define NPU_REG_SHARED_BUFFER4 0x0410 +#define NPU_REG_SHARED_BUFFER5 0x0414 +#define NPU_REG_SHARED_BUFFER6 0x0418 +#define NPU_REG_SHARED_BUFFER7 0x041C +#define NPU_REG_SHARED_BUFFER8 0x0420 +#define NPU_REG_SHARED_BUFFER9 0x0424 +#define NPU_REG_SHARED_BUFFER10 0x0428 +#define NPU_REG_SHARED_BUFFER11 0x042C +#define NPU_REG_SHARED_BUFFER12 0x0430 +#define NPU_REG_SHARED_BUFFER13 0x0434 +#define NPU_REG_SHARED_BUFFER14 0x0438 +#define NPU_REG_SHARED_BUFFER15 0x043C +#define NPU_REG_SHARED_BUFFER16 0x0440 +#define NPU_REG_SHARED_BUFFER17 0x0444 +#define NPU_REG_SHARED_BUFFER18 0x0448 +#define NPU_REG_SHARED_BUFFER19 0x044C +#define NPU_REG_SHARED_BUFFER20 0x0450 +#define NPU_REG_SHARED_BUFFER21 0x0454 +#define NPU_REG_SHARED_BUFFER22 0x0458 +#define NPU_REG_SHARED_BUFFER23 0x045C +#define NPU_REG_SHARED_BUFFER24 0x0460 +#define NPU_REG_SHARED_BUFFER25 0x0464 +#define NPU_REG_SHARED_BUFFER26 0x0468 +#define NPU_REG_SHARED_BUFFER27 0x046C +#define NPU_REG_SHARED_BUFFER28 0x0470 +#define NPU_REG_SHARED_BUFFER29 0x0474 +#define NPU_REG_SHARED_BUFFER30 0x0478 +#define NPU_REG_SHARED_BUFFER31 0x047C +#define NPU_REG_SHARED_BUFFER32 0x0480 +#define NPU_REG_SHARED_BUFFER33 0x0484 +#define NPU_REG_SHARED_BUFFER34 0x0488 +#define NPU_REG_SHARED_BUFFER35 0x048C +#define NPU_REG_SHARED_BUFFER36 0x0490 +#define NPU_REG_SHARED_BUFFER37 0x0494 +#define NPU_REG_SHARED_BUFFER38 0x0498 +#define NPU_REG_SHARED_BUFFER39 0x049C +#define NPU_REG_SHARED_BUFFER40 0x04A0 +#define NPU_REG_SHARED_BUFFER41 0x04A4 +#define NPU_REG_SHARED_BUFFER42 0x04A8 +#define NPU_REG_SHARED_BUFFER43 0x04AC +#define NPU_REG_SHARED_BUFFER44 0x04B0 +#define NPU_REG_SHARED_BUFFER45 0x04B4 +#define NPU_REG_SHARED_BUFFER46 0x04B8 +#define NPU_REG_SHARED_BUFFER47 0x04BC +#define NPU_REG_SHARED_BUFFER48 0x04C0 +#define NPU_REG_SHARED_BUFFER49 0x04C4 +#define NPU_REG_SHARED_BUFFER50 0x04C8 +#define NPU_REG_SHARED_BUFFER51 0x04CC +#define NPU_REG_SHARED_BUFFER52 0x04D0 +#define NPU_REG_SHARED_BUFFER53 0x04D4 +#define NPU_REG_SHARED_BUFFER54 0x04D8 +#define NPU_REG_SHARED_BUFFER55 0x04DC +#define NPU_REG_SHARED_BUFFER56 0x04E0 +#define NPU_REG_SHARED_BUFFER57 0x04E4 +#define NPU_REG_SHARED_BUFFER58 0x04E8 +#define NPU_REG_SHARED_BUFFER59 0x04EC +#define NPU_REG_SHARED_BUFFER60 0x04F0 +#define NPU_REG_SHARED_BUFFER61 0x04F4 +#define NPU_REG_SHARED_BUFFER62 0x04F8 +#define NPU_REG_SHARED_BUFFER63 0x04FC +#define NPU_REG_SHARED_BUFFER64 0x0500 +#define NPU_REG_SHARED_BUFFER65 0x0504 +#define NPU_REG_SHARED_BUFFER66 0x0508 +#define NPU_REG_SHARED_BUFFER67 0x050C +#define NPU_REG_SHARED_BUFFER68 0x0510 +#define NPU_REG_SHARED_BUFFER69 0x0514 +#define NPU_REG_SHARED_BUFFER70 0x0518 +#define NPU_REG_SHARED_BUFFER71 0x051C +#define NPU_REG_SHARED_BUFFER72 0x0520 +#define NPU_REG_SHARED_BUFFER73 0x0524 +#define NPU_REG_SHARED_BUFFER74 0x0528 +#define NPU_REG_SHARED_BUFFER75 0x052C +#define NPU_REG_SHARED_BUFFER76 0x0530 +#define NPU_REG_SHARED_BUFFER77 0x0534 +#define NPU_REG_SHARED_BUFFER78 0x0538 +#define NPU_REG_SHARED_BUFFER79 0x053C +#define NPU_REG_SHARED_BUFFER80 0x0540 +#define NPU_REG_SHARED_BUFFER81 0x0544 +#define NPU_REG_SHARED_BUFFER82 0x0548 +#define NPU_REG_SHARED_BUFFER83 0x054C +#define NPU_REG_SHARED_BUFFER84 0x0550 +#define NPU_REG_SHARED_BUFFER85 0x0554 +#define NPU_REG_SHARED_BUFFER86 0x0558 +#define NPU_REG_SHARED_BUFFER87 0x055C +#define NPU_REG_SHARED_BUFFER88 0x0560 +#define NPU_REG_SHARED_BUFFER89 0x0564 +#define NPU_REG_SHARED_BUFFER90 0x0568 +#define NPU_REG_SHARED_BUFFER91 0x056C +#define NPU_REG_SHARED_BUFFER92 0x0570 +#define NPU_REG_SHARED_BUFFER93 0x0574 +#define NPU_REG_SHARED_BUFFER94 0x0578 +#define NPU_REG_SHARED_BUFFER95 0x057C +#define NPU_REG_SHARED_BUFFER96 0x0580 +#define NPU_REG_SHARED_BUFFER97 0x0584 +#define NPU_REG_SHARED_BUFFER98 0x0588 +#define NPU_REG_SHARED_BUFFER99 0x058C +#define NPU_REG_SHARED_BUFFER100 0x0590 +#define NPU_REG_SHARED_BUFFER101 0x0594 +#define NPU_REG_SHARED_BUFFER102 0x0598 +#define NPU_REG_SHARED_BUFFER103 0x059C +#define NPU_REG_SHARED_BUFFER104 0x05A0 +#define NPU_REG_SHARED_BUFFER105 0x05A4 +#define NPU_REG_SHARED_BUFFER106 0x05A8 +#define NPU_REG_SHARED_BUFFER107 0x05AC +#define NPU_REG_SHARED_BUFFER108 0x05B0 +#define NPU_REG_SHARED_BUFFER109 0x05B4 +#define NPU_REG_SHARED_BUFFER110 0x05B8 +#define NPU_REG_SHARED_BUFFER111 0x05BC +#define NPU_REG_SHARED_BUFFER112 0x05C0 +#define NPU_REG_SHARED_BUFFER113 0x05C4 +#define NPU_REG_SHARED_BUFFER114 0x05C8 +#define NPU_REG_SHARED_BUFFER115 0x05CC +#define NPU_REG_SHARED_BUFFER116 0x05D0 +#define NPU_REG_SHARED_BUFFER117 0x05D4 +#define NPU_REG_SHARED_BUFFER118 0x05D8 +#define NPU_REG_SHARED_BUFFER119 0x05DC +#define NPU_REG_SHARED_BUFFER120 0x05E0 +#define NPU_REG_SHARED_BUFFER121 0x05E4 +#define NPU_REG_SHARED_BUFFER122 0x05E8 +#define NPU_REG_SHARED_BUFFER123 0x05EC +#define NPU_REG_SHARED_BUFFER124 0x05F0 +#define NPU_REG_SHARED_BUFFER125 0x05F4 +#define NPU_REG_SHARED_BUFFER126 0x05F8 +#define NPU_REG_SHARED_BUFFER127 0x05FC +#define NPU_REG_SHARED_BUFFER128 0x0600 +#define NPU_REG_SHARED_BUFFER129 0x0604 +#define NPU_REG_SHARED_BUFFER130 0x0608 +#define NPU_REG_SHARED_BUFFER131 0x060C +#define NPU_REG_SHARED_BUFFER132 0x0610 +#define NPU_REG_SHARED_BUFFER133 0x0614 +#define NPU_REG_SHARED_BUFFER134 0x0618 +#define NPU_REG_SHARED_BUFFER135 0x061C +#define NPU_REG_SHARED_BUFFER136 0x0620 +#define NPU_REG_SHARED_BUFFER137 0x0624 +#define NPU_REG_SHARED_BUFFER138 0x0628 +#define NPU_REG_SHARED_BUFFER139 0x062C +#define NPU_REG_SHARED_BUFFER140 0x0630 +#define NPU_REG_SHARED_BUFFER141 0x0634 +#define NPU_REG_SHARED_BUFFER142 0x0638 +#define NPU_REG_SHARED_BUFFER143 0x063C +#define NPU_REG_SHARED_BUFFER144 0x0640 +#define NPU_REG_SHARED_BUFFER145 0x0644 +#define NPU_REG_SHARED_BUFFER146 0x0648 +#define NPU_REG_SHARED_BUFFER147 0x064C +#define NPU_REG_SHARED_BUFFER148 0x0650 +#define NPU_REG_SHARED_BUFFER149 0x0654 +#define NPU_REG_SHARED_BUFFER150 0x0658 +#define NPU_REG_SHARED_BUFFER151 0x065C +#define NPU_REG_SHARED_BUFFER152 0x0660 +#define NPU_REG_SHARED_BUFFER153 0x0664 +#define NPU_REG_SHARED_BUFFER154 0x0668 +#define NPU_REG_SHARED_BUFFER155 0x066C +#define NPU_REG_SHARED_BUFFER156 0x0670 +#define NPU_REG_SHARED_BUFFER157 0x0674 +#define NPU_REG_SHARED_BUFFER158 0x0678 +#define NPU_REG_SHARED_BUFFER159 0x067C +#define NPU_REG_SHARED_BUFFER160 0x0680 +#define NPU_REG_SHARED_BUFFER161 0x0684 +#define NPU_REG_SHARED_BUFFER162 0x0688 +#define NPU_REG_SHARED_BUFFER163 0x068C +#define NPU_REG_SHARED_BUFFER164 0x0690 +#define NPU_REG_SHARED_BUFFER165 0x0694 +#define NPU_REG_SHARED_BUFFER166 0x0698 +#define NPU_REG_SHARED_BUFFER167 0x069C +#define NPU_REG_SHARED_BUFFER168 0x06A0 +#define NPU_REG_SHARED_BUFFER169 0x06A4 +#define NPU_REG_SHARED_BUFFER170 0x06A8 +#define NPU_REG_SHARED_BUFFER171 0x06AC +#define NPU_REG_SHARED_BUFFER172 0x06B0 +#define NPU_REG_SHARED_BUFFER173 0x06B4 +#define NPU_REG_SHARED_BUFFER174 0x06B8 +#define NPU_REG_SHARED_BUFFER175 0x06BC +#define NPU_REG_SHARED_BUFFER176 0x06C0 +#define NPU_REG_SHARED_BUFFER177 0x06C4 +#define NPU_REG_SHARED_BUFFER178 0x06C8 +#define NPU_REG_SHARED_BUFFER179 0x06CC +#define NPU_REG_SHARED_BUFFER180 0x06D0 +#define NPU_REG_SHARED_BUFFER181 0x06D4 +#define NPU_REG_SHARED_BUFFER182 0x06D8 +#define NPU_REG_SHARED_BUFFER183 0x06DC +#define NPU_REG_SHARED_BUFFER184 0x06E0 +#define NPU_REG_SHARED_BUFFER185 0x06E4 +#define NPU_REG_SHARED_BUFFER186 0x06E8 +#define NPU_REG_SHARED_BUFFER187 0x06EC +#define NPU_REG_SHARED_BUFFER188 0x06F0 +#define NPU_REG_SHARED_BUFFER189 0x06F4 +#define NPU_REG_SHARED_BUFFER190 0x06F8 +#define NPU_REG_SHARED_BUFFER191 0x06FC +#define NPU_REG_SHARED_BUFFER192 0x0700 +#define NPU_REG_SHARED_BUFFER193 0x0704 +#define NPU_REG_SHARED_BUFFER194 0x0708 +#define NPU_REG_SHARED_BUFFER195 0x070C +#define NPU_REG_SHARED_BUFFER196 0x0710 +#define NPU_REG_SHARED_BUFFER197 0x0714 +#define NPU_REG_SHARED_BUFFER198 0x0718 +#define NPU_REG_SHARED_BUFFER199 0x071C +#define NPU_REG_SHARED_BUFFER200 0x0720 +#define NPU_REG_SHARED_BUFFER201 0x0724 +#define NPU_REG_SHARED_BUFFER202 0x0728 +#define NPU_REG_SHARED_BUFFER203 0x072C +#define NPU_REG_SHARED_BUFFER204 0x0730 +#define NPU_REG_SHARED_BUFFER205 0x0734 +#define NPU_REG_SHARED_BUFFER206 0x0738 +#define NPU_REG_SHARED_BUFFER207 0x073C +#define NPU_REG_SHARED_BUFFER208 0x0740 +#define NPU_REG_SHARED_BUFFER209 0x0744 +#define NPU_REG_SHARED_BUFFER210 0x0748 +#define NPU_REG_SHARED_BUFFER211 0x074C +#define NPU_REG_SHARED_BUFFER212 0x0750 +#define NPU_REG_SHARED_BUFFER213 0x0754 +#define NPU_REG_SHARED_BUFFER214 0x0758 +#define NPU_REG_SHARED_BUFFER215 0x075C +#define NPU_REG_SHARED_BUFFER216 0x0760 +#define NPU_REG_SHARED_BUFFER217 0x0764 +#define NPU_REG_SHARED_BUFFER218 0x0768 +#define NPU_REG_SHARED_BUFFER219 0x076C +#define NPU_REG_SHARED_BUFFER220 0x0770 +#define NPU_REG_SHARED_BUFFER221 0x0774 +#define NPU_REG_SHARED_BUFFER222 0x0778 +#define NPU_REG_SHARED_BUFFER223 0x077C +#define NPU_REG_SHARED_BUFFER224 0x0780 +#define NPU_REG_SHARED_BUFFER225 0x0784 +#define NPU_REG_SHARED_BUFFER226 0x0788 +#define NPU_REG_SHARED_BUFFER227 0x078C +#define NPU_REG_SHARED_BUFFER228 0x0790 +#define NPU_REG_SHARED_BUFFER229 0x0794 +#define NPU_REG_SHARED_BUFFER230 0x0798 +#define NPU_REG_SHARED_BUFFER231 0x079C +#define NPU_REG_SHARED_BUFFER232 0x07A0 +#define NPU_REG_SHARED_BUFFER233 0x07A4 +#define NPU_REG_SHARED_BUFFER234 0x07A8 +#define NPU_REG_SHARED_BUFFER235 0x07AC +#define NPU_REG_SHARED_BUFFER236 0x07B0 +#define NPU_REG_SHARED_BUFFER237 0x07B4 +#define NPU_REG_SHARED_BUFFER238 0x07B8 +#define NPU_REG_SHARED_BUFFER239 0x07BC +#define NPU_REG_SHARED_BUFFER240 0x07C0 +#define NPU_REG_SHARED_BUFFER241 0x07C4 +#define NPU_REG_SHARED_BUFFER242 0x07C8 +#define NPU_REG_SHARED_BUFFER243 0x07CC +#define NPU_REG_SHARED_BUFFER244 0x07D0 +#define NPU_REG_SHARED_BUFFER245 0x07D4 +#define NPU_REG_SHARED_BUFFER246 0x07D8 +#define NPU_REG_SHARED_BUFFER247 0x07DC +#define NPU_REG_SHARED_BUFFER248 0x07E0 +#define NPU_REG_SHARED_BUFFER249 0x07E4 +#define NPU_REG_SHARED_BUFFER250 0x07E8 +#define NPU_REG_SHARED_BUFFER251 0x07EC +#define NPU_REG_SHARED_BUFFER252 0x07F0 +#define NPU_REG_SHARED_BUFFER253 0x07F4 +#define NPU_REG_SHARED_BUFFER254 0x07F8 +#define NPU_REG_SHARED_BUFFER255 0x07FC +#define SHARED_BUFFER_REGISTERS_SIZE 0x0800 + +// +// Register subpage TSU +// +#define NPU_REG_IFM_PAD_TOP 0x0800 +#define NPU_REG_IFM_PAD_LEFT 0x0804 +#define NPU_REG_IFM_PAD_RIGHT 0x0808 +#define NPU_REG_IFM_PAD_BOTTOM 0x080C +#define NPU_REG_IFM_DEPTH_M1 0x0810 +#define NPU_REG_IFM_PRECISION 0x0814 +#define NPU_REG_IFM_UPSCALE 0x081C +#define NPU_REG_IFM_ZERO_POINT 0x0824 +#define NPU_REG_IFM_WIDTH0_M1 0x0828 +#define NPU_REG_IFM_HEIGHT0_M1 0x082C +#define NPU_REG_IFM_HEIGHT1_M1 0x0830 +#define NPU_REG_IFM_IB_END 0x0834 +#define NPU_REG_IFM_REGION 0x083C +#define NPU_REG_OFM_WIDTH_M1 0x0844 +#define NPU_REG_OFM_HEIGHT_M1 0x0848 +#define NPU_REG_OFM_DEPTH_M1 0x084C +#define NPU_REG_OFM_PRECISION 0x0850 +#define NPU_REG_OFM_BLK_WIDTH_M1 0x0854 +#define NPU_REG_OFM_BLK_HEIGHT_M1 0x0858 +#define NPU_REG_OFM_BLK_DEPTH_M1 0x085C +#define NPU_REG_OFM_ZERO_POINT 0x0860 +#define NPU_REG_OFM_WIDTH0_M1 0x0868 +#define NPU_REG_OFM_HEIGHT0_M1 0x086C +#define NPU_REG_OFM_HEIGHT1_M1 0x0870 +#define NPU_REG_OFM_REGION 0x087C +#define NPU_REG_KERNEL_WIDTH_M1 0x0880 +#define NPU_REG_KERNEL_HEIGHT_M1 0x0884 +#define NPU_REG_KERNEL_STRIDE 0x0888 +#define NPU_REG_PARALLEL_MODE 0x088C +#define NPU_REG_ACC_FORMAT 0x0890 +#define NPU_REG_ACTIVATION 0x0894 +#define NPU_REG_ACTIVATION_MIN 0x0898 +#define NPU_REG_ACTIVATION_MAX 0x089C +#define NPU_REG_WEIGHT_REGION 0x08A0 +#define NPU_REG_SCALE_REGION 0x08A4 +#define NPU_REG_AB_START 0x08B4 +#define NPU_REG_BLOCKDEP 0x08BC +#define NPU_REG_DMA0_SRC_REGION 0x08C0 +#define NPU_REG_DMA0_DST_REGION 0x08C4 +#define NPU_REG_DMA0_SIZE0 0x08C8 +#define NPU_REG_DMA0_SIZE1 0x08CC +#define NPU_REG_IFM2_BROADCAST 0x0900 +#define NPU_REG_IFM2_SCALAR 0x0904 +#define NPU_REG_IFM2_PRECISION 0x0914 +#define NPU_REG_IFM2_ZERO_POINT 0x0924 +#define NPU_REG_IFM2_WIDTH0_M1 0x0928 +#define NPU_REG_IFM2_HEIGHT0_M1 0x092C +#define NPU_REG_IFM2_HEIGHT1_M1 0x0930 +#define NPU_REG_IFM2_IB_START 0x0934 +#define NPU_REG_IFM2_REGION 0x093C +#define NPU_REG_IFM_BASE0 0x0A00 +#define NPU_REG_IFM_BASE0_HI 0x0A04 +#define NPU_REG_IFM_BASE1 0x0A08 +#define NPU_REG_IFM_BASE1_HI 0x0A0C +#define NPU_REG_IFM_BASE2 0x0A10 +#define NPU_REG_IFM_BASE2_HI 0x0A14 +#define NPU_REG_IFM_BASE3 0x0A18 +#define NPU_REG_IFM_BASE3_HI 0x0A1C +#define NPU_REG_IFM_STRIDE_X 0x0A20 +#define NPU_REG_IFM_STRIDE_X_HI 0x0A24 +#define NPU_REG_IFM_STRIDE_Y 0x0A28 +#define NPU_REG_IFM_STRIDE_Y_HI 0x0A2C +#define NPU_REG_IFM_STRIDE_C 0x0A30 +#define NPU_REG_IFM_STRIDE_C_HI 0x0A34 +#define NPU_REG_OFM_BASE0 0x0A40 +#define NPU_REG_OFM_BASE0_HI 0x0A44 +#define NPU_REG_OFM_BASE1 0x0A48 +#define NPU_REG_OFM_BASE1_HI 0x0A4C +#define NPU_REG_OFM_BASE2 0x0A50 +#define NPU_REG_OFM_BASE2_HI 0x0A54 +#define NPU_REG_OFM_BASE3 0x0A58 +#define NPU_REG_OFM_BASE3_HI 0x0A5C +#define NPU_REG_OFM_STRIDE_X 0x0A60 +#define NPU_REG_OFM_STRIDE_X_HI 0x0A64 +#define NPU_REG_OFM_STRIDE_Y 0x0A68 +#define NPU_REG_OFM_STRIDE_Y_HI 0x0A6C +#define NPU_REG_OFM_STRIDE_C 0x0A70 +#define NPU_REG_OFM_STRIDE_C_HI 0x0A74 +#define NPU_REG_WEIGHT_BASE 0x0A80 +#define NPU_REG_WEIGHT_BASE_HI 0x0A84 +#define NPU_REG_WEIGHT_LENGTH 0x0A88 +#define NPU_REG_SCALE_BASE 0x0A90 +#define NPU_REG_SCALE_BASE_HI 0x0A94 +#define NPU_REG_SCALE_LENGTH 0x0A98 +#define NPU_REG_OFM_SCALE 0x0AA0 +#define NPU_REG_OFM_SCALE_SHIFT 0x0AA4 +#define NPU_REG_OPA_SCALE 0x0AA8 +#define NPU_REG_OPA_SCALE_SHIFT 0x0AAC +#define NPU_REG_OPB_SCALE 0x0AB0 +#define NPU_REG_DMA0_SRC 0x0AC0 +#define NPU_REG_DMA0_SRC_HI 0x0AC4 +#define NPU_REG_DMA0_DST 0x0AC8 +#define NPU_REG_DMA0_DST_HI 0x0ACC +#define NPU_REG_DMA0_LEN 0x0AD0 +#define NPU_REG_DMA0_LEN_HI 0x0AD4 +#define NPU_REG_DMA0_SKIP0 0x0AD8 +#define NPU_REG_DMA0_SKIP0_HI 0x0ADC +#define NPU_REG_DMA0_SKIP1 0x0AE0 +#define NPU_REG_DMA0_SKIP1_HI 0x0AE4 +#define NPU_REG_IFM2_BASE0 0x0B00 +#define NPU_REG_IFM2_BASE0_HI 0x0B04 +#define NPU_REG_IFM2_BASE1 0x0B08 +#define NPU_REG_IFM2_BASE1_HI 0x0B0C +#define NPU_REG_IFM2_BASE2 0x0B10 +#define NPU_REG_IFM2_BASE2_HI 0x0B14 +#define NPU_REG_IFM2_BASE3 0x0B18 +#define NPU_REG_IFM2_BASE3_HI 0x0B1C +#define NPU_REG_IFM2_STRIDE_X 0x0B20 +#define NPU_REG_IFM2_STRIDE_X_HI 0x0B24 +#define NPU_REG_IFM2_STRIDE_Y 0x0B28 +#define NPU_REG_IFM2_STRIDE_Y_HI 0x0B2C +#define NPU_REG_IFM2_STRIDE_C 0x0B30 +#define NPU_REG_IFM2_STRIDE_C_HI 0x0B34 +#define NPU_REG_WEIGHT1_BASE 0x0B40 +#define NPU_REG_WEIGHT1_BASE_HI 0x0B44 +#define NPU_REG_WEIGHT1_LENGTH 0x0B48 +#define NPU_REG_SCALE1_BASE 0x0B50 +#define NPU_REG_SCALE1_BASE_HI 0x0B54 +#define NPU_REG_SCALE1_LENGTH 0x0B58 +#define TSU_REGISTERS_SIZE 0x0B5C + +// +// Register subpage TSU_DEBUG +// +#define NPU_REG_KERNEL_X 0x0200 +#define NPU_REG_KERNEL_Y 0x0204 +#define NPU_REG_KERNEL_W_M1 0x0208 +#define NPU_REG_KERNEL_H_M1 0x020C +#define NPU_REG_OFM_CBLK_WIDTH_M1 0x0210 +#define NPU_REG_OFM_CBLK_HEIGHT_M1 0x0214 +#define NPU_REG_OFM_CBLK_DEPTH_M1 0x0218 +#define NPU_REG_IFM_CBLK_DEPTH_M1 0x021C +#define NPU_REG_OFM_X 0x0220 +#define NPU_REG_OFM_Y 0x0224 +#define NPU_REG_OFM_Z 0x0228 +#define NPU_REG_IFM_Z 0x022C +#define NPU_REG_PAD_TOP 0x0230 +#define NPU_REG_PAD_LEFT 0x0234 +#define NPU_REG_IFM_CBLK_WIDTH 0x0238 +#define NPU_REG_IFM_CBLK_HEIGHT 0x023C +#define NPU_REG_DMA_IFM_SRC 0x0240 +#define NPU_REG_DMA_IFM_SRC_HI 0x0244 +#define NPU_REG_DMA_IFM_DST 0x0248 +#define NPU_REG_DMA_OFM_SRC 0x024C +#define NPU_REG_DMA_OFM_DST 0x0250 +#define NPU_REG_DMA_OFM_DST_HI 0x0254 +#define NPU_REG_DMA_WEIGHT_SRC 0x0258 +#define NPU_REG_DMA_WEIGHT_SRC_HI 0x025C +#define NPU_REG_DMA_CMD_SRC 0x0260 +#define NPU_REG_DMA_CMD_SRC_HI 0x0264 +#define NPU_REG_DMA_CMD_SIZE 0x0268 +#define NPU_REG_DMA_M2M_SRC 0x026C +#define NPU_REG_DMA_M2M_SRC_HI 0x0270 +#define NPU_REG_DMA_M2M_DST 0x0274 +#define NPU_REG_DMA_M2M_DST_HI 0x0278 +#define NPU_REG_CURRENT_QREAD 0x027C +#define NPU_REG_DMA_SCALE_SRC 0x0280 +#define NPU_REG_DMA_SCALE_SRC_HI 0x0284 +#define NPU_REG_CURRENT_BLOCK 0x02B4 +#define NPU_REG_CURRENT_OP 0x02B8 +#define NPU_REG_CURRENT_CMD 0x02BC +#define TSU_DEBUG_REGISTERS_SIZE 0x02C0 + +#ifdef __cplusplus + +// Enum types + +enum class acc_format : uint16_t +{ + INT_32BIT = 0, + INT_40BIT = 1, + FP_S5_10 = 2, +}; + +enum class activation : uint16_t +{ + NONE = 0, + TANH = 3, + SIGMOID = 4, + LUT_START = 16, + LUT_END = 23, +}; + +enum class axi_mem_encoding_type : uint8_t +{ + DEVICE_NON_BUFFERABLE = 0x0, + DEVICE_BUFFERABLE = 0x1, + NORMAL_NON_CACHEABLE_NON_BUFFERABLE = 0x2, + NORMAL_NON_CACHEABLE_BUFFERABLE = 0x3, + WRITE_THROUGH_NO_ALLOCATE = 0x4, + WRITE_THROUGH_READ_ALLOCATE = 0x5, + WRITE_THROUGH_WRITE_ALLOCATE = 0x6, + WRITE_THROUGH_READ_AND_WRITE_ALLOCATE = 0x7, + WRITE_BACK_NO_ALLOCATE = 0x8, + WRITE_BACK_READ_ALLOCATE = 0x9, + WRITE_BACK_WRITE_ALLOCATE = 0xA, + WRITE_BACK_READ_AND_WRITE_ALLOCATE = 0xB, + RESERVED_12 = 0xC, + RESERVED_13 = 0xD, + RESERVED_14 = 0xE, + RESERVED_15 = 0xF, +}; + +enum class clip_range : uint8_t +{ + OFM_PRECISION = 0, + FORCE_UINT8 = 2, + FORCE_INT8 = 3, + FORCE_INT16 = 5, +}; + +enum class cmd0 : uint16_t +{ + NPU_OP_STOP = 0x000, + NPU_OP_IRQ = 0x001, + NPU_OP_CONV = 0x002, + NPU_OP_DEPTHWISE = 0x003, + NPU_OP_POOL = 0x005, + NPU_OP_ELEMENTWISE = 0x006, + NPU_OP_DMA_START = 0x010, + NPU_OP_DMA_WAIT = 0x011, + NPU_OP_KERNEL_WAIT = 0x012, + NPU_OP_PMU_MASK = 0x013, + NPU_SET_IFM_PAD_TOP = 0x100, + NPU_SET_IFM_PAD_LEFT = 0x101, + NPU_SET_IFM_PAD_RIGHT = 0x102, + NPU_SET_IFM_PAD_BOTTOM = 0x103, + NPU_SET_IFM_DEPTH_M1 = 0x104, + NPU_SET_IFM_PRECISION = 0x105, + NPU_SET_IFM_UPSCALE = 0x107, + NPU_SET_IFM_ZERO_POINT = 0x109, + NPU_SET_IFM_WIDTH0_M1 = 0x10A, + NPU_SET_IFM_HEIGHT0_M1 = 0x10B, + NPU_SET_IFM_HEIGHT1_M1 = 0x10C, + NPU_SET_IFM_IB_END = 0x10D, + NPU_SET_IFM_REGION = 0x10F, + NPU_SET_OFM_WIDTH_M1 = 0x111, + NPU_SET_OFM_HEIGHT_M1 = 0x112, + NPU_SET_OFM_DEPTH_M1 = 0x113, + NPU_SET_OFM_PRECISION = 0x114, + NPU_SET_OFM_BLK_WIDTH_M1 = 0x115, + NPU_SET_OFM_BLK_HEIGHT_M1 = 0x116, + NPU_SET_OFM_BLK_DEPTH_M1 = 0x117, + NPU_SET_OFM_ZERO_POINT = 0x118, + NPU_SET_OFM_WIDTH0_M1 = 0x11A, + NPU_SET_OFM_HEIGHT0_M1 = 0x11B, + NPU_SET_OFM_HEIGHT1_M1 = 0x11C, + NPU_SET_OFM_REGION = 0x11F, + NPU_SET_KERNEL_WIDTH_M1 = 0x120, + NPU_SET_KERNEL_HEIGHT_M1 = 0x121, + NPU_SET_KERNEL_STRIDE = 0x122, + NPU_SET_PARALLEL_MODE = 0x123, + NPU_SET_ACC_FORMAT = 0x124, + NPU_SET_ACTIVATION = 0x125, + NPU_SET_ACTIVATION_MIN = 0x126, + NPU_SET_ACTIVATION_MAX = 0x127, + NPU_SET_WEIGHT_REGION = 0x128, + NPU_SET_SCALE_REGION = 0x129, + NPU_SET_AB_START = 0x12D, + NPU_SET_BLOCKDEP = 0x12F, + NPU_SET_DMA0_SRC_REGION = 0x130, + NPU_SET_DMA0_DST_REGION = 0x131, + NPU_SET_DMA0_SIZE0 = 0x132, + NPU_SET_DMA0_SIZE1 = 0x133, + NPU_SET_IFM2_BROADCAST = 0x180, + NPU_SET_IFM2_SCALAR = 0x181, + NPU_SET_IFM2_PRECISION = 0x185, + NPU_SET_IFM2_ZERO_POINT = 0x189, + NPU_SET_IFM2_WIDTH0_M1 = 0x18A, + NPU_SET_IFM2_HEIGHT0_M1 = 0x18B, + NPU_SET_IFM2_HEIGHT1_M1 = 0x18C, + NPU_SET_IFM2_IB_START = 0x18D, + NPU_SET_IFM2_REGION = 0x18F, +}; + +enum class cmd1 : uint16_t +{ + NPU_SET_IFM_BASE0 = 0x000, + NPU_SET_IFM_BASE1 = 0x001, + NPU_SET_IFM_BASE2 = 0x002, + NPU_SET_IFM_BASE3 = 0x003, + NPU_SET_IFM_STRIDE_X = 0x004, + NPU_SET_IFM_STRIDE_Y = 0x005, + NPU_SET_IFM_STRIDE_C = 0x006, + NPU_SET_OFM_BASE0 = 0x010, + NPU_SET_OFM_BASE1 = 0x011, + NPU_SET_OFM_BASE2 = 0x012, + NPU_SET_OFM_BASE3 = 0x013, + NPU_SET_OFM_STRIDE_X = 0x014, + NPU_SET_OFM_STRIDE_Y = 0x015, + NPU_SET_OFM_STRIDE_C = 0x016, + NPU_SET_WEIGHT_BASE = 0x020, + NPU_SET_WEIGHT_LENGTH = 0x021, + NPU_SET_SCALE_BASE = 0x022, + NPU_SET_SCALE_LENGTH = 0x023, + NPU_SET_OFM_SCALE = 0x024, + NPU_SET_OPA_SCALE = 0x025, + NPU_SET_OPB_SCALE = 0x026, + NPU_SET_DMA0_SRC = 0x030, + NPU_SET_DMA0_DST = 0x031, + NPU_SET_DMA0_LEN = 0x032, + NPU_SET_DMA0_SKIP0 = 0x033, + NPU_SET_DMA0_SKIP1 = 0x034, + NPU_SET_IFM2_BASE0 = 0x080, + NPU_SET_IFM2_BASE1 = 0x081, + NPU_SET_IFM2_BASE2 = 0x082, + NPU_SET_IFM2_BASE3 = 0x083, + NPU_SET_IFM2_STRIDE_X = 0x084, + NPU_SET_IFM2_STRIDE_Y = 0x085, + NPU_SET_IFM2_STRIDE_C = 0x086, + NPU_SET_WEIGHT1_BASE = 0x090, + NPU_SET_WEIGHT1_LENGTH = 0x091, + NPU_SET_SCALE1_BASE = 0x092, + NPU_SET_SCALE1_LENGTH = 0x093, +}; + +enum class data_format : uint8_t +{ + NHWC = 0, + NHCWB16 = 1, +}; + +enum class elementwise_mode : uint16_t +{ + MUL = 0, + ADD = 1, + SUB = 2, + MIN = 3, + MAX = 4, + LRELU = 5, + ABS = 6, + CLZ = 7, + SHR = 8, + SHL = 9, +}; + +enum class ifm_precision : uint8_t +{ + U8 = 0, + S8 = 1, + U16 = 4, + S16 = 5, + S32 = 9, +}; + +enum class ifm_scale_mode : uint8_t +{ + SCALE_16BIT = 0, + SCALE_OPA_32BIT = 1, + SCALE_OPB_32BIT = 2, +}; + +enum class macs_per_cc : uint8_t +{ + MACS_PER_CC_IS_5 = 0x5, + MACS_PER_CC_IS_6 = 0x6, + MACS_PER_CC_IS_7 = 0x7, + MACS_PER_CC_IS_8 = 0x8, +}; + +enum class memory_type : uint8_t +{ + AXI0_OUTSTANDING_COUNTER0 = 0, + AXI0_OUTSTANDING_COUNTER1 = 1, + AXI1_OUTSTANDING_COUNTER2 = 2, + AXI1_OUTSTANDING_COUNTER3 = 3, +}; + +enum class ofm_precision : uint8_t +{ + U8 = 0, + S8 = 1, + U16 = 2, + S16 = 3, + S32 = 5, +}; + +enum class pmu_event_type : uint16_t +{ + NO_EVENT = 0x00, + CYCLE = 0x11, + NPU_IDLE = 0x20, + CC_STALLED_ON_BLOCKDEP = 0x21, + CC_STALLED_ON_SHRAM_RECONFIG = 0x22, + NPU_ACTIVE = 0x23, + MAC_ACTIVE = 0x30, + MAC_ACTIVE_8BIT = 0x31, + MAC_ACTIVE_16BIT = 0x32, + MAC_DPU_ACTIVE = 0x33, + MAC_STALLED_BY_WD_ACC = 0x34, + MAC_STALLED_BY_WD = 0x35, + MAC_STALLED_BY_ACC = 0x36, + MAC_STALLED_BY_IB = 0x37, + MAC_ACTIVE_32BIT = 0x38, + MAC_STALLED_BY_INT_W = 0x39, + MAC_STALLED_BY_INT_ACC = 0x3A, + AO_ACTIVE = 0x40, + AO_ACTIVE_8BIT = 0x41, + AO_ACTIVE_16BIT = 0x42, + AO_STALLED_BY_OFMP_OB = 0x43, + AO_STALLED_BY_OFMP = 0x44, + AO_STALLED_BY_OB = 0x45, + AO_STALLED_BY_ACC_IB = 0x46, + AO_STALLED_BY_ACC = 0x47, + AO_STALLED_BY_IB = 0x48, + WD_ACTIVE = 0x50, + WD_STALLED = 0x51, + WD_STALLED_BY_WS = 0x52, + WD_STALLED_BY_WD_BUF = 0x53, + WD_PARSE_ACTIVE = 0x54, + WD_PARSE_STALLED = 0x55, + WD_PARSE_STALLED_IN = 0x56, + WD_PARSE_STALLED_OUT = 0x57, + WD_TRANS_WS = 0x58, + WD_TRANS_WB = 0x59, + WD_TRANS_DW0 = 0x5a, + WD_TRANS_DW1 = 0x5b, + AXI0_RD_TRANS_ACCEPTED = 0x80, + AXI0_RD_TRANS_COMPLETED = 0x81, + AXI0_RD_DATA_BEAT_RECEIVED = 0x82, + AXI0_RD_TRAN_REQ_STALLED = 0x83, + AXI0_WR_TRANS_ACCEPTED = 0x84, + AXI0_WR_TRANS_COMPLETED_M = 0x85, + AXI0_WR_TRANS_COMPLETED_S = 0x86, + AXI0_WR_DATA_BEAT_WRITTEN = 0x87, + AXI0_WR_TRAN_REQ_STALLED = 0x88, + AXI0_WR_DATA_BEAT_STALLED = 0x89, + AXI0_ENABLED_CYCLES = 0x8c, + AXI0_RD_STALL_LIMIT = 0x8e, + AXI0_WR_STALL_LIMIT = 0x8f, + AXI1_RD_TRANS_ACCEPTED = 0x180, + AXI1_RD_TRANS_COMPLETED = 0x181, + AXI1_RD_DATA_BEAT_RECEIVED = 0x182, + AXI1_RD_TRAN_REQ_STALLED = 0x183, + AXI1_WR_TRANS_ACCEPTED = 0x184, + AXI1_WR_TRANS_COMPLETED_M = 0x185, + AXI1_WR_TRANS_COMPLETED_S = 0x186, + AXI1_WR_DATA_BEAT_WRITTEN = 0x187, + AXI1_WR_TRAN_REQ_STALLED = 0x188, + AXI1_WR_DATA_BEAT_STALLED = 0x189, + AXI1_ENABLED_CYCLES = 0x18c, + AXI1_RD_STALL_LIMIT = 0x18e, + AXI1_WR_STALL_LIMIT = 0x18f, + AXI_LATENCY_ANY = 0xa0, + AXI_LATENCY_32 = 0xa1, + AXI_LATENCY_64 = 0xa2, + AXI_LATENCY_128 = 0xa3, + AXI_LATENCY_256 = 0xa4, + AXI_LATENCY_512 = 0xa5, + AXI_LATENCY_1024 = 0xa6, + ECC_DMA = 0xb0, + ECC_SB0 = 0xb1, + ECC_SB1 = 0x1b1, +}; + +enum class pooling_mode : uint16_t +{ + MAX = 0, + AVERAGE = 1, + REDUCE_SUM = 2, +}; + +enum class privilege_level : uint8_t +{ + USER = 0, + PRIVILEGED = 1, +}; + +enum class resampling_mode : uint8_t +{ + NONE = 0, + NEAREST = 1, + TRANSPOSE = 2, +}; + +enum class rounding : uint8_t +{ + TFL = 0, + TRUNCATE = 1, + NATURAL = 2, +}; + +enum class security_level : uint8_t +{ + SECURE = 0, + NON_SECURE = 1, +}; + +enum class shram_size : uint8_t +{ + SHRAM_96KB = 0x60, + SHRAM_48KB = 0x30, + SHRAM_24KB = 0x18, + SHRAM_16KB = 0x10, +}; + +enum class state : uint8_t +{ + STOPPED = 0, + RUNNING = 1, +}; + +enum class stride_mode : uint8_t +{ + STRIDE_MODE_1D = 0, + STRIDE_MODE_2D = 1, + STRIDE_MODE_3D = 2, +}; + +#else + +enum acc_format +{ + ACC_FORMAT_INT_32BIT = 0, + ACC_FORMAT_INT_40BIT = 1, + ACC_FORMAT_FP_S5_10 = 2, +}; + +enum activation +{ + ACTIVATION_NONE = 0, + ACTIVATION_TANH = 3, + ACTIVATION_SIGMOID = 4, + ACTIVATION_LUT_START = 16, + ACTIVATION_LUT_END = 23, +}; + +enum axi_mem_encoding_type +{ + AXI_MEM_ENCODING_TYPE_DEVICE_NON_BUFFERABLE = 0x0, + AXI_MEM_ENCODING_TYPE_DEVICE_BUFFERABLE = 0x1, + AXI_MEM_ENCODING_TYPE_NORMAL_NON_CACHEABLE_NON_BUFFERABLE = 0x2, + AXI_MEM_ENCODING_TYPE_NORMAL_NON_CACHEABLE_BUFFERABLE = 0x3, + AXI_MEM_ENCODING_TYPE_WRITE_THROUGH_NO_ALLOCATE = 0x4, + AXI_MEM_ENCODING_TYPE_WRITE_THROUGH_READ_ALLOCATE = 0x5, + AXI_MEM_ENCODING_TYPE_WRITE_THROUGH_WRITE_ALLOCATE = 0x6, + AXI_MEM_ENCODING_TYPE_WRITE_THROUGH_READ_AND_WRITE_ALLOCATE = 0x7, + AXI_MEM_ENCODING_TYPE_WRITE_BACK_NO_ALLOCATE = 0x8, + AXI_MEM_ENCODING_TYPE_WRITE_BACK_READ_ALLOCATE = 0x9, + AXI_MEM_ENCODING_TYPE_WRITE_BACK_WRITE_ALLOCATE = 0xA, + AXI_MEM_ENCODING_TYPE_WRITE_BACK_READ_AND_WRITE_ALLOCATE = 0xB, + AXI_MEM_ENCODING_TYPE_RESERVED_12 = 0xC, + AXI_MEM_ENCODING_TYPE_RESERVED_13 = 0xD, + AXI_MEM_ENCODING_TYPE_RESERVED_14 = 0xE, + AXI_MEM_ENCODING_TYPE_RESERVED_15 = 0xF, +}; + +enum clip_range +{ + CLIP_RANGE_OFM_PRECISION = 0, + CLIP_RANGE_FORCE_UINT8 = 2, + CLIP_RANGE_FORCE_INT8 = 3, + CLIP_RANGE_FORCE_INT16 = 5, +}; + +enum cmd0 +{ + CMD0_NPU_OP_STOP = 0x000, + CMD0_NPU_OP_IRQ = 0x001, + CMD0_NPU_OP_CONV = 0x002, + CMD0_NPU_OP_DEPTHWISE = 0x003, + CMD0_NPU_OP_POOL = 0x005, + CMD0_NPU_OP_ELEMENTWISE = 0x006, + CMD0_NPU_OP_DMA_START = 0x010, + CMD0_NPU_OP_DMA_WAIT = 0x011, + CMD0_NPU_OP_KERNEL_WAIT = 0x012, + CMD0_NPU_OP_PMU_MASK = 0x013, + CMD0_NPU_SET_IFM_PAD_TOP = 0x100, + CMD0_NPU_SET_IFM_PAD_LEFT = 0x101, + CMD0_NPU_SET_IFM_PAD_RIGHT = 0x102, + CMD0_NPU_SET_IFM_PAD_BOTTOM = 0x103, + CMD0_NPU_SET_IFM_DEPTH_M1 = 0x104, + CMD0_NPU_SET_IFM_PRECISION = 0x105, + CMD0_NPU_SET_IFM_UPSCALE = 0x107, + CMD0_NPU_SET_IFM_ZERO_POINT = 0x109, + CMD0_NPU_SET_IFM_WIDTH0_M1 = 0x10A, + CMD0_NPU_SET_IFM_HEIGHT0_M1 = 0x10B, + CMD0_NPU_SET_IFM_HEIGHT1_M1 = 0x10C, + CMD0_NPU_SET_IFM_IB_END = 0x10D, + CMD0_NPU_SET_IFM_REGION = 0x10F, + CMD0_NPU_SET_OFM_WIDTH_M1 = 0x111, + CMD0_NPU_SET_OFM_HEIGHT_M1 = 0x112, + CMD0_NPU_SET_OFM_DEPTH_M1 = 0x113, + CMD0_NPU_SET_OFM_PRECISION = 0x114, + CMD0_NPU_SET_OFM_BLK_WIDTH_M1 = 0x115, + CMD0_NPU_SET_OFM_BLK_HEIGHT_M1 = 0x116, + CMD0_NPU_SET_OFM_BLK_DEPTH_M1 = 0x117, + CMD0_NPU_SET_OFM_ZERO_POINT = 0x118, + CMD0_NPU_SET_OFM_WIDTH0_M1 = 0x11A, + CMD0_NPU_SET_OFM_HEIGHT0_M1 = 0x11B, + CMD0_NPU_SET_OFM_HEIGHT1_M1 = 0x11C, + CMD0_NPU_SET_OFM_REGION = 0x11F, + CMD0_NPU_SET_KERNEL_WIDTH_M1 = 0x120, + CMD0_NPU_SET_KERNEL_HEIGHT_M1 = 0x121, + CMD0_NPU_SET_KERNEL_STRIDE = 0x122, + CMD0_NPU_SET_PARALLEL_MODE = 0x123, + CMD0_NPU_SET_ACC_FORMAT = 0x124, + CMD0_NPU_SET_ACTIVATION = 0x125, + CMD0_NPU_SET_ACTIVATION_MIN = 0x126, + CMD0_NPU_SET_ACTIVATION_MAX = 0x127, + CMD0_NPU_SET_WEIGHT_REGION = 0x128, + CMD0_NPU_SET_SCALE_REGION = 0x129, + CMD0_NPU_SET_AB_START = 0x12D, + CMD0_NPU_SET_BLOCKDEP = 0x12F, + CMD0_NPU_SET_DMA0_SRC_REGION = 0x130, + CMD0_NPU_SET_DMA0_DST_REGION = 0x131, + CMD0_NPU_SET_DMA0_SIZE0 = 0x132, + CMD0_NPU_SET_DMA0_SIZE1 = 0x133, + CMD0_NPU_SET_IFM2_BROADCAST = 0x180, + CMD0_NPU_SET_IFM2_SCALAR = 0x181, + CMD0_NPU_SET_IFM2_PRECISION = 0x185, + CMD0_NPU_SET_IFM2_ZERO_POINT = 0x189, + CMD0_NPU_SET_IFM2_WIDTH0_M1 = 0x18A, + CMD0_NPU_SET_IFM2_HEIGHT0_M1 = 0x18B, + CMD0_NPU_SET_IFM2_HEIGHT1_M1 = 0x18C, + CMD0_NPU_SET_IFM2_IB_START = 0x18D, + CMD0_NPU_SET_IFM2_REGION = 0x18F, +}; + +enum cmd1 +{ + CMD1_NPU_SET_IFM_BASE0 = 0x000, + CMD1_NPU_SET_IFM_BASE1 = 0x001, + CMD1_NPU_SET_IFM_BASE2 = 0x002, + CMD1_NPU_SET_IFM_BASE3 = 0x003, + CMD1_NPU_SET_IFM_STRIDE_X = 0x004, + CMD1_NPU_SET_IFM_STRIDE_Y = 0x005, + CMD1_NPU_SET_IFM_STRIDE_C = 0x006, + CMD1_NPU_SET_OFM_BASE0 = 0x010, + CMD1_NPU_SET_OFM_BASE1 = 0x011, + CMD1_NPU_SET_OFM_BASE2 = 0x012, + CMD1_NPU_SET_OFM_BASE3 = 0x013, + CMD1_NPU_SET_OFM_STRIDE_X = 0x014, + CMD1_NPU_SET_OFM_STRIDE_Y = 0x015, + CMD1_NPU_SET_OFM_STRIDE_C = 0x016, + CMD1_NPU_SET_WEIGHT_BASE = 0x020, + CMD1_NPU_SET_WEIGHT_LENGTH = 0x021, + CMD1_NPU_SET_SCALE_BASE = 0x022, + CMD1_NPU_SET_SCALE_LENGTH = 0x023, + CMD1_NPU_SET_OFM_SCALE = 0x024, + CMD1_NPU_SET_OPA_SCALE = 0x025, + CMD1_NPU_SET_OPB_SCALE = 0x026, + CMD1_NPU_SET_DMA0_SRC = 0x030, + CMD1_NPU_SET_DMA0_DST = 0x031, + CMD1_NPU_SET_DMA0_LEN = 0x032, + CMD1_NPU_SET_DMA0_SKIP0 = 0x033, + CMD1_NPU_SET_DMA0_SKIP1 = 0x034, + CMD1_NPU_SET_IFM2_BASE0 = 0x080, + CMD1_NPU_SET_IFM2_BASE1 = 0x081, + CMD1_NPU_SET_IFM2_BASE2 = 0x082, + CMD1_NPU_SET_IFM2_BASE3 = 0x083, + CMD1_NPU_SET_IFM2_STRIDE_X = 0x084, + CMD1_NPU_SET_IFM2_STRIDE_Y = 0x085, + CMD1_NPU_SET_IFM2_STRIDE_C = 0x086, + CMD1_NPU_SET_WEIGHT1_BASE = 0x090, + CMD1_NPU_SET_WEIGHT1_LENGTH = 0x091, + CMD1_NPU_SET_SCALE1_BASE = 0x092, + CMD1_NPU_SET_SCALE1_LENGTH = 0x093, +}; + +enum data_format +{ + DATA_FORMAT_NHWC = 0, + DATA_FORMAT_NHCWB16 = 1, +}; + +enum elementwise_mode +{ + ELEMENTWISE_MODE_MUL = 0, + ELEMENTWISE_MODE_ADD = 1, + ELEMENTWISE_MODE_SUB = 2, + ELEMENTWISE_MODE_MIN = 3, + ELEMENTWISE_MODE_MAX = 4, + ELEMENTWISE_MODE_LRELU = 5, + ELEMENTWISE_MODE_ABS = 6, + ELEMENTWISE_MODE_CLZ = 7, + ELEMENTWISE_MODE_SHR = 8, + ELEMENTWISE_MODE_SHL = 9, +}; + +enum ifm_precision +{ + IFM_PRECISION_U8 = 0, + IFM_PRECISION_S8 = 1, + IFM_PRECISION_U16 = 4, + IFM_PRECISION_S16 = 5, + IFM_PRECISION_S32 = 9, +}; + +enum ifm_scale_mode +{ + IFM_SCALE_MODE_SCALE_16BIT = 0, + IFM_SCALE_MODE_SCALE_OPA_32BIT = 1, + IFM_SCALE_MODE_SCALE_OPB_32BIT = 2, +}; + +enum macs_per_cc +{ + MACS_PER_CC_MACS_PER_CC_IS_5 = 0x5, + MACS_PER_CC_MACS_PER_CC_IS_6 = 0x6, + MACS_PER_CC_MACS_PER_CC_IS_7 = 0x7, + MACS_PER_CC_MACS_PER_CC_IS_8 = 0x8, +}; + +enum memory_type +{ + MEMORY_TYPE_AXI0_OUTSTANDING_COUNTER0 = 0, + MEMORY_TYPE_AXI0_OUTSTANDING_COUNTER1 = 1, + MEMORY_TYPE_AXI1_OUTSTANDING_COUNTER2 = 2, + MEMORY_TYPE_AXI1_OUTSTANDING_COUNTER3 = 3, +}; + +enum ofm_precision +{ + OFM_PRECISION_U8 = 0, + OFM_PRECISION_S8 = 1, + OFM_PRECISION_U16 = 2, + OFM_PRECISION_S16 = 3, + OFM_PRECISION_S32 = 5, +}; + +enum pmu_event_type +{ + PMU_EVENT_TYPE_NO_EVENT = 0x00, + PMU_EVENT_TYPE_CYCLE = 0x11, + PMU_EVENT_TYPE_NPU_IDLE = 0x20, + PMU_EVENT_TYPE_CC_STALLED_ON_BLOCKDEP = 0x21, + PMU_EVENT_TYPE_CC_STALLED_ON_SHRAM_RECONFIG = 0x22, + PMU_EVENT_TYPE_NPU_ACTIVE = 0x23, + PMU_EVENT_TYPE_MAC_ACTIVE = 0x30, + PMU_EVENT_TYPE_MAC_ACTIVE_8BIT = 0x31, + PMU_EVENT_TYPE_MAC_ACTIVE_16BIT = 0x32, + PMU_EVENT_TYPE_MAC_DPU_ACTIVE = 0x33, + PMU_EVENT_TYPE_MAC_STALLED_BY_WD_ACC = 0x34, + PMU_EVENT_TYPE_MAC_STALLED_BY_WD = 0x35, + PMU_EVENT_TYPE_MAC_STALLED_BY_ACC = 0x36, + PMU_EVENT_TYPE_MAC_STALLED_BY_IB = 0x37, + PMU_EVENT_TYPE_MAC_ACTIVE_32BIT = 0x38, + PMU_EVENT_TYPE_MAC_STALLED_BY_INT_W = 0x39, + PMU_EVENT_TYPE_MAC_STALLED_BY_INT_ACC = 0x3A, + PMU_EVENT_TYPE_AO_ACTIVE = 0x40, + PMU_EVENT_TYPE_AO_ACTIVE_8BIT = 0x41, + PMU_EVENT_TYPE_AO_ACTIVE_16BIT = 0x42, + PMU_EVENT_TYPE_AO_STALLED_BY_OFMP_OB = 0x43, + PMU_EVENT_TYPE_AO_STALLED_BY_OFMP = 0x44, + PMU_EVENT_TYPE_AO_STALLED_BY_OB = 0x45, + PMU_EVENT_TYPE_AO_STALLED_BY_ACC_IB = 0x46, + PMU_EVENT_TYPE_AO_STALLED_BY_ACC = 0x47, + PMU_EVENT_TYPE_AO_STALLED_BY_IB = 0x48, + PMU_EVENT_TYPE_WD_ACTIVE = 0x50, + PMU_EVENT_TYPE_WD_STALLED = 0x51, + PMU_EVENT_TYPE_WD_STALLED_BY_WS = 0x52, + PMU_EVENT_TYPE_WD_STALLED_BY_WD_BUF = 0x53, + PMU_EVENT_TYPE_WD_PARSE_ACTIVE = 0x54, + PMU_EVENT_TYPE_WD_PARSE_STALLED = 0x55, + PMU_EVENT_TYPE_WD_PARSE_STALLED_IN = 0x56, + PMU_EVENT_TYPE_WD_PARSE_STALLED_OUT = 0x57, + PMU_EVENT_TYPE_WD_TRANS_WS = 0x58, + PMU_EVENT_TYPE_WD_TRANS_WB = 0x59, + PMU_EVENT_TYPE_WD_TRANS_DW0 = 0x5a, + PMU_EVENT_TYPE_WD_TRANS_DW1 = 0x5b, + PMU_EVENT_TYPE_AXI0_RD_TRANS_ACCEPTED = 0x80, + PMU_EVENT_TYPE_AXI0_RD_TRANS_COMPLETED = 0x81, + PMU_EVENT_TYPE_AXI0_RD_DATA_BEAT_RECEIVED = 0x82, + PMU_EVENT_TYPE_AXI0_RD_TRAN_REQ_STALLED = 0x83, + PMU_EVENT_TYPE_AXI0_WR_TRANS_ACCEPTED = 0x84, + PMU_EVENT_TYPE_AXI0_WR_TRANS_COMPLETED_M = 0x85, + PMU_EVENT_TYPE_AXI0_WR_TRANS_COMPLETED_S = 0x86, + PMU_EVENT_TYPE_AXI0_WR_DATA_BEAT_WRITTEN = 0x87, + PMU_EVENT_TYPE_AXI0_WR_TRAN_REQ_STALLED = 0x88, + PMU_EVENT_TYPE_AXI0_WR_DATA_BEAT_STALLED = 0x89, + PMU_EVENT_TYPE_AXI0_ENABLED_CYCLES = 0x8c, + PMU_EVENT_TYPE_AXI0_RD_STALL_LIMIT = 0x8e, + PMU_EVENT_TYPE_AXI0_WR_STALL_LIMIT = 0x8f, + PMU_EVENT_TYPE_AXI1_RD_TRANS_ACCEPTED = 0x180, + PMU_EVENT_TYPE_AXI1_RD_TRANS_COMPLETED = 0x181, + PMU_EVENT_TYPE_AXI1_RD_DATA_BEAT_RECEIVED = 0x182, + PMU_EVENT_TYPE_AXI1_RD_TRAN_REQ_STALLED = 0x183, + PMU_EVENT_TYPE_AXI1_WR_TRANS_ACCEPTED = 0x184, + PMU_EVENT_TYPE_AXI1_WR_TRANS_COMPLETED_M = 0x185, + PMU_EVENT_TYPE_AXI1_WR_TRANS_COMPLETED_S = 0x186, + PMU_EVENT_TYPE_AXI1_WR_DATA_BEAT_WRITTEN = 0x187, + PMU_EVENT_TYPE_AXI1_WR_TRAN_REQ_STALLED = 0x188, + PMU_EVENT_TYPE_AXI1_WR_DATA_BEAT_STALLED = 0x189, + PMU_EVENT_TYPE_AXI1_ENABLED_CYCLES = 0x18c, + PMU_EVENT_TYPE_AXI1_RD_STALL_LIMIT = 0x18e, + PMU_EVENT_TYPE_AXI1_WR_STALL_LIMIT = 0x18f, + PMU_EVENT_TYPE_AXI_LATENCY_ANY = 0xa0, + PMU_EVENT_TYPE_AXI_LATENCY_32 = 0xa1, + PMU_EVENT_TYPE_AXI_LATENCY_64 = 0xa2, + PMU_EVENT_TYPE_AXI_LATENCY_128 = 0xa3, + PMU_EVENT_TYPE_AXI_LATENCY_256 = 0xa4, + PMU_EVENT_TYPE_AXI_LATENCY_512 = 0xa5, + PMU_EVENT_TYPE_AXI_LATENCY_1024 = 0xa6, + PMU_EVENT_TYPE_ECC_DMA = 0xb0, + PMU_EVENT_TYPE_ECC_SB0 = 0xb1, + PMU_EVENT_TYPE_ECC_SB1 = 0x1b1, +}; + +enum pooling_mode +{ + POOLING_MODE_MAX = 0, + POOLING_MODE_AVERAGE = 1, + POOLING_MODE_REDUCE_SUM = 2, +}; + +enum privilege_level +{ + PRIVILEGE_LEVEL_USER = 0, + PRIVILEGE_LEVEL_PRIVILEGED = 1, +}; + +enum resampling_mode +{ + RESAMPLING_MODE_NONE = 0, + RESAMPLING_MODE_NEAREST = 1, + RESAMPLING_MODE_TRANSPOSE = 2, +}; + +enum rounding +{ + ROUNDING_TFL = 0, + ROUNDING_TRUNCATE = 1, + ROUNDING_NATURAL = 2, +}; + +enum security_level +{ + SECURITY_LEVEL_SECURE = 0, + SECURITY_LEVEL_NON_SECURE = 1, +}; + +enum shram_size +{ + SHRAM_SIZE_SHRAM_96KB = 0x60, + SHRAM_SIZE_SHRAM_48KB = 0x30, + SHRAM_SIZE_SHRAM_24KB = 0x18, + SHRAM_SIZE_SHRAM_16KB = 0x10, +}; + +enum state +{ + STATE_STOPPED = 0, + STATE_RUNNING = 1, +}; + +enum stride_mode +{ + STRIDE_MODE_STRIDE_MODE_1D = 0, + STRIDE_MODE_STRIDE_MODE_2D = 1, + STRIDE_MODE_STRIDE_MODE_3D = 2, +}; + +#endif + +// id_r - ID register +struct id_r +{ +#ifdef __cplusplus + private: +#endif //__cplusplus + union + { + struct + { + uint32_t version_status : 4; // This is the version of the product + uint32_t version_minor : 4; // This is the n for the P part of an RnPn release number + uint32_t version_major : 4; // This is the n for the R part of an RnPn release number + uint32_t product_major : 4; // This is the X part of the ML00X product number + uint32_t arch_patch_rev : 4; // This is the patch number of the architecture version a.b + uint32_t + arch_minor_rev : 8; // This is the minor architecture version number, b in the architecture version a.b + uint32_t + arch_major_rev : 4; // This is the major architecture version number, a in the architecture version a.b + }; + uint32_t word; + }; +#ifdef __cplusplus + public: + CONSTEXPR id_r() : + version_status(static_cast(1)), version_minor(static_cast(0x0)), + version_major(static_cast(0x1)), product_major(static_cast(4)), + arch_patch_rev(static_cast(6)), arch_minor_rev(static_cast(0)), + arch_major_rev(static_cast(1)) + { + } + CONSTEXPR id_r(uint32_t init) : word(init) {} + CONSTEXPR void operator=(uint32_t value) + { + word = value; + } + void operator=(uint32_t value) volatile + { + word = value; + } + CONSTEXPR operator uint32_t() + { + return word; + } + operator uint32_t() volatile + { + return word; + } + id_r copy() volatile + { + return *this; + } + CONSTEXPR uint32_t get_version_status() const + { + uint32_t value = static_cast(version_status); + return value; + } + uint32_t get_version_status() const volatile + { + uint32_t value = static_cast(version_status); + return value; + } + CONSTEXPR id_r &set_version_status(uint32_t value) + { + version_status = ((1u << 4) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_version_minor() const + { + uint32_t value = static_cast(version_minor); + return value; + } + uint32_t get_version_minor() const volatile + { + uint32_t value = static_cast(version_minor); + return value; + } + CONSTEXPR id_r &set_version_minor(uint32_t value) + { + version_minor = ((1u << 4) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_version_major() const + { + uint32_t value = static_cast(version_major); + return value; + } + uint32_t get_version_major() const volatile + { + uint32_t value = static_cast(version_major); + return value; + } + CONSTEXPR id_r &set_version_major(uint32_t value) + { + version_major = ((1u << 4) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_product_major() const + { + uint32_t value = static_cast(product_major); + return value; + } + uint32_t get_product_major() const volatile + { + uint32_t value = static_cast(product_major); + return value; + } + CONSTEXPR id_r &set_product_major(uint32_t value) + { + product_major = ((1u << 4) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_arch_patch_rev() const + { + uint32_t value = static_cast(arch_patch_rev); + return value; + } + uint32_t get_arch_patch_rev() const volatile + { + uint32_t value = static_cast(arch_patch_rev); + return value; + } + CONSTEXPR id_r &set_arch_patch_rev(uint32_t value) + { + arch_patch_rev = ((1u << 4) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_arch_minor_rev() const + { + uint32_t value = static_cast(arch_minor_rev); + return value; + } + uint32_t get_arch_minor_rev() const volatile + { + uint32_t value = static_cast(arch_minor_rev); + return value; + } + CONSTEXPR id_r &set_arch_minor_rev(uint32_t value) + { + arch_minor_rev = ((1u << 8) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_arch_major_rev() const + { + uint32_t value = static_cast(arch_major_rev); + return value; + } + uint32_t get_arch_major_rev() const volatile + { + uint32_t value = static_cast(arch_major_rev); + return value; + } + CONSTEXPR id_r &set_arch_major_rev(uint32_t value) + { + arch_major_rev = ((1u << 4) - 1) & static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// status_r - Register describes the current operating status of the NPU +struct status_r +{ +#ifdef __cplusplus + private: +#endif //__cplusplus + union + { + struct + { + uint32_t state : 1; // NPU state, 0 = Stopped, 1 = Running + uint32_t irq_raised : 1; // Raw IRQ status, 0 = IRQ not raised, 1 = IRQ raised. IRQ is cleared using command + // register bit 1 + uint32_t + bus_status : 1; // 0=OK, 1=Bus abort detected and processing halted (NPU will reach IDLE state and not + // to start process any more commands/AXI transactions). Can only be cleared by a reset + uint32_t reset_status : 1; // Reset is ongoing and only this register can be read (other registers read as 0 + // and writes are ignored.) A value of 0 means NPU is not being reset and can be + // accessed as normal + uint32_t + cmd_parse_error : 1; // 0=No error 1=Command stream parsing error detected. Can only be cleared by reset + uint32_t cmd_end_reached : 1; // 0=Not reached, 1=Reached. Cleared by writing QBASE or QSIZE when NPU is in + // stopped state + uint32_t pmu_irq_raised : 1; // 0=No PMU IRQ, 1=PMU IRQ raised. Cleared by using command register bit 1 + uint32_t wd_fault : 1; // Weight decoder state: 0=no fault 1=weight decoder decompression fault. Can only be + // cleared by reset + uint32_t ecc_fault : 1; // ECC state for internal RAMs: 0=no fault 1=ECC fault signalled. Can only be + // cleared by reset + uint32_t reserved0 : 2; + uint32_t faulting_interface : 1; // Faulting interface on bus abort. 0=AXI-M0 1=AXI-M1 + uint32_t faulting_channel : 4; // Faulting channel on a bus abort. Read: 0=Cmd 1=IFM 2=Weights 3=Scale+Bias + // 4=Mem2Mem; Write: 8=OFM 9=Mem2Mem + uint32_t irq_history_mask : 16; // IRQ History mask + }; + uint32_t word; + }; +#ifdef __cplusplus + public: + CONSTEXPR status_r() : + state(static_cast(::state::STOPPED)), irq_raised(static_cast(0x0)), + bus_status(static_cast(0x0)), reset_status(static_cast(0x1)), + cmd_parse_error(static_cast(0x0)), cmd_end_reached(static_cast(0x0)), + pmu_irq_raised(static_cast(0x0)), wd_fault(static_cast(0x0)), + ecc_fault(static_cast(0x0)), reserved0(static_cast(0)), + faulting_interface(static_cast(0x0)), faulting_channel(static_cast(0x0)), + irq_history_mask(static_cast(0x0)) + { + } + CONSTEXPR status_r(uint32_t init) : word(init) {} + CONSTEXPR void operator=(uint32_t value) + { + word = value; + } + void operator=(uint32_t value) volatile + { + word = value; + } + CONSTEXPR operator uint32_t() + { + return word; + } + operator uint32_t() volatile + { + return word; + } + status_r copy() volatile + { + return *this; + } + CONSTEXPR ::state get_state() const + { + ::state value = static_cast<::state>(state); + return value; + } + ::state get_state() const volatile + { + ::state value = static_cast<::state>(state); + return value; + } + CONSTEXPR status_r &set_state(::state value) + { + state = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_irq_raised() const + { + uint32_t value = static_cast(irq_raised); + return value; + } + uint32_t get_irq_raised() const volatile + { + uint32_t value = static_cast(irq_raised); + return value; + } + CONSTEXPR status_r &set_irq_raised(uint32_t value) + { + irq_raised = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_bus_status() const + { + uint32_t value = static_cast(bus_status); + return value; + } + uint32_t get_bus_status() const volatile + { + uint32_t value = static_cast(bus_status); + return value; + } + CONSTEXPR status_r &set_bus_status(uint32_t value) + { + bus_status = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_reset_status() const + { + uint32_t value = static_cast(reset_status); + return value; + } + uint32_t get_reset_status() const volatile + { + uint32_t value = static_cast(reset_status); + return value; + } + CONSTEXPR status_r &set_reset_status(uint32_t value) + { + reset_status = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_cmd_parse_error() const + { + uint32_t value = static_cast(cmd_parse_error); + return value; + } + uint32_t get_cmd_parse_error() const volatile + { + uint32_t value = static_cast(cmd_parse_error); + return value; + } + CONSTEXPR status_r &set_cmd_parse_error(uint32_t value) + { + cmd_parse_error = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_cmd_end_reached() const + { + uint32_t value = static_cast(cmd_end_reached); + return value; + } + uint32_t get_cmd_end_reached() const volatile + { + uint32_t value = static_cast(cmd_end_reached); + return value; + } + CONSTEXPR status_r &set_cmd_end_reached(uint32_t value) + { + cmd_end_reached = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_pmu_irq_raised() const + { + uint32_t value = static_cast(pmu_irq_raised); + return value; + } + uint32_t get_pmu_irq_raised() const volatile + { + uint32_t value = static_cast(pmu_irq_raised); + return value; + } + CONSTEXPR status_r &set_pmu_irq_raised(uint32_t value) + { + pmu_irq_raised = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_wd_fault() const + { + uint32_t value = static_cast(wd_fault); + return value; + } + uint32_t get_wd_fault() const volatile + { + uint32_t value = static_cast(wd_fault); + return value; + } + CONSTEXPR status_r &set_wd_fault(uint32_t value) + { + wd_fault = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_ecc_fault() const + { + uint32_t value = static_cast(ecc_fault); + return value; + } + uint32_t get_ecc_fault() const volatile + { + uint32_t value = static_cast(ecc_fault); + return value; + } + CONSTEXPR status_r &set_ecc_fault(uint32_t value) + { + ecc_fault = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_faulting_interface() const + { + uint32_t value = static_cast(faulting_interface); + return value; + } + uint32_t get_faulting_interface() const volatile + { + uint32_t value = static_cast(faulting_interface); + return value; + } + CONSTEXPR status_r &set_faulting_interface(uint32_t value) + { + faulting_interface = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_faulting_channel() const + { + uint32_t value = static_cast(faulting_channel); + return value; + } + uint32_t get_faulting_channel() const volatile + { + uint32_t value = static_cast(faulting_channel); + return value; + } + CONSTEXPR status_r &set_faulting_channel(uint32_t value) + { + faulting_channel = ((1u << 4) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_irq_history_mask() const + { + uint32_t value = static_cast(irq_history_mask); + return value; + } + uint32_t get_irq_history_mask() const volatile + { + uint32_t value = static_cast(irq_history_mask); + return value; + } + CONSTEXPR status_r &set_irq_history_mask(uint32_t value) + { + irq_history_mask = ((1u << 16) - 1) & static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// cmd_r - Command register, reads as last written command +struct cmd_r +{ +#ifdef __cplusplus + private: +#endif //__cplusplus + union + { + struct + { + uint32_t transition_to_running_state : 1; // Write 1 to transition the NPU to running state. Writing 0 has + // no effect + uint32_t clear_irq : 1; // Write 1 to clear the IRQ status in the STATUS register. Writing 0 has no effect + uint32_t clock_q_enable : 1; // Write 1 to this bit to enable clock off using clock q-interface and enable + // the master clock gate + uint32_t power_q_enable : 1; // Write 1 to this bit to enable power off using power q-interface + uint32_t + stop_request : 1; // Write 1 to this bit to request STOP after completing any already-started commands + uint32_t reserved0 : 11; + uint32_t clear_irq_history : 16; // Clears the IRQ history mask + }; + uint32_t word; + }; +#ifdef __cplusplus + public: + CONSTEXPR cmd_r() : + transition_to_running_state(static_cast(0x0)), clear_irq(static_cast(0x0)), + clock_q_enable(static_cast(0x1)), power_q_enable(static_cast(0x1)), + stop_request(static_cast(0x0)), reserved0(static_cast(0)), + clear_irq_history(static_cast(0x0)) + { + } + CONSTEXPR cmd_r(uint32_t init) : word(init) {} + CONSTEXPR void operator=(uint32_t value) + { + word = value; + } + void operator=(uint32_t value) volatile + { + word = value; + } + CONSTEXPR operator uint32_t() + { + return word; + } + operator uint32_t() volatile + { + return word; + } + cmd_r copy() volatile + { + return *this; + } + CONSTEXPR uint32_t get_transition_to_running_state() const + { + uint32_t value = static_cast(transition_to_running_state); + return value; + } + uint32_t get_transition_to_running_state() const volatile + { + uint32_t value = static_cast(transition_to_running_state); + return value; + } + CONSTEXPR cmd_r &set_transition_to_running_state(uint32_t value) + { + transition_to_running_state = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_clear_irq() const + { + uint32_t value = static_cast(clear_irq); + return value; + } + uint32_t get_clear_irq() const volatile + { + uint32_t value = static_cast(clear_irq); + return value; + } + CONSTEXPR cmd_r &set_clear_irq(uint32_t value) + { + clear_irq = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_clock_q_enable() const + { + uint32_t value = static_cast(clock_q_enable); + return value; + } + uint32_t get_clock_q_enable() const volatile + { + uint32_t value = static_cast(clock_q_enable); + return value; + } + CONSTEXPR cmd_r &set_clock_q_enable(uint32_t value) + { + clock_q_enable = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_power_q_enable() const + { + uint32_t value = static_cast(power_q_enable); + return value; + } + uint32_t get_power_q_enable() const volatile + { + uint32_t value = static_cast(power_q_enable); + return value; + } + CONSTEXPR cmd_r &set_power_q_enable(uint32_t value) + { + power_q_enable = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_stop_request() const + { + uint32_t value = static_cast(stop_request); + return value; + } + uint32_t get_stop_request() const volatile + { + uint32_t value = static_cast(stop_request); + return value; + } + CONSTEXPR cmd_r &set_stop_request(uint32_t value) + { + stop_request = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_clear_irq_history() const + { + uint32_t value = static_cast(clear_irq_history); + return value; + } + uint32_t get_clear_irq_history() const volatile + { + uint32_t value = static_cast(clear_irq_history); + return value; + } + CONSTEXPR cmd_r &set_clear_irq_history(uint32_t value) + { + clear_irq_history = ((1u << 16) - 1) & static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// reset_r - Request Reset and new security mode +struct reset_r +{ +#ifdef __cplusplus + private: +#endif //__cplusplus + union + { + struct + { + uint32_t pending_CPL : 1; // Current privilege level 0=User 1=Privileged + uint32_t pending_CSL : 1; // Current security level 0=Secure 1=Non secure + uint32_t reserved0 : 30; + }; + uint32_t word; + }; +#ifdef __cplusplus + public: + CONSTEXPR reset_r() : + pending_CPL(static_cast(::privilege_level::USER)), + pending_CSL(static_cast(::security_level::SECURE)), reserved0(static_cast(0)) + { + } + CONSTEXPR reset_r(uint32_t init) : word(init) {} + CONSTEXPR void operator=(uint32_t value) + { + word = value; + } + void operator=(uint32_t value) volatile + { + word = value; + } + CONSTEXPR operator uint32_t() + { + return word; + } + operator uint32_t() volatile + { + return word; + } + reset_r copy() volatile + { + return *this; + } + CONSTEXPR ::privilege_level get_pending_CPL() const + { + ::privilege_level value = static_cast<::privilege_level>(pending_CPL); + return value; + } + ::privilege_level get_pending_CPL() const volatile + { + ::privilege_level value = static_cast<::privilege_level>(pending_CPL); + return value; + } + CONSTEXPR reset_r &set_pending_CPL(::privilege_level value) + { + pending_CPL = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR ::security_level get_pending_CSL() const + { + ::security_level value = static_cast<::security_level>(pending_CSL); + return value; + } + ::security_level get_pending_CSL() const volatile + { + ::security_level value = static_cast<::security_level>(pending_CSL); + return value; + } + CONSTEXPR reset_r &set_pending_CSL(::security_level value) + { + pending_CSL = ((1u << 1) - 1) & static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// qbase0_r - Base address of command queue bits [31:0]. The address is 4 byte aligned +struct qbase0_r +{ +#ifdef __cplusplus + private: +#endif //__cplusplus + union + { + uint32_t QBASE0; // The 4 byte aligned lower bytes of the base address value for the command stream + uint32_t word; + }; +#ifdef __cplusplus + public: + CONSTEXPR qbase0_r() : QBASE0(static_cast(0x00000000)) {} + CONSTEXPR qbase0_r(uint32_t init) : word(init) {} + CONSTEXPR void operator=(uint32_t value) + { + word = value; + } + void operator=(uint32_t value) volatile + { + word = value; + } + CONSTEXPR operator uint32_t() + { + return word; + } + operator uint32_t() volatile + { + return word; + } + qbase0_r copy() volatile + { + return *this; + } + CONSTEXPR uint32_t get_QBASE0() const + { + uint32_t value = static_cast(QBASE0); + return value; + } + uint32_t get_QBASE0() const volatile + { + uint32_t value = static_cast(QBASE0); + return value; + } + CONSTEXPR qbase0_r &set_QBASE0(uint32_t value) + { + QBASE0 = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// qbase1_r - Address extension bits [47:32] bits for queue base +struct qbase1_r +{ +#ifdef __cplusplus + private: +#endif //__cplusplus + union + { + uint32_t QBASE1; // The 4 byte aligned upper bytes of the base address value for the command stream + uint32_t word; + }; +#ifdef __cplusplus + public: + CONSTEXPR qbase1_r() : QBASE1(static_cast(0x00000000)) {} + CONSTEXPR qbase1_r(uint32_t init) : word(init) {} + CONSTEXPR void operator=(uint32_t value) + { + word = value; + } + void operator=(uint32_t value) volatile + { + word = value; + } + CONSTEXPR operator uint32_t() + { + return word; + } + operator uint32_t() volatile + { + return word; + } + qbase1_r copy() volatile + { + return *this; + } + CONSTEXPR uint32_t get_QBASE1() const + { + uint32_t value = static_cast(QBASE1); + return value; + } + uint32_t get_QBASE1() const volatile + { + uint32_t value = static_cast(QBASE1); + return value; + } + CONSTEXPR qbase1_r &set_QBASE1(uint32_t value) + { + QBASE1 = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// qread_r - Read offset in the command stream in bytes. Multiple of 4 in the range 0 to 16 MB +struct qread_r +{ +#ifdef __cplusplus + private: +#endif //__cplusplus + union + { + uint32_t QREAD; // The read offset of the current command under execution + uint32_t word; + }; +#ifdef __cplusplus + public: + CONSTEXPR qread_r() : QREAD(static_cast(0x00000000)) {} + CONSTEXPR qread_r(uint32_t init) : word(init) {} + CONSTEXPR void operator=(uint32_t value) + { + word = value; + } + void operator=(uint32_t value) volatile + { + word = value; + } + CONSTEXPR operator uint32_t() + { + return word; + } + operator uint32_t() volatile + { + return word; + } + qread_r copy() volatile + { + return *this; + } + CONSTEXPR uint32_t get_QREAD() const + { + uint32_t value = static_cast(QREAD); + return value; + } + uint32_t get_QREAD() const volatile + { + uint32_t value = static_cast(QREAD); + return value; + } + CONSTEXPR qread_r &set_QREAD(uint32_t value) + { + QREAD = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// qconfig_r - AXI configuration for the command stream in the range 0-3. Same encoding as for REGIONCFG +struct qconfig_r +{ +#ifdef __cplusplus + private: +#endif //__cplusplus + union + { + uint32_t QCONFIG; // AXI configuration for the command stream in the range 0-3 + uint32_t word; + }; +#ifdef __cplusplus + public: + CONSTEXPR qconfig_r() : QCONFIG(static_cast(0x00000000)) {} + CONSTEXPR qconfig_r(uint32_t init) : word(init) {} + CONSTEXPR void operator=(uint32_t value) + { + word = value; + } + void operator=(uint32_t value) volatile + { + word = value; + } + CONSTEXPR operator uint32_t() + { + return word; + } + operator uint32_t() volatile + { + return word; + } + qconfig_r copy() volatile + { + return *this; + } + CONSTEXPR uint32_t get_QCONFIG() const + { + uint32_t value = static_cast(QCONFIG); + return value; + } + uint32_t get_QCONFIG() const volatile + { + uint32_t value = static_cast(QCONFIG); + return value; + } + CONSTEXPR qconfig_r &set_QCONFIG(uint32_t value) + { + QCONFIG = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// qsize_r - Size of the command stream in bytes. Multiple of 4 in the range 0 to 16 MB +struct qsize_r +{ +#ifdef __cplusplus + private: +#endif //__cplusplus + union + { + uint32_t QSIZE; // Size of the next command stream to be executed by the NPU + uint32_t word; + }; +#ifdef __cplusplus + public: + CONSTEXPR qsize_r() : QSIZE(static_cast(0x00000000)) {} + CONSTEXPR qsize_r(uint32_t init) : word(init) {} + CONSTEXPR void operator=(uint32_t value) + { + word = value; + } + void operator=(uint32_t value) volatile + { + word = value; + } + CONSTEXPR operator uint32_t() + { + return word; + } + operator uint32_t() volatile + { + return word; + } + qsize_r copy() volatile + { + return *this; + } + CONSTEXPR uint32_t get_QSIZE() const + { + uint32_t value = static_cast(QSIZE); + return value; + } + uint32_t get_QSIZE() const volatile + { + uint32_t value = static_cast(QSIZE); + return value; + } + CONSTEXPR qsize_r &set_QSIZE(uint32_t value) + { + QSIZE = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// prot_r - Protection level configured for the NPU when acting as an AXI master +struct prot_r +{ +#ifdef __cplusplus + private: +#endif //__cplusplus + union + { + struct + { + uint32_t active_CPL : 1; // Current privilege level 0=User 1=Privileged + uint32_t active_CSL : 1; // Current security level 0=Secure 1=Non secure + uint32_t reserved0 : 30; + }; + uint32_t word; + }; +#ifdef __cplusplus + public: + CONSTEXPR prot_r() : + active_CPL(static_cast(::privilege_level::USER)), + active_CSL(static_cast(::security_level::SECURE)), reserved0(static_cast(0)) + { + } + CONSTEXPR prot_r(uint32_t init) : word(init) {} + CONSTEXPR void operator=(uint32_t value) + { + word = value; + } + void operator=(uint32_t value) volatile + { + word = value; + } + CONSTEXPR operator uint32_t() + { + return word; + } + operator uint32_t() volatile + { + return word; + } + prot_r copy() volatile + { + return *this; + } + CONSTEXPR ::privilege_level get_active_CPL() const + { + ::privilege_level value = static_cast<::privilege_level>(active_CPL); + return value; + } + ::privilege_level get_active_CPL() const volatile + { + ::privilege_level value = static_cast<::privilege_level>(active_CPL); + return value; + } + CONSTEXPR prot_r &set_active_CPL(::privilege_level value) + { + active_CPL = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR ::security_level get_active_CSL() const + { + ::security_level value = static_cast<::security_level>(active_CSL); + return value; + } + ::security_level get_active_CSL() const volatile + { + ::security_level value = static_cast<::security_level>(active_CSL); + return value; + } + CONSTEXPR prot_r &set_active_CSL(::security_level value) + { + active_CSL = ((1u << 1) - 1) & static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// config_r - RTL configuration +struct config_r +{ +#ifdef __cplusplus + private: +#endif //__cplusplus + union + { + struct + { + uint32_t macs_per_cc : 4; // The log2(macs/clock cycle). Valid encoding range is 5 to 8 for 32 to 256 + // MACs/clock cycle. + uint32_t cmd_stream_version : 4; // command stream version accepted by this NPU. + uint32_t shram_size : 8; // Size in KB of SHRAM in the range 8 to 48. + uint32_t reserved0 : 11; + uint32_t custom_dma : 1; // Custom DMA enable bit. + uint32_t product : 4; // Product configuration + }; + uint32_t word; + }; +#ifdef __cplusplus + public: + CONSTEXPR config_r() : + macs_per_cc(static_cast(0)), cmd_stream_version(static_cast(0x0)), + shram_size(static_cast(0)), reserved0(static_cast(0)), product(static_cast(0)) + { + } + CONSTEXPR config_r(uint32_t init) : word(init) {} + CONSTEXPR void operator=(uint32_t value) + { + word = value; + } + void operator=(uint32_t value) volatile + { + word = value; + } + CONSTEXPR operator uint32_t() + { + return word; + } + operator uint32_t() volatile + { + return word; + } + config_r copy() volatile + { + return *this; + } + CONSTEXPR ::macs_per_cc get_macs_per_cc() const + { + ::macs_per_cc value = static_cast<::macs_per_cc>(macs_per_cc); + return value; + } + ::macs_per_cc get_macs_per_cc() const volatile + { + ::macs_per_cc value = static_cast<::macs_per_cc>(macs_per_cc); + return value; + } + CONSTEXPR config_r &set_macs_per_cc(::macs_per_cc value) + { + macs_per_cc = ((1u << 4) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_cmd_stream_version() const + { + uint32_t value = static_cast(cmd_stream_version); + return value; + } + uint32_t get_cmd_stream_version() const volatile + { + uint32_t value = static_cast(cmd_stream_version); + return value; + } + CONSTEXPR config_r &set_cmd_stream_version(uint32_t value) + { + cmd_stream_version = ((1u << 4) - 1) & static_cast(value); + return *this; + } + CONSTEXPR ::shram_size get_shram_size() const + { + ::shram_size value = static_cast<::shram_size>(shram_size); + return value; + } + ::shram_size get_shram_size() const volatile + { + ::shram_size value = static_cast<::shram_size>(shram_size); + return value; + } + CONSTEXPR config_r &set_shram_size(::shram_size value) + { + shram_size = ((1u << 8) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_product() const + { + uint32_t value = static_cast(product); + return value; + } + uint32_t get_product() const volatile + { + uint32_t value = static_cast(product); + return value; + } + CONSTEXPR config_r &set_product(uint32_t value) + { + product = ((1u << 4) - 1) & static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// lock_r - Lock register. This register is designed for driver use and does not affect NPU functionality +struct lock_r +{ +#ifdef __cplusplus + private: +#endif //__cplusplus + union + { + uint32_t LOCK; // 32 bit value for LOCK configuration + uint32_t word; + }; +#ifdef __cplusplus + public: + CONSTEXPR lock_r() : LOCK(static_cast(0x00000000)) {} + CONSTEXPR lock_r(uint32_t init) : word(init) {} + CONSTEXPR void operator=(uint32_t value) + { + word = value; + } + void operator=(uint32_t value) volatile + { + word = value; + } + CONSTEXPR operator uint32_t() + { + return word; + } + operator uint32_t() volatile + { + return word; + } + lock_r copy() volatile + { + return *this; + } + CONSTEXPR uint32_t get_LOCK() const + { + uint32_t value = static_cast(LOCK); + return value; + } + uint32_t get_LOCK() const volatile + { + uint32_t value = static_cast(LOCK); + return value; + } + CONSTEXPR lock_r &set_LOCK(uint32_t value) + { + LOCK = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// regioncfg_r - Region memory type configuration. Bits[2*k+1:2*k] give the memory type for REGION[k] +struct regioncfg_r +{ +#ifdef __cplusplus + private: +#endif //__cplusplus + union + { + struct + { + uint32_t region0 : 2; // Bits for Region0 Configurion + uint32_t region1 : 2; // Bits for Region1 Configurion + uint32_t region2 : 2; // Bits for Region2 Configurion + uint32_t region3 : 2; // Bits for Region3 Configurion + uint32_t region4 : 2; // Bits for Region4 Configurion + uint32_t region5 : 2; // Bits for Region5 Configurion + uint32_t region6 : 2; // Bits for Region6 Configurion + uint32_t region7 : 2; // Bits for Region7 Configurion + uint32_t reserved0 : 16; + }; + uint32_t word; + }; +#ifdef __cplusplus + public: + CONSTEXPR regioncfg_r() : + region0(static_cast(::memory_type::AXI0_OUTSTANDING_COUNTER0)), + region1(static_cast(::memory_type::AXI0_OUTSTANDING_COUNTER0)), + region2(static_cast(::memory_type::AXI0_OUTSTANDING_COUNTER0)), + region3(static_cast(::memory_type::AXI0_OUTSTANDING_COUNTER0)), + region4(static_cast(::memory_type::AXI0_OUTSTANDING_COUNTER0)), + region5(static_cast(::memory_type::AXI0_OUTSTANDING_COUNTER0)), + region6(static_cast(::memory_type::AXI0_OUTSTANDING_COUNTER0)), + region7(static_cast(::memory_type::AXI0_OUTSTANDING_COUNTER0)), reserved0(static_cast(0)) + { + } + CONSTEXPR regioncfg_r(uint32_t init) : word(init) {} + CONSTEXPR void operator=(uint32_t value) + { + word = value; + } + void operator=(uint32_t value) volatile + { + word = value; + } + CONSTEXPR operator uint32_t() + { + return word; + } + operator uint32_t() volatile + { + return word; + } + regioncfg_r copy() volatile + { + return *this; + } + CONSTEXPR ::memory_type get_region0() const + { + ::memory_type value = static_cast<::memory_type>(region0); + return value; + } + ::memory_type get_region0() const volatile + { + ::memory_type value = static_cast<::memory_type>(region0); + return value; + } + CONSTEXPR regioncfg_r &set_region0(::memory_type value) + { + region0 = ((1u << 2) - 1) & static_cast(value); + return *this; + } + CONSTEXPR ::memory_type get_region1() const + { + ::memory_type value = static_cast<::memory_type>(region1); + return value; + } + ::memory_type get_region1() const volatile + { + ::memory_type value = static_cast<::memory_type>(region1); + return value; + } + CONSTEXPR regioncfg_r &set_region1(::memory_type value) + { + region1 = ((1u << 2) - 1) & static_cast(value); + return *this; + } + CONSTEXPR ::memory_type get_region2() const + { + ::memory_type value = static_cast<::memory_type>(region2); + return value; + } + ::memory_type get_region2() const volatile + { + ::memory_type value = static_cast<::memory_type>(region2); + return value; + } + CONSTEXPR regioncfg_r &set_region2(::memory_type value) + { + region2 = ((1u << 2) - 1) & static_cast(value); + return *this; + } + CONSTEXPR ::memory_type get_region3() const + { + ::memory_type value = static_cast<::memory_type>(region3); + return value; + } + ::memory_type get_region3() const volatile + { + ::memory_type value = static_cast<::memory_type>(region3); + return value; + } + CONSTEXPR regioncfg_r &set_region3(::memory_type value) + { + region3 = ((1u << 2) - 1) & static_cast(value); + return *this; + } + CONSTEXPR ::memory_type get_region4() const + { + ::memory_type value = static_cast<::memory_type>(region4); + return value; + } + ::memory_type get_region4() const volatile + { + ::memory_type value = static_cast<::memory_type>(region4); + return value; + } + CONSTEXPR regioncfg_r &set_region4(::memory_type value) + { + region4 = ((1u << 2) - 1) & static_cast(value); + return *this; + } + CONSTEXPR ::memory_type get_region5() const + { + ::memory_type value = static_cast<::memory_type>(region5); + return value; + } + ::memory_type get_region5() const volatile + { + ::memory_type value = static_cast<::memory_type>(region5); + return value; + } + CONSTEXPR regioncfg_r &set_region5(::memory_type value) + { + region5 = ((1u << 2) - 1) & static_cast(value); + return *this; + } + CONSTEXPR ::memory_type get_region6() const + { + ::memory_type value = static_cast<::memory_type>(region6); + return value; + } + ::memory_type get_region6() const volatile + { + ::memory_type value = static_cast<::memory_type>(region6); + return value; + } + CONSTEXPR regioncfg_r &set_region6(::memory_type value) + { + region6 = ((1u << 2) - 1) & static_cast(value); + return *this; + } + CONSTEXPR ::memory_type get_region7() const + { + ::memory_type value = static_cast<::memory_type>(region7); + return value; + } + ::memory_type get_region7() const volatile + { + ::memory_type value = static_cast<::memory_type>(region7); + return value; + } + CONSTEXPR regioncfg_r &set_region7(::memory_type value) + { + region7 = ((1u << 2) - 1) & static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// axi_limit0_r - AXI limits for port 0 counter 0 +struct axi_limit0_r +{ +#ifdef __cplusplus + private: +#endif //__cplusplus + union + { + struct + { + uint32_t max_beats : 2; // Burst split alignment: 0=64 bytes, 1=128 bytes, 2=256 bytes, 3=reserved + uint32_t reserved0 : 2; + uint32_t memtype : 4; // Memtype to be used to encode AxCACHE signals + uint32_t reserved1 : 8; + uint32_t + max_outstanding_read_m1 : 8; // Maximum number of outstanding AXI read transactions - 1 in range 0 to 31 + uint32_t max_outstanding_write_m1 : 8; // Maximum number of outstanding AXI write transactions - 1 in range + // 0 to 15 + }; + uint32_t word; + }; +#ifdef __cplusplus + public: + CONSTEXPR axi_limit0_r() : + max_beats(static_cast(0x0)), reserved0(static_cast(0)), + memtype(static_cast(::axi_mem_encoding_type::DEVICE_NON_BUFFERABLE)), + reserved1(static_cast(0)), max_outstanding_read_m1(static_cast(0x00)), + max_outstanding_write_m1(static_cast(0x00)) + { + } + CONSTEXPR axi_limit0_r(uint32_t init) : word(init) {} + CONSTEXPR void operator=(uint32_t value) + { + word = value; + } + void operator=(uint32_t value) volatile + { + word = value; + } + CONSTEXPR operator uint32_t() + { + return word; + } + operator uint32_t() volatile + { + return word; + } + axi_limit0_r copy() volatile + { + return *this; + } + CONSTEXPR uint32_t get_max_beats() const + { + uint32_t value = static_cast(max_beats); + return value; + } + uint32_t get_max_beats() const volatile + { + uint32_t value = static_cast(max_beats); + return value; + } + CONSTEXPR axi_limit0_r &set_max_beats(uint32_t value) + { + max_beats = ((1u << 2) - 1) & static_cast(value); + return *this; + } + CONSTEXPR ::axi_mem_encoding_type get_memtype() const + { + ::axi_mem_encoding_type value = static_cast<::axi_mem_encoding_type>(memtype); + return value; + } + ::axi_mem_encoding_type get_memtype() const volatile + { + ::axi_mem_encoding_type value = static_cast<::axi_mem_encoding_type>(memtype); + return value; + } + CONSTEXPR axi_limit0_r &set_memtype(::axi_mem_encoding_type value) + { + memtype = ((1u << 4) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_max_outstanding_read_m1() const + { + uint32_t value = static_cast(max_outstanding_read_m1); + return value; + } + uint32_t get_max_outstanding_read_m1() const volatile + { + uint32_t value = static_cast(max_outstanding_read_m1); + return value; + } + CONSTEXPR axi_limit0_r &set_max_outstanding_read_m1(uint32_t value) + { + max_outstanding_read_m1 = ((1u << 8) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_max_outstanding_write_m1() const + { + uint32_t value = static_cast(max_outstanding_write_m1); + return value; + } + uint32_t get_max_outstanding_write_m1() const volatile + { + uint32_t value = static_cast(max_outstanding_write_m1); + return value; + } + CONSTEXPR axi_limit0_r &set_max_outstanding_write_m1(uint32_t value) + { + max_outstanding_write_m1 = ((1u << 8) - 1) & static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// axi_limit1_r - AXI limits for port 0 counter 1 +struct axi_limit1_r +{ +#ifdef __cplusplus + private: +#endif //__cplusplus + union + { + struct + { + uint32_t max_beats : 2; // Burst split alignment: 0=64 bytes, 1=128 bytes, 2=256 bytes, 3=reserved + uint32_t reserved0 : 2; + uint32_t memtype : 4; // Memtype to be used to encode AxCACHE signals + uint32_t reserved1 : 8; + uint32_t + max_outstanding_read_m1 : 8; // Maximum number of outstanding AXI read transactions - 1 in range 0 to 31 + uint32_t max_outstanding_write_m1 : 8; // Maximum number of outstanding AXI write transactions - 1 in range + // 0 to 15 + }; + uint32_t word; + }; +#ifdef __cplusplus + public: + CONSTEXPR axi_limit1_r() : + max_beats(static_cast(0x0)), reserved0(static_cast(0)), + memtype(static_cast(::axi_mem_encoding_type::DEVICE_NON_BUFFERABLE)), + reserved1(static_cast(0)), max_outstanding_read_m1(static_cast(0x00)), + max_outstanding_write_m1(static_cast(0x00)) + { + } + CONSTEXPR axi_limit1_r(uint32_t init) : word(init) {} + CONSTEXPR void operator=(uint32_t value) + { + word = value; + } + void operator=(uint32_t value) volatile + { + word = value; + } + CONSTEXPR operator uint32_t() + { + return word; + } + operator uint32_t() volatile + { + return word; + } + axi_limit1_r copy() volatile + { + return *this; + } + CONSTEXPR uint32_t get_max_beats() const + { + uint32_t value = static_cast(max_beats); + return value; + } + uint32_t get_max_beats() const volatile + { + uint32_t value = static_cast(max_beats); + return value; + } + CONSTEXPR axi_limit1_r &set_max_beats(uint32_t value) + { + max_beats = ((1u << 2) - 1) & static_cast(value); + return *this; + } + CONSTEXPR ::axi_mem_encoding_type get_memtype() const + { + ::axi_mem_encoding_type value = static_cast<::axi_mem_encoding_type>(memtype); + return value; + } + ::axi_mem_encoding_type get_memtype() const volatile + { + ::axi_mem_encoding_type value = static_cast<::axi_mem_encoding_type>(memtype); + return value; + } + CONSTEXPR axi_limit1_r &set_memtype(::axi_mem_encoding_type value) + { + memtype = ((1u << 4) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_max_outstanding_read_m1() const + { + uint32_t value = static_cast(max_outstanding_read_m1); + return value; + } + uint32_t get_max_outstanding_read_m1() const volatile + { + uint32_t value = static_cast(max_outstanding_read_m1); + return value; + } + CONSTEXPR axi_limit1_r &set_max_outstanding_read_m1(uint32_t value) + { + max_outstanding_read_m1 = ((1u << 8) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_max_outstanding_write_m1() const + { + uint32_t value = static_cast(max_outstanding_write_m1); + return value; + } + uint32_t get_max_outstanding_write_m1() const volatile + { + uint32_t value = static_cast(max_outstanding_write_m1); + return value; + } + CONSTEXPR axi_limit1_r &set_max_outstanding_write_m1(uint32_t value) + { + max_outstanding_write_m1 = ((1u << 8) - 1) & static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// axi_limit2_r - AXI limits for port 1 counter 2 +struct axi_limit2_r +{ +#ifdef __cplusplus + private: +#endif //__cplusplus + union + { + struct + { + uint32_t max_beats : 2; // Burst split alignment: 0=64 bytes, 1=128 bytes, 2=256 bytes, 3=reserved + uint32_t reserved0 : 2; + uint32_t memtype : 4; // Memtype to be used to encode AxCACHE signals + uint32_t reserved1 : 8; + uint32_t + max_outstanding_read_m1 : 8; // Maximum number of outstanding AXI read transactions - 1 in range 0 to 31 + uint32_t max_outstanding_write_m1 : 8; // Maximum number of outstanding AXI write transactions - 1 in range + // 0 to 15 + }; + uint32_t word; + }; +#ifdef __cplusplus + public: + CONSTEXPR axi_limit2_r() : + max_beats(static_cast(0x0)), reserved0(static_cast(0)), + memtype(static_cast(::axi_mem_encoding_type::DEVICE_NON_BUFFERABLE)), + reserved1(static_cast(0)), max_outstanding_read_m1(static_cast(0x00)), + max_outstanding_write_m1(static_cast(0x00)) + { + } + CONSTEXPR axi_limit2_r(uint32_t init) : word(init) {} + CONSTEXPR void operator=(uint32_t value) + { + word = value; + } + void operator=(uint32_t value) volatile + { + word = value; + } + CONSTEXPR operator uint32_t() + { + return word; + } + operator uint32_t() volatile + { + return word; + } + axi_limit2_r copy() volatile + { + return *this; + } + CONSTEXPR uint32_t get_max_beats() const + { + uint32_t value = static_cast(max_beats); + return value; + } + uint32_t get_max_beats() const volatile + { + uint32_t value = static_cast(max_beats); + return value; + } + CONSTEXPR axi_limit2_r &set_max_beats(uint32_t value) + { + max_beats = ((1u << 2) - 1) & static_cast(value); + return *this; + } + CONSTEXPR ::axi_mem_encoding_type get_memtype() const + { + ::axi_mem_encoding_type value = static_cast<::axi_mem_encoding_type>(memtype); + return value; + } + ::axi_mem_encoding_type get_memtype() const volatile + { + ::axi_mem_encoding_type value = static_cast<::axi_mem_encoding_type>(memtype); + return value; + } + CONSTEXPR axi_limit2_r &set_memtype(::axi_mem_encoding_type value) + { + memtype = ((1u << 4) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_max_outstanding_read_m1() const + { + uint32_t value = static_cast(max_outstanding_read_m1); + return value; + } + uint32_t get_max_outstanding_read_m1() const volatile + { + uint32_t value = static_cast(max_outstanding_read_m1); + return value; + } + CONSTEXPR axi_limit2_r &set_max_outstanding_read_m1(uint32_t value) + { + max_outstanding_read_m1 = ((1u << 8) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_max_outstanding_write_m1() const + { + uint32_t value = static_cast(max_outstanding_write_m1); + return value; + } + uint32_t get_max_outstanding_write_m1() const volatile + { + uint32_t value = static_cast(max_outstanding_write_m1); + return value; + } + CONSTEXPR axi_limit2_r &set_max_outstanding_write_m1(uint32_t value) + { + max_outstanding_write_m1 = ((1u << 8) - 1) & static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// axi_limit3_r - AXI limits for port 1 counter 3 +struct axi_limit3_r +{ +#ifdef __cplusplus + private: +#endif //__cplusplus + union + { + struct + { + uint32_t max_beats : 2; // Burst split alignment: 0=64 bytes, 1=128 bytes, 2=256 bytes, 3=reserved + uint32_t reserved0 : 2; + uint32_t memtype : 4; // Memtype to be used to encode AxCACHE signals + uint32_t reserved1 : 8; + uint32_t + max_outstanding_read_m1 : 8; // Maximum number of outstanding AXI read transactions - 1 in range 0 to 31 + uint32_t max_outstanding_write_m1 : 8; // Maximum number of outstanding AXI write transactions - 1 in range + // 0 to 15 + }; + uint32_t word; + }; +#ifdef __cplusplus + public: + CONSTEXPR axi_limit3_r() : + max_beats(static_cast(0x0)), reserved0(static_cast(0)), + memtype(static_cast(::axi_mem_encoding_type::DEVICE_NON_BUFFERABLE)), + reserved1(static_cast(0)), max_outstanding_read_m1(static_cast(0x00)), + max_outstanding_write_m1(static_cast(0x00)) + { + } + CONSTEXPR axi_limit3_r(uint32_t init) : word(init) {} + CONSTEXPR void operator=(uint32_t value) + { + word = value; + } + void operator=(uint32_t value) volatile + { + word = value; + } + CONSTEXPR operator uint32_t() + { + return word; + } + operator uint32_t() volatile + { + return word; + } + axi_limit3_r copy() volatile + { + return *this; + } + CONSTEXPR uint32_t get_max_beats() const + { + uint32_t value = static_cast(max_beats); + return value; + } + uint32_t get_max_beats() const volatile + { + uint32_t value = static_cast(max_beats); + return value; + } + CONSTEXPR axi_limit3_r &set_max_beats(uint32_t value) + { + max_beats = ((1u << 2) - 1) & static_cast(value); + return *this; + } + CONSTEXPR ::axi_mem_encoding_type get_memtype() const + { + ::axi_mem_encoding_type value = static_cast<::axi_mem_encoding_type>(memtype); + return value; + } + ::axi_mem_encoding_type get_memtype() const volatile + { + ::axi_mem_encoding_type value = static_cast<::axi_mem_encoding_type>(memtype); + return value; + } + CONSTEXPR axi_limit3_r &set_memtype(::axi_mem_encoding_type value) + { + memtype = ((1u << 4) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_max_outstanding_read_m1() const + { + uint32_t value = static_cast(max_outstanding_read_m1); + return value; + } + uint32_t get_max_outstanding_read_m1() const volatile + { + uint32_t value = static_cast(max_outstanding_read_m1); + return value; + } + CONSTEXPR axi_limit3_r &set_max_outstanding_read_m1(uint32_t value) + { + max_outstanding_read_m1 = ((1u << 8) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_max_outstanding_write_m1() const + { + uint32_t value = static_cast(max_outstanding_write_m1); + return value; + } + uint32_t get_max_outstanding_write_m1() const volatile + { + uint32_t value = static_cast(max_outstanding_write_m1); + return value; + } + CONSTEXPR axi_limit3_r &set_max_outstanding_write_m1(uint32_t value) + { + max_outstanding_write_m1 = ((1u << 8) - 1) & static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// basep0_r - Lower 32 bits of the Base pointer for region index 0 +struct basep0_r +{ +#ifdef __cplusplus + private: +#endif //__cplusplus + union + { + uint32_t addr_word; // The low word of the 64-bit address + uint32_t word; + }; +#ifdef __cplusplus + public: + CONSTEXPR basep0_r() : addr_word(static_cast(0)) {} + CONSTEXPR basep0_r(uint32_t init) : word(init) {} + CONSTEXPR void operator=(uint32_t value) + { + word = value; + } + void operator=(uint32_t value) volatile + { + word = value; + } + CONSTEXPR operator uint32_t() + { + return word; + } + operator uint32_t() volatile + { + return word; + } + basep0_r copy() volatile + { + return *this; + } + CONSTEXPR uint32_t get_addr_word() const + { + uint32_t value = static_cast(addr_word); + return value; + } + uint32_t get_addr_word() const volatile + { + uint32_t value = static_cast(addr_word); + return value; + } + CONSTEXPR basep0_r &set_addr_word(uint32_t value) + { + addr_word = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// basep1_r - Upper 32 bits of the Base pointer for region index 0 +struct basep1_r +{ +#ifdef __cplusplus + private: +#endif //__cplusplus + union + { + uint32_t addr_word; // The high word of the 64-bit address + uint32_t word; + }; +#ifdef __cplusplus + public: + CONSTEXPR basep1_r() : addr_word(static_cast(0)) {} + CONSTEXPR basep1_r(uint32_t init) : word(init) {} + CONSTEXPR void operator=(uint32_t value) + { + word = value; + } + void operator=(uint32_t value) volatile + { + word = value; + } + CONSTEXPR operator uint32_t() + { + return word; + } + operator uint32_t() volatile + { + return word; + } + basep1_r copy() volatile + { + return *this; + } + CONSTEXPR uint32_t get_addr_word() const + { + uint32_t value = static_cast(addr_word); + return value; + } + uint32_t get_addr_word() const volatile + { + uint32_t value = static_cast(addr_word); + return value; + } + CONSTEXPR basep1_r &set_addr_word(uint32_t value) + { + addr_word = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// basep2_r - Lower 32 bits of the Base pointer for region index 1 +struct basep2_r +{ +#ifdef __cplusplus + private: +#endif //__cplusplus + union + { + uint32_t addr_word; // The low word of the 64-bit address + uint32_t word; + }; +#ifdef __cplusplus + public: + CONSTEXPR basep2_r() : addr_word(static_cast(0)) {} + CONSTEXPR basep2_r(uint32_t init) : word(init) {} + CONSTEXPR void operator=(uint32_t value) + { + word = value; + } + void operator=(uint32_t value) volatile + { + word = value; + } + CONSTEXPR operator uint32_t() + { + return word; + } + operator uint32_t() volatile + { + return word; + } + basep2_r copy() volatile + { + return *this; + } + CONSTEXPR uint32_t get_addr_word() const + { + uint32_t value = static_cast(addr_word); + return value; + } + uint32_t get_addr_word() const volatile + { + uint32_t value = static_cast(addr_word); + return value; + } + CONSTEXPR basep2_r &set_addr_word(uint32_t value) + { + addr_word = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// basep3_r - Upper 32 bits of the Base pointer for region index 1 +struct basep3_r +{ +#ifdef __cplusplus + private: +#endif //__cplusplus + union + { + uint32_t addr_word; // The high word of the 64-bit address + uint32_t word; + }; +#ifdef __cplusplus + public: + CONSTEXPR basep3_r() : addr_word(static_cast(0)) {} + CONSTEXPR basep3_r(uint32_t init) : word(init) {} + CONSTEXPR void operator=(uint32_t value) + { + word = value; + } + void operator=(uint32_t value) volatile + { + word = value; + } + CONSTEXPR operator uint32_t() + { + return word; + } + operator uint32_t() volatile + { + return word; + } + basep3_r copy() volatile + { + return *this; + } + CONSTEXPR uint32_t get_addr_word() const + { + uint32_t value = static_cast(addr_word); + return value; + } + uint32_t get_addr_word() const volatile + { + uint32_t value = static_cast(addr_word); + return value; + } + CONSTEXPR basep3_r &set_addr_word(uint32_t value) + { + addr_word = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// basep4_r - Lower 32 bits of the Base pointer for region index 2 +struct basep4_r +{ +#ifdef __cplusplus + private: +#endif //__cplusplus + union + { + uint32_t addr_word; // The low word of the 64-bit address + uint32_t word; + }; +#ifdef __cplusplus + public: + CONSTEXPR basep4_r() : addr_word(static_cast(0)) {} + CONSTEXPR basep4_r(uint32_t init) : word(init) {} + CONSTEXPR void operator=(uint32_t value) + { + word = value; + } + void operator=(uint32_t value) volatile + { + word = value; + } + CONSTEXPR operator uint32_t() + { + return word; + } + operator uint32_t() volatile + { + return word; + } + basep4_r copy() volatile + { + return *this; + } + CONSTEXPR uint32_t get_addr_word() const + { + uint32_t value = static_cast(addr_word); + return value; + } + uint32_t get_addr_word() const volatile + { + uint32_t value = static_cast(addr_word); + return value; + } + CONSTEXPR basep4_r &set_addr_word(uint32_t value) + { + addr_word = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// basep5_r - Upper 32 bits of the Base pointer for region index 2 +struct basep5_r +{ +#ifdef __cplusplus + private: +#endif //__cplusplus + union + { + uint32_t addr_word; // The high word of the 64-bit address + uint32_t word; + }; +#ifdef __cplusplus + public: + CONSTEXPR basep5_r() : addr_word(static_cast(0)) {} + CONSTEXPR basep5_r(uint32_t init) : word(init) {} + CONSTEXPR void operator=(uint32_t value) + { + word = value; + } + void operator=(uint32_t value) volatile + { + word = value; + } + CONSTEXPR operator uint32_t() + { + return word; + } + operator uint32_t() volatile + { + return word; + } + basep5_r copy() volatile + { + return *this; + } + CONSTEXPR uint32_t get_addr_word() const + { + uint32_t value = static_cast(addr_word); + return value; + } + uint32_t get_addr_word() const volatile + { + uint32_t value = static_cast(addr_word); + return value; + } + CONSTEXPR basep5_r &set_addr_word(uint32_t value) + { + addr_word = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// basep6_r - Lower 32 bits of the Base pointer for region index 3 +struct basep6_r +{ +#ifdef __cplusplus + private: +#endif //__cplusplus + union + { + uint32_t addr_word; // The low word of the 64-bit address + uint32_t word; + }; +#ifdef __cplusplus + public: + CONSTEXPR basep6_r() : addr_word(static_cast(0)) {} + CONSTEXPR basep6_r(uint32_t init) : word(init) {} + CONSTEXPR void operator=(uint32_t value) + { + word = value; + } + void operator=(uint32_t value) volatile + { + word = value; + } + CONSTEXPR operator uint32_t() + { + return word; + } + operator uint32_t() volatile + { + return word; + } + basep6_r copy() volatile + { + return *this; + } + CONSTEXPR uint32_t get_addr_word() const + { + uint32_t value = static_cast(addr_word); + return value; + } + uint32_t get_addr_word() const volatile + { + uint32_t value = static_cast(addr_word); + return value; + } + CONSTEXPR basep6_r &set_addr_word(uint32_t value) + { + addr_word = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// basep7_r - Upper 32 bits of the Base pointer for region index 3 +struct basep7_r +{ +#ifdef __cplusplus + private: +#endif //__cplusplus + union + { + uint32_t addr_word; // The high word of the 64-bit address + uint32_t word; + }; +#ifdef __cplusplus + public: + CONSTEXPR basep7_r() : addr_word(static_cast(0)) {} + CONSTEXPR basep7_r(uint32_t init) : word(init) {} + CONSTEXPR void operator=(uint32_t value) + { + word = value; + } + void operator=(uint32_t value) volatile + { + word = value; + } + CONSTEXPR operator uint32_t() + { + return word; + } + operator uint32_t() volatile + { + return word; + } + basep7_r copy() volatile + { + return *this; + } + CONSTEXPR uint32_t get_addr_word() const + { + uint32_t value = static_cast(addr_word); + return value; + } + uint32_t get_addr_word() const volatile + { + uint32_t value = static_cast(addr_word); + return value; + } + CONSTEXPR basep7_r &set_addr_word(uint32_t value) + { + addr_word = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// basep8_r - Lower 32 bits of the Base pointer for region index 4 +struct basep8_r +{ +#ifdef __cplusplus + private: +#endif //__cplusplus + union + { + uint32_t addr_word; // The low word of the 64-bit address + uint32_t word; + }; +#ifdef __cplusplus + public: + CONSTEXPR basep8_r() : addr_word(static_cast(0)) {} + CONSTEXPR basep8_r(uint32_t init) : word(init) {} + CONSTEXPR void operator=(uint32_t value) + { + word = value; + } + void operator=(uint32_t value) volatile + { + word = value; + } + CONSTEXPR operator uint32_t() + { + return word; + } + operator uint32_t() volatile + { + return word; + } + basep8_r copy() volatile + { + return *this; + } + CONSTEXPR uint32_t get_addr_word() const + { + uint32_t value = static_cast(addr_word); + return value; + } + uint32_t get_addr_word() const volatile + { + uint32_t value = static_cast(addr_word); + return value; + } + CONSTEXPR basep8_r &set_addr_word(uint32_t value) + { + addr_word = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// basep9_r - Upper 32 bits of the Base pointer for region index 4 +struct basep9_r +{ +#ifdef __cplusplus + private: +#endif //__cplusplus + union + { + uint32_t addr_word; // The high word of the 64-bit address + uint32_t word; + }; +#ifdef __cplusplus + public: + CONSTEXPR basep9_r() : addr_word(static_cast(0)) {} + CONSTEXPR basep9_r(uint32_t init) : word(init) {} + CONSTEXPR void operator=(uint32_t value) + { + word = value; + } + void operator=(uint32_t value) volatile + { + word = value; + } + CONSTEXPR operator uint32_t() + { + return word; + } + operator uint32_t() volatile + { + return word; + } + basep9_r copy() volatile + { + return *this; + } + CONSTEXPR uint32_t get_addr_word() const + { + uint32_t value = static_cast(addr_word); + return value; + } + uint32_t get_addr_word() const volatile + { + uint32_t value = static_cast(addr_word); + return value; + } + CONSTEXPR basep9_r &set_addr_word(uint32_t value) + { + addr_word = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// basep10_r - Lower 32 bits of the Base pointer for region index 5 +struct basep10_r +{ +#ifdef __cplusplus + private: +#endif //__cplusplus + union + { + uint32_t addr_word; // The low word of the 64-bit address + uint32_t word; + }; +#ifdef __cplusplus + public: + CONSTEXPR basep10_r() : addr_word(static_cast(0)) {} + CONSTEXPR basep10_r(uint32_t init) : word(init) {} + CONSTEXPR void operator=(uint32_t value) + { + word = value; + } + void operator=(uint32_t value) volatile + { + word = value; + } + CONSTEXPR operator uint32_t() + { + return word; + } + operator uint32_t() volatile + { + return word; + } + basep10_r copy() volatile + { + return *this; + } + CONSTEXPR uint32_t get_addr_word() const + { + uint32_t value = static_cast(addr_word); + return value; + } + uint32_t get_addr_word() const volatile + { + uint32_t value = static_cast(addr_word); + return value; + } + CONSTEXPR basep10_r &set_addr_word(uint32_t value) + { + addr_word = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// basep11_r - Upper 32 bits of the Base pointer for region index 5 +struct basep11_r +{ +#ifdef __cplusplus + private: +#endif //__cplusplus + union + { + uint32_t addr_word; // The high word of the 64-bit address + uint32_t word; + }; +#ifdef __cplusplus + public: + CONSTEXPR basep11_r() : addr_word(static_cast(0)) {} + CONSTEXPR basep11_r(uint32_t init) : word(init) {} + CONSTEXPR void operator=(uint32_t value) + { + word = value; + } + void operator=(uint32_t value) volatile + { + word = value; + } + CONSTEXPR operator uint32_t() + { + return word; + } + operator uint32_t() volatile + { + return word; + } + basep11_r copy() volatile + { + return *this; + } + CONSTEXPR uint32_t get_addr_word() const + { + uint32_t value = static_cast(addr_word); + return value; + } + uint32_t get_addr_word() const volatile + { + uint32_t value = static_cast(addr_word); + return value; + } + CONSTEXPR basep11_r &set_addr_word(uint32_t value) + { + addr_word = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// basep12_r - Lower 32 bits of the Base pointer for region index 6 +struct basep12_r +{ +#ifdef __cplusplus + private: +#endif //__cplusplus + union + { + uint32_t addr_word; // The low word of the 64-bit address + uint32_t word; + }; +#ifdef __cplusplus + public: + CONSTEXPR basep12_r() : addr_word(static_cast(0)) {} + CONSTEXPR basep12_r(uint32_t init) : word(init) {} + CONSTEXPR void operator=(uint32_t value) + { + word = value; + } + void operator=(uint32_t value) volatile + { + word = value; + } + CONSTEXPR operator uint32_t() + { + return word; + } + operator uint32_t() volatile + { + return word; + } + basep12_r copy() volatile + { + return *this; + } + CONSTEXPR uint32_t get_addr_word() const + { + uint32_t value = static_cast(addr_word); + return value; + } + uint32_t get_addr_word() const volatile + { + uint32_t value = static_cast(addr_word); + return value; + } + CONSTEXPR basep12_r &set_addr_word(uint32_t value) + { + addr_word = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// basep13_r - Upper 32 bits of the Base pointer for region index 6 +struct basep13_r +{ +#ifdef __cplusplus + private: +#endif //__cplusplus + union + { + uint32_t addr_word; // The high word of the 64-bit address + uint32_t word; + }; +#ifdef __cplusplus + public: + CONSTEXPR basep13_r() : addr_word(static_cast(0)) {} + CONSTEXPR basep13_r(uint32_t init) : word(init) {} + CONSTEXPR void operator=(uint32_t value) + { + word = value; + } + void operator=(uint32_t value) volatile + { + word = value; + } + CONSTEXPR operator uint32_t() + { + return word; + } + operator uint32_t() volatile + { + return word; + } + basep13_r copy() volatile + { + return *this; + } + CONSTEXPR uint32_t get_addr_word() const + { + uint32_t value = static_cast(addr_word); + return value; + } + uint32_t get_addr_word() const volatile + { + uint32_t value = static_cast(addr_word); + return value; + } + CONSTEXPR basep13_r &set_addr_word(uint32_t value) + { + addr_word = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// basep14_r - Lower 32 bits of the Base pointer for region index 7 +struct basep14_r +{ +#ifdef __cplusplus + private: +#endif //__cplusplus + union + { + uint32_t addr_word; // The low word of the 64-bit address + uint32_t word; + }; +#ifdef __cplusplus + public: + CONSTEXPR basep14_r() : addr_word(static_cast(0)) {} + CONSTEXPR basep14_r(uint32_t init) : word(init) {} + CONSTEXPR void operator=(uint32_t value) + { + word = value; + } + void operator=(uint32_t value) volatile + { + word = value; + } + CONSTEXPR operator uint32_t() + { + return word; + } + operator uint32_t() volatile + { + return word; + } + basep14_r copy() volatile + { + return *this; + } + CONSTEXPR uint32_t get_addr_word() const + { + uint32_t value = static_cast(addr_word); + return value; + } + uint32_t get_addr_word() const volatile + { + uint32_t value = static_cast(addr_word); + return value; + } + CONSTEXPR basep14_r &set_addr_word(uint32_t value) + { + addr_word = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// basep15_r - Upper 32 bits of the Base pointer for region index 7 +struct basep15_r +{ +#ifdef __cplusplus + private: +#endif //__cplusplus + union + { + uint32_t addr_word; // The high word of the 64-bit address + uint32_t word; + }; +#ifdef __cplusplus + public: + CONSTEXPR basep15_r() : addr_word(static_cast(0)) {} + CONSTEXPR basep15_r(uint32_t init) : word(init) {} + CONSTEXPR void operator=(uint32_t value) + { + word = value; + } + void operator=(uint32_t value) volatile + { + word = value; + } + CONSTEXPR operator uint32_t() + { + return word; + } + operator uint32_t() volatile + { + return word; + } + basep15_r copy() volatile + { + return *this; + } + CONSTEXPR uint32_t get_addr_word() const + { + uint32_t value = static_cast(addr_word); + return value; + } + uint32_t get_addr_word() const volatile + { + uint32_t value = static_cast(addr_word); + return value; + } + CONSTEXPR basep15_r &set_addr_word(uint32_t value) + { + addr_word = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// wd_status_r - WD_STATUS of core DEBUGCORE +struct wd_status_r +{ +#ifdef __cplusplus + private: +#endif //__cplusplus + union + { + struct + { + uint32_t core_slice_state : 2; // STATE_HEADER=0, STATE_PALETTE=1, STATE_WEIGHTS=2 + uint32_t core_idle : 1; // Core idle + uint32_t ctrl_state : 2; // IDLE=0, DRAIN=1, OFD_INIT=2, OFD_RUN=3 + uint32_t ctrl_idle : 1; // All stripe jobs idle (all weights consumed) + uint32_t write_buf_index0 : 3; // current write index for next data from core + uint32_t write_buf_valid0 : 1; // write buf valid (full) + uint32_t write_buf_idle0 : 1; // write buf idle (empty) + uint32_t write_buf_index1 : 3; // current write index for next data from core + uint32_t write_buf_valid1 : 1; // write buf valid (full) + uint32_t write_buf_idle1 : 1; // write buf idle (empty) + uint32_t events : 12; // WD events mapped as appendix A + uint32_t reserved0 : 4; + }; + uint32_t word; + }; +#ifdef __cplusplus + public: + CONSTEXPR wd_status_r() : + core_slice_state(static_cast(0)), core_idle(static_cast(0)), + ctrl_state(static_cast(0)), ctrl_idle(static_cast(0)), + write_buf_index0(static_cast(0)), write_buf_valid0(static_cast(0)), + write_buf_idle0(static_cast(0)), write_buf_index1(static_cast(0)), + write_buf_valid1(static_cast(0)), write_buf_idle1(static_cast(0)), + events(static_cast(0)), reserved0(static_cast(0)) + { + } + CONSTEXPR wd_status_r(uint32_t init) : word(init) {} + CONSTEXPR void operator=(uint32_t value) + { + word = value; + } + void operator=(uint32_t value) volatile + { + word = value; + } + CONSTEXPR operator uint32_t() + { + return word; + } + operator uint32_t() volatile + { + return word; + } + wd_status_r copy() volatile + { + return *this; + } + CONSTEXPR uint32_t get_core_slice_state() const + { + uint32_t value = static_cast(core_slice_state); + return value; + } + uint32_t get_core_slice_state() const volatile + { + uint32_t value = static_cast(core_slice_state); + return value; + } + CONSTEXPR wd_status_r &set_core_slice_state(uint32_t value) + { + core_slice_state = ((1u << 2) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_core_idle() const + { + uint32_t value = static_cast(core_idle); + return value; + } + uint32_t get_core_idle() const volatile + { + uint32_t value = static_cast(core_idle); + return value; + } + CONSTEXPR wd_status_r &set_core_idle(uint32_t value) + { + core_idle = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_ctrl_state() const + { + uint32_t value = static_cast(ctrl_state); + return value; + } + uint32_t get_ctrl_state() const volatile + { + uint32_t value = static_cast(ctrl_state); + return value; + } + CONSTEXPR wd_status_r &set_ctrl_state(uint32_t value) + { + ctrl_state = ((1u << 2) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_ctrl_idle() const + { + uint32_t value = static_cast(ctrl_idle); + return value; + } + uint32_t get_ctrl_idle() const volatile + { + uint32_t value = static_cast(ctrl_idle); + return value; + } + CONSTEXPR wd_status_r &set_ctrl_idle(uint32_t value) + { + ctrl_idle = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_write_buf_index0() const + { + uint32_t value = static_cast(write_buf_index0); + return value; + } + uint32_t get_write_buf_index0() const volatile + { + uint32_t value = static_cast(write_buf_index0); + return value; + } + CONSTEXPR wd_status_r &set_write_buf_index0(uint32_t value) + { + write_buf_index0 = ((1u << 3) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_write_buf_valid0() const + { + uint32_t value = static_cast(write_buf_valid0); + return value; + } + uint32_t get_write_buf_valid0() const volatile + { + uint32_t value = static_cast(write_buf_valid0); + return value; + } + CONSTEXPR wd_status_r &set_write_buf_valid0(uint32_t value) + { + write_buf_valid0 = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_write_buf_idle0() const + { + uint32_t value = static_cast(write_buf_idle0); + return value; + } + uint32_t get_write_buf_idle0() const volatile + { + uint32_t value = static_cast(write_buf_idle0); + return value; + } + CONSTEXPR wd_status_r &set_write_buf_idle0(uint32_t value) + { + write_buf_idle0 = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_write_buf_index1() const + { + uint32_t value = static_cast(write_buf_index1); + return value; + } + uint32_t get_write_buf_index1() const volatile + { + uint32_t value = static_cast(write_buf_index1); + return value; + } + CONSTEXPR wd_status_r &set_write_buf_index1(uint32_t value) + { + write_buf_index1 = ((1u << 3) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_write_buf_valid1() const + { + uint32_t value = static_cast(write_buf_valid1); + return value; + } + uint32_t get_write_buf_valid1() const volatile + { + uint32_t value = static_cast(write_buf_valid1); + return value; + } + CONSTEXPR wd_status_r &set_write_buf_valid1(uint32_t value) + { + write_buf_valid1 = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_write_buf_idle1() const + { + uint32_t value = static_cast(write_buf_idle1); + return value; + } + uint32_t get_write_buf_idle1() const volatile + { + uint32_t value = static_cast(write_buf_idle1); + return value; + } + CONSTEXPR wd_status_r &set_write_buf_idle1(uint32_t value) + { + write_buf_idle1 = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_events() const + { + uint32_t value = static_cast(events); + return value; + } + uint32_t get_events() const volatile + { + uint32_t value = static_cast(events); + return value; + } + CONSTEXPR wd_status_r &set_events(uint32_t value) + { + events = ((1u << 12) - 1) & static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// mac_status_r - MAC_STATUS of core DEBUGCORE +struct mac_status_r +{ +#ifdef __cplusplus + private: +#endif //__cplusplus + union + { + struct + { + uint32_t block_cfg_valid : 1; // MAC has a valid block configuration + uint32_t trav_en : 1; // MAC is doing block traversal + uint32_t wait_for_ib : 1; // MAC is waiting for an Input Buffer to become available + uint32_t wait_for_acc_buf : 1; // MAC is waiting for an Accumulator Buffer to become available + uint32_t wait_for_weights : 1; // MAC is waiting for a Weight Block to become available + uint32_t stall_stripe : 1; // MAC is stalling between two stripes + uint32_t dw_sel : 1; // Currently used weight interface in MAC AI + uint32_t wait_for_dw0_ready : 1; // MAC AI is waiting for MAC DPU to send dw0_ready to WD + uint32_t wait_for_dw1_ready : 1; // MAC AI is waiting for MAC DPU to send dw1_ready to WD + uint32_t acc_buf_sel_ai : 1; // Currently used AccBuf interface in MAC AI + uint32_t wait_for_acc0_ready : 1; // MAC AI is waiting for acc0_ready from AO + uint32_t wait_for_acc1_ready : 1; // MAC AI is waiting for acc1_ready from AO + uint32_t acc_buf_sel_aa : 1; // Currently used AccBuf interface in MAC ADDER_ARRAY + uint32_t acc0_valid : 1; // MAC outgoing value of acc0_valid + uint32_t acc1_valid : 1; // MAC outgoing value of acc1_valid + uint32_t reserved0 : 1; + uint32_t events : 11; // Mapped to MAC events described in Appendix A + uint32_t reserved1 : 5; + }; + uint32_t word; + }; +#ifdef __cplusplus + public: + CONSTEXPR mac_status_r() : + block_cfg_valid(static_cast(0)), trav_en(static_cast(0)), + wait_for_ib(static_cast(0)), wait_for_acc_buf(static_cast(0)), + wait_for_weights(static_cast(0)), stall_stripe(static_cast(0)), + dw_sel(static_cast(0)), wait_for_dw0_ready(static_cast(0)), + wait_for_dw1_ready(static_cast(0)), acc_buf_sel_ai(static_cast(0)), + wait_for_acc0_ready(static_cast(0)), wait_for_acc1_ready(static_cast(0)), + acc_buf_sel_aa(static_cast(0)), acc0_valid(static_cast(0)), + acc1_valid(static_cast(0)), reserved0(static_cast(0)), events(static_cast(0)), + reserved1(static_cast(0)) + { + } + CONSTEXPR mac_status_r(uint32_t init) : word(init) {} + CONSTEXPR void operator=(uint32_t value) + { + word = value; + } + void operator=(uint32_t value) volatile + { + word = value; + } + CONSTEXPR operator uint32_t() + { + return word; + } + operator uint32_t() volatile + { + return word; + } + mac_status_r copy() volatile + { + return *this; + } + CONSTEXPR uint32_t get_block_cfg_valid() const + { + uint32_t value = static_cast(block_cfg_valid); + return value; + } + uint32_t get_block_cfg_valid() const volatile + { + uint32_t value = static_cast(block_cfg_valid); + return value; + } + CONSTEXPR mac_status_r &set_block_cfg_valid(uint32_t value) + { + block_cfg_valid = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_trav_en() const + { + uint32_t value = static_cast(trav_en); + return value; + } + uint32_t get_trav_en() const volatile + { + uint32_t value = static_cast(trav_en); + return value; + } + CONSTEXPR mac_status_r &set_trav_en(uint32_t value) + { + trav_en = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_wait_for_ib() const + { + uint32_t value = static_cast(wait_for_ib); + return value; + } + uint32_t get_wait_for_ib() const volatile + { + uint32_t value = static_cast(wait_for_ib); + return value; + } + CONSTEXPR mac_status_r &set_wait_for_ib(uint32_t value) + { + wait_for_ib = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_wait_for_acc_buf() const + { + uint32_t value = static_cast(wait_for_acc_buf); + return value; + } + uint32_t get_wait_for_acc_buf() const volatile + { + uint32_t value = static_cast(wait_for_acc_buf); + return value; + } + CONSTEXPR mac_status_r &set_wait_for_acc_buf(uint32_t value) + { + wait_for_acc_buf = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_wait_for_weights() const + { + uint32_t value = static_cast(wait_for_weights); + return value; + } + uint32_t get_wait_for_weights() const volatile + { + uint32_t value = static_cast(wait_for_weights); + return value; + } + CONSTEXPR mac_status_r &set_wait_for_weights(uint32_t value) + { + wait_for_weights = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_stall_stripe() const + { + uint32_t value = static_cast(stall_stripe); + return value; + } + uint32_t get_stall_stripe() const volatile + { + uint32_t value = static_cast(stall_stripe); + return value; + } + CONSTEXPR mac_status_r &set_stall_stripe(uint32_t value) + { + stall_stripe = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_dw_sel() const + { + uint32_t value = static_cast(dw_sel); + return value; + } + uint32_t get_dw_sel() const volatile + { + uint32_t value = static_cast(dw_sel); + return value; + } + CONSTEXPR mac_status_r &set_dw_sel(uint32_t value) + { + dw_sel = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_wait_for_dw0_ready() const + { + uint32_t value = static_cast(wait_for_dw0_ready); + return value; + } + uint32_t get_wait_for_dw0_ready() const volatile + { + uint32_t value = static_cast(wait_for_dw0_ready); + return value; + } + CONSTEXPR mac_status_r &set_wait_for_dw0_ready(uint32_t value) + { + wait_for_dw0_ready = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_wait_for_dw1_ready() const + { + uint32_t value = static_cast(wait_for_dw1_ready); + return value; + } + uint32_t get_wait_for_dw1_ready() const volatile + { + uint32_t value = static_cast(wait_for_dw1_ready); + return value; + } + CONSTEXPR mac_status_r &set_wait_for_dw1_ready(uint32_t value) + { + wait_for_dw1_ready = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_acc_buf_sel_ai() const + { + uint32_t value = static_cast(acc_buf_sel_ai); + return value; + } + uint32_t get_acc_buf_sel_ai() const volatile + { + uint32_t value = static_cast(acc_buf_sel_ai); + return value; + } + CONSTEXPR mac_status_r &set_acc_buf_sel_ai(uint32_t value) + { + acc_buf_sel_ai = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_wait_for_acc0_ready() const + { + uint32_t value = static_cast(wait_for_acc0_ready); + return value; + } + uint32_t get_wait_for_acc0_ready() const volatile + { + uint32_t value = static_cast(wait_for_acc0_ready); + return value; + } + CONSTEXPR mac_status_r &set_wait_for_acc0_ready(uint32_t value) + { + wait_for_acc0_ready = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_wait_for_acc1_ready() const + { + uint32_t value = static_cast(wait_for_acc1_ready); + return value; + } + uint32_t get_wait_for_acc1_ready() const volatile + { + uint32_t value = static_cast(wait_for_acc1_ready); + return value; + } + CONSTEXPR mac_status_r &set_wait_for_acc1_ready(uint32_t value) + { + wait_for_acc1_ready = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_acc_buf_sel_aa() const + { + uint32_t value = static_cast(acc_buf_sel_aa); + return value; + } + uint32_t get_acc_buf_sel_aa() const volatile + { + uint32_t value = static_cast(acc_buf_sel_aa); + return value; + } + CONSTEXPR mac_status_r &set_acc_buf_sel_aa(uint32_t value) + { + acc_buf_sel_aa = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_acc0_valid() const + { + uint32_t value = static_cast(acc0_valid); + return value; + } + uint32_t get_acc0_valid() const volatile + { + uint32_t value = static_cast(acc0_valid); + return value; + } + CONSTEXPR mac_status_r &set_acc0_valid(uint32_t value) + { + acc0_valid = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_acc1_valid() const + { + uint32_t value = static_cast(acc1_valid); + return value; + } + uint32_t get_acc1_valid() const volatile + { + uint32_t value = static_cast(acc1_valid); + return value; + } + CONSTEXPR mac_status_r &set_acc1_valid(uint32_t value) + { + acc1_valid = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_events() const + { + uint32_t value = static_cast(events); + return value; + } + uint32_t get_events() const volatile + { + uint32_t value = static_cast(events); + return value; + } + CONSTEXPR mac_status_r &set_events(uint32_t value) + { + events = ((1u << 11) - 1) & static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// ao_status_r - AO_STATUS of core DEBUGCORE +struct ao_status_r +{ +#ifdef __cplusplus + private: +#endif //__cplusplus + union + { + struct + { + uint32_t cmd_sbw_valid : 1; // Block command to shared buffer write module is valid. + uint32_t cmd_act_valid : 1; // Block command to activation function module is valid. + uint32_t cmd_ctl_valid : 1; // Block command to control module is valid. + uint32_t cmd_scl_valid : 1; // Block command to scale module is valid. + uint32_t cmd_sbr_valid : 1; // Block command to shared buffer read module is valid. + uint32_t cmd_ofm_valid : 1; // Block command to ofm parameter module is valid. + uint32_t blk_cmd_ready : 1; // Ready to accept block command. + uint32_t blk_cmd_valid : 1; // Block command from CC is valid. + uint32_t reserved0 : 8; + uint32_t events : 8; // Mapped to AO events described in Appendix A. + uint32_t reserved1 : 8; + }; + uint32_t word; + }; +#ifdef __cplusplus + public: + CONSTEXPR ao_status_r() : + cmd_sbw_valid(static_cast(0)), cmd_act_valid(static_cast(0)), + cmd_ctl_valid(static_cast(0)), cmd_scl_valid(static_cast(0)), + cmd_sbr_valid(static_cast(0)), cmd_ofm_valid(static_cast(0)), + blk_cmd_ready(static_cast(0)), blk_cmd_valid(static_cast(0)), + reserved0(static_cast(0)), events(static_cast(0)), reserved1(static_cast(0)) + { + } + CONSTEXPR ao_status_r(uint32_t init) : word(init) {} + CONSTEXPR void operator=(uint32_t value) + { + word = value; + } + void operator=(uint32_t value) volatile + { + word = value; + } + CONSTEXPR operator uint32_t() + { + return word; + } + operator uint32_t() volatile + { + return word; + } + ao_status_r copy() volatile + { + return *this; + } + CONSTEXPR uint32_t get_cmd_sbw_valid() const + { + uint32_t value = static_cast(cmd_sbw_valid); + return value; + } + uint32_t get_cmd_sbw_valid() const volatile + { + uint32_t value = static_cast(cmd_sbw_valid); + return value; + } + CONSTEXPR ao_status_r &set_cmd_sbw_valid(uint32_t value) + { + cmd_sbw_valid = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_cmd_act_valid() const + { + uint32_t value = static_cast(cmd_act_valid); + return value; + } + uint32_t get_cmd_act_valid() const volatile + { + uint32_t value = static_cast(cmd_act_valid); + return value; + } + CONSTEXPR ao_status_r &set_cmd_act_valid(uint32_t value) + { + cmd_act_valid = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_cmd_ctl_valid() const + { + uint32_t value = static_cast(cmd_ctl_valid); + return value; + } + uint32_t get_cmd_ctl_valid() const volatile + { + uint32_t value = static_cast(cmd_ctl_valid); + return value; + } + CONSTEXPR ao_status_r &set_cmd_ctl_valid(uint32_t value) + { + cmd_ctl_valid = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_cmd_scl_valid() const + { + uint32_t value = static_cast(cmd_scl_valid); + return value; + } + uint32_t get_cmd_scl_valid() const volatile + { + uint32_t value = static_cast(cmd_scl_valid); + return value; + } + CONSTEXPR ao_status_r &set_cmd_scl_valid(uint32_t value) + { + cmd_scl_valid = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_cmd_sbr_valid() const + { + uint32_t value = static_cast(cmd_sbr_valid); + return value; + } + uint32_t get_cmd_sbr_valid() const volatile + { + uint32_t value = static_cast(cmd_sbr_valid); + return value; + } + CONSTEXPR ao_status_r &set_cmd_sbr_valid(uint32_t value) + { + cmd_sbr_valid = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_cmd_ofm_valid() const + { + uint32_t value = static_cast(cmd_ofm_valid); + return value; + } + uint32_t get_cmd_ofm_valid() const volatile + { + uint32_t value = static_cast(cmd_ofm_valid); + return value; + } + CONSTEXPR ao_status_r &set_cmd_ofm_valid(uint32_t value) + { + cmd_ofm_valid = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_blk_cmd_ready() const + { + uint32_t value = static_cast(blk_cmd_ready); + return value; + } + uint32_t get_blk_cmd_ready() const volatile + { + uint32_t value = static_cast(blk_cmd_ready); + return value; + } + CONSTEXPR ao_status_r &set_blk_cmd_ready(uint32_t value) + { + blk_cmd_ready = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_blk_cmd_valid() const + { + uint32_t value = static_cast(blk_cmd_valid); + return value; + } + uint32_t get_blk_cmd_valid() const volatile + { + uint32_t value = static_cast(blk_cmd_valid); + return value; + } + CONSTEXPR ao_status_r &set_blk_cmd_valid(uint32_t value) + { + blk_cmd_valid = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_events() const + { + uint32_t value = static_cast(events); + return value; + } + uint32_t get_events() const volatile + { + uint32_t value = static_cast(events); + return value; + } + CONSTEXPR ao_status_r &set_events(uint32_t value) + { + events = ((1u << 8) - 1) & static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// dma_status0_r - DMA_STATUS0 of core DEBUGCORE +struct dma_status0_r +{ +#ifdef __cplusplus + private: +#endif //__cplusplus + union + { + struct + { + uint32_t CMD_IDLE : 1; // When this bit is high means that the CMD block is not busy in generating addresses + // for a CMD job. + uint32_t IFM_IDLE : 1; // When this bit is high means that there are no ongoing IFM jobs + uint32_t WGT_IDLE_C0 : 1; // When this bit is high means that the WGT block is not busy in generating + // addresses for a WGT job + uint32_t BAS_IDLE_C0 : 1; // When this bit is high means that the BAS block is not busy in generating + // addresses for a BAS job + uint32_t M2M_IDLE : 1; // When this bit is high means that there are no ongoing M2M jobs + uint32_t OFM_IDLE : 1; // When this bit is high means that there are no ongoing OFM jobs + uint32_t HALT_REQ : 1; // CPM has requested to HALT AXI bus before soft reset + uint32_t HALT_ACK : 1; // DMA is in condition to halt the AXI bus since there are no pending transactions + uint32_t PAUSE_REQ : 1; // CC has requested to pause the AXI + uint32_t PAUSE_ACK : 1; // DMA is in condition to pause the AXI bus since there are no pending transactions + uint32_t IB0_AI_VALID_C0 : 1; // Data for AI to be read in IFM input buffer 0 - Core 0 + uint32_t IB0_AI_READY_C0 : 1; // Data consumed from AI in IFM input buffer 0 - Core 0 + uint32_t IB1_AI_VALID_C0 : 1; // Data for AI to be read in IFM input buffer 1 - Core 0 + uint32_t IB1_AI_READY_C0 : 1; // Data consumed from AI in IFM input buffer 1 - Core 0 + uint32_t IB0_AO_VALID_C0 : 1; // Data for AO to be read in IFM input buffer 0 - Core 0 + uint32_t IB0_AO_READY_C0 : 1; // Data consumed from AO in IFM input buffer 0 - Core 0 + uint32_t IB1_AO_VALID_C0 : 1; // Data for AO to be read in IFM input buffer 0 - Core 0 + uint32_t IB1_AO_READY_C0 : 1; // Data consumed from AO in IFM input buffer 1 - Core 0 + uint32_t OB0_VALID_C0 : 1; // Data for DMA ready to be consumed in OFM output buffer 0 - Core 0 + uint32_t OB0_READY_C0 : 1; // Data consumed from DMA in OFM output buffer 0 - Core 0 + uint32_t OB1_VALID_C0 : 1; // Data for DMA ready to be consumed in OFM output buffer 1 - Core 0 + uint32_t OB1_READY_C0 : 1; // Data consumed from DMA in OFM output buffer 1 - Core 0 + uint32_t CMD_VALID : 1; // New command word for CC to be consumed + uint32_t CMD_READY : 1; // command word consumed by CC + uint32_t WD_BITSTREAM_VALID_C0 : 1; // New weight word for WD to be consumed - Core 0 + uint32_t WD_BITSTREAM_READY_C0 : 1; // Weight word consumed by WD - Core 0 + uint32_t BS_BITSTREAM_VALID_C0 : 1; // New BaS word for AO to be consumed - Core 0 + uint32_t BS_BITSTREAM_READY_C0 : 1; // BaS word consumed by AO - Core 0 + uint32_t AXI0_AR_STALLED : 1; // Read transfer request stalled on arready low AXI0 (due to memory system) + uint32_t AXI0_RD_LIMIT_STALL : 1; // Read stalled due to one AXI0 limit counter being reached + uint32_t AXI0_AW_STALLED : 1; // Write transfer request stalled on awready low AXI0 (due to memory system) + uint32_t AXI0_W_STALLED : 1; // Write transfer stalled on awready low AXI0 (due to memory system) + }; + uint32_t word; + }; +#ifdef __cplusplus + public: + CONSTEXPR dma_status0_r() : + CMD_IDLE(static_cast(0)), IFM_IDLE(static_cast(0)), WGT_IDLE_C0(static_cast(0)), + BAS_IDLE_C0(static_cast(0)), M2M_IDLE(static_cast(0)), OFM_IDLE(static_cast(0)), + HALT_REQ(static_cast(0)), HALT_ACK(static_cast(0)), PAUSE_REQ(static_cast(0)), + PAUSE_ACK(static_cast(0)), IB0_AI_VALID_C0(static_cast(0)), + IB0_AI_READY_C0(static_cast(0)), IB1_AI_VALID_C0(static_cast(0)), + IB1_AI_READY_C0(static_cast(0)), IB0_AO_VALID_C0(static_cast(0)), + IB0_AO_READY_C0(static_cast(0)), IB1_AO_VALID_C0(static_cast(0)), + IB1_AO_READY_C0(static_cast(0)), OB0_VALID_C0(static_cast(0)), + OB0_READY_C0(static_cast(0)), OB1_VALID_C0(static_cast(0)), + OB1_READY_C0(static_cast(0)), CMD_VALID(static_cast(0)), + CMD_READY(static_cast(0)), WD_BITSTREAM_VALID_C0(static_cast(0)), + WD_BITSTREAM_READY_C0(static_cast(0)), BS_BITSTREAM_VALID_C0(static_cast(0)), + BS_BITSTREAM_READY_C0(static_cast(0)), AXI0_AR_STALLED(static_cast(0)), + AXI0_RD_LIMIT_STALL(static_cast(0)), AXI0_AW_STALLED(static_cast(0)), + AXI0_W_STALLED(static_cast(0)) + { + } + CONSTEXPR dma_status0_r(uint32_t init) : word(init) {} + CONSTEXPR void operator=(uint32_t value) + { + word = value; + } + void operator=(uint32_t value) volatile + { + word = value; + } + CONSTEXPR operator uint32_t() + { + return word; + } + operator uint32_t() volatile + { + return word; + } + dma_status0_r copy() volatile + { + return *this; + } + CONSTEXPR uint32_t get_CMD_IDLE() const + { + uint32_t value = static_cast(CMD_IDLE); + return value; + } + uint32_t get_CMD_IDLE() const volatile + { + uint32_t value = static_cast(CMD_IDLE); + return value; + } + CONSTEXPR dma_status0_r &set_CMD_IDLE(uint32_t value) + { + CMD_IDLE = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_IFM_IDLE() const + { + uint32_t value = static_cast(IFM_IDLE); + return value; + } + uint32_t get_IFM_IDLE() const volatile + { + uint32_t value = static_cast(IFM_IDLE); + return value; + } + CONSTEXPR dma_status0_r &set_IFM_IDLE(uint32_t value) + { + IFM_IDLE = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_WGT_IDLE_C0() const + { + uint32_t value = static_cast(WGT_IDLE_C0); + return value; + } + uint32_t get_WGT_IDLE_C0() const volatile + { + uint32_t value = static_cast(WGT_IDLE_C0); + return value; + } + CONSTEXPR dma_status0_r &set_WGT_IDLE_C0(uint32_t value) + { + WGT_IDLE_C0 = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_BAS_IDLE_C0() const + { + uint32_t value = static_cast(BAS_IDLE_C0); + return value; + } + uint32_t get_BAS_IDLE_C0() const volatile + { + uint32_t value = static_cast(BAS_IDLE_C0); + return value; + } + CONSTEXPR dma_status0_r &set_BAS_IDLE_C0(uint32_t value) + { + BAS_IDLE_C0 = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_M2M_IDLE() const + { + uint32_t value = static_cast(M2M_IDLE); + return value; + } + uint32_t get_M2M_IDLE() const volatile + { + uint32_t value = static_cast(M2M_IDLE); + return value; + } + CONSTEXPR dma_status0_r &set_M2M_IDLE(uint32_t value) + { + M2M_IDLE = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_OFM_IDLE() const + { + uint32_t value = static_cast(OFM_IDLE); + return value; + } + uint32_t get_OFM_IDLE() const volatile + { + uint32_t value = static_cast(OFM_IDLE); + return value; + } + CONSTEXPR dma_status0_r &set_OFM_IDLE(uint32_t value) + { + OFM_IDLE = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_HALT_REQ() const + { + uint32_t value = static_cast(HALT_REQ); + return value; + } + uint32_t get_HALT_REQ() const volatile + { + uint32_t value = static_cast(HALT_REQ); + return value; + } + CONSTEXPR dma_status0_r &set_HALT_REQ(uint32_t value) + { + HALT_REQ = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_HALT_ACK() const + { + uint32_t value = static_cast(HALT_ACK); + return value; + } + uint32_t get_HALT_ACK() const volatile + { + uint32_t value = static_cast(HALT_ACK); + return value; + } + CONSTEXPR dma_status0_r &set_HALT_ACK(uint32_t value) + { + HALT_ACK = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_PAUSE_REQ() const + { + uint32_t value = static_cast(PAUSE_REQ); + return value; + } + uint32_t get_PAUSE_REQ() const volatile + { + uint32_t value = static_cast(PAUSE_REQ); + return value; + } + CONSTEXPR dma_status0_r &set_PAUSE_REQ(uint32_t value) + { + PAUSE_REQ = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_PAUSE_ACK() const + { + uint32_t value = static_cast(PAUSE_ACK); + return value; + } + uint32_t get_PAUSE_ACK() const volatile + { + uint32_t value = static_cast(PAUSE_ACK); + return value; + } + CONSTEXPR dma_status0_r &set_PAUSE_ACK(uint32_t value) + { + PAUSE_ACK = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_IB0_AI_VALID_C0() const + { + uint32_t value = static_cast(IB0_AI_VALID_C0); + return value; + } + uint32_t get_IB0_AI_VALID_C0() const volatile + { + uint32_t value = static_cast(IB0_AI_VALID_C0); + return value; + } + CONSTEXPR dma_status0_r &set_IB0_AI_VALID_C0(uint32_t value) + { + IB0_AI_VALID_C0 = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_IB0_AI_READY_C0() const + { + uint32_t value = static_cast(IB0_AI_READY_C0); + return value; + } + uint32_t get_IB0_AI_READY_C0() const volatile + { + uint32_t value = static_cast(IB0_AI_READY_C0); + return value; + } + CONSTEXPR dma_status0_r &set_IB0_AI_READY_C0(uint32_t value) + { + IB0_AI_READY_C0 = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_IB1_AI_VALID_C0() const + { + uint32_t value = static_cast(IB1_AI_VALID_C0); + return value; + } + uint32_t get_IB1_AI_VALID_C0() const volatile + { + uint32_t value = static_cast(IB1_AI_VALID_C0); + return value; + } + CONSTEXPR dma_status0_r &set_IB1_AI_VALID_C0(uint32_t value) + { + IB1_AI_VALID_C0 = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_IB1_AI_READY_C0() const + { + uint32_t value = static_cast(IB1_AI_READY_C0); + return value; + } + uint32_t get_IB1_AI_READY_C0() const volatile + { + uint32_t value = static_cast(IB1_AI_READY_C0); + return value; + } + CONSTEXPR dma_status0_r &set_IB1_AI_READY_C0(uint32_t value) + { + IB1_AI_READY_C0 = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_IB0_AO_VALID_C0() const + { + uint32_t value = static_cast(IB0_AO_VALID_C0); + return value; + } + uint32_t get_IB0_AO_VALID_C0() const volatile + { + uint32_t value = static_cast(IB0_AO_VALID_C0); + return value; + } + CONSTEXPR dma_status0_r &set_IB0_AO_VALID_C0(uint32_t value) + { + IB0_AO_VALID_C0 = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_IB0_AO_READY_C0() const + { + uint32_t value = static_cast(IB0_AO_READY_C0); + return value; + } + uint32_t get_IB0_AO_READY_C0() const volatile + { + uint32_t value = static_cast(IB0_AO_READY_C0); + return value; + } + CONSTEXPR dma_status0_r &set_IB0_AO_READY_C0(uint32_t value) + { + IB0_AO_READY_C0 = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_IB1_AO_VALID_C0() const + { + uint32_t value = static_cast(IB1_AO_VALID_C0); + return value; + } + uint32_t get_IB1_AO_VALID_C0() const volatile + { + uint32_t value = static_cast(IB1_AO_VALID_C0); + return value; + } + CONSTEXPR dma_status0_r &set_IB1_AO_VALID_C0(uint32_t value) + { + IB1_AO_VALID_C0 = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_IB1_AO_READY_C0() const + { + uint32_t value = static_cast(IB1_AO_READY_C0); + return value; + } + uint32_t get_IB1_AO_READY_C0() const volatile + { + uint32_t value = static_cast(IB1_AO_READY_C0); + return value; + } + CONSTEXPR dma_status0_r &set_IB1_AO_READY_C0(uint32_t value) + { + IB1_AO_READY_C0 = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_OB0_VALID_C0() const + { + uint32_t value = static_cast(OB0_VALID_C0); + return value; + } + uint32_t get_OB0_VALID_C0() const volatile + { + uint32_t value = static_cast(OB0_VALID_C0); + return value; + } + CONSTEXPR dma_status0_r &set_OB0_VALID_C0(uint32_t value) + { + OB0_VALID_C0 = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_OB0_READY_C0() const + { + uint32_t value = static_cast(OB0_READY_C0); + return value; + } + uint32_t get_OB0_READY_C0() const volatile + { + uint32_t value = static_cast(OB0_READY_C0); + return value; + } + CONSTEXPR dma_status0_r &set_OB0_READY_C0(uint32_t value) + { + OB0_READY_C0 = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_OB1_VALID_C0() const + { + uint32_t value = static_cast(OB1_VALID_C0); + return value; + } + uint32_t get_OB1_VALID_C0() const volatile + { + uint32_t value = static_cast(OB1_VALID_C0); + return value; + } + CONSTEXPR dma_status0_r &set_OB1_VALID_C0(uint32_t value) + { + OB1_VALID_C0 = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_OB1_READY_C0() const + { + uint32_t value = static_cast(OB1_READY_C0); + return value; + } + uint32_t get_OB1_READY_C0() const volatile + { + uint32_t value = static_cast(OB1_READY_C0); + return value; + } + CONSTEXPR dma_status0_r &set_OB1_READY_C0(uint32_t value) + { + OB1_READY_C0 = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_CMD_VALID() const + { + uint32_t value = static_cast(CMD_VALID); + return value; + } + uint32_t get_CMD_VALID() const volatile + { + uint32_t value = static_cast(CMD_VALID); + return value; + } + CONSTEXPR dma_status0_r &set_CMD_VALID(uint32_t value) + { + CMD_VALID = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_CMD_READY() const + { + uint32_t value = static_cast(CMD_READY); + return value; + } + uint32_t get_CMD_READY() const volatile + { + uint32_t value = static_cast(CMD_READY); + return value; + } + CONSTEXPR dma_status0_r &set_CMD_READY(uint32_t value) + { + CMD_READY = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_WD_BITSTREAM_VALID_C0() const + { + uint32_t value = static_cast(WD_BITSTREAM_VALID_C0); + return value; + } + uint32_t get_WD_BITSTREAM_VALID_C0() const volatile + { + uint32_t value = static_cast(WD_BITSTREAM_VALID_C0); + return value; + } + CONSTEXPR dma_status0_r &set_WD_BITSTREAM_VALID_C0(uint32_t value) + { + WD_BITSTREAM_VALID_C0 = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_WD_BITSTREAM_READY_C0() const + { + uint32_t value = static_cast(WD_BITSTREAM_READY_C0); + return value; + } + uint32_t get_WD_BITSTREAM_READY_C0() const volatile + { + uint32_t value = static_cast(WD_BITSTREAM_READY_C0); + return value; + } + CONSTEXPR dma_status0_r &set_WD_BITSTREAM_READY_C0(uint32_t value) + { + WD_BITSTREAM_READY_C0 = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_BS_BITSTREAM_VALID_C0() const + { + uint32_t value = static_cast(BS_BITSTREAM_VALID_C0); + return value; + } + uint32_t get_BS_BITSTREAM_VALID_C0() const volatile + { + uint32_t value = static_cast(BS_BITSTREAM_VALID_C0); + return value; + } + CONSTEXPR dma_status0_r &set_BS_BITSTREAM_VALID_C0(uint32_t value) + { + BS_BITSTREAM_VALID_C0 = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_BS_BITSTREAM_READY_C0() const + { + uint32_t value = static_cast(BS_BITSTREAM_READY_C0); + return value; + } + uint32_t get_BS_BITSTREAM_READY_C0() const volatile + { + uint32_t value = static_cast(BS_BITSTREAM_READY_C0); + return value; + } + CONSTEXPR dma_status0_r &set_BS_BITSTREAM_READY_C0(uint32_t value) + { + BS_BITSTREAM_READY_C0 = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_AXI0_AR_STALLED() const + { + uint32_t value = static_cast(AXI0_AR_STALLED); + return value; + } + uint32_t get_AXI0_AR_STALLED() const volatile + { + uint32_t value = static_cast(AXI0_AR_STALLED); + return value; + } + CONSTEXPR dma_status0_r &set_AXI0_AR_STALLED(uint32_t value) + { + AXI0_AR_STALLED = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_AXI0_RD_LIMIT_STALL() const + { + uint32_t value = static_cast(AXI0_RD_LIMIT_STALL); + return value; + } + uint32_t get_AXI0_RD_LIMIT_STALL() const volatile + { + uint32_t value = static_cast(AXI0_RD_LIMIT_STALL); + return value; + } + CONSTEXPR dma_status0_r &set_AXI0_RD_LIMIT_STALL(uint32_t value) + { + AXI0_RD_LIMIT_STALL = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_AXI0_AW_STALLED() const + { + uint32_t value = static_cast(AXI0_AW_STALLED); + return value; + } + uint32_t get_AXI0_AW_STALLED() const volatile + { + uint32_t value = static_cast(AXI0_AW_STALLED); + return value; + } + CONSTEXPR dma_status0_r &set_AXI0_AW_STALLED(uint32_t value) + { + AXI0_AW_STALLED = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_AXI0_W_STALLED() const + { + uint32_t value = static_cast(AXI0_W_STALLED); + return value; + } + uint32_t get_AXI0_W_STALLED() const volatile + { + uint32_t value = static_cast(AXI0_W_STALLED); + return value; + } + CONSTEXPR dma_status0_r &set_AXI0_W_STALLED(uint32_t value) + { + AXI0_W_STALLED = ((1u << 1) - 1) & static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// dma_status1_r - DMA_STATUS1 of core DEBUGCORE +struct dma_status1_r +{ +#ifdef __cplusplus + private: +#endif //__cplusplus + union + { + struct + { + uint32_t AXI0_WR_LIMIT_STALL : 1; // Write stalled due to one AXI0 limit counter being reached + uint32_t AXI1_AR_STALLED : 1; // Read transfer request stalled on arready low AXI1 (due to memory system) + uint32_t AXI1_RD_LIMIT_STALL : 1; // Read stalled due to one AXI1 limit counter being reached + uint32_t AXI1_WR_STALLED : 1; // Write transfer request stalled on awready low AXI1 (due to memory system) + uint32_t AXI1_W_STALLED : 1; // Write transfer stalled on wready low AXI1 (due to memory system) + uint32_t AXI1_WR_LIMIT_STALL : 1; // Write stalled due to one AXI1 limit counter being reached + uint32_t WGT_IDLE_C1 : 1; // When this bit is high means that the WGT block is not busy in generating + // addresses for a WGT job + uint32_t BAS_IDLE_C1 : 1; // When this bit is high means that the BAS block is not busy in generating + // addresses for a BAS job. + uint32_t IB0_AI_VALID_C1 : 1; // Data for AI to be read in IFM input buffer 0 - Core 1 + uint32_t IB0_AI_READY_C1 : 1; // Data consumed from AI in IFM input buffer 0 - Core 1 + uint32_t IB1_AI_VALID_C1 : 1; // Data for AI to be read in IFM input buffer 1 - Core 1 + uint32_t IB1_AI_READY_C1 : 1; // Data consumed from AI in IFM input buffer 1 - Core 1 + uint32_t IB0_AO_VALID_C1 : 1; // Data for AO to be read in IFM input buffer 0 - Core 1 + uint32_t IB0_AO_READY_C1 : 1; // Data consumed from AO in IFM input buffer 0 - Core 1 + uint32_t IB1_AO_VALID_C1 : 1; // Data for AO to be read in IFM input buffer 0 - Core 1 + uint32_t IB1_AO_READY_C1 : 1; // Data consumed from AO in IFM input buffer 1 - Core 1 + uint32_t OB0_VALID_C1 : 1; // Data for DMA ready to be consumed in OFM output buffer 0 - Core 1 + uint32_t OB0_READY_C1 : 1; // Data consumed from DMA in OFM output buffer 0 - Core 1 + uint32_t OB1_VALID_C1 : 1; // Data for DMA ready to be consumed in OFM output buffer 1 - Core 1 + uint32_t OB1_READY_C1 : 1; // Data consumed from DMA in OFM output buffer 1 - Core 1 + uint32_t WD_BITSTREAM_VALID_C1 : 1; // New weight word for WD to be consumed - Core 1 + uint32_t WD_BITSTREAM_READY_C1 : 1; // Weight word consumed by WD - Core 1 + uint32_t BS_BITSTREAM_VALID_C1 : 1; // New BaS word for AO to be consumed - Core 1 + uint32_t BS_BITSTREAM_READY_C1 : 1; // BaS word consumed by AO - Core 1 + uint32_t reserved0 : 8; + }; + uint32_t word; + }; +#ifdef __cplusplus + public: + CONSTEXPR dma_status1_r() : + AXI0_WR_LIMIT_STALL(static_cast(0)), AXI1_AR_STALLED(static_cast(0)), + AXI1_RD_LIMIT_STALL(static_cast(0)), AXI1_WR_STALLED(static_cast(0)), + AXI1_W_STALLED(static_cast(0)), AXI1_WR_LIMIT_STALL(static_cast(0)), + WGT_IDLE_C1(static_cast(0)), BAS_IDLE_C1(static_cast(0)), + IB0_AI_VALID_C1(static_cast(0)), IB0_AI_READY_C1(static_cast(0)), + IB1_AI_VALID_C1(static_cast(0)), IB1_AI_READY_C1(static_cast(0)), + IB0_AO_VALID_C1(static_cast(0)), IB0_AO_READY_C1(static_cast(0)), + IB1_AO_VALID_C1(static_cast(0)), IB1_AO_READY_C1(static_cast(0)), + OB0_VALID_C1(static_cast(0)), OB0_READY_C1(static_cast(0)), + OB1_VALID_C1(static_cast(0)), OB1_READY_C1(static_cast(0)), + WD_BITSTREAM_VALID_C1(static_cast(0)), WD_BITSTREAM_READY_C1(static_cast(0)), + BS_BITSTREAM_VALID_C1(static_cast(0)), BS_BITSTREAM_READY_C1(static_cast(0)), + reserved0(static_cast(0)) + { + } + CONSTEXPR dma_status1_r(uint32_t init) : word(init) {} + CONSTEXPR void operator=(uint32_t value) + { + word = value; + } + void operator=(uint32_t value) volatile + { + word = value; + } + CONSTEXPR operator uint32_t() + { + return word; + } + operator uint32_t() volatile + { + return word; + } + dma_status1_r copy() volatile + { + return *this; + } + CONSTEXPR uint32_t get_AXI0_WR_LIMIT_STALL() const + { + uint32_t value = static_cast(AXI0_WR_LIMIT_STALL); + return value; + } + uint32_t get_AXI0_WR_LIMIT_STALL() const volatile + { + uint32_t value = static_cast(AXI0_WR_LIMIT_STALL); + return value; + } + CONSTEXPR dma_status1_r &set_AXI0_WR_LIMIT_STALL(uint32_t value) + { + AXI0_WR_LIMIT_STALL = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_AXI1_AR_STALLED() const + { + uint32_t value = static_cast(AXI1_AR_STALLED); + return value; + } + uint32_t get_AXI1_AR_STALLED() const volatile + { + uint32_t value = static_cast(AXI1_AR_STALLED); + return value; + } + CONSTEXPR dma_status1_r &set_AXI1_AR_STALLED(uint32_t value) + { + AXI1_AR_STALLED = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_AXI1_RD_LIMIT_STALL() const + { + uint32_t value = static_cast(AXI1_RD_LIMIT_STALL); + return value; + } + uint32_t get_AXI1_RD_LIMIT_STALL() const volatile + { + uint32_t value = static_cast(AXI1_RD_LIMIT_STALL); + return value; + } + CONSTEXPR dma_status1_r &set_AXI1_RD_LIMIT_STALL(uint32_t value) + { + AXI1_RD_LIMIT_STALL = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_AXI1_WR_STALLED() const + { + uint32_t value = static_cast(AXI1_WR_STALLED); + return value; + } + uint32_t get_AXI1_WR_STALLED() const volatile + { + uint32_t value = static_cast(AXI1_WR_STALLED); + return value; + } + CONSTEXPR dma_status1_r &set_AXI1_WR_STALLED(uint32_t value) + { + AXI1_WR_STALLED = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_AXI1_W_STALLED() const + { + uint32_t value = static_cast(AXI1_W_STALLED); + return value; + } + uint32_t get_AXI1_W_STALLED() const volatile + { + uint32_t value = static_cast(AXI1_W_STALLED); + return value; + } + CONSTEXPR dma_status1_r &set_AXI1_W_STALLED(uint32_t value) + { + AXI1_W_STALLED = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_AXI1_WR_LIMIT_STALL() const + { + uint32_t value = static_cast(AXI1_WR_LIMIT_STALL); + return value; + } + uint32_t get_AXI1_WR_LIMIT_STALL() const volatile + { + uint32_t value = static_cast(AXI1_WR_LIMIT_STALL); + return value; + } + CONSTEXPR dma_status1_r &set_AXI1_WR_LIMIT_STALL(uint32_t value) + { + AXI1_WR_LIMIT_STALL = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_WGT_IDLE_C1() const + { + uint32_t value = static_cast(WGT_IDLE_C1); + return value; + } + uint32_t get_WGT_IDLE_C1() const volatile + { + uint32_t value = static_cast(WGT_IDLE_C1); + return value; + } + CONSTEXPR dma_status1_r &set_WGT_IDLE_C1(uint32_t value) + { + WGT_IDLE_C1 = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_BAS_IDLE_C1() const + { + uint32_t value = static_cast(BAS_IDLE_C1); + return value; + } + uint32_t get_BAS_IDLE_C1() const volatile + { + uint32_t value = static_cast(BAS_IDLE_C1); + return value; + } + CONSTEXPR dma_status1_r &set_BAS_IDLE_C1(uint32_t value) + { + BAS_IDLE_C1 = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_IB0_AI_VALID_C1() const + { + uint32_t value = static_cast(IB0_AI_VALID_C1); + return value; + } + uint32_t get_IB0_AI_VALID_C1() const volatile + { + uint32_t value = static_cast(IB0_AI_VALID_C1); + return value; + } + CONSTEXPR dma_status1_r &set_IB0_AI_VALID_C1(uint32_t value) + { + IB0_AI_VALID_C1 = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_IB0_AI_READY_C1() const + { + uint32_t value = static_cast(IB0_AI_READY_C1); + return value; + } + uint32_t get_IB0_AI_READY_C1() const volatile + { + uint32_t value = static_cast(IB0_AI_READY_C1); + return value; + } + CONSTEXPR dma_status1_r &set_IB0_AI_READY_C1(uint32_t value) + { + IB0_AI_READY_C1 = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_IB1_AI_VALID_C1() const + { + uint32_t value = static_cast(IB1_AI_VALID_C1); + return value; + } + uint32_t get_IB1_AI_VALID_C1() const volatile + { + uint32_t value = static_cast(IB1_AI_VALID_C1); + return value; + } + CONSTEXPR dma_status1_r &set_IB1_AI_VALID_C1(uint32_t value) + { + IB1_AI_VALID_C1 = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_IB1_AI_READY_C1() const + { + uint32_t value = static_cast(IB1_AI_READY_C1); + return value; + } + uint32_t get_IB1_AI_READY_C1() const volatile + { + uint32_t value = static_cast(IB1_AI_READY_C1); + return value; + } + CONSTEXPR dma_status1_r &set_IB1_AI_READY_C1(uint32_t value) + { + IB1_AI_READY_C1 = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_IB0_AO_VALID_C1() const + { + uint32_t value = static_cast(IB0_AO_VALID_C1); + return value; + } + uint32_t get_IB0_AO_VALID_C1() const volatile + { + uint32_t value = static_cast(IB0_AO_VALID_C1); + return value; + } + CONSTEXPR dma_status1_r &set_IB0_AO_VALID_C1(uint32_t value) + { + IB0_AO_VALID_C1 = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_IB0_AO_READY_C1() const + { + uint32_t value = static_cast(IB0_AO_READY_C1); + return value; + } + uint32_t get_IB0_AO_READY_C1() const volatile + { + uint32_t value = static_cast(IB0_AO_READY_C1); + return value; + } + CONSTEXPR dma_status1_r &set_IB0_AO_READY_C1(uint32_t value) + { + IB0_AO_READY_C1 = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_IB1_AO_VALID_C1() const + { + uint32_t value = static_cast(IB1_AO_VALID_C1); + return value; + } + uint32_t get_IB1_AO_VALID_C1() const volatile + { + uint32_t value = static_cast(IB1_AO_VALID_C1); + return value; + } + CONSTEXPR dma_status1_r &set_IB1_AO_VALID_C1(uint32_t value) + { + IB1_AO_VALID_C1 = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_IB1_AO_READY_C1() const + { + uint32_t value = static_cast(IB1_AO_READY_C1); + return value; + } + uint32_t get_IB1_AO_READY_C1() const volatile + { + uint32_t value = static_cast(IB1_AO_READY_C1); + return value; + } + CONSTEXPR dma_status1_r &set_IB1_AO_READY_C1(uint32_t value) + { + IB1_AO_READY_C1 = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_OB0_VALID_C1() const + { + uint32_t value = static_cast(OB0_VALID_C1); + return value; + } + uint32_t get_OB0_VALID_C1() const volatile + { + uint32_t value = static_cast(OB0_VALID_C1); + return value; + } + CONSTEXPR dma_status1_r &set_OB0_VALID_C1(uint32_t value) + { + OB0_VALID_C1 = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_OB0_READY_C1() const + { + uint32_t value = static_cast(OB0_READY_C1); + return value; + } + uint32_t get_OB0_READY_C1() const volatile + { + uint32_t value = static_cast(OB0_READY_C1); + return value; + } + CONSTEXPR dma_status1_r &set_OB0_READY_C1(uint32_t value) + { + OB0_READY_C1 = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_OB1_VALID_C1() const + { + uint32_t value = static_cast(OB1_VALID_C1); + return value; + } + uint32_t get_OB1_VALID_C1() const volatile + { + uint32_t value = static_cast(OB1_VALID_C1); + return value; + } + CONSTEXPR dma_status1_r &set_OB1_VALID_C1(uint32_t value) + { + OB1_VALID_C1 = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_OB1_READY_C1() const + { + uint32_t value = static_cast(OB1_READY_C1); + return value; + } + uint32_t get_OB1_READY_C1() const volatile + { + uint32_t value = static_cast(OB1_READY_C1); + return value; + } + CONSTEXPR dma_status1_r &set_OB1_READY_C1(uint32_t value) + { + OB1_READY_C1 = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_WD_BITSTREAM_VALID_C1() const + { + uint32_t value = static_cast(WD_BITSTREAM_VALID_C1); + return value; + } + uint32_t get_WD_BITSTREAM_VALID_C1() const volatile + { + uint32_t value = static_cast(WD_BITSTREAM_VALID_C1); + return value; + } + CONSTEXPR dma_status1_r &set_WD_BITSTREAM_VALID_C1(uint32_t value) + { + WD_BITSTREAM_VALID_C1 = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_WD_BITSTREAM_READY_C1() const + { + uint32_t value = static_cast(WD_BITSTREAM_READY_C1); + return value; + } + uint32_t get_WD_BITSTREAM_READY_C1() const volatile + { + uint32_t value = static_cast(WD_BITSTREAM_READY_C1); + return value; + } + CONSTEXPR dma_status1_r &set_WD_BITSTREAM_READY_C1(uint32_t value) + { + WD_BITSTREAM_READY_C1 = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_BS_BITSTREAM_VALID_C1() const + { + uint32_t value = static_cast(BS_BITSTREAM_VALID_C1); + return value; + } + uint32_t get_BS_BITSTREAM_VALID_C1() const volatile + { + uint32_t value = static_cast(BS_BITSTREAM_VALID_C1); + return value; + } + CONSTEXPR dma_status1_r &set_BS_BITSTREAM_VALID_C1(uint32_t value) + { + BS_BITSTREAM_VALID_C1 = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_BS_BITSTREAM_READY_C1() const + { + uint32_t value = static_cast(BS_BITSTREAM_READY_C1); + return value; + } + uint32_t get_BS_BITSTREAM_READY_C1() const volatile + { + uint32_t value = static_cast(BS_BITSTREAM_READY_C1); + return value; + } + CONSTEXPR dma_status1_r &set_BS_BITSTREAM_READY_C1(uint32_t value) + { + BS_BITSTREAM_READY_C1 = ((1u << 1) - 1) & static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// clkforce_r - Force clocks on for clock gating +struct clkforce_r +{ +#ifdef __cplusplus + private: +#endif //__cplusplus + union + { + struct + { + uint32_t top_level_clk : 1; // set to 1 to force on TOP level clock + uint32_t cc_clk : 1; // set to 1 to force on CC clock + uint32_t dma_clk : 1; // set to 1 to force on DMA clock + uint32_t mac_clk : 1; // set to 1 to force on MAC clock + uint32_t ao_clk : 1; // set to 1 to force on AO clock + uint32_t wd_clk : 1; // set to 1 to force on WD clock + uint32_t reserved0 : 26; + }; + uint32_t word; + }; +#ifdef __cplusplus + public: + CONSTEXPR clkforce_r() : + top_level_clk(static_cast(0)), cc_clk(static_cast(0)), dma_clk(static_cast(0)), + mac_clk(static_cast(0)), ao_clk(static_cast(0)), wd_clk(static_cast(0)), + reserved0(static_cast(0)) + { + } + CONSTEXPR clkforce_r(uint32_t init) : word(init) {} + CONSTEXPR void operator=(uint32_t value) + { + word = value; + } + void operator=(uint32_t value) volatile + { + word = value; + } + CONSTEXPR operator uint32_t() + { + return word; + } + operator uint32_t() volatile + { + return word; + } + clkforce_r copy() volatile + { + return *this; + } + CONSTEXPR uint32_t get_top_level_clk() const + { + uint32_t value = static_cast(top_level_clk); + return value; + } + uint32_t get_top_level_clk() const volatile + { + uint32_t value = static_cast(top_level_clk); + return value; + } + CONSTEXPR clkforce_r &set_top_level_clk(uint32_t value) + { + top_level_clk = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_cc_clk() const + { + uint32_t value = static_cast(cc_clk); + return value; + } + uint32_t get_cc_clk() const volatile + { + uint32_t value = static_cast(cc_clk); + return value; + } + CONSTEXPR clkforce_r &set_cc_clk(uint32_t value) + { + cc_clk = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_dma_clk() const + { + uint32_t value = static_cast(dma_clk); + return value; + } + uint32_t get_dma_clk() const volatile + { + uint32_t value = static_cast(dma_clk); + return value; + } + CONSTEXPR clkforce_r &set_dma_clk(uint32_t value) + { + dma_clk = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_mac_clk() const + { + uint32_t value = static_cast(mac_clk); + return value; + } + uint32_t get_mac_clk() const volatile + { + uint32_t value = static_cast(mac_clk); + return value; + } + CONSTEXPR clkforce_r &set_mac_clk(uint32_t value) + { + mac_clk = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_ao_clk() const + { + uint32_t value = static_cast(ao_clk); + return value; + } + uint32_t get_ao_clk() const volatile + { + uint32_t value = static_cast(ao_clk); + return value; + } + CONSTEXPR clkforce_r &set_ao_clk(uint32_t value) + { + ao_clk = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_wd_clk() const + { + uint32_t value = static_cast(wd_clk); + return value; + } + uint32_t get_wd_clk() const volatile + { + uint32_t value = static_cast(wd_clk); + return value; + } + CONSTEXPR clkforce_r &set_wd_clk(uint32_t value) + { + wd_clk = ((1u << 1) - 1) & static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// pid4_r - Peripheral ID byte 4 (Arm=code 4) +struct pid4_r +{ +#ifdef __cplusplus + private: +#endif //__cplusplus + union + { + uint32_t PID4; // Byte 4 of Peripheral ID (Lower 8 bits valid) + uint32_t word; + }; +#ifdef __cplusplus + public: + CONSTEXPR pid4_r() : PID4(static_cast(0x04)) {} + CONSTEXPR pid4_r(uint32_t init) : word(init) {} + CONSTEXPR void operator=(uint32_t value) + { + word = value; + } + void operator=(uint32_t value) volatile + { + word = value; + } + CONSTEXPR operator uint32_t() + { + return word; + } + operator uint32_t() volatile + { + return word; + } + pid4_r copy() volatile + { + return *this; + } + CONSTEXPR uint32_t get_PID4() const + { + uint32_t value = static_cast(PID4); + return value; + } + uint32_t get_PID4() const volatile + { + uint32_t value = static_cast(PID4); + return value; + } + CONSTEXPR pid4_r &set_PID4(uint32_t value) + { + PID4 = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// pid5_r - Peripheral ID byte 5 (reserved) +struct pid5_r +{ +#ifdef __cplusplus + private: +#endif //__cplusplus + union + { + uint32_t PID5; // Byte 5 of Peripheral ID (Lower 8 bits valid) + uint32_t word; + }; +#ifdef __cplusplus + public: + CONSTEXPR pid5_r() : PID5(static_cast(0x00)) {} + CONSTEXPR pid5_r(uint32_t init) : word(init) {} + CONSTEXPR void operator=(uint32_t value) + { + word = value; + } + void operator=(uint32_t value) volatile + { + word = value; + } + CONSTEXPR operator uint32_t() + { + return word; + } + operator uint32_t() volatile + { + return word; + } + pid5_r copy() volatile + { + return *this; + } + CONSTEXPR uint32_t get_PID5() const + { + uint32_t value = static_cast(PID5); + return value; + } + uint32_t get_PID5() const volatile + { + uint32_t value = static_cast(PID5); + return value; + } + CONSTEXPR pid5_r &set_PID5(uint32_t value) + { + PID5 = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// pid6_r - Peripheral ID byte 6 (reserved) +struct pid6_r +{ +#ifdef __cplusplus + private: +#endif //__cplusplus + union + { + uint32_t PID6; // Byte 6 of Peripheral ID (Lower 8 bits valid) + uint32_t word; + }; +#ifdef __cplusplus + public: + CONSTEXPR pid6_r() : PID6(static_cast(0x00)) {} + CONSTEXPR pid6_r(uint32_t init) : word(init) {} + CONSTEXPR void operator=(uint32_t value) + { + word = value; + } + void operator=(uint32_t value) volatile + { + word = value; + } + CONSTEXPR operator uint32_t() + { + return word; + } + operator uint32_t() volatile + { + return word; + } + pid6_r copy() volatile + { + return *this; + } + CONSTEXPR uint32_t get_PID6() const + { + uint32_t value = static_cast(PID6); + return value; + } + uint32_t get_PID6() const volatile + { + uint32_t value = static_cast(PID6); + return value; + } + CONSTEXPR pid6_r &set_PID6(uint32_t value) + { + PID6 = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// pid7_r - Peripheral ID byte 7 (reserved) +struct pid7_r +{ +#ifdef __cplusplus + private: +#endif //__cplusplus + union + { + uint32_t PID7; // Byte 7 of Peripheral ID (Lower 8 bits valid) + uint32_t word; + }; +#ifdef __cplusplus + public: + CONSTEXPR pid7_r() : PID7(static_cast(0x00)) {} + CONSTEXPR pid7_r(uint32_t init) : word(init) {} + CONSTEXPR void operator=(uint32_t value) + { + word = value; + } + void operator=(uint32_t value) volatile + { + word = value; + } + CONSTEXPR operator uint32_t() + { + return word; + } + operator uint32_t() volatile + { + return word; + } + pid7_r copy() volatile + { + return *this; + } + CONSTEXPR uint32_t get_PID7() const + { + uint32_t value = static_cast(PID7); + return value; + } + uint32_t get_PID7() const volatile + { + uint32_t value = static_cast(PID7); + return value; + } + CONSTEXPR pid7_r &set_PID7(uint32_t value) + { + PID7 = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// pid0_r - Peripheral ID byte 0. This is bits[7:0] of the part number. +struct pid0_r +{ +#ifdef __cplusplus + private: +#endif //__cplusplus + union + { + uint32_t PID0; // Byte 0 of Peripheral ID (Lower 8 bits valid) + uint32_t word; + }; +#ifdef __cplusplus + public: + CONSTEXPR pid0_r() : PID0(static_cast(0x80)) {} + CONSTEXPR pid0_r(uint32_t init) : word(init) {} + CONSTEXPR void operator=(uint32_t value) + { + word = value; + } + void operator=(uint32_t value) volatile + { + word = value; + } + CONSTEXPR operator uint32_t() + { + return word; + } + operator uint32_t() volatile + { + return word; + } + pid0_r copy() volatile + { + return *this; + } + CONSTEXPR uint32_t get_PID0() const + { + uint32_t value = static_cast(PID0); + return value; + } + uint32_t get_PID0() const volatile + { + uint32_t value = static_cast(PID0); + return value; + } + CONSTEXPR pid0_r &set_PID0(uint32_t value) + { + PID0 = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// pid1_r - Peripheral ID byte 1. This is bits[11:8] of the part number in bits[3:0], and bits[3:0] of the Arm ID in +// bits[7:4]. +struct pid1_r +{ +#ifdef __cplusplus + private: +#endif //__cplusplus + union + { + uint32_t PID1; // Byte 1 of Peripheral ID (Lower 8 bits valid) + uint32_t word; + }; +#ifdef __cplusplus + public: + CONSTEXPR pid1_r() : PID1(static_cast(0xB5)) {} + CONSTEXPR pid1_r(uint32_t init) : word(init) {} + CONSTEXPR void operator=(uint32_t value) + { + word = value; + } + void operator=(uint32_t value) volatile + { + word = value; + } + CONSTEXPR operator uint32_t() + { + return word; + } + operator uint32_t() volatile + { + return word; + } + pid1_r copy() volatile + { + return *this; + } + CONSTEXPR uint32_t get_PID1() const + { + uint32_t value = static_cast(PID1); + return value; + } + uint32_t get_PID1() const volatile + { + uint32_t value = static_cast(PID1); + return value; + } + CONSTEXPR pid1_r &set_PID1(uint32_t value) + { + PID1 = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// pid2_r - Peripheral ID byte 2. This is bits[6:4] of the Arm ID in bits[2:0], and bit 3 indicates format B. +struct pid2_r +{ +#ifdef __cplusplus + private: +#endif //__cplusplus + union + { + uint32_t PID2; // Byte 2 of Peripheral ID (Lower 8 bits valid) + uint32_t word; + }; +#ifdef __cplusplus + public: + CONSTEXPR pid2_r() : PID2(static_cast(0x0B)) {} + CONSTEXPR pid2_r(uint32_t init) : word(init) {} + CONSTEXPR void operator=(uint32_t value) + { + word = value; + } + void operator=(uint32_t value) volatile + { + word = value; + } + CONSTEXPR operator uint32_t() + { + return word; + } + operator uint32_t() volatile + { + return word; + } + pid2_r copy() volatile + { + return *this; + } + CONSTEXPR uint32_t get_PID2() const + { + uint32_t value = static_cast(PID2); + return value; + } + uint32_t get_PID2() const volatile + { + uint32_t value = static_cast(PID2); + return value; + } + CONSTEXPR pid2_r &set_PID2(uint32_t value) + { + PID2 = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// pid3_r - Peripheral ID byte 3. +struct pid3_r +{ +#ifdef __cplusplus + private: +#endif //__cplusplus + union + { + uint32_t PID3; // Byte 1 of Peripheral ID (Lower 8 bits valid) + uint32_t word; + }; +#ifdef __cplusplus + public: + CONSTEXPR pid3_r() : PID3(static_cast(0x0)) {} + CONSTEXPR pid3_r(uint32_t init) : word(init) {} + CONSTEXPR void operator=(uint32_t value) + { + word = value; + } + void operator=(uint32_t value) volatile + { + word = value; + } + CONSTEXPR operator uint32_t() + { + return word; + } + operator uint32_t() volatile + { + return word; + } + pid3_r copy() volatile + { + return *this; + } + CONSTEXPR uint32_t get_PID3() const + { + uint32_t value = static_cast(PID3); + return value; + } + uint32_t get_PID3() const volatile + { + uint32_t value = static_cast(PID3); + return value; + } + CONSTEXPR pid3_r &set_PID3(uint32_t value) + { + PID3 = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// cid0_r - Component ID byte 0. +struct cid0_r +{ +#ifdef __cplusplus + private: +#endif //__cplusplus + union + { + uint32_t CID0; // Byte 0 of Component ID (Lower 8 bits valid) + uint32_t word; + }; +#ifdef __cplusplus + public: + CONSTEXPR cid0_r() : CID0(static_cast(0x0D)) {} + CONSTEXPR cid0_r(uint32_t init) : word(init) {} + CONSTEXPR void operator=(uint32_t value) + { + word = value; + } + void operator=(uint32_t value) volatile + { + word = value; + } + CONSTEXPR operator uint32_t() + { + return word; + } + operator uint32_t() volatile + { + return word; + } + cid0_r copy() volatile + { + return *this; + } + CONSTEXPR uint32_t get_CID0() const + { + uint32_t value = static_cast(CID0); + return value; + } + uint32_t get_CID0() const volatile + { + uint32_t value = static_cast(CID0); + return value; + } + CONSTEXPR cid0_r &set_CID0(uint32_t value) + { + CID0 = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// cid1_r - Component ID byte 1. +struct cid1_r +{ +#ifdef __cplusplus + private: +#endif //__cplusplus + union + { + uint32_t CID1; // Byte 1 of Component ID (Lower 8 bits valid) + uint32_t word; + }; +#ifdef __cplusplus + public: + CONSTEXPR cid1_r() : CID1(static_cast(0xF0)) {} + CONSTEXPR cid1_r(uint32_t init) : word(init) {} + CONSTEXPR void operator=(uint32_t value) + { + word = value; + } + void operator=(uint32_t value) volatile + { + word = value; + } + CONSTEXPR operator uint32_t() + { + return word; + } + operator uint32_t() volatile + { + return word; + } + cid1_r copy() volatile + { + return *this; + } + CONSTEXPR uint32_t get_CID1() const + { + uint32_t value = static_cast(CID1); + return value; + } + uint32_t get_CID1() const volatile + { + uint32_t value = static_cast(CID1); + return value; + } + CONSTEXPR cid1_r &set_CID1(uint32_t value) + { + CID1 = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// cid2_r - Component ID byte 2. +struct cid2_r +{ +#ifdef __cplusplus + private: +#endif //__cplusplus + union + { + uint32_t CID2; // Byte 2 of Component ID (Lower 8 bits valid) + uint32_t word; + }; +#ifdef __cplusplus + public: + CONSTEXPR cid2_r() : CID2(static_cast(0x05)) {} + CONSTEXPR cid2_r(uint32_t init) : word(init) {} + CONSTEXPR void operator=(uint32_t value) + { + word = value; + } + void operator=(uint32_t value) volatile + { + word = value; + } + CONSTEXPR operator uint32_t() + { + return word; + } + operator uint32_t() volatile + { + return word; + } + cid2_r copy() volatile + { + return *this; + } + CONSTEXPR uint32_t get_CID2() const + { + uint32_t value = static_cast(CID2); + return value; + } + uint32_t get_CID2() const volatile + { + uint32_t value = static_cast(CID2); + return value; + } + CONSTEXPR cid2_r &set_CID2(uint32_t value) + { + CID2 = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// cid3_r - Component ID byte 3. +struct cid3_r +{ +#ifdef __cplusplus + private: +#endif //__cplusplus + union + { + uint32_t CID3; // Byte 3 of Component ID (Lower 8 bits valid) + uint32_t word; + }; +#ifdef __cplusplus + public: + CONSTEXPR cid3_r() : CID3(static_cast(0xB1)) {} + CONSTEXPR cid3_r(uint32_t init) : word(init) {} + CONSTEXPR void operator=(uint32_t value) + { + word = value; + } + void operator=(uint32_t value) volatile + { + word = value; + } + CONSTEXPR operator uint32_t() + { + return word; + } + operator uint32_t() volatile + { + return word; + } + cid3_r copy() volatile + { + return *this; + } + CONSTEXPR uint32_t get_CID3() const + { + uint32_t value = static_cast(CID3); + return value; + } + uint32_t get_CID3() const volatile + { + uint32_t value = static_cast(CID3); + return value; + } + CONSTEXPR cid3_r &set_CID3(uint32_t value) + { + CID3 = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// pmcr_r - PMU Register control +struct pmcr_r +{ +#ifdef __cplusplus + private: +#endif //__cplusplus + union + { + struct + { + uint32_t cnt_en : 1; // Enable counter + uint32_t event_cnt_rst : 1; // Reset event counter + uint32_t cycle_cnt_rst : 1; // Reset cycle counter + uint32_t mask_en : 1; // PMU can be enabled/disabled by command stream operation NPU_OP_PMU_MASK + uint32_t reserved0 : 7; + uint32_t num_event_cnt : 5; // Number of event counters + uint32_t reserved1 : 16; + }; + uint32_t word; + }; +#ifdef __cplusplus + public: + CONSTEXPR pmcr_r() : + cnt_en(static_cast(0x0)), event_cnt_rst(static_cast(0)), + cycle_cnt_rst(static_cast(0)), mask_en(static_cast(0x0)), + reserved0(static_cast(0)), num_event_cnt(static_cast(0x04)), + reserved1(static_cast(0)) + { + } + CONSTEXPR pmcr_r(uint32_t init) : word(init) {} + CONSTEXPR void operator=(uint32_t value) + { + word = value; + } + void operator=(uint32_t value) volatile + { + word = value; + } + CONSTEXPR operator uint32_t() + { + return word; + } + operator uint32_t() volatile + { + return word; + } + pmcr_r copy() volatile + { + return *this; + } + CONSTEXPR uint32_t get_cnt_en() const + { + uint32_t value = static_cast(cnt_en); + return value; + } + uint32_t get_cnt_en() const volatile + { + uint32_t value = static_cast(cnt_en); + return value; + } + CONSTEXPR pmcr_r &set_cnt_en(uint32_t value) + { + cnt_en = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_event_cnt_rst() const + { + uint32_t value = static_cast(event_cnt_rst); + return value; + } + uint32_t get_event_cnt_rst() const volatile + { + uint32_t value = static_cast(event_cnt_rst); + return value; + } + CONSTEXPR pmcr_r &set_event_cnt_rst(uint32_t value) + { + event_cnt_rst = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_cycle_cnt_rst() const + { + uint32_t value = static_cast(cycle_cnt_rst); + return value; + } + uint32_t get_cycle_cnt_rst() const volatile + { + uint32_t value = static_cast(cycle_cnt_rst); + return value; + } + CONSTEXPR pmcr_r &set_cycle_cnt_rst(uint32_t value) + { + cycle_cnt_rst = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_mask_en() const + { + uint32_t value = static_cast(mask_en); + return value; + } + uint32_t get_mask_en() const volatile + { + uint32_t value = static_cast(mask_en); + return value; + } + CONSTEXPR pmcr_r &set_mask_en(uint32_t value) + { + mask_en = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_num_event_cnt() const + { + uint32_t value = static_cast(num_event_cnt); + return value; + } + uint32_t get_num_event_cnt() const volatile + { + uint32_t value = static_cast(num_event_cnt); + return value; + } + CONSTEXPR pmcr_r &set_num_event_cnt(uint32_t value) + { + num_event_cnt = ((1u << 5) - 1) & static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// pmcntenset_r - Count enable set register +struct pmcntenset_r +{ +#ifdef __cplusplus + private: +#endif //__cplusplus + union + { + struct + { + uint32_t EVENT_CNT_0 : 1; // Event counter enable bit for PMEVCNTR0 + uint32_t EVENT_CNT_1 : 1; // Event counter enable bit for PMEVCNTR1 + uint32_t EVENT_CNT_2 : 1; // Event counter enable bit for PMEVCNTR2 + uint32_t EVENT_CNT_3 : 1; // Event counter enable bit for PMEVCNTR3 + uint32_t reserved0 : 27; + uint32_t CYCLE_CNT : 1; // PMCCNTR enable bit + }; + uint32_t word; + }; +#ifdef __cplusplus + public: + CONSTEXPR pmcntenset_r() : + EVENT_CNT_0(static_cast(0)), EVENT_CNT_1(static_cast(0)), + EVENT_CNT_2(static_cast(0)), EVENT_CNT_3(static_cast(0)), + reserved0(static_cast(0)), CYCLE_CNT(static_cast(0)) + { + } + CONSTEXPR pmcntenset_r(uint32_t init) : word(init) {} + CONSTEXPR void operator=(uint32_t value) + { + word = value; + } + void operator=(uint32_t value) volatile + { + word = value; + } + CONSTEXPR operator uint32_t() + { + return word; + } + operator uint32_t() volatile + { + return word; + } + pmcntenset_r copy() volatile + { + return *this; + } + CONSTEXPR uint32_t get_EVENT_CNT_0() const + { + uint32_t value = static_cast(EVENT_CNT_0); + return value; + } + uint32_t get_EVENT_CNT_0() const volatile + { + uint32_t value = static_cast(EVENT_CNT_0); + return value; + } + CONSTEXPR pmcntenset_r &set_EVENT_CNT_0(uint32_t value) + { + EVENT_CNT_0 = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_EVENT_CNT_1() const + { + uint32_t value = static_cast(EVENT_CNT_1); + return value; + } + uint32_t get_EVENT_CNT_1() const volatile + { + uint32_t value = static_cast(EVENT_CNT_1); + return value; + } + CONSTEXPR pmcntenset_r &set_EVENT_CNT_1(uint32_t value) + { + EVENT_CNT_1 = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_EVENT_CNT_2() const + { + uint32_t value = static_cast(EVENT_CNT_2); + return value; + } + uint32_t get_EVENT_CNT_2() const volatile + { + uint32_t value = static_cast(EVENT_CNT_2); + return value; + } + CONSTEXPR pmcntenset_r &set_EVENT_CNT_2(uint32_t value) + { + EVENT_CNT_2 = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_EVENT_CNT_3() const + { + uint32_t value = static_cast(EVENT_CNT_3); + return value; + } + uint32_t get_EVENT_CNT_3() const volatile + { + uint32_t value = static_cast(EVENT_CNT_3); + return value; + } + CONSTEXPR pmcntenset_r &set_EVENT_CNT_3(uint32_t value) + { + EVENT_CNT_3 = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_CYCLE_CNT() const + { + uint32_t value = static_cast(CYCLE_CNT); + return value; + } + uint32_t get_CYCLE_CNT() const volatile + { + uint32_t value = static_cast(CYCLE_CNT); + return value; + } + CONSTEXPR pmcntenset_r &set_CYCLE_CNT(uint32_t value) + { + CYCLE_CNT = ((1u << 1) - 1) & static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// pmcntenclr_r - Count enable clear register +struct pmcntenclr_r +{ +#ifdef __cplusplus + private: +#endif //__cplusplus + union + { + struct + { + uint32_t EVENT_CNT_0 : 1; // Event counter disable bit for PMEVCNTR0 + uint32_t EVENT_CNT_1 : 1; // Event counter disable bit for PMEVCNTR1 + uint32_t EVENT_CNT_2 : 1; // Event counter disable bit for PMEVCNTR2 + uint32_t EVENT_CNT_3 : 1; // Event counter disable bit for PMEVCNTR3 + uint32_t reserved0 : 27; + uint32_t CYCLE_CNT : 1; // PMCCNTR disable bit + }; + uint32_t word; + }; +#ifdef __cplusplus + public: + CONSTEXPR pmcntenclr_r() : + EVENT_CNT_0(static_cast(0)), EVENT_CNT_1(static_cast(0)), + EVENT_CNT_2(static_cast(0)), EVENT_CNT_3(static_cast(0)), + reserved0(static_cast(0)), CYCLE_CNT(static_cast(0)) + { + } + CONSTEXPR pmcntenclr_r(uint32_t init) : word(init) {} + CONSTEXPR void operator=(uint32_t value) + { + word = value; + } + void operator=(uint32_t value) volatile + { + word = value; + } + CONSTEXPR operator uint32_t() + { + return word; + } + operator uint32_t() volatile + { + return word; + } + pmcntenclr_r copy() volatile + { + return *this; + } + CONSTEXPR uint32_t get_EVENT_CNT_0() const + { + uint32_t value = static_cast(EVENT_CNT_0); + return value; + } + uint32_t get_EVENT_CNT_0() const volatile + { + uint32_t value = static_cast(EVENT_CNT_0); + return value; + } + CONSTEXPR pmcntenclr_r &set_EVENT_CNT_0(uint32_t value) + { + EVENT_CNT_0 = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_EVENT_CNT_1() const + { + uint32_t value = static_cast(EVENT_CNT_1); + return value; + } + uint32_t get_EVENT_CNT_1() const volatile + { + uint32_t value = static_cast(EVENT_CNT_1); + return value; + } + CONSTEXPR pmcntenclr_r &set_EVENT_CNT_1(uint32_t value) + { + EVENT_CNT_1 = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_EVENT_CNT_2() const + { + uint32_t value = static_cast(EVENT_CNT_2); + return value; + } + uint32_t get_EVENT_CNT_2() const volatile + { + uint32_t value = static_cast(EVENT_CNT_2); + return value; + } + CONSTEXPR pmcntenclr_r &set_EVENT_CNT_2(uint32_t value) + { + EVENT_CNT_2 = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_EVENT_CNT_3() const + { + uint32_t value = static_cast(EVENT_CNT_3); + return value; + } + uint32_t get_EVENT_CNT_3() const volatile + { + uint32_t value = static_cast(EVENT_CNT_3); + return value; + } + CONSTEXPR pmcntenclr_r &set_EVENT_CNT_3(uint32_t value) + { + EVENT_CNT_3 = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_CYCLE_CNT() const + { + uint32_t value = static_cast(CYCLE_CNT); + return value; + } + uint32_t get_CYCLE_CNT() const volatile + { + uint32_t value = static_cast(CYCLE_CNT); + return value; + } + CONSTEXPR pmcntenclr_r &set_CYCLE_CNT(uint32_t value) + { + CYCLE_CNT = ((1u << 1) - 1) & static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// pmovsset_r - Overflow flag status set register +struct pmovsset_r +{ +#ifdef __cplusplus + private: +#endif //__cplusplus + union + { + struct + { + uint32_t EVENT_CNT_0_OVF : 1; // Event counter overflow set bit for PMEVCNTR0 + uint32_t EVENT_CNT_1_OVF : 1; // Event counter overflow set bit for PMEVCNTR1 + uint32_t EVENT_CNT_2_OVF : 1; // Event counter overflow set bit for PMEVCNTR2 + uint32_t EVENT_CNT_3_OVF : 1; // Event counter overflow set bit for PMEVCNTR3 + uint32_t reserved0 : 27; + uint32_t CYCLE_CNT_OVF : 1; // PMCCNTR overflow set bit + }; + uint32_t word; + }; +#ifdef __cplusplus + public: + CONSTEXPR pmovsset_r() : + EVENT_CNT_0_OVF(static_cast(0)), EVENT_CNT_1_OVF(static_cast(0)), + EVENT_CNT_2_OVF(static_cast(0)), EVENT_CNT_3_OVF(static_cast(0)), + reserved0(static_cast(0)), CYCLE_CNT_OVF(static_cast(0)) + { + } + CONSTEXPR pmovsset_r(uint32_t init) : word(init) {} + CONSTEXPR void operator=(uint32_t value) + { + word = value; + } + void operator=(uint32_t value) volatile + { + word = value; + } + CONSTEXPR operator uint32_t() + { + return word; + } + operator uint32_t() volatile + { + return word; + } + pmovsset_r copy() volatile + { + return *this; + } + CONSTEXPR uint32_t get_EVENT_CNT_0_OVF() const + { + uint32_t value = static_cast(EVENT_CNT_0_OVF); + return value; + } + uint32_t get_EVENT_CNT_0_OVF() const volatile + { + uint32_t value = static_cast(EVENT_CNT_0_OVF); + return value; + } + CONSTEXPR pmovsset_r &set_EVENT_CNT_0_OVF(uint32_t value) + { + EVENT_CNT_0_OVF = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_EVENT_CNT_1_OVF() const + { + uint32_t value = static_cast(EVENT_CNT_1_OVF); + return value; + } + uint32_t get_EVENT_CNT_1_OVF() const volatile + { + uint32_t value = static_cast(EVENT_CNT_1_OVF); + return value; + } + CONSTEXPR pmovsset_r &set_EVENT_CNT_1_OVF(uint32_t value) + { + EVENT_CNT_1_OVF = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_EVENT_CNT_2_OVF() const + { + uint32_t value = static_cast(EVENT_CNT_2_OVF); + return value; + } + uint32_t get_EVENT_CNT_2_OVF() const volatile + { + uint32_t value = static_cast(EVENT_CNT_2_OVF); + return value; + } + CONSTEXPR pmovsset_r &set_EVENT_CNT_2_OVF(uint32_t value) + { + EVENT_CNT_2_OVF = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_EVENT_CNT_3_OVF() const + { + uint32_t value = static_cast(EVENT_CNT_3_OVF); + return value; + } + uint32_t get_EVENT_CNT_3_OVF() const volatile + { + uint32_t value = static_cast(EVENT_CNT_3_OVF); + return value; + } + CONSTEXPR pmovsset_r &set_EVENT_CNT_3_OVF(uint32_t value) + { + EVENT_CNT_3_OVF = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_CYCLE_CNT_OVF() const + { + uint32_t value = static_cast(CYCLE_CNT_OVF); + return value; + } + uint32_t get_CYCLE_CNT_OVF() const volatile + { + uint32_t value = static_cast(CYCLE_CNT_OVF); + return value; + } + CONSTEXPR pmovsset_r &set_CYCLE_CNT_OVF(uint32_t value) + { + CYCLE_CNT_OVF = ((1u << 1) - 1) & static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// pmovsclr_r - Overflow flag status clear register +struct pmovsclr_r +{ +#ifdef __cplusplus + private: +#endif //__cplusplus + union + { + struct + { + uint32_t EVENT_CNT_0_OVF : 1; // Event counter overflow clear bit for PMEVCNTR0 + uint32_t EVENT_CNT_1_OVF : 1; // Event counter overflow clear bit for PMEVCNTR1 + uint32_t EVENT_CNT_2_OVF : 1; // Event counter overflow clear bit for PMEVCNTR2 + uint32_t EVENT_CNT_3_OVF : 1; // Event counter overflow clear bit for PMEVCNTR3 + uint32_t reserved0 : 27; + uint32_t CYCLE_CNT_OVF : 1; // PMCCNTR overflow clear bit + }; + uint32_t word; + }; +#ifdef __cplusplus + public: + CONSTEXPR pmovsclr_r() : + EVENT_CNT_0_OVF(static_cast(0)), EVENT_CNT_1_OVF(static_cast(0)), + EVENT_CNT_2_OVF(static_cast(0)), EVENT_CNT_3_OVF(static_cast(0)), + reserved0(static_cast(0)), CYCLE_CNT_OVF(static_cast(0)) + { + } + CONSTEXPR pmovsclr_r(uint32_t init) : word(init) {} + CONSTEXPR void operator=(uint32_t value) + { + word = value; + } + void operator=(uint32_t value) volatile + { + word = value; + } + CONSTEXPR operator uint32_t() + { + return word; + } + operator uint32_t() volatile + { + return word; + } + pmovsclr_r copy() volatile + { + return *this; + } + CONSTEXPR uint32_t get_EVENT_CNT_0_OVF() const + { + uint32_t value = static_cast(EVENT_CNT_0_OVF); + return value; + } + uint32_t get_EVENT_CNT_0_OVF() const volatile + { + uint32_t value = static_cast(EVENT_CNT_0_OVF); + return value; + } + CONSTEXPR pmovsclr_r &set_EVENT_CNT_0_OVF(uint32_t value) + { + EVENT_CNT_0_OVF = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_EVENT_CNT_1_OVF() const + { + uint32_t value = static_cast(EVENT_CNT_1_OVF); + return value; + } + uint32_t get_EVENT_CNT_1_OVF() const volatile + { + uint32_t value = static_cast(EVENT_CNT_1_OVF); + return value; + } + CONSTEXPR pmovsclr_r &set_EVENT_CNT_1_OVF(uint32_t value) + { + EVENT_CNT_1_OVF = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_EVENT_CNT_2_OVF() const + { + uint32_t value = static_cast(EVENT_CNT_2_OVF); + return value; + } + uint32_t get_EVENT_CNT_2_OVF() const volatile + { + uint32_t value = static_cast(EVENT_CNT_2_OVF); + return value; + } + CONSTEXPR pmovsclr_r &set_EVENT_CNT_2_OVF(uint32_t value) + { + EVENT_CNT_2_OVF = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_EVENT_CNT_3_OVF() const + { + uint32_t value = static_cast(EVENT_CNT_3_OVF); + return value; + } + uint32_t get_EVENT_CNT_3_OVF() const volatile + { + uint32_t value = static_cast(EVENT_CNT_3_OVF); + return value; + } + CONSTEXPR pmovsclr_r &set_EVENT_CNT_3_OVF(uint32_t value) + { + EVENT_CNT_3_OVF = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_CYCLE_CNT_OVF() const + { + uint32_t value = static_cast(CYCLE_CNT_OVF); + return value; + } + uint32_t get_CYCLE_CNT_OVF() const volatile + { + uint32_t value = static_cast(CYCLE_CNT_OVF); + return value; + } + CONSTEXPR pmovsclr_r &set_CYCLE_CNT_OVF(uint32_t value) + { + CYCLE_CNT_OVF = ((1u << 1) - 1) & static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// pmintset_r - Interrupt enable set register +struct pmintset_r +{ +#ifdef __cplusplus + private: +#endif //__cplusplus + union + { + struct + { + uint32_t EVENT_CNT_0_INT : 1; // Event counter overflow interrupt request enable bit for PMEVCNTR0 + uint32_t EVENT_CNT_1_INT : 1; // Event counter overflow interrupt request enable bit for PMEVCNTR1 + uint32_t EVENT_CNT_2_INT : 1; // Event counter overflow interrupt request enable bit for PMEVCNTR2 + uint32_t EVENT_CNT_3_INT : 1; // Event counter overflow interrupt request enable bit for PMEVCNTR3 + uint32_t reserved0 : 27; + uint32_t CYCLE_CNT_INT : 1; // PMCCNTR overflow interrupt request enable bit + }; + uint32_t word; + }; +#ifdef __cplusplus + public: + CONSTEXPR pmintset_r() : + EVENT_CNT_0_INT(static_cast(0)), EVENT_CNT_1_INT(static_cast(0)), + EVENT_CNT_2_INT(static_cast(0)), EVENT_CNT_3_INT(static_cast(0)), + reserved0(static_cast(0)), CYCLE_CNT_INT(static_cast(0)) + { + } + CONSTEXPR pmintset_r(uint32_t init) : word(init) {} + CONSTEXPR void operator=(uint32_t value) + { + word = value; + } + void operator=(uint32_t value) volatile + { + word = value; + } + CONSTEXPR operator uint32_t() + { + return word; + } + operator uint32_t() volatile + { + return word; + } + pmintset_r copy() volatile + { + return *this; + } + CONSTEXPR uint32_t get_EVENT_CNT_0_INT() const + { + uint32_t value = static_cast(EVENT_CNT_0_INT); + return value; + } + uint32_t get_EVENT_CNT_0_INT() const volatile + { + uint32_t value = static_cast(EVENT_CNT_0_INT); + return value; + } + CONSTEXPR pmintset_r &set_EVENT_CNT_0_INT(uint32_t value) + { + EVENT_CNT_0_INT = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_EVENT_CNT_1_INT() const + { + uint32_t value = static_cast(EVENT_CNT_1_INT); + return value; + } + uint32_t get_EVENT_CNT_1_INT() const volatile + { + uint32_t value = static_cast(EVENT_CNT_1_INT); + return value; + } + CONSTEXPR pmintset_r &set_EVENT_CNT_1_INT(uint32_t value) + { + EVENT_CNT_1_INT = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_EVENT_CNT_2_INT() const + { + uint32_t value = static_cast(EVENT_CNT_2_INT); + return value; + } + uint32_t get_EVENT_CNT_2_INT() const volatile + { + uint32_t value = static_cast(EVENT_CNT_2_INT); + return value; + } + CONSTEXPR pmintset_r &set_EVENT_CNT_2_INT(uint32_t value) + { + EVENT_CNT_2_INT = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_EVENT_CNT_3_INT() const + { + uint32_t value = static_cast(EVENT_CNT_3_INT); + return value; + } + uint32_t get_EVENT_CNT_3_INT() const volatile + { + uint32_t value = static_cast(EVENT_CNT_3_INT); + return value; + } + CONSTEXPR pmintset_r &set_EVENT_CNT_3_INT(uint32_t value) + { + EVENT_CNT_3_INT = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_CYCLE_CNT_INT() const + { + uint32_t value = static_cast(CYCLE_CNT_INT); + return value; + } + uint32_t get_CYCLE_CNT_INT() const volatile + { + uint32_t value = static_cast(CYCLE_CNT_INT); + return value; + } + CONSTEXPR pmintset_r &set_CYCLE_CNT_INT(uint32_t value) + { + CYCLE_CNT_INT = ((1u << 1) - 1) & static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// pmintclr_r - Interrupt enable clear register +struct pmintclr_r +{ +#ifdef __cplusplus + private: +#endif //__cplusplus + union + { + struct + { + uint32_t EVENT_CNT_0_INT : 1; // Event counter overflow interrupt request disable bit for PMEVCNTR0 + uint32_t EVENT_CNT_1_INT : 1; // Event counter overflow interrupt request disable bit for PMEVCNTR1 + uint32_t EVENT_CNT_2_INT : 1; // Event counter overflow interrupt request disable bit for PMEVCNTR2 + uint32_t EVENT_CNT_3_INT : 1; // Event counter overflow interrupt request disable bit for PMEVCNTR3 + uint32_t reserved0 : 27; + uint32_t CYCLE_CNT_INT : 1; // PMCCNTR overflow interrupt request disable bit + }; + uint32_t word; + }; +#ifdef __cplusplus + public: + CONSTEXPR pmintclr_r() : + EVENT_CNT_0_INT(static_cast(0)), EVENT_CNT_1_INT(static_cast(0)), + EVENT_CNT_2_INT(static_cast(0)), EVENT_CNT_3_INT(static_cast(0)), + reserved0(static_cast(0)), CYCLE_CNT_INT(static_cast(0)) + { + } + CONSTEXPR pmintclr_r(uint32_t init) : word(init) {} + CONSTEXPR void operator=(uint32_t value) + { + word = value; + } + void operator=(uint32_t value) volatile + { + word = value; + } + CONSTEXPR operator uint32_t() + { + return word; + } + operator uint32_t() volatile + { + return word; + } + pmintclr_r copy() volatile + { + return *this; + } + CONSTEXPR uint32_t get_EVENT_CNT_0_INT() const + { + uint32_t value = static_cast(EVENT_CNT_0_INT); + return value; + } + uint32_t get_EVENT_CNT_0_INT() const volatile + { + uint32_t value = static_cast(EVENT_CNT_0_INT); + return value; + } + CONSTEXPR pmintclr_r &set_EVENT_CNT_0_INT(uint32_t value) + { + EVENT_CNT_0_INT = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_EVENT_CNT_1_INT() const + { + uint32_t value = static_cast(EVENT_CNT_1_INT); + return value; + } + uint32_t get_EVENT_CNT_1_INT() const volatile + { + uint32_t value = static_cast(EVENT_CNT_1_INT); + return value; + } + CONSTEXPR pmintclr_r &set_EVENT_CNT_1_INT(uint32_t value) + { + EVENT_CNT_1_INT = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_EVENT_CNT_2_INT() const + { + uint32_t value = static_cast(EVENT_CNT_2_INT); + return value; + } + uint32_t get_EVENT_CNT_2_INT() const volatile + { + uint32_t value = static_cast(EVENT_CNT_2_INT); + return value; + } + CONSTEXPR pmintclr_r &set_EVENT_CNT_2_INT(uint32_t value) + { + EVENT_CNT_2_INT = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_EVENT_CNT_3_INT() const + { + uint32_t value = static_cast(EVENT_CNT_3_INT); + return value; + } + uint32_t get_EVENT_CNT_3_INT() const volatile + { + uint32_t value = static_cast(EVENT_CNT_3_INT); + return value; + } + CONSTEXPR pmintclr_r &set_EVENT_CNT_3_INT(uint32_t value) + { + EVENT_CNT_3_INT = ((1u << 1) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_CYCLE_CNT_INT() const + { + uint32_t value = static_cast(CYCLE_CNT_INT); + return value; + } + uint32_t get_CYCLE_CNT_INT() const volatile + { + uint32_t value = static_cast(CYCLE_CNT_INT); + return value; + } + CONSTEXPR pmintclr_r &set_CYCLE_CNT_INT(uint32_t value) + { + CYCLE_CNT_INT = ((1u << 1) - 1) & static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// pmccntr_lo_r - Performance monitor cycle count low register +struct pmccntr_lo_r +{ +#ifdef __cplusplus + private: +#endif //__cplusplus + union + { + uint32_t CYCLE_CNT_LO; // Cycle count low + uint32_t word; + }; +#ifdef __cplusplus + public: + CONSTEXPR pmccntr_lo_r() : CYCLE_CNT_LO(static_cast(0x00000000)) {} + CONSTEXPR pmccntr_lo_r(uint32_t init) : word(init) {} + CONSTEXPR void operator=(uint32_t value) + { + word = value; + } + void operator=(uint32_t value) volatile + { + word = value; + } + CONSTEXPR operator uint32_t() + { + return word; + } + operator uint32_t() volatile + { + return word; + } + pmccntr_lo_r copy() volatile + { + return *this; + } + CONSTEXPR uint32_t get_CYCLE_CNT_LO() const + { + uint32_t value = static_cast(CYCLE_CNT_LO); + return value; + } + uint32_t get_CYCLE_CNT_LO() const volatile + { + uint32_t value = static_cast(CYCLE_CNT_LO); + return value; + } + CONSTEXPR pmccntr_lo_r &set_CYCLE_CNT_LO(uint32_t value) + { + CYCLE_CNT_LO = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// pmccntr_hi_r - Performance monitor cycle count high register +struct pmccntr_hi_r +{ +#ifdef __cplusplus + private: +#endif //__cplusplus + union + { + struct + { + uint32_t CYCLE_CNT_HI : 16; // Cycle count high + uint32_t reserved0 : 16; + }; + uint32_t word; + }; +#ifdef __cplusplus + public: + CONSTEXPR pmccntr_hi_r() : CYCLE_CNT_HI(static_cast(0x0000)), reserved0(static_cast(0)) {} + CONSTEXPR pmccntr_hi_r(uint32_t init) : word(init) {} + CONSTEXPR void operator=(uint32_t value) + { + word = value; + } + void operator=(uint32_t value) volatile + { + word = value; + } + CONSTEXPR operator uint32_t() + { + return word; + } + operator uint32_t() volatile + { + return word; + } + pmccntr_hi_r copy() volatile + { + return *this; + } + CONSTEXPR uint32_t get_CYCLE_CNT_HI() const + { + uint32_t value = static_cast(CYCLE_CNT_HI); + return value; + } + uint32_t get_CYCLE_CNT_HI() const volatile + { + uint32_t value = static_cast(CYCLE_CNT_HI); + return value; + } + CONSTEXPR pmccntr_hi_r &set_CYCLE_CNT_HI(uint32_t value) + { + CYCLE_CNT_HI = ((1u << 16) - 1) & static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// pmccntr_cfg_r - Set start/stop event on the cycle counter +struct pmccntr_cfg_r +{ +#ifdef __cplusplus + private: +#endif //__cplusplus + union + { + struct + { + uint32_t CYCLE_CNT_CFG_START : 10; // Cycle counter start event + uint32_t reserved0 : 6; + uint32_t CYCLE_CNT_CFG_STOP : 10; // Cycle counter stop event + uint32_t reserved1 : 6; + }; + uint32_t word; + }; +#ifdef __cplusplus + public: + CONSTEXPR pmccntr_cfg_r() : + CYCLE_CNT_CFG_START(static_cast(0x00)), reserved0(static_cast(0)), + CYCLE_CNT_CFG_STOP(static_cast(0x00)), reserved1(static_cast(0)) + { + } + CONSTEXPR pmccntr_cfg_r(uint32_t init) : word(init) {} + CONSTEXPR void operator=(uint32_t value) + { + word = value; + } + void operator=(uint32_t value) volatile + { + word = value; + } + CONSTEXPR operator uint32_t() + { + return word; + } + operator uint32_t() volatile + { + return word; + } + pmccntr_cfg_r copy() volatile + { + return *this; + } + CONSTEXPR uint32_t get_CYCLE_CNT_CFG_START() const + { + uint32_t value = static_cast(CYCLE_CNT_CFG_START); + return value; + } + uint32_t get_CYCLE_CNT_CFG_START() const volatile + { + uint32_t value = static_cast(CYCLE_CNT_CFG_START); + return value; + } + CONSTEXPR pmccntr_cfg_r &set_CYCLE_CNT_CFG_START(uint32_t value) + { + CYCLE_CNT_CFG_START = ((1u << 10) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_CYCLE_CNT_CFG_STOP() const + { + uint32_t value = static_cast(CYCLE_CNT_CFG_STOP); + return value; + } + uint32_t get_CYCLE_CNT_CFG_STOP() const volatile + { + uint32_t value = static_cast(CYCLE_CNT_CFG_STOP); + return value; + } + CONSTEXPR pmccntr_cfg_r &set_CYCLE_CNT_CFG_STOP(uint32_t value) + { + CYCLE_CNT_CFG_STOP = ((1u << 10) - 1) & static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// pmcaxi_chan_r - Set which AXI channel to monitor for latency measurements in PMU +struct pmcaxi_chan_r +{ +#ifdef __cplusplus + private: +#endif //__cplusplus + union + { + struct + { + uint32_t CH_SEL : 4; // Channel number to monitor for latency measurements (Read: 0=Cmd 1=IFM 2=Weights + // 3=Scale+Bias 4=Mem2Mem; Write: 8=OFM 9=Mem2Mem) + uint32_t reserved0 : 4; + uint32_t AXI_CNT_SEL : 2; // AXI counter to monitor for latency measurements (0=AXI0 counter0, 1=AXI0 + // counter1, 2=AXI1 counter 2, 3=AXI counter3) + uint32_t BW_CH_SEL_EN : 1; // Bandwidth channel selector enable: {0=AXI bw events measured for all channels, + // 1=AXI bw events measured for channel specified by CH_SEL + uint32_t reserved1 : 21; + }; + uint32_t word; + }; +#ifdef __cplusplus + public: + CONSTEXPR pmcaxi_chan_r() : + CH_SEL(static_cast(0x0)), reserved0(static_cast(0)), + AXI_CNT_SEL(static_cast(0x000000)), BW_CH_SEL_EN(static_cast(0x000000)), + reserved1(static_cast(0)) + { + } + CONSTEXPR pmcaxi_chan_r(uint32_t init) : word(init) {} + CONSTEXPR void operator=(uint32_t value) + { + word = value; + } + void operator=(uint32_t value) volatile + { + word = value; + } + CONSTEXPR operator uint32_t() + { + return word; + } + operator uint32_t() volatile + { + return word; + } + pmcaxi_chan_r copy() volatile + { + return *this; + } + CONSTEXPR uint32_t get_CH_SEL() const + { + uint32_t value = static_cast(CH_SEL); + return value; + } + uint32_t get_CH_SEL() const volatile + { + uint32_t value = static_cast(CH_SEL); + return value; + } + CONSTEXPR pmcaxi_chan_r &set_CH_SEL(uint32_t value) + { + CH_SEL = ((1u << 4) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_AXI_CNT_SEL() const + { + uint32_t value = static_cast(AXI_CNT_SEL); + return value; + } + uint32_t get_AXI_CNT_SEL() const volatile + { + uint32_t value = static_cast(AXI_CNT_SEL); + return value; + } + CONSTEXPR pmcaxi_chan_r &set_AXI_CNT_SEL(uint32_t value) + { + AXI_CNT_SEL = ((1u << 2) - 1) & static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_BW_CH_SEL_EN() const + { + uint32_t value = static_cast(BW_CH_SEL_EN); + return value; + } + uint32_t get_BW_CH_SEL_EN() const volatile + { + uint32_t value = static_cast(BW_CH_SEL_EN); + return value; + } + CONSTEXPR pmcaxi_chan_r &set_BW_CH_SEL_EN(uint32_t value) + { + BW_CH_SEL_EN = ((1u << 1) - 1) & static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// pmevtyper0_r - Performance monitor event type register 0 +struct pmevtyper0_r +{ +#ifdef __cplusplus + private: +#endif //__cplusplus + union + { + struct + { + uint32_t EV_TYPE : 10; // Event Type + uint32_t reserved0 : 22; + }; + uint32_t word; + }; +#ifdef __cplusplus + public: + CONSTEXPR pmevtyper0_r() : + EV_TYPE(static_cast(::pmu_event_type::NO_EVENT)), reserved0(static_cast(0)) + { + } + CONSTEXPR pmevtyper0_r(uint32_t init) : word(init) {} + CONSTEXPR void operator=(uint32_t value) + { + word = value; + } + void operator=(uint32_t value) volatile + { + word = value; + } + CONSTEXPR operator uint32_t() + { + return word; + } + operator uint32_t() volatile + { + return word; + } + pmevtyper0_r copy() volatile + { + return *this; + } + CONSTEXPR ::pmu_event_type get_EV_TYPE() const + { + ::pmu_event_type value = static_cast<::pmu_event_type>(EV_TYPE); + return value; + } + ::pmu_event_type get_EV_TYPE() const volatile + { + ::pmu_event_type value = static_cast<::pmu_event_type>(EV_TYPE); + return value; + } + CONSTEXPR pmevtyper0_r &set_EV_TYPE(::pmu_event_type value) + { + EV_TYPE = ((1u << 10) - 1) & static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// pmevtyper1_r - Performance monitor event type register 1 +struct pmevtyper1_r +{ +#ifdef __cplusplus + private: +#endif //__cplusplus + union + { + struct + { + uint32_t EV_TYPE : 10; // Event Type + uint32_t reserved0 : 22; + }; + uint32_t word; + }; +#ifdef __cplusplus + public: + CONSTEXPR pmevtyper1_r() : + EV_TYPE(static_cast(::pmu_event_type::NO_EVENT)), reserved0(static_cast(0)) + { + } + CONSTEXPR pmevtyper1_r(uint32_t init) : word(init) {} + CONSTEXPR void operator=(uint32_t value) + { + word = value; + } + void operator=(uint32_t value) volatile + { + word = value; + } + CONSTEXPR operator uint32_t() + { + return word; + } + operator uint32_t() volatile + { + return word; + } + pmevtyper1_r copy() volatile + { + return *this; + } + CONSTEXPR ::pmu_event_type get_EV_TYPE() const + { + ::pmu_event_type value = static_cast<::pmu_event_type>(EV_TYPE); + return value; + } + ::pmu_event_type get_EV_TYPE() const volatile + { + ::pmu_event_type value = static_cast<::pmu_event_type>(EV_TYPE); + return value; + } + CONSTEXPR pmevtyper1_r &set_EV_TYPE(::pmu_event_type value) + { + EV_TYPE = ((1u << 10) - 1) & static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// pmevtyper2_r - Performance monitor event type register 2 +struct pmevtyper2_r +{ +#ifdef __cplusplus + private: +#endif //__cplusplus + union + { + struct + { + uint32_t EV_TYPE : 10; // Event Type + uint32_t reserved0 : 22; + }; + uint32_t word; + }; +#ifdef __cplusplus + public: + CONSTEXPR pmevtyper2_r() : + EV_TYPE(static_cast(::pmu_event_type::NO_EVENT)), reserved0(static_cast(0)) + { + } + CONSTEXPR pmevtyper2_r(uint32_t init) : word(init) {} + CONSTEXPR void operator=(uint32_t value) + { + word = value; + } + void operator=(uint32_t value) volatile + { + word = value; + } + CONSTEXPR operator uint32_t() + { + return word; + } + operator uint32_t() volatile + { + return word; + } + pmevtyper2_r copy() volatile + { + return *this; + } + CONSTEXPR ::pmu_event_type get_EV_TYPE() const + { + ::pmu_event_type value = static_cast<::pmu_event_type>(EV_TYPE); + return value; + } + ::pmu_event_type get_EV_TYPE() const volatile + { + ::pmu_event_type value = static_cast<::pmu_event_type>(EV_TYPE); + return value; + } + CONSTEXPR pmevtyper2_r &set_EV_TYPE(::pmu_event_type value) + { + EV_TYPE = ((1u << 10) - 1) & static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// pmevtyper3_r - Performance monitor event type register 3 +struct pmevtyper3_r +{ +#ifdef __cplusplus + private: +#endif //__cplusplus + union + { + struct + { + uint32_t EV_TYPE : 10; // Event Type + uint32_t reserved0 : 22; + }; + uint32_t word; + }; +#ifdef __cplusplus + public: + CONSTEXPR pmevtyper3_r() : + EV_TYPE(static_cast(::pmu_event_type::NO_EVENT)), reserved0(static_cast(0)) + { + } + CONSTEXPR pmevtyper3_r(uint32_t init) : word(init) {} + CONSTEXPR void operator=(uint32_t value) + { + word = value; + } + void operator=(uint32_t value) volatile + { + word = value; + } + CONSTEXPR operator uint32_t() + { + return word; + } + operator uint32_t() volatile + { + return word; + } + pmevtyper3_r copy() volatile + { + return *this; + } + CONSTEXPR ::pmu_event_type get_EV_TYPE() const + { + ::pmu_event_type value = static_cast<::pmu_event_type>(EV_TYPE); + return value; + } + ::pmu_event_type get_EV_TYPE() const volatile + { + ::pmu_event_type value = static_cast<::pmu_event_type>(EV_TYPE); + return value; + } + CONSTEXPR pmevtyper3_r &set_EV_TYPE(::pmu_event_type value) + { + EV_TYPE = ((1u << 10) - 1) & static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +struct NPU_REG +{ + STRUCT id_r ID; // 0x0 + STRUCT status_r STATUS; // 0x4 + STRUCT cmd_r CMD; // 0x8 + STRUCT reset_r RESET; // 0xc + STRUCT qbase0_r QBASE0; // 0x10 + STRUCT qbase1_r QBASE1; // 0x14 + STRUCT qread_r QREAD; // 0x18 + STRUCT qconfig_r QCONFIG; // 0x1c + STRUCT qsize_r QSIZE; // 0x20 + STRUCT prot_r PROT; // 0x24 + STRUCT config_r CONFIG; // 0x28 + STRUCT lock_r LOCK; // 0x2c + uint32_t unused0[3]; + STRUCT regioncfg_r REGIONCFG; // 0x3c + STRUCT axi_limit0_r AXI_LIMIT0; // 0x40 + STRUCT axi_limit1_r AXI_LIMIT1; // 0x44 + STRUCT axi_limit2_r AXI_LIMIT2; // 0x48 + STRUCT axi_limit3_r AXI_LIMIT3; // 0x4c + uint32_t unused1[12]; + STRUCT basep0_r BASEP0; // 0x80 + STRUCT basep1_r BASEP1; // 0x84 + STRUCT basep2_r BASEP2; // 0x88 + STRUCT basep3_r BASEP3; // 0x8c + STRUCT basep4_r BASEP4; // 0x90 + STRUCT basep5_r BASEP5; // 0x94 + STRUCT basep6_r BASEP6; // 0x98 + STRUCT basep7_r BASEP7; // 0x9c + STRUCT basep8_r BASEP8; // 0xa0 + STRUCT basep9_r BASEP9; // 0xa4 + STRUCT basep10_r BASEP10; // 0xa8 + STRUCT basep11_r BASEP11; // 0xac + STRUCT basep12_r BASEP12; // 0xb0 + STRUCT basep13_r BASEP13; // 0xb4 + STRUCT basep14_r BASEP14; // 0xb8 + STRUCT basep15_r BASEP15; // 0xbc + uint32_t unused2[16]; + STRUCT wd_status_r WD_STATUS; // 0x100 + STRUCT mac_status_r MAC_STATUS; // 0x104 + STRUCT ao_status_r AO_STATUS; // 0x108 + uint32_t unused3[1]; + STRUCT dma_status0_r DMA_STATUS0; // 0x110 + STRUCT dma_status1_r DMA_STATUS1; // 0x114 + uint32_t unused4[10]; + STRUCT clkforce_r CLKFORCE; // 0x140 + uint32_t DEBUG_ADDRESS; // 0x144 + uint32_t DEBUG_MISC; // 0x148 + uint32_t DEBUGCORE; // 0x14c + uint32_t DEBUG_BLOCK; // 0x150 + uint32_t unused5[11]; + STRUCT pmcr_r PMCR; // 0x180 + STRUCT pmcntenset_r PMCNTENSET; // 0x184 + STRUCT pmcntenclr_r PMCNTENCLR; // 0x188 + STRUCT pmovsset_r PMOVSSET; // 0x18c + STRUCT pmovsclr_r PMOVSCLR; // 0x190 + STRUCT pmintset_r PMINTSET; // 0x194 + STRUCT pmintclr_r PMINTCLR; // 0x198 + uint32_t unused6[1]; + STRUCT pmccntr_lo_r PMCCNTR_LO; // 0x1a0 + STRUCT pmccntr_hi_r PMCCNTR_HI; // 0x1a4 + STRUCT pmccntr_cfg_r PMCCNTR_CFG; // 0x1a8 + STRUCT pmcaxi_chan_r PMCAXI_CHAN; // 0x1ac + uint32_t unused7[20]; + uint32_t KERNEL_X; // 0x200 + uint32_t KERNEL_Y; // 0x204 + uint32_t KERNEL_W_M1; // 0x208 + uint32_t KERNEL_H_M1; // 0x20c + uint32_t OFM_CBLK_WIDTH_M1; // 0x210 + uint32_t OFM_CBLK_HEIGHT_M1; // 0x214 + uint32_t OFM_CBLK_DEPTH_M1; // 0x218 + uint32_t IFM_CBLK_DEPTH_M1; // 0x21c + uint32_t OFM_X; // 0x220 + uint32_t OFM_Y; // 0x224 + uint32_t OFM_Z; // 0x228 + uint32_t IFM_Z; // 0x22c + uint32_t PAD_TOP; // 0x230 + uint32_t PAD_LEFT; // 0x234 + uint32_t IFM_CBLK_WIDTH; // 0x238 + uint32_t IFM_CBLK_HEIGHT; // 0x23c + uint32_t DMA_IFM_SRC; // 0x240 + uint32_t DMA_IFM_SRC_HI; // 0x244 + uint32_t DMA_IFM_DST; // 0x248 + uint32_t DMA_OFM_SRC; // 0x24c + uint32_t DMA_OFM_DST; // 0x250 + uint32_t DMA_OFM_DST_HI; // 0x254 + uint32_t DMA_WEIGHT_SRC; // 0x258 + uint32_t DMA_WEIGHT_SRC_HI; // 0x25c + uint32_t DMA_CMD_SRC; // 0x260 + uint32_t DMA_CMD_SRC_HI; // 0x264 + uint32_t DMA_CMD_SIZE; // 0x268 + uint32_t DMA_M2M_SRC; // 0x26c + uint32_t DMA_M2M_SRC_HI; // 0x270 + uint32_t DMA_M2M_DST; // 0x274 + uint32_t DMA_M2M_DST_HI; // 0x278 + uint32_t CURRENT_QREAD; // 0x27c + uint32_t DMA_SCALE_SRC; // 0x280 + uint32_t DMA_SCALE_SRC_HI; // 0x284 + uint32_t unused8[11]; + uint32_t CURRENT_BLOCK; // 0x2b4 + uint32_t CURRENT_OP; // 0x2b8 + uint32_t CURRENT_CMD; // 0x2bc + uint32_t unused9[16]; + uint32_t PMEVCNTR[4]; // 0x300 + uint32_t unused10[28]; + STRUCT pmevtyper0_r PMEVTYPER[4]; // 0x380 + uint32_t unused11[28]; + uint32_t SHARED_BUFFER[256]; // 0x400 + uint32_t IFM_PAD_TOP; // 0x800 + uint32_t IFM_PAD_LEFT; // 0x804 + uint32_t IFM_PAD_RIGHT; // 0x808 + uint32_t IFM_PAD_BOTTOM; // 0x80c + uint32_t IFM_DEPTH_M1; // 0x810 + uint32_t IFM_PRECISION; // 0x814 + uint32_t unused12[1]; + uint32_t IFM_UPSCALE; // 0x81c + uint32_t unused13[1]; + uint32_t IFM_ZERO_POINT; // 0x824 + uint32_t IFM_WIDTH0_M1; // 0x828 + uint32_t IFM_HEIGHT0_M1; // 0x82c + uint32_t IFM_HEIGHT1_M1; // 0x830 + uint32_t IFM_IB_END; // 0x834 + uint32_t unused14[1]; + uint32_t IFM_REGION; // 0x83c + uint32_t unused15[1]; + uint32_t OFM_WIDTH_M1; // 0x844 + uint32_t OFM_HEIGHT_M1; // 0x848 + uint32_t OFM_DEPTH_M1; // 0x84c + uint32_t OFM_PRECISION; // 0x850 + uint32_t OFM_BLK_WIDTH_M1; // 0x854 + uint32_t OFM_BLK_HEIGHT_M1; // 0x858 + uint32_t OFM_BLK_DEPTH_M1; // 0x85c + uint32_t OFM_ZERO_POINT; // 0x860 + uint32_t unused16[1]; + uint32_t OFM_WIDTH0_M1; // 0x868 + uint32_t OFM_HEIGHT0_M1; // 0x86c + uint32_t OFM_HEIGHT1_M1; // 0x870 + uint32_t unused17[2]; + uint32_t OFM_REGION; // 0x87c + uint32_t KERNEL_WIDTH_M1; // 0x880 + uint32_t KERNEL_HEIGHT_M1; // 0x884 + uint32_t KERNEL_STRIDE; // 0x888 + uint32_t PARALLEL_MODE; // 0x88c + uint32_t ACC_FORMAT; // 0x890 + uint32_t ACTIVATION; // 0x894 + uint32_t ACTIVATION_MIN; // 0x898 + uint32_t ACTIVATION_MAX; // 0x89c + uint32_t WEIGHT_REGION; // 0x8a0 + uint32_t SCALE_REGION; // 0x8a4 + uint32_t unused18[3]; + uint32_t AB_START; // 0x8b4 + uint32_t unused19[1]; + uint32_t BLOCKDEP; // 0x8bc + uint32_t DMA0_SRC_REGION; // 0x8c0 + uint32_t DMA0_DST_REGION; // 0x8c4 + uint32_t DMA0_SIZE0; // 0x8c8 + uint32_t DMA0_SIZE1; // 0x8cc + uint32_t unused20[12]; + uint32_t IFM2_BROADCAST; // 0x900 + uint32_t IFM2_SCALAR; // 0x904 + uint32_t unused21[3]; + uint32_t IFM2_PRECISION; // 0x914 + uint32_t unused22[3]; + uint32_t IFM2_ZERO_POINT; // 0x924 + uint32_t IFM2_WIDTH0_M1; // 0x928 + uint32_t IFM2_HEIGHT0_M1; // 0x92c + uint32_t IFM2_HEIGHT1_M1; // 0x930 + uint32_t IFM2_IB_START; // 0x934 + uint32_t unused23[1]; + uint32_t IFM2_REGION; // 0x93c + uint32_t unused24[48]; + uint32_t IFM_BASE0; // 0xa00 + uint32_t IFM_BASE0_HI; // 0xa04 + uint32_t IFM_BASE1; // 0xa08 + uint32_t IFM_BASE1_HI; // 0xa0c + uint32_t IFM_BASE2; // 0xa10 + uint32_t IFM_BASE2_HI; // 0xa14 + uint32_t IFM_BASE3; // 0xa18 + uint32_t IFM_BASE3_HI; // 0xa1c + uint32_t IFM_STRIDE_X; // 0xa20 + uint32_t IFM_STRIDE_X_HI; // 0xa24 + uint32_t IFM_STRIDE_Y; // 0xa28 + uint32_t IFM_STRIDE_Y_HI; // 0xa2c + uint32_t IFM_STRIDE_C; // 0xa30 + uint32_t IFM_STRIDE_C_HI; // 0xa34 + uint32_t unused25[2]; + uint32_t OFM_BASE0; // 0xa40 + uint32_t OFM_BASE0_HI; // 0xa44 + uint32_t OFM_BASE1; // 0xa48 + uint32_t OFM_BASE1_HI; // 0xa4c + uint32_t OFM_BASE2; // 0xa50 + uint32_t OFM_BASE2_HI; // 0xa54 + uint32_t OFM_BASE3; // 0xa58 + uint32_t OFM_BASE3_HI; // 0xa5c + uint32_t OFM_STRIDE_X; // 0xa60 + uint32_t OFM_STRIDE_X_HI; // 0xa64 + uint32_t OFM_STRIDE_Y; // 0xa68 + uint32_t OFM_STRIDE_Y_HI; // 0xa6c + uint32_t OFM_STRIDE_C; // 0xa70 + uint32_t OFM_STRIDE_C_HI; // 0xa74 + uint32_t unused26[2]; + uint32_t WEIGHT_BASE; // 0xa80 + uint32_t WEIGHT_BASE_HI; // 0xa84 + uint32_t WEIGHT_LENGTH; // 0xa88 + uint32_t unused27[1]; + uint32_t SCALE_BASE; // 0xa90 + uint32_t SCALE_BASE_HI; // 0xa94 + uint32_t SCALE_LENGTH; // 0xa98 + uint32_t unused28[1]; + uint32_t OFM_SCALE; // 0xaa0 + uint32_t OFM_SCALE_SHIFT; // 0xaa4 + uint32_t OPA_SCALE; // 0xaa8 + uint32_t OPA_SCALE_SHIFT; // 0xaac + uint32_t OPB_SCALE; // 0xab0 + uint32_t unused29[3]; + uint32_t DMA0_SRC; // 0xac0 + uint32_t DMA0_SRC_HI; // 0xac4 + uint32_t DMA0_DST; // 0xac8 + uint32_t DMA0_DST_HI; // 0xacc + uint32_t DMA0_LEN; // 0xad0 + uint32_t DMA0_LEN_HI; // 0xad4 + uint32_t DMA0_SKIP0; // 0xad8 + uint32_t DMA0_SKIP0_HI; // 0xadc + uint32_t DMA0_SKIP1; // 0xae0 + uint32_t DMA0_SKIP1_HI; // 0xae4 + uint32_t unused30[6]; + uint32_t IFM2_BASE0; // 0xb00 + uint32_t IFM2_BASE0_HI; // 0xb04 + uint32_t IFM2_BASE1; // 0xb08 + uint32_t IFM2_BASE1_HI; // 0xb0c + uint32_t IFM2_BASE2; // 0xb10 + uint32_t IFM2_BASE2_HI; // 0xb14 + uint32_t IFM2_BASE3; // 0xb18 + uint32_t IFM2_BASE3_HI; // 0xb1c + uint32_t IFM2_STRIDE_X; // 0xb20 + uint32_t IFM2_STRIDE_X_HI; // 0xb24 + uint32_t IFM2_STRIDE_Y; // 0xb28 + uint32_t IFM2_STRIDE_Y_HI; // 0xb2c + uint32_t IFM2_STRIDE_C; // 0xb30 + uint32_t IFM2_STRIDE_C_HI; // 0xb34 + uint32_t unused31[2]; + uint32_t WEIGHT1_BASE; // 0xb40 + uint32_t WEIGHT1_BASE_HI; // 0xb44 + uint32_t WEIGHT1_LENGTH; // 0xb48 + uint32_t unused32[1]; + uint32_t SCALE1_BASE; // 0xb50 + uint32_t SCALE1_BASE_HI; // 0xb54 + uint32_t SCALE1_LENGTH; // 0xb58 + uint32_t unused33[281]; + uint32_t REVISION; // 0xfc0 + uint32_t unused34[3]; + STRUCT pid4_r PID4; // 0xfd0 + STRUCT pid5_r PID5; // 0xfd4 + STRUCT pid6_r PID6; // 0xfd8 + STRUCT pid7_r PID7; // 0xfdc + STRUCT pid0_r PID0; // 0xfe0 + STRUCT pid1_r PID1; // 0xfe4 + STRUCT pid2_r PID2; // 0xfe8 + STRUCT pid3_r PID3; // 0xfec + STRUCT cid0_r CID0; // 0xff0 + STRUCT cid1_r CID1; // 0xff4 + STRUCT cid2_r CID2; // 0xff8 + STRUCT cid3_r CID3; // 0xffc +#ifdef __cplusplus + NPU_REG() + { + reset(); + } + void reset() + { + ID = 268845313; + STATUS = 8; + CMD = 12; + RESET = 0; + QBASE0 = 0; + QBASE1 = 0; + QREAD = 0; + QCONFIG = 0; + QSIZE = 0; + PROT = 0; + CONFIG = 0; + LOCK = 0; + REGIONCFG = 0; + AXI_LIMIT0 = 0; + AXI_LIMIT1 = 0; + AXI_LIMIT2 = 0; + AXI_LIMIT3 = 0; + BASEP0 = 0; + BASEP1 = 0; + BASEP2 = 0; + BASEP3 = 0; + BASEP4 = 0; + BASEP5 = 0; + BASEP6 = 0; + BASEP7 = 0; + BASEP8 = 0; + BASEP9 = 0; + BASEP10 = 0; + BASEP11 = 0; + BASEP12 = 0; + BASEP13 = 0; + BASEP14 = 0; + BASEP15 = 0; + REVISION = 0; + PID4 = 4; + PID5 = 0; + PID6 = 0; + PID7 = 0; + PID0 = 128; + PID1 = 181; + PID2 = 11; + PID3 = 0; + CID0 = 13; + CID1 = 240; + CID2 = 5; + CID3 = 177; + WD_STATUS = 0; + MAC_STATUS = 0; + AO_STATUS = 0; + DMA_STATUS0 = 0; + DMA_STATUS1 = 0; + CLKFORCE = 0; + DEBUG_ADDRESS = 0; + DEBUG_MISC = 0; + DEBUGCORE = 0; + DEBUG_BLOCK = 0; + KERNEL_X = 0; + KERNEL_Y = 0; + KERNEL_W_M1 = 0; + KERNEL_H_M1 = 0; + OFM_CBLK_WIDTH_M1 = 0; + OFM_CBLK_HEIGHT_M1 = 0; + OFM_CBLK_DEPTH_M1 = 0; + IFM_CBLK_DEPTH_M1 = 0; + OFM_X = 0; + OFM_Y = 0; + OFM_Z = 0; + IFM_Z = 0; + PAD_TOP = 0; + PAD_LEFT = 0; + IFM_CBLK_WIDTH = 0; + IFM_CBLK_HEIGHT = 0; + DMA_IFM_SRC = 0; + DMA_IFM_SRC_HI = 0; + DMA_IFM_DST = 0; + DMA_OFM_SRC = 0; + DMA_OFM_DST = 0; + DMA_OFM_DST_HI = 0; + DMA_WEIGHT_SRC = 0; + DMA_WEIGHT_SRC_HI = 0; + DMA_CMD_SRC = 0; + DMA_CMD_SRC_HI = 0; + DMA_CMD_SIZE = 0; + DMA_M2M_SRC = 0; + DMA_M2M_SRC_HI = 0; + DMA_M2M_DST = 0; + DMA_M2M_DST_HI = 0; + CURRENT_QREAD = 0; + DMA_SCALE_SRC = 0; + DMA_SCALE_SRC_HI = 0; + CURRENT_BLOCK = 0; + CURRENT_OP = 0; + CURRENT_CMD = 0; + IFM_PAD_TOP = 0; + IFM_PAD_LEFT = 0; + IFM_PAD_RIGHT = 0; + IFM_PAD_BOTTOM = 0; + IFM_DEPTH_M1 = 0; + IFM_PRECISION = 0; + IFM_UPSCALE = 0; + IFM_ZERO_POINT = 0; + IFM_WIDTH0_M1 = 0; + IFM_HEIGHT0_M1 = 0; + IFM_HEIGHT1_M1 = 0; + IFM_IB_END = 0; + IFM_REGION = 0; + OFM_WIDTH_M1 = 0; + OFM_HEIGHT_M1 = 0; + OFM_DEPTH_M1 = 0; + OFM_PRECISION = 0; + OFM_BLK_WIDTH_M1 = 0; + OFM_BLK_HEIGHT_M1 = 0; + OFM_BLK_DEPTH_M1 = 0; + OFM_ZERO_POINT = 0; + OFM_WIDTH0_M1 = 0; + OFM_HEIGHT0_M1 = 0; + OFM_HEIGHT1_M1 = 0; + OFM_REGION = 0; + KERNEL_WIDTH_M1 = 0; + KERNEL_HEIGHT_M1 = 0; + KERNEL_STRIDE = 0; + PARALLEL_MODE = 0; + ACC_FORMAT = 0; + ACTIVATION = 0; + ACTIVATION_MIN = 0; + ACTIVATION_MAX = 0; + WEIGHT_REGION = 0; + SCALE_REGION = 0; + AB_START = 0; + BLOCKDEP = 0; + DMA0_SRC_REGION = 0; + DMA0_DST_REGION = 0; + DMA0_SIZE0 = 0; + DMA0_SIZE1 = 0; + IFM2_BROADCAST = 0; + IFM2_SCALAR = 0; + IFM2_PRECISION = 0; + IFM2_ZERO_POINT = 0; + IFM2_WIDTH0_M1 = 0; + IFM2_HEIGHT0_M1 = 0; + IFM2_HEIGHT1_M1 = 0; + IFM2_IB_START = 0; + IFM2_REGION = 0; + IFM_BASE0 = 0; + IFM_BASE0_HI = 0; + IFM_BASE1 = 0; + IFM_BASE1_HI = 0; + IFM_BASE2 = 0; + IFM_BASE2_HI = 0; + IFM_BASE3 = 0; + IFM_BASE3_HI = 0; + IFM_STRIDE_X = 0; + IFM_STRIDE_X_HI = 0; + IFM_STRIDE_Y = 0; + IFM_STRIDE_Y_HI = 0; + IFM_STRIDE_C = 0; + IFM_STRIDE_C_HI = 0; + OFM_BASE0 = 0; + OFM_BASE0_HI = 0; + OFM_BASE1 = 0; + OFM_BASE1_HI = 0; + OFM_BASE2 = 0; + OFM_BASE2_HI = 0; + OFM_BASE3 = 0; + OFM_BASE3_HI = 0; + OFM_STRIDE_X = 0; + OFM_STRIDE_X_HI = 0; + OFM_STRIDE_Y = 0; + OFM_STRIDE_Y_HI = 0; + OFM_STRIDE_C = 0; + OFM_STRIDE_C_HI = 0; + WEIGHT_BASE = 0; + WEIGHT_BASE_HI = 0; + WEIGHT_LENGTH = 0; + SCALE_BASE = 0; + SCALE_BASE_HI = 0; + SCALE_LENGTH = 0; + OFM_SCALE = 0; + OFM_SCALE_SHIFT = 0; + OPA_SCALE = 0; + OPA_SCALE_SHIFT = 0; + OPB_SCALE = 0; + DMA0_SRC = 0; + DMA0_SRC_HI = 0; + DMA0_DST = 0; + DMA0_DST_HI = 0; + DMA0_LEN = 0; + DMA0_LEN_HI = 0; + DMA0_SKIP0 = 0; + DMA0_SKIP0_HI = 0; + DMA0_SKIP1 = 0; + DMA0_SKIP1_HI = 0; + IFM2_BASE0 = 0; + IFM2_BASE0_HI = 0; + IFM2_BASE1 = 0; + IFM2_BASE1_HI = 0; + IFM2_BASE2 = 0; + IFM2_BASE2_HI = 0; + IFM2_BASE3 = 0; + IFM2_BASE3_HI = 0; + IFM2_STRIDE_X = 0; + IFM2_STRIDE_X_HI = 0; + IFM2_STRIDE_Y = 0; + IFM2_STRIDE_Y_HI = 0; + IFM2_STRIDE_C = 0; + IFM2_STRIDE_C_HI = 0; + WEIGHT1_BASE = 0; + WEIGHT1_BASE_HI = 0; + WEIGHT1_LENGTH = 0; + SCALE1_BASE = 0; + SCALE1_BASE_HI = 0; + SCALE1_LENGTH = 0; + PMCR = 8192; + PMCNTENSET = 0; + PMCNTENCLR = 0; + PMOVSSET = 0; + PMOVSCLR = 0; + PMINTSET = 0; + PMINTCLR = 0; + PMCCNTR_LO = 0; + PMCCNTR_HI = 0; + PMCCNTR_CFG = 0; + PMCAXI_CHAN = 0; + for (size_t i = 0; i < (sizeof(PMEVCNTR) / sizeof(PMEVCNTR[0])); ++i) + PMEVCNTR[i] = 0; + for (size_t i = 0; i < (sizeof(PMEVTYPER) / sizeof(PMEVTYPER[0])); ++i) + PMEVTYPER[i] = 0; + for (size_t i = 0; i < (sizeof(SHARED_BUFFER) / sizeof(SHARED_BUFFER[0])); ++i) + SHARED_BUFFER[i] = 0; + } + uint32_t &operator[](const int addr_offset) + { + return reinterpret_cast(this)[addr_offset / 4]; + } + enum class access_type_t : bool + { + RO, + RW + }; + access_type_t get_access_type(uint32_t offset) + { + switch (offset) + { + case 0: + return access_type_t::RO; + case 4: + return access_type_t::RO; + case 8: + return access_type_t::RW; + case 12: + return access_type_t::RW; + case 16: + return access_type_t::RW; + case 20: + return access_type_t::RW; + case 24: + return access_type_t::RO; + case 28: + return access_type_t::RW; + case 32: + return access_type_t::RW; + case 36: + return access_type_t::RO; + case 40: + return access_type_t::RO; + case 44: + return access_type_t::RW; + case 60: + return access_type_t::RW; + case 64: + return access_type_t::RW; + case 68: + return access_type_t::RW; + case 72: + return access_type_t::RW; + case 76: + return access_type_t::RW; + case 128: + return access_type_t::RW; + case 132: + return access_type_t::RW; + case 136: + return access_type_t::RW; + case 140: + return access_type_t::RW; + case 144: + return access_type_t::RW; + case 148: + return access_type_t::RW; + case 152: + return access_type_t::RW; + case 156: + return access_type_t::RW; + case 160: + return access_type_t::RW; + case 164: + return access_type_t::RW; + case 168: + return access_type_t::RW; + case 172: + return access_type_t::RW; + case 176: + return access_type_t::RW; + case 180: + return access_type_t::RW; + case 184: + return access_type_t::RW; + case 188: + return access_type_t::RW; + case 4032: + return access_type_t::RO; + case 4048: + return access_type_t::RO; + case 4052: + return access_type_t::RO; + case 4056: + return access_type_t::RO; + case 4060: + return access_type_t::RO; + case 4064: + return access_type_t::RO; + case 4068: + return access_type_t::RO; + case 4072: + return access_type_t::RO; + case 4076: + return access_type_t::RO; + case 4080: + return access_type_t::RO; + case 4084: + return access_type_t::RO; + case 4088: + return access_type_t::RO; + case 4092: + return access_type_t::RO; + case 256: + return access_type_t::RO; + case 260: + return access_type_t::RO; + case 264: + return access_type_t::RO; + case 272: + return access_type_t::RO; + case 276: + return access_type_t::RO; + case 320: + return access_type_t::RW; + case 324: + return access_type_t::RW; + case 328: + return access_type_t::RW; + case 332: + return access_type_t::RW; + case 336: + return access_type_t::RW; + case 512: + return access_type_t::RO; + case 516: + return access_type_t::RO; + case 520: + return access_type_t::RO; + case 524: + return access_type_t::RO; + case 528: + return access_type_t::RO; + case 532: + return access_type_t::RO; + case 536: + return access_type_t::RO; + case 540: + return access_type_t::RO; + case 544: + return access_type_t::RO; + case 548: + return access_type_t::RO; + case 552: + return access_type_t::RO; + case 556: + return access_type_t::RO; + case 560: + return access_type_t::RO; + case 564: + return access_type_t::RO; + case 568: + return access_type_t::RO; + case 572: + return access_type_t::RO; + case 576: + return access_type_t::RO; + case 580: + return access_type_t::RO; + case 584: + return access_type_t::RO; + case 588: + return access_type_t::RO; + case 592: + return access_type_t::RO; + case 596: + return access_type_t::RO; + case 600: + return access_type_t::RO; + case 604: + return access_type_t::RO; + case 608: + return access_type_t::RO; + case 612: + return access_type_t::RO; + case 616: + return access_type_t::RO; + case 620: + return access_type_t::RO; + case 624: + return access_type_t::RO; + case 628: + return access_type_t::RO; + case 632: + return access_type_t::RO; + case 636: + return access_type_t::RO; + case 640: + return access_type_t::RO; + case 644: + return access_type_t::RO; + case 692: + return access_type_t::RO; + case 696: + return access_type_t::RO; + case 700: + return access_type_t::RO; + case 2048: + return access_type_t::RW; + case 2052: + return access_type_t::RW; + case 2056: + return access_type_t::RW; + case 2060: + return access_type_t::RW; + case 2064: + return access_type_t::RW; + case 2068: + return access_type_t::RW; + case 2076: + return access_type_t::RW; + case 2084: + return access_type_t::RW; + case 2088: + return access_type_t::RW; + case 2092: + return access_type_t::RW; + case 2096: + return access_type_t::RW; + case 2100: + return access_type_t::RW; + case 2108: + return access_type_t::RW; + case 2116: + return access_type_t::RW; + case 2120: + return access_type_t::RW; + case 2124: + return access_type_t::RW; + case 2128: + return access_type_t::RW; + case 2132: + return access_type_t::RW; + case 2136: + return access_type_t::RW; + case 2140: + return access_type_t::RW; + case 2144: + return access_type_t::RW; + case 2152: + return access_type_t::RW; + case 2156: + return access_type_t::RW; + case 2160: + return access_type_t::RW; + case 2172: + return access_type_t::RW; + case 2176: + return access_type_t::RW; + case 2180: + return access_type_t::RW; + case 2184: + return access_type_t::RW; + case 2188: + return access_type_t::RW; + case 2192: + return access_type_t::RW; + case 2196: + return access_type_t::RW; + case 2200: + return access_type_t::RW; + case 2204: + return access_type_t::RW; + case 2208: + return access_type_t::RW; + case 2212: + return access_type_t::RW; + case 2228: + return access_type_t::RW; + case 2236: + return access_type_t::RW; + case 2240: + return access_type_t::RW; + case 2244: + return access_type_t::RW; + case 2248: + return access_type_t::RW; + case 2252: + return access_type_t::RW; + case 2304: + return access_type_t::RW; + case 2308: + return access_type_t::RW; + case 2324: + return access_type_t::RW; + case 2340: + return access_type_t::RW; + case 2344: + return access_type_t::RW; + case 2348: + return access_type_t::RW; + case 2352: + return access_type_t::RW; + case 2356: + return access_type_t::RW; + case 2364: + return access_type_t::RW; + case 2560: + return access_type_t::RW; + case 2564: + return access_type_t::RW; + case 2568: + return access_type_t::RW; + case 2572: + return access_type_t::RW; + case 2576: + return access_type_t::RW; + case 2580: + return access_type_t::RW; + case 2584: + return access_type_t::RW; + case 2588: + return access_type_t::RW; + case 2592: + return access_type_t::RW; + case 2596: + return access_type_t::RW; + case 2600: + return access_type_t::RW; + case 2604: + return access_type_t::RW; + case 2608: + return access_type_t::RW; + case 2612: + return access_type_t::RW; + case 2624: + return access_type_t::RW; + case 2628: + return access_type_t::RW; + case 2632: + return access_type_t::RW; + case 2636: + return access_type_t::RW; + case 2640: + return access_type_t::RW; + case 2644: + return access_type_t::RW; + case 2648: + return access_type_t::RW; + case 2652: + return access_type_t::RW; + case 2656: + return access_type_t::RW; + case 2660: + return access_type_t::RW; + case 2664: + return access_type_t::RW; + case 2668: + return access_type_t::RW; + case 2672: + return access_type_t::RW; + case 2676: + return access_type_t::RW; + case 2688: + return access_type_t::RW; + case 2692: + return access_type_t::RW; + case 2696: + return access_type_t::RW; + case 2704: + return access_type_t::RW; + case 2708: + return access_type_t::RW; + case 2712: + return access_type_t::RW; + case 2720: + return access_type_t::RW; + case 2724: + return access_type_t::RW; + case 2728: + return access_type_t::RW; + case 2732: + return access_type_t::RW; + case 2736: + return access_type_t::RW; + case 2752: + return access_type_t::RW; + case 2756: + return access_type_t::RW; + case 2760: + return access_type_t::RW; + case 2764: + return access_type_t::RW; + case 2768: + return access_type_t::RW; + case 2772: + return access_type_t::RW; + case 2776: + return access_type_t::RW; + case 2780: + return access_type_t::RW; + case 2784: + return access_type_t::RW; + case 2788: + return access_type_t::RW; + case 2816: + return access_type_t::RW; + case 2820: + return access_type_t::RW; + case 2824: + return access_type_t::RW; + case 2828: + return access_type_t::RW; + case 2832: + return access_type_t::RW; + case 2836: + return access_type_t::RW; + case 2840: + return access_type_t::RW; + case 2844: + return access_type_t::RW; + case 2848: + return access_type_t::RW; + case 2852: + return access_type_t::RW; + case 2856: + return access_type_t::RW; + case 2860: + return access_type_t::RW; + case 2864: + return access_type_t::RW; + case 2868: + return access_type_t::RW; + case 2880: + return access_type_t::RW; + case 2884: + return access_type_t::RW; + case 2888: + return access_type_t::RW; + case 2896: + return access_type_t::RW; + case 2900: + return access_type_t::RW; + case 2904: + return access_type_t::RW; + case 384: + return access_type_t::RW; + case 388: + return access_type_t::RW; + case 392: + return access_type_t::RW; + case 396: + return access_type_t::RW; + case 400: + return access_type_t::RW; + case 404: + return access_type_t::RW; + case 408: + return access_type_t::RW; + case 416: + return access_type_t::RW; + case 420: + return access_type_t::RW; + case 424: + return access_type_t::RW; + case 428: + return access_type_t::RW; + case 768: + return access_type_t::RW; + case 772: + return access_type_t::RW; + case 776: + return access_type_t::RW; + case 780: + return access_type_t::RW; + case 896: + return access_type_t::RW; + case 900: + return access_type_t::RW; + case 904: + return access_type_t::RW; + case 908: + return access_type_t::RW; + case 1024: + return access_type_t::RW; + case 1028: + return access_type_t::RW; + case 1032: + return access_type_t::RW; + case 1036: + return access_type_t::RW; + case 1040: + return access_type_t::RW; + case 1044: + return access_type_t::RW; + case 1048: + return access_type_t::RW; + case 1052: + return access_type_t::RW; + case 1056: + return access_type_t::RW; + case 1060: + return access_type_t::RW; + case 1064: + return access_type_t::RW; + case 1068: + return access_type_t::RW; + case 1072: + return access_type_t::RW; + case 1076: + return access_type_t::RW; + case 1080: + return access_type_t::RW; + case 1084: + return access_type_t::RW; + case 1088: + return access_type_t::RW; + case 1092: + return access_type_t::RW; + case 1096: + return access_type_t::RW; + case 1100: + return access_type_t::RW; + case 1104: + return access_type_t::RW; + case 1108: + return access_type_t::RW; + case 1112: + return access_type_t::RW; + case 1116: + return access_type_t::RW; + case 1120: + return access_type_t::RW; + case 1124: + return access_type_t::RW; + case 1128: + return access_type_t::RW; + case 1132: + return access_type_t::RW; + case 1136: + return access_type_t::RW; + case 1140: + return access_type_t::RW; + case 1144: + return access_type_t::RW; + case 1148: + return access_type_t::RW; + case 1152: + return access_type_t::RW; + case 1156: + return access_type_t::RW; + case 1160: + return access_type_t::RW; + case 1164: + return access_type_t::RW; + case 1168: + return access_type_t::RW; + case 1172: + return access_type_t::RW; + case 1176: + return access_type_t::RW; + case 1180: + return access_type_t::RW; + case 1184: + return access_type_t::RW; + case 1188: + return access_type_t::RW; + case 1192: + return access_type_t::RW; + case 1196: + return access_type_t::RW; + case 1200: + return access_type_t::RW; + case 1204: + return access_type_t::RW; + case 1208: + return access_type_t::RW; + case 1212: + return access_type_t::RW; + case 1216: + return access_type_t::RW; + case 1220: + return access_type_t::RW; + case 1224: + return access_type_t::RW; + case 1228: + return access_type_t::RW; + case 1232: + return access_type_t::RW; + case 1236: + return access_type_t::RW; + case 1240: + return access_type_t::RW; + case 1244: + return access_type_t::RW; + case 1248: + return access_type_t::RW; + case 1252: + return access_type_t::RW; + case 1256: + return access_type_t::RW; + case 1260: + return access_type_t::RW; + case 1264: + return access_type_t::RW; + case 1268: + return access_type_t::RW; + case 1272: + return access_type_t::RW; + case 1276: + return access_type_t::RW; + case 1280: + return access_type_t::RW; + case 1284: + return access_type_t::RW; + case 1288: + return access_type_t::RW; + case 1292: + return access_type_t::RW; + case 1296: + return access_type_t::RW; + case 1300: + return access_type_t::RW; + case 1304: + return access_type_t::RW; + case 1308: + return access_type_t::RW; + case 1312: + return access_type_t::RW; + case 1316: + return access_type_t::RW; + case 1320: + return access_type_t::RW; + case 1324: + return access_type_t::RW; + case 1328: + return access_type_t::RW; + case 1332: + return access_type_t::RW; + case 1336: + return access_type_t::RW; + case 1340: + return access_type_t::RW; + case 1344: + return access_type_t::RW; + case 1348: + return access_type_t::RW; + case 1352: + return access_type_t::RW; + case 1356: + return access_type_t::RW; + case 1360: + return access_type_t::RW; + case 1364: + return access_type_t::RW; + case 1368: + return access_type_t::RW; + case 1372: + return access_type_t::RW; + case 1376: + return access_type_t::RW; + case 1380: + return access_type_t::RW; + case 1384: + return access_type_t::RW; + case 1388: + return access_type_t::RW; + case 1392: + return access_type_t::RW; + case 1396: + return access_type_t::RW; + case 1400: + return access_type_t::RW; + case 1404: + return access_type_t::RW; + case 1408: + return access_type_t::RW; + case 1412: + return access_type_t::RW; + case 1416: + return access_type_t::RW; + case 1420: + return access_type_t::RW; + case 1424: + return access_type_t::RW; + case 1428: + return access_type_t::RW; + case 1432: + return access_type_t::RW; + case 1436: + return access_type_t::RW; + case 1440: + return access_type_t::RW; + case 1444: + return access_type_t::RW; + case 1448: + return access_type_t::RW; + case 1452: + return access_type_t::RW; + case 1456: + return access_type_t::RW; + case 1460: + return access_type_t::RW; + case 1464: + return access_type_t::RW; + case 1468: + return access_type_t::RW; + case 1472: + return access_type_t::RW; + case 1476: + return access_type_t::RW; + case 1480: + return access_type_t::RW; + case 1484: + return access_type_t::RW; + case 1488: + return access_type_t::RW; + case 1492: + return access_type_t::RW; + case 1496: + return access_type_t::RW; + case 1500: + return access_type_t::RW; + case 1504: + return access_type_t::RW; + case 1508: + return access_type_t::RW; + case 1512: + return access_type_t::RW; + case 1516: + return access_type_t::RW; + case 1520: + return access_type_t::RW; + case 1524: + return access_type_t::RW; + case 1528: + return access_type_t::RW; + case 1532: + return access_type_t::RW; + case 1536: + return access_type_t::RW; + case 1540: + return access_type_t::RW; + case 1544: + return access_type_t::RW; + case 1548: + return access_type_t::RW; + case 1552: + return access_type_t::RW; + case 1556: + return access_type_t::RW; + case 1560: + return access_type_t::RW; + case 1564: + return access_type_t::RW; + case 1568: + return access_type_t::RW; + case 1572: + return access_type_t::RW; + case 1576: + return access_type_t::RW; + case 1580: + return access_type_t::RW; + case 1584: + return access_type_t::RW; + case 1588: + return access_type_t::RW; + case 1592: + return access_type_t::RW; + case 1596: + return access_type_t::RW; + case 1600: + return access_type_t::RW; + case 1604: + return access_type_t::RW; + case 1608: + return access_type_t::RW; + case 1612: + return access_type_t::RW; + case 1616: + return access_type_t::RW; + case 1620: + return access_type_t::RW; + case 1624: + return access_type_t::RW; + case 1628: + return access_type_t::RW; + case 1632: + return access_type_t::RW; + case 1636: + return access_type_t::RW; + case 1640: + return access_type_t::RW; + case 1644: + return access_type_t::RW; + case 1648: + return access_type_t::RW; + case 1652: + return access_type_t::RW; + case 1656: + return access_type_t::RW; + case 1660: + return access_type_t::RW; + case 1664: + return access_type_t::RW; + case 1668: + return access_type_t::RW; + case 1672: + return access_type_t::RW; + case 1676: + return access_type_t::RW; + case 1680: + return access_type_t::RW; + case 1684: + return access_type_t::RW; + case 1688: + return access_type_t::RW; + case 1692: + return access_type_t::RW; + case 1696: + return access_type_t::RW; + case 1700: + return access_type_t::RW; + case 1704: + return access_type_t::RW; + case 1708: + return access_type_t::RW; + case 1712: + return access_type_t::RW; + case 1716: + return access_type_t::RW; + case 1720: + return access_type_t::RW; + case 1724: + return access_type_t::RW; + case 1728: + return access_type_t::RW; + case 1732: + return access_type_t::RW; + case 1736: + return access_type_t::RW; + case 1740: + return access_type_t::RW; + case 1744: + return access_type_t::RW; + case 1748: + return access_type_t::RW; + case 1752: + return access_type_t::RW; + case 1756: + return access_type_t::RW; + case 1760: + return access_type_t::RW; + case 1764: + return access_type_t::RW; + case 1768: + return access_type_t::RW; + case 1772: + return access_type_t::RW; + case 1776: + return access_type_t::RW; + case 1780: + return access_type_t::RW; + case 1784: + return access_type_t::RW; + case 1788: + return access_type_t::RW; + case 1792: + return access_type_t::RW; + case 1796: + return access_type_t::RW; + case 1800: + return access_type_t::RW; + case 1804: + return access_type_t::RW; + case 1808: + return access_type_t::RW; + case 1812: + return access_type_t::RW; + case 1816: + return access_type_t::RW; + case 1820: + return access_type_t::RW; + case 1824: + return access_type_t::RW; + case 1828: + return access_type_t::RW; + case 1832: + return access_type_t::RW; + case 1836: + return access_type_t::RW; + case 1840: + return access_type_t::RW; + case 1844: + return access_type_t::RW; + case 1848: + return access_type_t::RW; + case 1852: + return access_type_t::RW; + case 1856: + return access_type_t::RW; + case 1860: + return access_type_t::RW; + case 1864: + return access_type_t::RW; + case 1868: + return access_type_t::RW; + case 1872: + return access_type_t::RW; + case 1876: + return access_type_t::RW; + case 1880: + return access_type_t::RW; + case 1884: + return access_type_t::RW; + case 1888: + return access_type_t::RW; + case 1892: + return access_type_t::RW; + case 1896: + return access_type_t::RW; + case 1900: + return access_type_t::RW; + case 1904: + return access_type_t::RW; + case 1908: + return access_type_t::RW; + case 1912: + return access_type_t::RW; + case 1916: + return access_type_t::RW; + case 1920: + return access_type_t::RW; + case 1924: + return access_type_t::RW; + case 1928: + return access_type_t::RW; + case 1932: + return access_type_t::RW; + case 1936: + return access_type_t::RW; + case 1940: + return access_type_t::RW; + case 1944: + return access_type_t::RW; + case 1948: + return access_type_t::RW; + case 1952: + return access_type_t::RW; + case 1956: + return access_type_t::RW; + case 1960: + return access_type_t::RW; + case 1964: + return access_type_t::RW; + case 1968: + return access_type_t::RW; + case 1972: + return access_type_t::RW; + case 1976: + return access_type_t::RW; + case 1980: + return access_type_t::RW; + case 1984: + return access_type_t::RW; + case 1988: + return access_type_t::RW; + case 1992: + return access_type_t::RW; + case 1996: + return access_type_t::RW; + case 2000: + return access_type_t::RW; + case 2004: + return access_type_t::RW; + case 2008: + return access_type_t::RW; + case 2012: + return access_type_t::RW; + case 2016: + return access_type_t::RW; + case 2020: + return access_type_t::RW; + case 2024: + return access_type_t::RW; + case 2028: + return access_type_t::RW; + case 2032: + return access_type_t::RW; + case 2036: + return access_type_t::RW; + case 2040: + return access_type_t::RW; + case 2044: + return access_type_t::RW; + default: + throw std::runtime_error("invalid register address"); + } + } +#endif //__cplusplus +}; + +// Data structure for commands without payload +struct command_no_payload_t +{ + uint32_t cmd_code : 10; + uint32_t must_be_zero0 : 6; // 0 + uint32_t param : 16; +#ifdef __cplusplus + CONSTEXPR bool valid() const + { + return must_be_zero0 == 0; + } + CONSTEXPR void init() + { + must_be_zero0 = 0; + } + CONSTEXPR ::cmd0 get_cmd_code() const + { + return static_cast<::cmd0>(cmd_code); + } + CONSTEXPR command_no_payload_t &set_cmd_code(::cmd0 value) + { + cmd_code = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_param() const + { + return static_cast(param); + } + CONSTEXPR command_no_payload_t &set_param(uint32_t value) + { + param = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// Data structure for commands with payload +struct command_with_payload_t +{ + uint32_t cmd_code : 10; + uint32_t must_be_zero : 4; // 0 + uint32_t payload_size : 2; // Min:1 Max:2 + uint32_t param : 16; + uint32_t data : 32; +#ifdef __cplusplus + CONSTEXPR bool valid() const + { + return must_be_zero == 0 && payload_size >= 1 && payload_size <= 2; + } + CONSTEXPR void init() + { + must_be_zero = 0; + payload_size = 1; + } + CONSTEXPR ::cmd1 get_cmd_code() const + { + return static_cast<::cmd1>(cmd_code); + } + CONSTEXPR command_with_payload_t &set_cmd_code(::cmd1 value) + { + cmd_code = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_data() const + { + return static_cast(data); + } + CONSTEXPR command_with_payload_t &set_data(uint32_t value) + { + data = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_param() const + { + return static_cast(param); + } + CONSTEXPR command_with_payload_t &set_param(uint32_t value) + { + param = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_payload_size() const + { + return static_cast(payload_size); + } + CONSTEXPR command_with_payload_t &set_payload_size(uint32_t value) + { + payload_size = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// Move to stopped state once all commands to this point are done. Raise IRQ to the host and logically OR the mask into +// the status register upper 16 bits (see the status register) +struct npu_op_stop_t +{ + uint32_t cmd_code : 10; // NPU_OP_STOP + uint32_t must_be_zero0 : 6; // 0 + uint32_t mask : 16; +#ifdef __cplusplus + CONSTEXPR bool valid() const + { + return cmd_code == static_cast(cmd0::NPU_OP_STOP) && must_be_zero0 == 0; + } + CONSTEXPR void init() + { + cmd_code = static_cast(cmd0::NPU_OP_STOP); + must_be_zero0 = 0; + } + CONSTEXPR ::cmd0 get_cmd_code() const + { + return static_cast<::cmd0>(cmd_code); + } + CONSTEXPR npu_op_stop_t &set_cmd_code(::cmd0 value) + { + cmd_code = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_mask() const + { + return static_cast(mask); + } + CONSTEXPR npu_op_stop_t &set_mask(uint32_t value) + { + mask = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// Raise IRQ to the host and logically OR the mask into the status register upper 16 bits (see the status register) +struct npu_op_irq_t +{ + uint32_t cmd_code : 10; // NPU_OP_IRQ + uint32_t must_be_zero0 : 6; // 0 + uint32_t mask : 16; +#ifdef __cplusplus + CONSTEXPR bool valid() const + { + return cmd_code == static_cast(cmd0::NPU_OP_IRQ) && must_be_zero0 == 0; + } + CONSTEXPR void init() + { + cmd_code = static_cast(cmd0::NPU_OP_IRQ); + must_be_zero0 = 0; + } + CONSTEXPR ::cmd0 get_cmd_code() const + { + return static_cast<::cmd0>(cmd_code); + } + CONSTEXPR npu_op_irq_t &set_cmd_code(::cmd0 value) + { + cmd_code = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_mask() const + { + return static_cast(mask); + } + CONSTEXPR npu_op_irq_t &set_mask(uint32_t value) + { + mask = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// Start stripe with full convolution or deconvolution +struct npu_op_conv_t +{ + uint32_t cmd_code : 10; // NPU_OP_CONV + uint32_t must_be_zero0 : 6; // 0 + uint32_t reserved0 : 16; +#ifdef __cplusplus + CONSTEXPR bool valid() const + { + return cmd_code == static_cast(cmd0::NPU_OP_CONV) && must_be_zero0 == 0; + } + CONSTEXPR void init() + { + cmd_code = static_cast(cmd0::NPU_OP_CONV); + must_be_zero0 = 0; + } + CONSTEXPR ::cmd0 get_cmd_code() const + { + return static_cast<::cmd0>(cmd_code); + } + CONSTEXPR npu_op_conv_t &set_cmd_code(::cmd0 value) + { + cmd_code = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// Start stripe width depth-wise convolution or deconvolution operation +struct npu_op_depthwise_t +{ + uint32_t cmd_code : 10; // NPU_OP_DEPTHWISE + uint32_t must_be_zero0 : 6; // 0 + uint32_t reserved0 : 16; +#ifdef __cplusplus + CONSTEXPR bool valid() const + { + return cmd_code == static_cast(cmd0::NPU_OP_DEPTHWISE) && must_be_zero0 == 0; + } + CONSTEXPR void init() + { + cmd_code = static_cast(cmd0::NPU_OP_DEPTHWISE); + must_be_zero0 = 0; + } + CONSTEXPR ::cmd0 get_cmd_code() const + { + return static_cast<::cmd0>(cmd_code); + } + CONSTEXPR npu_op_depthwise_t &set_cmd_code(::cmd0 value) + { + cmd_code = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// Start stripe with pooling operation +struct npu_op_pool_t +{ + uint32_t cmd_code : 10; // NPU_OP_POOL + uint32_t must_be_zero0 : 6; // 0 + uint32_t mode : 16; +#ifdef __cplusplus + CONSTEXPR bool valid() const + { + return cmd_code == static_cast(cmd0::NPU_OP_POOL) && must_be_zero0 == 0; + } + CONSTEXPR void init() + { + cmd_code = static_cast(cmd0::NPU_OP_POOL); + must_be_zero0 = 0; + } + CONSTEXPR ::cmd0 get_cmd_code() const + { + return static_cast<::cmd0>(cmd_code); + } + CONSTEXPR npu_op_pool_t &set_cmd_code(::cmd0 value) + { + cmd_code = static_cast(value); + return *this; + } + CONSTEXPR ::pooling_mode get_mode() const + { + return static_cast<::pooling_mode>(mode); + } + CONSTEXPR npu_op_pool_t &set_mode(::pooling_mode value) + { + mode = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// Start stripe with pointwise operation +struct npu_op_elementwise_t +{ + uint32_t cmd_code : 10; // NPU_OP_ELEMENTWISE + uint32_t must_be_zero0 : 6; // 0 + uint32_t mode : 16; +#ifdef __cplusplus + CONSTEXPR bool valid() const + { + return cmd_code == static_cast(cmd0::NPU_OP_ELEMENTWISE) && must_be_zero0 == 0; + } + CONSTEXPR void init() + { + cmd_code = static_cast(cmd0::NPU_OP_ELEMENTWISE); + must_be_zero0 = 0; + } + CONSTEXPR ::cmd0 get_cmd_code() const + { + return static_cast<::cmd0>(cmd_code); + } + CONSTEXPR npu_op_elementwise_t &set_cmd_code(::cmd0 value) + { + cmd_code = static_cast(value); + return *this; + } + CONSTEXPR ::elementwise_mode get_mode() const + { + return static_cast<::elementwise_mode>(mode); + } + CONSTEXPR npu_op_elementwise_t &set_mode(::elementwise_mode value) + { + mode = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// Queue new DMA for the given channel with the given mode. Mode bit 0 specifies the source address type 0=external, +// 1=internal Mode bit 1 specifies the destination address type 0=external, 1=internal In Ethos-U55 there is only one +// user channel so channel=0. If the channel is fully in use then the command blocks until a new DMA can start +struct npu_op_dma_start_t +{ + uint32_t cmd_code : 10; // NPU_OP_DMA_START + uint32_t must_be_zero0 : 6; // 0 + uint32_t channel_mode : 16; +#ifdef __cplusplus + CONSTEXPR bool valid() const + { + return cmd_code == static_cast(cmd0::NPU_OP_DMA_START) && must_be_zero0 == 0; + } + CONSTEXPR void init() + { + cmd_code = static_cast(cmd0::NPU_OP_DMA_START); + must_be_zero0 = 0; + } + CONSTEXPR uint32_t get_channel_mode() const + { + return static_cast(channel_mode); + } + CONSTEXPR npu_op_dma_start_t &set_channel_mode(uint32_t value) + { + channel_mode = static_cast(value); + return *this; + } + CONSTEXPR ::cmd0 get_cmd_code() const + { + return static_cast<::cmd0>(cmd_code); + } + CONSTEXPR npu_op_dma_start_t &set_cmd_code(::cmd0 value) + { + cmd_code = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// Wait for the DMA channel to have k or fewer active descriptors outstanding. In Ethos-U55 there is only one user +// channel so channel=0. In Ethos-U55 there is only one descriptor per channel so k=0 and the command waits for the +// single DMA to be complete. +struct npu_op_dma_wait_t +{ + uint32_t cmd_code : 10; // NPU_OP_DMA_WAIT + uint32_t must_be_zero0 : 6; // 0 + uint32_t reserved0 : 16; +#ifdef __cplusplus + CONSTEXPR bool valid() const + { + return cmd_code == static_cast(cmd0::NPU_OP_DMA_WAIT) && must_be_zero0 == 0; + } + CONSTEXPR void init() + { + cmd_code = static_cast(cmd0::NPU_OP_DMA_WAIT); + must_be_zero0 = 0; + } + CONSTEXPR ::cmd0 get_cmd_code() const + { + return static_cast<::cmd0>(cmd_code); + } + CONSTEXPR npu_op_dma_wait_t &set_cmd_code(::cmd0 value) + { + cmd_code = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// Wait for n or fewer kernel operations to be remaining (not complete) before starting the next command. A kernel +// operation is Conv, Depthwise, Pool, VectorProd Elementwise. This command is typically placed before an +// NPU_OP_DMA_START command to prevent the DMA from starting until a previous kernel operation reading the memory has +// completed. +struct npu_op_kernel_wait_t +{ + uint32_t cmd_code : 10; // NPU_OP_KERNEL_WAIT + uint32_t must_be_zero0 : 6; // 0 + uint32_t param : 16; +#ifdef __cplusplus + CONSTEXPR bool valid() const + { + return cmd_code == static_cast(cmd0::NPU_OP_KERNEL_WAIT) && must_be_zero0 == 0; + } + CONSTEXPR void init() + { + cmd_code = static_cast(cmd0::NPU_OP_KERNEL_WAIT); + must_be_zero0 = 0; + } + CONSTEXPR ::cmd0 get_cmd_code() const + { + return static_cast<::cmd0>(cmd_code); + } + CONSTEXPR npu_op_kernel_wait_t &set_cmd_code(::cmd0 value) + { + cmd_code = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_param() const + { + return static_cast(param); + } + CONSTEXPR npu_op_kernel_wait_t &set_param(uint32_t value) + { + param = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// Enable or disable PMU counting (debug feature only). +struct npu_op_pmu_mask_t +{ + uint32_t cmd_code : 10; // NPU_OP_PMU_MASK + uint32_t must_be_zero0 : 6; // 0 + uint32_t param : 16; +#ifdef __cplusplus + CONSTEXPR bool valid() const + { + return cmd_code == static_cast(cmd0::NPU_OP_PMU_MASK) && must_be_zero0 == 0; + } + CONSTEXPR void init() + { + cmd_code = static_cast(cmd0::NPU_OP_PMU_MASK); + must_be_zero0 = 0; + } + CONSTEXPR ::cmd0 get_cmd_code() const + { + return static_cast<::cmd0>(cmd_code); + } + CONSTEXPR npu_op_pmu_mask_t &set_cmd_code(::cmd0 value) + { + cmd_code = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_param() const + { + return static_cast(param); + } + CONSTEXPR npu_op_pmu_mask_t &set_param(uint32_t value) + { + param = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// IFM top pad +struct npu_set_ifm_pad_top_t +{ + uint32_t cmd_code : 10; // NPU_SET_IFM_PAD_TOP + uint32_t must_be_zero0 : 6; // 0 + uint32_t param : 16; +#ifdef __cplusplus + CONSTEXPR bool valid() const + { + return cmd_code == static_cast(cmd0::NPU_SET_IFM_PAD_TOP) && must_be_zero0 == 0; + } + CONSTEXPR void init() + { + cmd_code = static_cast(cmd0::NPU_SET_IFM_PAD_TOP); + must_be_zero0 = 0; + } + CONSTEXPR ::cmd0 get_cmd_code() const + { + return static_cast<::cmd0>(cmd_code); + } + CONSTEXPR npu_set_ifm_pad_top_t &set_cmd_code(::cmd0 value) + { + cmd_code = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_param() const + { + return static_cast(param); + } + CONSTEXPR npu_set_ifm_pad_top_t &set_param(uint32_t value) + { + param = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// IFM left pad +struct npu_set_ifm_pad_left_t +{ + uint32_t cmd_code : 10; // NPU_SET_IFM_PAD_LEFT + uint32_t must_be_zero0 : 6; // 0 + uint32_t param : 16; +#ifdef __cplusplus + CONSTEXPR bool valid() const + { + return cmd_code == static_cast(cmd0::NPU_SET_IFM_PAD_LEFT) && must_be_zero0 == 0; + } + CONSTEXPR void init() + { + cmd_code = static_cast(cmd0::NPU_SET_IFM_PAD_LEFT); + must_be_zero0 = 0; + } + CONSTEXPR ::cmd0 get_cmd_code() const + { + return static_cast<::cmd0>(cmd_code); + } + CONSTEXPR npu_set_ifm_pad_left_t &set_cmd_code(::cmd0 value) + { + cmd_code = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_param() const + { + return static_cast(param); + } + CONSTEXPR npu_set_ifm_pad_left_t &set_param(uint32_t value) + { + param = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// IFM right pad +struct npu_set_ifm_pad_right_t +{ + uint32_t cmd_code : 10; // NPU_SET_IFM_PAD_RIGHT + uint32_t must_be_zero0 : 6; // 0 + uint32_t param : 16; +#ifdef __cplusplus + CONSTEXPR bool valid() const + { + return cmd_code == static_cast(cmd0::NPU_SET_IFM_PAD_RIGHT) && must_be_zero0 == 0; + } + CONSTEXPR void init() + { + cmd_code = static_cast(cmd0::NPU_SET_IFM_PAD_RIGHT); + must_be_zero0 = 0; + } + CONSTEXPR ::cmd0 get_cmd_code() const + { + return static_cast<::cmd0>(cmd_code); + } + CONSTEXPR npu_set_ifm_pad_right_t &set_cmd_code(::cmd0 value) + { + cmd_code = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_param() const + { + return static_cast(param); + } + CONSTEXPR npu_set_ifm_pad_right_t &set_param(uint32_t value) + { + param = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// IFM bottom pad +struct npu_set_ifm_pad_bottom_t +{ + uint32_t cmd_code : 10; // NPU_SET_IFM_PAD_BOTTOM + uint32_t must_be_zero0 : 6; // 0 + uint32_t param : 16; +#ifdef __cplusplus + CONSTEXPR bool valid() const + { + return cmd_code == static_cast(cmd0::NPU_SET_IFM_PAD_BOTTOM) && must_be_zero0 == 0; + } + CONSTEXPR void init() + { + cmd_code = static_cast(cmd0::NPU_SET_IFM_PAD_BOTTOM); + must_be_zero0 = 0; + } + CONSTEXPR ::cmd0 get_cmd_code() const + { + return static_cast<::cmd0>(cmd_code); + } + CONSTEXPR npu_set_ifm_pad_bottom_t &set_cmd_code(::cmd0 value) + { + cmd_code = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_param() const + { + return static_cast(param); + } + CONSTEXPR npu_set_ifm_pad_bottom_t &set_param(uint32_t value) + { + param = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// Number of input channels - 1 +struct npu_set_ifm_depth_m1_t +{ + uint32_t cmd_code : 10; // NPU_SET_IFM_DEPTH_M1 + uint32_t must_be_zero0 : 6; // 0 + uint32_t param : 16; +#ifdef __cplusplus + CONSTEXPR bool valid() const + { + return cmd_code == static_cast(cmd0::NPU_SET_IFM_DEPTH_M1) && must_be_zero0 == 0; + } + CONSTEXPR void init() + { + cmd_code = static_cast(cmd0::NPU_SET_IFM_DEPTH_M1); + must_be_zero0 = 0; + } + CONSTEXPR ::cmd0 get_cmd_code() const + { + return static_cast<::cmd0>(cmd_code); + } + CONSTEXPR npu_set_ifm_depth_m1_t &set_cmd_code(::cmd0 value) + { + cmd_code = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_param() const + { + return static_cast(param); + } + CONSTEXPR npu_set_ifm_depth_m1_t &set_param(uint32_t value) + { + param = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// Set IFM precision +struct npu_set_ifm_precision_t +{ + uint32_t cmd_code : 10; // NPU_SET_IFM_PRECISION + uint32_t must_be_zero0 : 6; // 0 + uint32_t precision : 4; + uint32_t reserved0 : 2; + uint32_t format : 2; + uint32_t scale_mode : 2; + uint32_t reserved1 : 4; + uint32_t round_mode : 2; +#ifdef __cplusplus + CONSTEXPR bool valid() const + { + return cmd_code == static_cast(cmd0::NPU_SET_IFM_PRECISION) && must_be_zero0 == 0; + } + CONSTEXPR void init() + { + cmd_code = static_cast(cmd0::NPU_SET_IFM_PRECISION); + must_be_zero0 = 0; + } + CONSTEXPR ::cmd0 get_cmd_code() const + { + return static_cast<::cmd0>(cmd_code); + } + CONSTEXPR npu_set_ifm_precision_t &set_cmd_code(::cmd0 value) + { + cmd_code = static_cast(value); + return *this; + } + CONSTEXPR ::data_format get_format() const + { + return static_cast<::data_format>(format); + } + CONSTEXPR npu_set_ifm_precision_t &set_format(::data_format value) + { + format = static_cast(value); + return *this; + } + CONSTEXPR ::ifm_precision get_precision() const + { + return static_cast<::ifm_precision>(precision); + } + CONSTEXPR npu_set_ifm_precision_t &set_precision(::ifm_precision value) + { + precision = static_cast(value); + return *this; + } + CONSTEXPR ::rounding get_round_mode() const + { + return static_cast<::rounding>(round_mode); + } + CONSTEXPR npu_set_ifm_precision_t &set_round_mode(::rounding value) + { + round_mode = static_cast(value); + return *this; + } + CONSTEXPR ::ifm_scale_mode get_scale_mode() const + { + return static_cast<::ifm_scale_mode>(scale_mode); + } + CONSTEXPR npu_set_ifm_precision_t &set_scale_mode(::ifm_scale_mode value) + { + scale_mode = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// b[1:0] = upscale mode (0=none, 1=2x2 nearest, 2=2x2 transpose) +struct npu_set_ifm_upscale_t +{ + uint32_t cmd_code : 10; // NPU_SET_IFM_UPSCALE + uint32_t must_be_zero0 : 6; // 0 + uint32_t mode : 2; + uint32_t reserved0 : 14; +#ifdef __cplusplus + CONSTEXPR bool valid() const + { + return cmd_code == static_cast(cmd0::NPU_SET_IFM_UPSCALE) && must_be_zero0 == 0; + } + CONSTEXPR void init() + { + cmd_code = static_cast(cmd0::NPU_SET_IFM_UPSCALE); + must_be_zero0 = 0; + } + CONSTEXPR ::cmd0 get_cmd_code() const + { + return static_cast<::cmd0>(cmd_code); + } + CONSTEXPR npu_set_ifm_upscale_t &set_cmd_code(::cmd0 value) + { + cmd_code = static_cast(value); + return *this; + } + CONSTEXPR ::resampling_mode get_mode() const + { + return static_cast<::resampling_mode>(mode); + } + CONSTEXPR npu_set_ifm_upscale_t &set_mode(::resampling_mode value) + { + mode = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// Zero point offset (so value that 0 is encoded as) +struct npu_set_ifm_zero_point_t +{ + uint32_t cmd_code : 10; // NPU_SET_IFM_ZERO_POINT + uint32_t must_be_zero0 : 6; // 0 + uint32_t param : 16; +#ifdef __cplusplus + CONSTEXPR bool valid() const + { + return cmd_code == static_cast(cmd0::NPU_SET_IFM_ZERO_POINT) && must_be_zero0 == 0; + } + CONSTEXPR void init() + { + cmd_code = static_cast(cmd0::NPU_SET_IFM_ZERO_POINT); + must_be_zero0 = 0; + } + CONSTEXPR ::cmd0 get_cmd_code() const + { + return static_cast<::cmd0>(cmd_code); + } + CONSTEXPR npu_set_ifm_zero_point_t &set_cmd_code(::cmd0 value) + { + cmd_code = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_param() const + { + return static_cast(param); + } + CONSTEXPR npu_set_ifm_zero_point_t &set_param(uint32_t value) + { + param = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// IFM Tile 0 and tile 2 (width-1) +struct npu_set_ifm_width0_m1_t +{ + uint32_t cmd_code : 10; // NPU_SET_IFM_WIDTH0_M1 + uint32_t must_be_zero0 : 6; // 0 + uint32_t param : 16; +#ifdef __cplusplus + CONSTEXPR bool valid() const + { + return cmd_code == static_cast(cmd0::NPU_SET_IFM_WIDTH0_M1) && must_be_zero0 == 0; + } + CONSTEXPR void init() + { + cmd_code = static_cast(cmd0::NPU_SET_IFM_WIDTH0_M1); + must_be_zero0 = 0; + } + CONSTEXPR ::cmd0 get_cmd_code() const + { + return static_cast<::cmd0>(cmd_code); + } + CONSTEXPR npu_set_ifm_width0_m1_t &set_cmd_code(::cmd0 value) + { + cmd_code = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_param() const + { + return static_cast(param); + } + CONSTEXPR npu_set_ifm_width0_m1_t &set_param(uint32_t value) + { + param = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// IFM Tile 0 (height-1) +struct npu_set_ifm_height0_m1_t +{ + uint32_t cmd_code : 10; // NPU_SET_IFM_HEIGHT0_M1 + uint32_t must_be_zero0 : 6; // 0 + uint32_t param : 16; +#ifdef __cplusplus + CONSTEXPR bool valid() const + { + return cmd_code == static_cast(cmd0::NPU_SET_IFM_HEIGHT0_M1) && must_be_zero0 == 0; + } + CONSTEXPR void init() + { + cmd_code = static_cast(cmd0::NPU_SET_IFM_HEIGHT0_M1); + must_be_zero0 = 0; + } + CONSTEXPR ::cmd0 get_cmd_code() const + { + return static_cast<::cmd0>(cmd_code); + } + CONSTEXPR npu_set_ifm_height0_m1_t &set_cmd_code(::cmd0 value) + { + cmd_code = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_param() const + { + return static_cast(param); + } + CONSTEXPR npu_set_ifm_height0_m1_t &set_param(uint32_t value) + { + param = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// IFM Tile 1 (height-1) +struct npu_set_ifm_height1_m1_t +{ + uint32_t cmd_code : 10; // NPU_SET_IFM_HEIGHT1_M1 + uint32_t must_be_zero0 : 6; // 0 + uint32_t param : 16; +#ifdef __cplusplus + CONSTEXPR bool valid() const + { + return cmd_code == static_cast(cmd0::NPU_SET_IFM_HEIGHT1_M1) && must_be_zero0 == 0; + } + CONSTEXPR void init() + { + cmd_code = static_cast(cmd0::NPU_SET_IFM_HEIGHT1_M1); + must_be_zero0 = 0; + } + CONSTEXPR ::cmd0 get_cmd_code() const + { + return static_cast<::cmd0>(cmd_code); + } + CONSTEXPR npu_set_ifm_height1_m1_t &set_cmd_code(::cmd0 value) + { + cmd_code = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_param() const + { + return static_cast(param); + } + CONSTEXPR npu_set_ifm_height1_m1_t &set_param(uint32_t value) + { + param = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// End of IB0,IB1 buffers in the SHRAM in KB units. Multiple of 2. +struct npu_set_ifm_ib_end_t +{ + uint32_t cmd_code : 10; // NPU_SET_IFM_IB_END + uint32_t must_be_zero0 : 6; // 0 + uint32_t param : 16; +#ifdef __cplusplus + CONSTEXPR bool valid() const + { + return cmd_code == static_cast(cmd0::NPU_SET_IFM_IB_END) && must_be_zero0 == 0; + } + CONSTEXPR void init() + { + cmd_code = static_cast(cmd0::NPU_SET_IFM_IB_END); + must_be_zero0 = 0; + } + CONSTEXPR ::cmd0 get_cmd_code() const + { + return static_cast<::cmd0>(cmd_code); + } + CONSTEXPR npu_set_ifm_ib_end_t &set_cmd_code(::cmd0 value) + { + cmd_code = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_param() const + { + return static_cast(param); + } + CONSTEXPR npu_set_ifm_ib_end_t &set_param(uint32_t value) + { + param = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// Index n for IFM access: BasePointer[n] is added to all IFM offsets +struct npu_set_ifm_region_t +{ + uint32_t cmd_code : 10; // NPU_SET_IFM_REGION + uint32_t must_be_zero0 : 6; // 0 + uint32_t param : 16; +#ifdef __cplusplus + CONSTEXPR bool valid() const + { + return cmd_code == static_cast(cmd0::NPU_SET_IFM_REGION) && must_be_zero0 == 0; + } + CONSTEXPR void init() + { + cmd_code = static_cast(cmd0::NPU_SET_IFM_REGION); + must_be_zero0 = 0; + } + CONSTEXPR ::cmd0 get_cmd_code() const + { + return static_cast<::cmd0>(cmd_code); + } + CONSTEXPR npu_set_ifm_region_t &set_cmd_code(::cmd0 value) + { + cmd_code = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_param() const + { + return static_cast(param); + } + CONSTEXPR npu_set_ifm_region_t &set_param(uint32_t value) + { + param = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// Output feature map width -1 (for the stripe to process) +struct npu_set_ofm_width_m1_t +{ + uint32_t cmd_code : 10; // NPU_SET_OFM_WIDTH_M1 + uint32_t must_be_zero0 : 6; // 0 + uint32_t param : 16; +#ifdef __cplusplus + CONSTEXPR bool valid() const + { + return cmd_code == static_cast(cmd0::NPU_SET_OFM_WIDTH_M1) && must_be_zero0 == 0; + } + CONSTEXPR void init() + { + cmd_code = static_cast(cmd0::NPU_SET_OFM_WIDTH_M1); + must_be_zero0 = 0; + } + CONSTEXPR ::cmd0 get_cmd_code() const + { + return static_cast<::cmd0>(cmd_code); + } + CONSTEXPR npu_set_ofm_width_m1_t &set_cmd_code(::cmd0 value) + { + cmd_code = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_param() const + { + return static_cast(param); + } + CONSTEXPR npu_set_ofm_width_m1_t &set_param(uint32_t value) + { + param = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// Output feature map height -1 (for the stripe to process) +struct npu_set_ofm_height_m1_t +{ + uint32_t cmd_code : 10; // NPU_SET_OFM_HEIGHT_M1 + uint32_t must_be_zero0 : 6; // 0 + uint32_t param : 16; +#ifdef __cplusplus + CONSTEXPR bool valid() const + { + return cmd_code == static_cast(cmd0::NPU_SET_OFM_HEIGHT_M1) && must_be_zero0 == 0; + } + CONSTEXPR void init() + { + cmd_code = static_cast(cmd0::NPU_SET_OFM_HEIGHT_M1); + must_be_zero0 = 0; + } + CONSTEXPR ::cmd0 get_cmd_code() const + { + return static_cast<::cmd0>(cmd_code); + } + CONSTEXPR npu_set_ofm_height_m1_t &set_cmd_code(::cmd0 value) + { + cmd_code = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_param() const + { + return static_cast(param); + } + CONSTEXPR npu_set_ofm_height_m1_t &set_param(uint32_t value) + { + param = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// Output feature map depth -1 (for the stripe to process) +struct npu_set_ofm_depth_m1_t +{ + uint32_t cmd_code : 10; // NPU_SET_OFM_DEPTH_M1 + uint32_t must_be_zero0 : 6; // 0 + uint32_t param : 16; +#ifdef __cplusplus + CONSTEXPR bool valid() const + { + return cmd_code == static_cast(cmd0::NPU_SET_OFM_DEPTH_M1) && must_be_zero0 == 0; + } + CONSTEXPR void init() + { + cmd_code = static_cast(cmd0::NPU_SET_OFM_DEPTH_M1); + must_be_zero0 = 0; + } + CONSTEXPR ::cmd0 get_cmd_code() const + { + return static_cast<::cmd0>(cmd_code); + } + CONSTEXPR npu_set_ofm_depth_m1_t &set_cmd_code(::cmd0 value) + { + cmd_code = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_param() const + { + return static_cast(param); + } + CONSTEXPR npu_set_ofm_depth_m1_t &set_param(uint32_t value) + { + param = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// Set OFM precision +struct npu_set_ofm_precision_t +{ + uint32_t cmd_code : 10; // NPU_SET_OFM_PRECISION + uint32_t must_be_zero0 : 6; // 0 + uint32_t precision : 3; + uint32_t reserved0 : 3; + uint32_t format : 2; + uint32_t scaling : 1; // 0=Per channel scale/bias 1=Global scale (SET_OFM_SCALE), no bias + uint32_t reserved1 : 5; + uint32_t rounding : 2; // 0=TFL rounding 1=truncate towards zero 2=natural rounding +#ifdef __cplusplus + CONSTEXPR bool valid() const + { + return cmd_code == static_cast(cmd0::NPU_SET_OFM_PRECISION) && must_be_zero0 == 0; + } + CONSTEXPR void init() + { + cmd_code = static_cast(cmd0::NPU_SET_OFM_PRECISION); + must_be_zero0 = 0; + } + CONSTEXPR ::cmd0 get_cmd_code() const + { + return static_cast<::cmd0>(cmd_code); + } + CONSTEXPR npu_set_ofm_precision_t &set_cmd_code(::cmd0 value) + { + cmd_code = static_cast(value); + return *this; + } + CONSTEXPR ::data_format get_format() const + { + return static_cast<::data_format>(format); + } + CONSTEXPR npu_set_ofm_precision_t &set_format(::data_format value) + { + format = static_cast(value); + return *this; + } + CONSTEXPR ::ofm_precision get_precision() const + { + return static_cast<::ofm_precision>(precision); + } + CONSTEXPR npu_set_ofm_precision_t &set_precision(::ofm_precision value) + { + precision = static_cast(value); + return *this; + } + CONSTEXPR ::rounding get_rounding() const + { + return static_cast<::rounding>(rounding); + } + CONSTEXPR npu_set_ofm_precision_t &set_rounding(::rounding value) + { + rounding = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_scaling() const + { + return static_cast(scaling); + } + CONSTEXPR npu_set_ofm_precision_t &set_scaling(uint32_t value) + { + scaling = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// TSU block width - 1 (provided sufficient data remaining) +struct npu_set_ofm_blk_width_m1_t +{ + uint32_t cmd_code : 10; // NPU_SET_OFM_BLK_WIDTH_M1 + uint32_t must_be_zero0 : 6; // 0 + uint32_t param : 16; +#ifdef __cplusplus + CONSTEXPR bool valid() const + { + return cmd_code == static_cast(cmd0::NPU_SET_OFM_BLK_WIDTH_M1) && must_be_zero0 == 0; + } + CONSTEXPR void init() + { + cmd_code = static_cast(cmd0::NPU_SET_OFM_BLK_WIDTH_M1); + must_be_zero0 = 0; + } + CONSTEXPR ::cmd0 get_cmd_code() const + { + return static_cast<::cmd0>(cmd_code); + } + CONSTEXPR npu_set_ofm_blk_width_m1_t &set_cmd_code(::cmd0 value) + { + cmd_code = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_param() const + { + return static_cast(param); + } + CONSTEXPR npu_set_ofm_blk_width_m1_t &set_param(uint32_t value) + { + param = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// TSU block height -1 (provided sufficient data remaining) +struct npu_set_ofm_blk_height_m1_t +{ + uint32_t cmd_code : 10; // NPU_SET_OFM_BLK_HEIGHT_M1 + uint32_t must_be_zero0 : 6; // 0 + uint32_t param : 16; +#ifdef __cplusplus + CONSTEXPR bool valid() const + { + return cmd_code == static_cast(cmd0::NPU_SET_OFM_BLK_HEIGHT_M1) && must_be_zero0 == 0; + } + CONSTEXPR void init() + { + cmd_code = static_cast(cmd0::NPU_SET_OFM_BLK_HEIGHT_M1); + must_be_zero0 = 0; + } + CONSTEXPR ::cmd0 get_cmd_code() const + { + return static_cast<::cmd0>(cmd_code); + } + CONSTEXPR npu_set_ofm_blk_height_m1_t &set_cmd_code(::cmd0 value) + { + cmd_code = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_param() const + { + return static_cast(param); + } + CONSTEXPR npu_set_ofm_blk_height_m1_t &set_param(uint32_t value) + { + param = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// TSU block depth -1 (provided sufficient data remaining) +struct npu_set_ofm_blk_depth_m1_t +{ + uint32_t cmd_code : 10; // NPU_SET_OFM_BLK_DEPTH_M1 + uint32_t must_be_zero0 : 6; // 0 + uint32_t param : 16; +#ifdef __cplusplus + CONSTEXPR bool valid() const + { + return cmd_code == static_cast(cmd0::NPU_SET_OFM_BLK_DEPTH_M1) && must_be_zero0 == 0; + } + CONSTEXPR void init() + { + cmd_code = static_cast(cmd0::NPU_SET_OFM_BLK_DEPTH_M1); + must_be_zero0 = 0; + } + CONSTEXPR ::cmd0 get_cmd_code() const + { + return static_cast<::cmd0>(cmd_code); + } + CONSTEXPR npu_set_ofm_blk_depth_m1_t &set_cmd_code(::cmd0 value) + { + cmd_code = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_param() const + { + return static_cast(param); + } + CONSTEXPR npu_set_ofm_blk_depth_m1_t &set_param(uint32_t value) + { + param = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// Zero point offset (so value that 0 is encoded as) +struct npu_set_ofm_zero_point_t +{ + uint32_t cmd_code : 10; // NPU_SET_OFM_ZERO_POINT + uint32_t must_be_zero0 : 6; // 0 + uint32_t param : 16; +#ifdef __cplusplus + CONSTEXPR bool valid() const + { + return cmd_code == static_cast(cmd0::NPU_SET_OFM_ZERO_POINT) && must_be_zero0 == 0; + } + CONSTEXPR void init() + { + cmd_code = static_cast(cmd0::NPU_SET_OFM_ZERO_POINT); + must_be_zero0 = 0; + } + CONSTEXPR ::cmd0 get_cmd_code() const + { + return static_cast<::cmd0>(cmd_code); + } + CONSTEXPR npu_set_ofm_zero_point_t &set_cmd_code(::cmd0 value) + { + cmd_code = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_param() const + { + return static_cast(param); + } + CONSTEXPR npu_set_ofm_zero_point_t &set_param(uint32_t value) + { + param = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// OFM Tile 0 and tile 2 (width-1) +struct npu_set_ofm_width0_m1_t +{ + uint32_t cmd_code : 10; // NPU_SET_OFM_WIDTH0_M1 + uint32_t must_be_zero0 : 6; // 0 + uint32_t param : 16; +#ifdef __cplusplus + CONSTEXPR bool valid() const + { + return cmd_code == static_cast(cmd0::NPU_SET_OFM_WIDTH0_M1) && must_be_zero0 == 0; + } + CONSTEXPR void init() + { + cmd_code = static_cast(cmd0::NPU_SET_OFM_WIDTH0_M1); + must_be_zero0 = 0; + } + CONSTEXPR ::cmd0 get_cmd_code() const + { + return static_cast<::cmd0>(cmd_code); + } + CONSTEXPR npu_set_ofm_width0_m1_t &set_cmd_code(::cmd0 value) + { + cmd_code = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_param() const + { + return static_cast(param); + } + CONSTEXPR npu_set_ofm_width0_m1_t &set_param(uint32_t value) + { + param = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// OFM Tile 0 (height-1) +struct npu_set_ofm_height0_m1_t +{ + uint32_t cmd_code : 10; // NPU_SET_OFM_HEIGHT0_M1 + uint32_t must_be_zero0 : 6; // 0 + uint32_t param : 16; +#ifdef __cplusplus + CONSTEXPR bool valid() const + { + return cmd_code == static_cast(cmd0::NPU_SET_OFM_HEIGHT0_M1) && must_be_zero0 == 0; + } + CONSTEXPR void init() + { + cmd_code = static_cast(cmd0::NPU_SET_OFM_HEIGHT0_M1); + must_be_zero0 = 0; + } + CONSTEXPR ::cmd0 get_cmd_code() const + { + return static_cast<::cmd0>(cmd_code); + } + CONSTEXPR npu_set_ofm_height0_m1_t &set_cmd_code(::cmd0 value) + { + cmd_code = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_param() const + { + return static_cast(param); + } + CONSTEXPR npu_set_ofm_height0_m1_t &set_param(uint32_t value) + { + param = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// OFM Tile 1 (height-1) +struct npu_set_ofm_height1_m1_t +{ + uint32_t cmd_code : 10; // NPU_SET_OFM_HEIGHT1_M1 + uint32_t must_be_zero0 : 6; // 0 + uint32_t param : 16; +#ifdef __cplusplus + CONSTEXPR bool valid() const + { + return cmd_code == static_cast(cmd0::NPU_SET_OFM_HEIGHT1_M1) && must_be_zero0 == 0; + } + CONSTEXPR void init() + { + cmd_code = static_cast(cmd0::NPU_SET_OFM_HEIGHT1_M1); + must_be_zero0 = 0; + } + CONSTEXPR ::cmd0 get_cmd_code() const + { + return static_cast<::cmd0>(cmd_code); + } + CONSTEXPR npu_set_ofm_height1_m1_t &set_cmd_code(::cmd0 value) + { + cmd_code = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_param() const + { + return static_cast(param); + } + CONSTEXPR npu_set_ofm_height1_m1_t &set_param(uint32_t value) + { + param = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// Index n for OFM access: BasePointer[n] is added to all OFM offsets +struct npu_set_ofm_region_t +{ + uint32_t cmd_code : 10; // NPU_SET_OFM_REGION + uint32_t must_be_zero0 : 6; // 0 + uint32_t param : 16; +#ifdef __cplusplus + CONSTEXPR bool valid() const + { + return cmd_code == static_cast(cmd0::NPU_SET_OFM_REGION) && must_be_zero0 == 0; + } + CONSTEXPR void init() + { + cmd_code = static_cast(cmd0::NPU_SET_OFM_REGION); + must_be_zero0 = 0; + } + CONSTEXPR ::cmd0 get_cmd_code() const + { + return static_cast<::cmd0>(cmd_code); + } + CONSTEXPR npu_set_ofm_region_t &set_cmd_code(::cmd0 value) + { + cmd_code = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_param() const + { + return static_cast(param); + } + CONSTEXPR npu_set_ofm_region_t &set_param(uint32_t value) + { + param = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// Set kernel width - 1 +struct npu_set_kernel_width_m1_t +{ + uint32_t cmd_code : 10; // NPU_SET_KERNEL_WIDTH_M1 + uint32_t must_be_zero0 : 6; // 0 + uint32_t param : 16; +#ifdef __cplusplus + CONSTEXPR bool valid() const + { + return cmd_code == static_cast(cmd0::NPU_SET_KERNEL_WIDTH_M1) && must_be_zero0 == 0; + } + CONSTEXPR void init() + { + cmd_code = static_cast(cmd0::NPU_SET_KERNEL_WIDTH_M1); + must_be_zero0 = 0; + } + CONSTEXPR ::cmd0 get_cmd_code() const + { + return static_cast<::cmd0>(cmd_code); + } + CONSTEXPR npu_set_kernel_width_m1_t &set_cmd_code(::cmd0 value) + { + cmd_code = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_param() const + { + return static_cast(param); + } + CONSTEXPR npu_set_kernel_width_m1_t &set_param(uint32_t value) + { + param = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// Set kernel height - 1 +struct npu_set_kernel_height_m1_t +{ + uint32_t cmd_code : 10; // NPU_SET_KERNEL_HEIGHT_M1 + uint32_t must_be_zero0 : 6; // 0 + uint32_t param : 16; +#ifdef __cplusplus + CONSTEXPR bool valid() const + { + return cmd_code == static_cast(cmd0::NPU_SET_KERNEL_HEIGHT_M1) && must_be_zero0 == 0; + } + CONSTEXPR void init() + { + cmd_code = static_cast(cmd0::NPU_SET_KERNEL_HEIGHT_M1); + must_be_zero0 = 0; + } + CONSTEXPR ::cmd0 get_cmd_code() const + { + return static_cast<::cmd0>(cmd_code); + } + CONSTEXPR npu_set_kernel_height_m1_t &set_cmd_code(::cmd0 value) + { + cmd_code = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_param() const + { + return static_cast(param); + } + CONSTEXPR npu_set_kernel_height_m1_t &set_param(uint32_t value) + { + param = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// Kernel stride b0=(X stride-1)&1, b1=(Y stride-1)&1, b2=weight order (0=depth, 1=kernel) b3 = kernel_x_dilation - 1 +// (0=no x dilation, 1=x dilation of x2) b4 = kernel_y_dilation -1 (0=no y dilation, 1=y dilation of x2) b5 = kernel +// decomposition size (0 for kernel_split_size=8, 1 for kernel_split_size=4) b[8:6] = (X stride-1)>>1 b[11:9] = (Y +// stride-1)>>1 +struct npu_set_kernel_stride_t +{ + uint32_t cmd_code : 10; // NPU_SET_KERNEL_STRIDE + uint32_t must_be_zero0 : 6; // 0 + uint32_t param : 16; +#ifdef __cplusplus + CONSTEXPR bool valid() const + { + return cmd_code == static_cast(cmd0::NPU_SET_KERNEL_STRIDE) && must_be_zero0 == 0; + } + CONSTEXPR void init() + { + cmd_code = static_cast(cmd0::NPU_SET_KERNEL_STRIDE); + must_be_zero0 = 0; + } + CONSTEXPR ::cmd0 get_cmd_code() const + { + return static_cast<::cmd0>(cmd_code); + } + CONSTEXPR npu_set_kernel_stride_t &set_cmd_code(::cmd0 value) + { + cmd_code = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_param() const + { + return static_cast(param); + } + CONSTEXPR npu_set_kernel_stride_t &set_param(uint32_t value) + { + param = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// 0=1-core, 1=2-core depth (this command is Ethos-U65 only and UNPREDICTABLE for Ethos-U55) +struct npu_set_parallel_mode_t +{ + uint32_t cmd_code : 10; // NPU_SET_PARALLEL_MODE + uint32_t must_be_zero0 : 6; // 0 + uint32_t param : 16; +#ifdef __cplusplus + CONSTEXPR bool valid() const + { + return cmd_code == static_cast(cmd0::NPU_SET_PARALLEL_MODE) && must_be_zero0 == 0; + } + CONSTEXPR void init() + { + cmd_code = static_cast(cmd0::NPU_SET_PARALLEL_MODE); + must_be_zero0 = 0; + } + CONSTEXPR ::cmd0 get_cmd_code() const + { + return static_cast<::cmd0>(cmd_code); + } + CONSTEXPR npu_set_parallel_mode_t &set_cmd_code(::cmd0 value) + { + cmd_code = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_param() const + { + return static_cast(param); + } + CONSTEXPR npu_set_parallel_mode_t &set_param(uint32_t value) + { + param = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// Set accumulator format +struct npu_set_acc_format_t +{ + uint32_t cmd_code : 10; // NPU_SET_ACC_FORMAT + uint32_t must_be_zero0 : 6; // 0 + uint32_t param : 16; +#ifdef __cplusplus + CONSTEXPR bool valid() const + { + return cmd_code == static_cast(cmd0::NPU_SET_ACC_FORMAT) && must_be_zero0 == 0; + } + CONSTEXPR void init() + { + cmd_code = static_cast(cmd0::NPU_SET_ACC_FORMAT); + must_be_zero0 = 0; + } + CONSTEXPR ::cmd0 get_cmd_code() const + { + return static_cast<::cmd0>(cmd_code); + } + CONSTEXPR npu_set_acc_format_t &set_cmd_code(::cmd0 value) + { + cmd_code = static_cast(value); + return *this; + } + CONSTEXPR ::acc_format get_param() const + { + return static_cast<::acc_format>(param); + } + CONSTEXPR npu_set_acc_format_t &set_param(::acc_format value) + { + param = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// Set activation +struct npu_set_activation_t +{ + uint32_t cmd_code : 10; // NPU_SET_ACTIVATION + uint32_t must_be_zero0 : 6; // 0 + uint32_t type : 12; + uint32_t act_clip_range : 4; +#ifdef __cplusplus + CONSTEXPR bool valid() const + { + return cmd_code == static_cast(cmd0::NPU_SET_ACTIVATION) && must_be_zero0 == 0; + } + CONSTEXPR void init() + { + cmd_code = static_cast(cmd0::NPU_SET_ACTIVATION); + must_be_zero0 = 0; + } + CONSTEXPR ::clip_range get_act_clip_range() const + { + return static_cast<::clip_range>(act_clip_range); + } + CONSTEXPR npu_set_activation_t &set_act_clip_range(::clip_range value) + { + act_clip_range = static_cast(value); + return *this; + } + CONSTEXPR ::cmd0 get_cmd_code() const + { + return static_cast<::cmd0>(cmd_code); + } + CONSTEXPR npu_set_activation_t &set_cmd_code(::cmd0 value) + { + cmd_code = static_cast(value); + return *this; + } + CONSTEXPR ::activation get_type() const + { + return static_cast<::activation>(type); + } + CONSTEXPR npu_set_activation_t &set_type(::activation value) + { + type = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// Lower bound clip for OFM activations – range is the OFM type range +struct npu_set_activation_min_t +{ + uint32_t cmd_code : 10; // NPU_SET_ACTIVATION_MIN + uint32_t must_be_zero0 : 6; // 0 + uint32_t param : 16; +#ifdef __cplusplus + CONSTEXPR bool valid() const + { + return cmd_code == static_cast(cmd0::NPU_SET_ACTIVATION_MIN) && must_be_zero0 == 0; + } + CONSTEXPR void init() + { + cmd_code = static_cast(cmd0::NPU_SET_ACTIVATION_MIN); + must_be_zero0 = 0; + } + CONSTEXPR ::cmd0 get_cmd_code() const + { + return static_cast<::cmd0>(cmd_code); + } + CONSTEXPR npu_set_activation_min_t &set_cmd_code(::cmd0 value) + { + cmd_code = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_param() const + { + return static_cast(param); + } + CONSTEXPR npu_set_activation_min_t &set_param(uint32_t value) + { + param = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// Upper bound clip for OFM activations – range is the OFM type range +struct npu_set_activation_max_t +{ + uint32_t cmd_code : 10; // NPU_SET_ACTIVATION_MAX + uint32_t must_be_zero0 : 6; // 0 + uint32_t param : 16; +#ifdef __cplusplus + CONSTEXPR bool valid() const + { + return cmd_code == static_cast(cmd0::NPU_SET_ACTIVATION_MAX) && must_be_zero0 == 0; + } + CONSTEXPR void init() + { + cmd_code = static_cast(cmd0::NPU_SET_ACTIVATION_MAX); + must_be_zero0 = 0; + } + CONSTEXPR ::cmd0 get_cmd_code() const + { + return static_cast<::cmd0>(cmd_code); + } + CONSTEXPR npu_set_activation_max_t &set_cmd_code(::cmd0 value) + { + cmd_code = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_param() const + { + return static_cast(param); + } + CONSTEXPR npu_set_activation_max_t &set_param(uint32_t value) + { + param = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// Index n for weight access: BasePointer[n] is added to all Weight stream offsets +struct npu_set_weight_region_t +{ + uint32_t cmd_code : 10; // NPU_SET_WEIGHT_REGION + uint32_t must_be_zero0 : 6; // 0 + uint32_t param : 16; +#ifdef __cplusplus + CONSTEXPR bool valid() const + { + return cmd_code == static_cast(cmd0::NPU_SET_WEIGHT_REGION) && must_be_zero0 == 0; + } + CONSTEXPR void init() + { + cmd_code = static_cast(cmd0::NPU_SET_WEIGHT_REGION); + must_be_zero0 = 0; + } + CONSTEXPR ::cmd0 get_cmd_code() const + { + return static_cast<::cmd0>(cmd_code); + } + CONSTEXPR npu_set_weight_region_t &set_cmd_code(::cmd0 value) + { + cmd_code = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_param() const + { + return static_cast(param); + } + CONSTEXPR npu_set_weight_region_t &set_param(uint32_t value) + { + param = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// Index n for weight access: BasePointer[n] is added to all scale stream offsets +struct npu_set_scale_region_t +{ + uint32_t cmd_code : 10; // NPU_SET_SCALE_REGION + uint32_t must_be_zero0 : 6; // 0 + uint32_t param : 16; +#ifdef __cplusplus + CONSTEXPR bool valid() const + { + return cmd_code == static_cast(cmd0::NPU_SET_SCALE_REGION) && must_be_zero0 == 0; + } + CONSTEXPR void init() + { + cmd_code = static_cast(cmd0::NPU_SET_SCALE_REGION); + must_be_zero0 = 0; + } + CONSTEXPR ::cmd0 get_cmd_code() const + { + return static_cast<::cmd0>(cmd_code); + } + CONSTEXPR npu_set_scale_region_t &set_cmd_code(::cmd0 value) + { + cmd_code = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_param() const + { + return static_cast(param); + } + CONSTEXPR npu_set_scale_region_t &set_param(uint32_t value) + { + param = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// Start of ACC0,ACC1 buffers in the SHRAM in KB units. Multiple of 4.) +struct npu_set_ab_start_t +{ + uint32_t cmd_code : 10; // NPU_SET_AB_START + uint32_t must_be_zero0 : 6; // 0 + uint32_t param : 16; +#ifdef __cplusplus + CONSTEXPR bool valid() const + { + return cmd_code == static_cast(cmd0::NPU_SET_AB_START) && must_be_zero0 == 0; + } + CONSTEXPR void init() + { + cmd_code = static_cast(cmd0::NPU_SET_AB_START); + must_be_zero0 = 0; + } + CONSTEXPR ::cmd0 get_cmd_code() const + { + return static_cast<::cmd0>(cmd_code); + } + CONSTEXPR npu_set_ab_start_t &set_cmd_code(::cmd0 value) + { + cmd_code = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_param() const + { + return static_cast(param); + } + CONSTEXPR npu_set_ab_start_t &set_param(uint32_t value) + { + param = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// Set block number of blocks dependency between kernel operations +struct npu_set_blockdep_t +{ + uint32_t cmd_code : 10; // NPU_SET_BLOCKDEP + uint32_t must_be_zero0 : 6; // 0 + uint32_t param : 16; +#ifdef __cplusplus + CONSTEXPR bool valid() const + { + return cmd_code == static_cast(cmd0::NPU_SET_BLOCKDEP) && must_be_zero0 == 0; + } + CONSTEXPR void init() + { + cmd_code = static_cast(cmd0::NPU_SET_BLOCKDEP); + must_be_zero0 = 0; + } + CONSTEXPR ::cmd0 get_cmd_code() const + { + return static_cast<::cmd0>(cmd_code); + } + CONSTEXPR npu_set_blockdep_t &set_cmd_code(::cmd0 value) + { + cmd_code = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_param() const + { + return static_cast(param); + } + CONSTEXPR npu_set_blockdep_t &set_param(uint32_t value) + { + param = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// DMA0 SRC region bitmap +struct npu_set_dma0_src_region_t +{ + uint32_t cmd_code : 10; // NPU_SET_DMA0_SRC_REGION + uint32_t must_be_zero0 : 6; // 0 + uint32_t region : 8; // If Bit[8]=0, Bit[7:0]=Region number in the range [0, 8) of SRC offset. If Bit[8]=1, + // Bit[7:0]=Core number (0 or 1) to read. + uint32_t internal : 1; // Must be 0 (external) + uint32_t stride_mode : 2; // stride mode 0/1/2=1D/2D/3D + uint32_t reserved0 : 5; +#ifdef __cplusplus + CONSTEXPR bool valid() const + { + return cmd_code == static_cast(cmd0::NPU_SET_DMA0_SRC_REGION) && must_be_zero0 == 0; + } + CONSTEXPR void init() + { + cmd_code = static_cast(cmd0::NPU_SET_DMA0_SRC_REGION); + must_be_zero0 = 0; + } + CONSTEXPR ::cmd0 get_cmd_code() const + { + return static_cast<::cmd0>(cmd_code); + } + CONSTEXPR npu_set_dma0_src_region_t &set_cmd_code(::cmd0 value) + { + cmd_code = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_internal() const + { + return static_cast(internal); + } + CONSTEXPR npu_set_dma0_src_region_t &set_internal(uint32_t value) + { + internal = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_region() const + { + return static_cast(region); + } + CONSTEXPR npu_set_dma0_src_region_t &set_region(uint32_t value) + { + region = static_cast(value); + return *this; + } + CONSTEXPR ::stride_mode get_stride_mode() const + { + return static_cast<::stride_mode>(stride_mode); + } + CONSTEXPR npu_set_dma0_src_region_t &set_stride_mode(::stride_mode value) + { + stride_mode = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// DMA0 DST region bitmap +struct npu_set_dma0_dst_region_t +{ + uint32_t cmd_code : 10; // NPU_SET_DMA0_DST_REGION + uint32_t must_be_zero0 : 6; // 0 + uint32_t region : 8; // If Bit[8]=0, Bit[7:0]=Region number in the range [0, 8) of DST offset. If Bit[8]=1, + // Bit[7:0]=Core mask to write to (bit k set for core k=0,1). + uint32_t internal : 1; // Select external/internal=0/1 + uint32_t stride_mode : 2; // stride mode 0/1/2=1D/2D/3D + uint32_t reserved0 : 5; +#ifdef __cplusplus + CONSTEXPR bool valid() const + { + return cmd_code == static_cast(cmd0::NPU_SET_DMA0_DST_REGION) && must_be_zero0 == 0; + } + CONSTEXPR void init() + { + cmd_code = static_cast(cmd0::NPU_SET_DMA0_DST_REGION); + must_be_zero0 = 0; + } + CONSTEXPR ::cmd0 get_cmd_code() const + { + return static_cast<::cmd0>(cmd_code); + } + CONSTEXPR npu_set_dma0_dst_region_t &set_cmd_code(::cmd0 value) + { + cmd_code = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_internal() const + { + return static_cast(internal); + } + CONSTEXPR npu_set_dma0_dst_region_t &set_internal(uint32_t value) + { + internal = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_region() const + { + return static_cast(region); + } + CONSTEXPR npu_set_dma0_dst_region_t &set_region(uint32_t value) + { + region = static_cast(value); + return *this; + } + CONSTEXPR ::stride_mode get_stride_mode() const + { + return static_cast<::stride_mode>(stride_mode); + } + CONSTEXPR npu_set_dma0_dst_region_t &set_stride_mode(::stride_mode value) + { + stride_mode = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// Inner size for 2D/3D mode. +struct npu_set_dma0_size0_t +{ + uint32_t cmd_code : 10; // NPU_SET_DMA0_SIZE0 + uint32_t must_be_zero0 : 6; // 0 + uint32_t param : 16; +#ifdef __cplusplus + CONSTEXPR bool valid() const + { + return cmd_code == static_cast(cmd0::NPU_SET_DMA0_SIZE0) && must_be_zero0 == 0; + } + CONSTEXPR void init() + { + cmd_code = static_cast(cmd0::NPU_SET_DMA0_SIZE0); + must_be_zero0 = 0; + } + CONSTEXPR ::cmd0 get_cmd_code() const + { + return static_cast<::cmd0>(cmd_code); + } + CONSTEXPR npu_set_dma0_size0_t &set_cmd_code(::cmd0 value) + { + cmd_code = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_param() const + { + return static_cast(param); + } + CONSTEXPR npu_set_dma0_size0_t &set_param(uint32_t value) + { + param = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// Outer size for 3D mode. +struct npu_set_dma0_size1_t +{ + uint32_t cmd_code : 10; // NPU_SET_DMA0_SIZE1 + uint32_t must_be_zero0 : 6; // 0 + uint32_t param : 16; +#ifdef __cplusplus + CONSTEXPR bool valid() const + { + return cmd_code == static_cast(cmd0::NPU_SET_DMA0_SIZE1) && must_be_zero0 == 0; + } + CONSTEXPR void init() + { + cmd_code = static_cast(cmd0::NPU_SET_DMA0_SIZE1); + must_be_zero0 = 0; + } + CONSTEXPR ::cmd0 get_cmd_code() const + { + return static_cast<::cmd0>(cmd_code); + } + CONSTEXPR npu_set_dma0_size1_t &set_cmd_code(::cmd0 value) + { + cmd_code = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_param() const + { + return static_cast(param); + } + CONSTEXPR npu_set_dma0_size1_t &set_param(uint32_t value) + { + param = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// Set IFM2 Broadcast mode +struct npu_set_ifm2_broadcast_t +{ + uint32_t cmd_code : 10; // NPU_SET_IFM2_BROADCAST + uint32_t must_be_zero0 : 6; // 0 + uint32_t broadcast_height : 1; + uint32_t broadcast_width : 1; + uint32_t broadcast_depth : 1; + uint32_t reserved0 : 3; + uint32_t operand_order : 1; + uint32_t broadcast_scalar : 1; + uint32_t reserved1 : 8; +#ifdef __cplusplus + CONSTEXPR bool valid() const + { + return cmd_code == static_cast(cmd0::NPU_SET_IFM2_BROADCAST) && must_be_zero0 == 0; + } + CONSTEXPR void init() + { + cmd_code = static_cast(cmd0::NPU_SET_IFM2_BROADCAST); + must_be_zero0 = 0; + } + CONSTEXPR uint32_t get_broadcast_depth() const + { + return static_cast(broadcast_depth); + } + CONSTEXPR npu_set_ifm2_broadcast_t &set_broadcast_depth(uint32_t value) + { + broadcast_depth = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_broadcast_height() const + { + return static_cast(broadcast_height); + } + CONSTEXPR npu_set_ifm2_broadcast_t &set_broadcast_height(uint32_t value) + { + broadcast_height = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_broadcast_scalar() const + { + return static_cast(broadcast_scalar); + } + CONSTEXPR npu_set_ifm2_broadcast_t &set_broadcast_scalar(uint32_t value) + { + broadcast_scalar = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_broadcast_width() const + { + return static_cast(broadcast_width); + } + CONSTEXPR npu_set_ifm2_broadcast_t &set_broadcast_width(uint32_t value) + { + broadcast_width = static_cast(value); + return *this; + } + CONSTEXPR ::cmd0 get_cmd_code() const + { + return static_cast<::cmd0>(cmd_code); + } + CONSTEXPR npu_set_ifm2_broadcast_t &set_cmd_code(::cmd0 value) + { + cmd_code = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_operand_order() const + { + return static_cast(operand_order); + } + CONSTEXPR npu_set_ifm2_broadcast_t &set_operand_order(uint32_t value) + { + operand_order = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// IFM2 scalar value at range IFM_PRECISION +struct npu_set_ifm2_scalar_t +{ + uint32_t cmd_code : 10; // NPU_SET_IFM2_SCALAR + uint32_t must_be_zero0 : 6; // 0 + uint32_t param : 16; +#ifdef __cplusplus + CONSTEXPR bool valid() const + { + return cmd_code == static_cast(cmd0::NPU_SET_IFM2_SCALAR) && must_be_zero0 == 0; + } + CONSTEXPR void init() + { + cmd_code = static_cast(cmd0::NPU_SET_IFM2_SCALAR); + must_be_zero0 = 0; + } + CONSTEXPR ::cmd0 get_cmd_code() const + { + return static_cast<::cmd0>(cmd_code); + } + CONSTEXPR npu_set_ifm2_scalar_t &set_cmd_code(::cmd0 value) + { + cmd_code = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_param() const + { + return static_cast(param); + } + CONSTEXPR npu_set_ifm2_scalar_t &set_param(uint32_t value) + { + param = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// Set activation +struct npu_set_ifm2_precision_t +{ + uint32_t cmd_code : 10; // NPU_SET_IFM2_PRECISION + uint32_t must_be_zero0 : 6; // 0 + uint32_t precision : 4; + uint32_t reserved0 : 2; + uint32_t format : 2; + uint32_t reserved1 : 8; +#ifdef __cplusplus + CONSTEXPR bool valid() const + { + return cmd_code == static_cast(cmd0::NPU_SET_IFM2_PRECISION) && must_be_zero0 == 0; + } + CONSTEXPR void init() + { + cmd_code = static_cast(cmd0::NPU_SET_IFM2_PRECISION); + must_be_zero0 = 0; + } + CONSTEXPR ::cmd0 get_cmd_code() const + { + return static_cast<::cmd0>(cmd_code); + } + CONSTEXPR npu_set_ifm2_precision_t &set_cmd_code(::cmd0 value) + { + cmd_code = static_cast(value); + return *this; + } + CONSTEXPR ::data_format get_format() const + { + return static_cast<::data_format>(format); + } + CONSTEXPR npu_set_ifm2_precision_t &set_format(::data_format value) + { + format = static_cast(value); + return *this; + } + CONSTEXPR ::ifm_precision get_precision() const + { + return static_cast<::ifm_precision>(precision); + } + CONSTEXPR npu_set_ifm2_precision_t &set_precision(::ifm_precision value) + { + precision = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// Zero point offset (so value that 0 is encoded as) at range IFM_PRECISION +struct npu_set_ifm2_zero_point_t +{ + uint32_t cmd_code : 10; // NPU_SET_IFM2_ZERO_POINT + uint32_t must_be_zero0 : 6; // 0 + uint32_t param : 16; +#ifdef __cplusplus + CONSTEXPR bool valid() const + { + return cmd_code == static_cast(cmd0::NPU_SET_IFM2_ZERO_POINT) && must_be_zero0 == 0; + } + CONSTEXPR void init() + { + cmd_code = static_cast(cmd0::NPU_SET_IFM2_ZERO_POINT); + must_be_zero0 = 0; + } + CONSTEXPR ::cmd0 get_cmd_code() const + { + return static_cast<::cmd0>(cmd_code); + } + CONSTEXPR npu_set_ifm2_zero_point_t &set_cmd_code(::cmd0 value) + { + cmd_code = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_param() const + { + return static_cast(param); + } + CONSTEXPR npu_set_ifm2_zero_point_t &set_param(uint32_t value) + { + param = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// IFM2 Tile 0 and tile 2 (width-1) +struct npu_set_ifm2_width0_m1_t +{ + uint32_t cmd_code : 10; // NPU_SET_IFM2_WIDTH0_M1 + uint32_t must_be_zero0 : 6; // 0 + uint32_t param : 16; +#ifdef __cplusplus + CONSTEXPR bool valid() const + { + return cmd_code == static_cast(cmd0::NPU_SET_IFM2_WIDTH0_M1) && must_be_zero0 == 0; + } + CONSTEXPR void init() + { + cmd_code = static_cast(cmd0::NPU_SET_IFM2_WIDTH0_M1); + must_be_zero0 = 0; + } + CONSTEXPR ::cmd0 get_cmd_code() const + { + return static_cast<::cmd0>(cmd_code); + } + CONSTEXPR npu_set_ifm2_width0_m1_t &set_cmd_code(::cmd0 value) + { + cmd_code = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_param() const + { + return static_cast(param); + } + CONSTEXPR npu_set_ifm2_width0_m1_t &set_param(uint32_t value) + { + param = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// IFM2 Tile 0 (height-1) +struct npu_set_ifm2_height0_m1_t +{ + uint32_t cmd_code : 10; // NPU_SET_IFM2_HEIGHT0_M1 + uint32_t must_be_zero0 : 6; // 0 + uint32_t param : 16; +#ifdef __cplusplus + CONSTEXPR bool valid() const + { + return cmd_code == static_cast(cmd0::NPU_SET_IFM2_HEIGHT0_M1) && must_be_zero0 == 0; + } + CONSTEXPR void init() + { + cmd_code = static_cast(cmd0::NPU_SET_IFM2_HEIGHT0_M1); + must_be_zero0 = 0; + } + CONSTEXPR ::cmd0 get_cmd_code() const + { + return static_cast<::cmd0>(cmd_code); + } + CONSTEXPR npu_set_ifm2_height0_m1_t &set_cmd_code(::cmd0 value) + { + cmd_code = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_param() const + { + return static_cast(param); + } + CONSTEXPR npu_set_ifm2_height0_m1_t &set_param(uint32_t value) + { + param = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// IFM2 Tile 1 (height-1) +struct npu_set_ifm2_height1_m1_t +{ + uint32_t cmd_code : 10; // NPU_SET_IFM2_HEIGHT1_M1 + uint32_t must_be_zero0 : 6; // 0 + uint32_t param : 16; +#ifdef __cplusplus + CONSTEXPR bool valid() const + { + return cmd_code == static_cast(cmd0::NPU_SET_IFM2_HEIGHT1_M1) && must_be_zero0 == 0; + } + CONSTEXPR void init() + { + cmd_code = static_cast(cmd0::NPU_SET_IFM2_HEIGHT1_M1); + must_be_zero0 = 0; + } + CONSTEXPR ::cmd0 get_cmd_code() const + { + return static_cast<::cmd0>(cmd_code); + } + CONSTEXPR npu_set_ifm2_height1_m1_t &set_cmd_code(::cmd0 value) + { + cmd_code = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_param() const + { + return static_cast(param); + } + CONSTEXPR npu_set_ifm2_height1_m1_t &set_param(uint32_t value) + { + param = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// Start of IB0, IB1 buffers for IFM2 in SHRAM. In KB units, multiple of 2. +struct npu_set_ifm2_ib_start_t +{ + uint32_t cmd_code : 10; // NPU_SET_IFM2_IB_START + uint32_t must_be_zero0 : 6; // 0 + uint32_t param : 16; +#ifdef __cplusplus + CONSTEXPR bool valid() const + { + return cmd_code == static_cast(cmd0::NPU_SET_IFM2_IB_START) && must_be_zero0 == 0; + } + CONSTEXPR void init() + { + cmd_code = static_cast(cmd0::NPU_SET_IFM2_IB_START); + must_be_zero0 = 0; + } + CONSTEXPR ::cmd0 get_cmd_code() const + { + return static_cast<::cmd0>(cmd_code); + } + CONSTEXPR npu_set_ifm2_ib_start_t &set_cmd_code(::cmd0 value) + { + cmd_code = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_param() const + { + return static_cast(param); + } + CONSTEXPR npu_set_ifm2_ib_start_t &set_param(uint32_t value) + { + param = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// Index n for IFM2 access: Region[n] is added to all IFM2 addresses +struct npu_set_ifm2_region_t +{ + uint32_t cmd_code : 10; // NPU_SET_IFM2_REGION + uint32_t must_be_zero0 : 6; // 0 + uint32_t param : 16; +#ifdef __cplusplus + CONSTEXPR bool valid() const + { + return cmd_code == static_cast(cmd0::NPU_SET_IFM2_REGION) && must_be_zero0 == 0; + } + CONSTEXPR void init() + { + cmd_code = static_cast(cmd0::NPU_SET_IFM2_REGION); + must_be_zero0 = 0; + } + CONSTEXPR ::cmd0 get_cmd_code() const + { + return static_cast<::cmd0>(cmd_code); + } + CONSTEXPR npu_set_ifm2_region_t &set_cmd_code(::cmd0 value) + { + cmd_code = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_param() const + { + return static_cast(param); + } + CONSTEXPR npu_set_ifm2_region_t &set_param(uint32_t value) + { + param = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// Set IFM base address (top left tile) +struct npu_set_ifm_base0_t +{ + uint32_t cmd_code : 10; // NPU_SET_IFM_BASE0 + uint32_t must_be_zero : 4; // 0 + uint32_t payload_size : 2; // Min:1 Max:2 + uint32_t reserved0 : 16; + uint32_t data : 32; // IFM base address (top left tile) +#ifdef __cplusplus + CONSTEXPR bool valid() const + { + return cmd_code == static_cast(cmd1::NPU_SET_IFM_BASE0) && must_be_zero == 0 && payload_size >= 1 && + payload_size <= 2; + } + CONSTEXPR void init() + { + cmd_code = static_cast(cmd1::NPU_SET_IFM_BASE0); + must_be_zero = 0; + payload_size = 1; + } + CONSTEXPR ::cmd1 get_cmd_code() const + { + return static_cast<::cmd1>(cmd_code); + } + CONSTEXPR npu_set_ifm_base0_t &set_cmd_code(::cmd1 value) + { + cmd_code = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_data() const + { + return static_cast(data); + } + CONSTEXPR npu_set_ifm_base0_t &set_data(uint32_t value) + { + data = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_payload_size() const + { + return static_cast(payload_size); + } + CONSTEXPR npu_set_ifm_base0_t &set_payload_size(uint32_t value) + { + payload_size = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// Set IFM base address (top right tile) +struct npu_set_ifm_base1_t +{ + uint32_t cmd_code : 10; // NPU_SET_IFM_BASE1 + uint32_t must_be_zero : 4; // 0 + uint32_t payload_size : 2; // Min:1 Max:2 + uint32_t reserved0 : 16; + uint32_t data : 32; // IFM base address (top right tile) +#ifdef __cplusplus + CONSTEXPR bool valid() const + { + return cmd_code == static_cast(cmd1::NPU_SET_IFM_BASE1) && must_be_zero == 0 && payload_size >= 1 && + payload_size <= 2; + } + CONSTEXPR void init() + { + cmd_code = static_cast(cmd1::NPU_SET_IFM_BASE1); + must_be_zero = 0; + payload_size = 1; + } + CONSTEXPR ::cmd1 get_cmd_code() const + { + return static_cast<::cmd1>(cmd_code); + } + CONSTEXPR npu_set_ifm_base1_t &set_cmd_code(::cmd1 value) + { + cmd_code = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_data() const + { + return static_cast(data); + } + CONSTEXPR npu_set_ifm_base1_t &set_data(uint32_t value) + { + data = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_payload_size() const + { + return static_cast(payload_size); + } + CONSTEXPR npu_set_ifm_base1_t &set_payload_size(uint32_t value) + { + payload_size = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// Set IFM base address (bottom left tile) +struct npu_set_ifm_base2_t +{ + uint32_t cmd_code : 10; // NPU_SET_IFM_BASE2 + uint32_t must_be_zero : 4; // 0 + uint32_t payload_size : 2; // Min:1 Max:2 + uint32_t reserved0 : 16; + uint32_t data : 32; // IFM base address (bottom left tile) +#ifdef __cplusplus + CONSTEXPR bool valid() const + { + return cmd_code == static_cast(cmd1::NPU_SET_IFM_BASE2) && must_be_zero == 0 && payload_size >= 1 && + payload_size <= 2; + } + CONSTEXPR void init() + { + cmd_code = static_cast(cmd1::NPU_SET_IFM_BASE2); + must_be_zero = 0; + payload_size = 1; + } + CONSTEXPR ::cmd1 get_cmd_code() const + { + return static_cast<::cmd1>(cmd_code); + } + CONSTEXPR npu_set_ifm_base2_t &set_cmd_code(::cmd1 value) + { + cmd_code = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_data() const + { + return static_cast(data); + } + CONSTEXPR npu_set_ifm_base2_t &set_data(uint32_t value) + { + data = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_payload_size() const + { + return static_cast(payload_size); + } + CONSTEXPR npu_set_ifm_base2_t &set_payload_size(uint32_t value) + { + payload_size = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// Set IFM base address (bottom right tile) +struct npu_set_ifm_base3_t +{ + uint32_t cmd_code : 10; // NPU_SET_IFM_BASE3 + uint32_t must_be_zero : 4; // 0 + uint32_t payload_size : 2; // Min:1 Max:2 + uint32_t reserved0 : 16; + uint32_t data : 32; // IFM base address (bottom right tile) +#ifdef __cplusplus + CONSTEXPR bool valid() const + { + return cmd_code == static_cast(cmd1::NPU_SET_IFM_BASE3) && must_be_zero == 0 && payload_size >= 1 && + payload_size <= 2; + } + CONSTEXPR void init() + { + cmd_code = static_cast(cmd1::NPU_SET_IFM_BASE3); + must_be_zero = 0; + payload_size = 1; + } + CONSTEXPR ::cmd1 get_cmd_code() const + { + return static_cast<::cmd1>(cmd_code); + } + CONSTEXPR npu_set_ifm_base3_t &set_cmd_code(::cmd1 value) + { + cmd_code = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_data() const + { + return static_cast(data); + } + CONSTEXPR npu_set_ifm_base3_t &set_data(uint32_t value) + { + data = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_payload_size() const + { + return static_cast(payload_size); + } + CONSTEXPR npu_set_ifm_base3_t &set_payload_size(uint32_t value) + { + payload_size = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// Set IFM byte stride between horizontal values +struct npu_set_ifm_stride_x_t +{ + uint32_t cmd_code : 10; // NPU_SET_IFM_STRIDE_X + uint32_t must_be_zero : 4; // 0 + uint32_t payload_size : 2; // Min:1 Max:2 + uint32_t reserved0 : 16; + uint32_t data : 32; // IFM byte stride between horizontal values +#ifdef __cplusplus + CONSTEXPR bool valid() const + { + return cmd_code == static_cast(cmd1::NPU_SET_IFM_STRIDE_X) && must_be_zero == 0 && + payload_size >= 1 && payload_size <= 2; + } + CONSTEXPR void init() + { + cmd_code = static_cast(cmd1::NPU_SET_IFM_STRIDE_X); + must_be_zero = 0; + payload_size = 1; + } + CONSTEXPR ::cmd1 get_cmd_code() const + { + return static_cast<::cmd1>(cmd_code); + } + CONSTEXPR npu_set_ifm_stride_x_t &set_cmd_code(::cmd1 value) + { + cmd_code = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_data() const + { + return static_cast(data); + } + CONSTEXPR npu_set_ifm_stride_x_t &set_data(uint32_t value) + { + data = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_payload_size() const + { + return static_cast(payload_size); + } + CONSTEXPR npu_set_ifm_stride_x_t &set_payload_size(uint32_t value) + { + payload_size = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// Set IFM byte stride between vertical values +struct npu_set_ifm_stride_y_t +{ + uint32_t cmd_code : 10; // NPU_SET_IFM_STRIDE_Y + uint32_t must_be_zero : 4; // 0 + uint32_t payload_size : 2; // Min:1 Max:2 + uint32_t reserved0 : 16; + uint32_t data : 32; // IFM byte stride between vertical values +#ifdef __cplusplus + CONSTEXPR bool valid() const + { + return cmd_code == static_cast(cmd1::NPU_SET_IFM_STRIDE_Y) && must_be_zero == 0 && + payload_size >= 1 && payload_size <= 2; + } + CONSTEXPR void init() + { + cmd_code = static_cast(cmd1::NPU_SET_IFM_STRIDE_Y); + must_be_zero = 0; + payload_size = 1; + } + CONSTEXPR ::cmd1 get_cmd_code() const + { + return static_cast<::cmd1>(cmd_code); + } + CONSTEXPR npu_set_ifm_stride_y_t &set_cmd_code(::cmd1 value) + { + cmd_code = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_data() const + { + return static_cast(data); + } + CONSTEXPR npu_set_ifm_stride_y_t &set_data(uint32_t value) + { + data = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_payload_size() const + { + return static_cast(payload_size); + } + CONSTEXPR npu_set_ifm_stride_y_t &set_payload_size(uint32_t value) + { + payload_size = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// Set IFM byte stride between channel blocks (of 16 bytes each block) +struct npu_set_ifm_stride_c_t +{ + uint32_t cmd_code : 10; // NPU_SET_IFM_STRIDE_C + uint32_t must_be_zero : 4; // 0 + uint32_t payload_size : 2; // Min:1 Max:2 + uint32_t reserved0 : 16; + uint32_t data : 32; // IFM byte stride between channel blocks (of 16 bytes each block) +#ifdef __cplusplus + CONSTEXPR bool valid() const + { + return cmd_code == static_cast(cmd1::NPU_SET_IFM_STRIDE_C) && must_be_zero == 0 && + payload_size >= 1 && payload_size <= 2; + } + CONSTEXPR void init() + { + cmd_code = static_cast(cmd1::NPU_SET_IFM_STRIDE_C); + must_be_zero = 0; + payload_size = 1; + } + CONSTEXPR ::cmd1 get_cmd_code() const + { + return static_cast<::cmd1>(cmd_code); + } + CONSTEXPR npu_set_ifm_stride_c_t &set_cmd_code(::cmd1 value) + { + cmd_code = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_data() const + { + return static_cast(data); + } + CONSTEXPR npu_set_ifm_stride_c_t &set_data(uint32_t value) + { + data = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_payload_size() const + { + return static_cast(payload_size); + } + CONSTEXPR npu_set_ifm_stride_c_t &set_payload_size(uint32_t value) + { + payload_size = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// Set OFM base address (top left tile) +struct npu_set_ofm_base0_t +{ + uint32_t cmd_code : 10; // NPU_SET_OFM_BASE0 + uint32_t must_be_zero : 4; // 0 + uint32_t payload_size : 2; // Min:1 Max:2 + uint32_t reserved0 : 16; + uint32_t data : 32; // OFM base address (top left tile) +#ifdef __cplusplus + CONSTEXPR bool valid() const + { + return cmd_code == static_cast(cmd1::NPU_SET_OFM_BASE0) && must_be_zero == 0 && payload_size >= 1 && + payload_size <= 2; + } + CONSTEXPR void init() + { + cmd_code = static_cast(cmd1::NPU_SET_OFM_BASE0); + must_be_zero = 0; + payload_size = 1; + } + CONSTEXPR ::cmd1 get_cmd_code() const + { + return static_cast<::cmd1>(cmd_code); + } + CONSTEXPR npu_set_ofm_base0_t &set_cmd_code(::cmd1 value) + { + cmd_code = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_data() const + { + return static_cast(data); + } + CONSTEXPR npu_set_ofm_base0_t &set_data(uint32_t value) + { + data = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_payload_size() const + { + return static_cast(payload_size); + } + CONSTEXPR npu_set_ofm_base0_t &set_payload_size(uint32_t value) + { + payload_size = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// Set OFM base address (top right tile) +struct npu_set_ofm_base1_t +{ + uint32_t cmd_code : 10; // NPU_SET_OFM_BASE1 + uint32_t must_be_zero : 4; // 0 + uint32_t payload_size : 2; // Min:1 Max:2 + uint32_t reserved0 : 16; + uint32_t data : 32; // OFM base address (top right tile) +#ifdef __cplusplus + CONSTEXPR bool valid() const + { + return cmd_code == static_cast(cmd1::NPU_SET_OFM_BASE1) && must_be_zero == 0 && payload_size >= 1 && + payload_size <= 2; + } + CONSTEXPR void init() + { + cmd_code = static_cast(cmd1::NPU_SET_OFM_BASE1); + must_be_zero = 0; + payload_size = 1; + } + CONSTEXPR ::cmd1 get_cmd_code() const + { + return static_cast<::cmd1>(cmd_code); + } + CONSTEXPR npu_set_ofm_base1_t &set_cmd_code(::cmd1 value) + { + cmd_code = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_data() const + { + return static_cast(data); + } + CONSTEXPR npu_set_ofm_base1_t &set_data(uint32_t value) + { + data = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_payload_size() const + { + return static_cast(payload_size); + } + CONSTEXPR npu_set_ofm_base1_t &set_payload_size(uint32_t value) + { + payload_size = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// Set OFM base address (bottom left tile) +struct npu_set_ofm_base2_t +{ + uint32_t cmd_code : 10; // NPU_SET_OFM_BASE2 + uint32_t must_be_zero : 4; // 0 + uint32_t payload_size : 2; // Min:1 Max:2 + uint32_t reserved0 : 16; + uint32_t data : 32; // OFM base address (bottom left tile) +#ifdef __cplusplus + CONSTEXPR bool valid() const + { + return cmd_code == static_cast(cmd1::NPU_SET_OFM_BASE2) && must_be_zero == 0 && payload_size >= 1 && + payload_size <= 2; + } + CONSTEXPR void init() + { + cmd_code = static_cast(cmd1::NPU_SET_OFM_BASE2); + must_be_zero = 0; + payload_size = 1; + } + CONSTEXPR ::cmd1 get_cmd_code() const + { + return static_cast<::cmd1>(cmd_code); + } + CONSTEXPR npu_set_ofm_base2_t &set_cmd_code(::cmd1 value) + { + cmd_code = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_data() const + { + return static_cast(data); + } + CONSTEXPR npu_set_ofm_base2_t &set_data(uint32_t value) + { + data = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_payload_size() const + { + return static_cast(payload_size); + } + CONSTEXPR npu_set_ofm_base2_t &set_payload_size(uint32_t value) + { + payload_size = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// Set OFM base address (bottom right tile) +struct npu_set_ofm_base3_t +{ + uint32_t cmd_code : 10; // NPU_SET_OFM_BASE3 + uint32_t must_be_zero : 4; // 0 + uint32_t payload_size : 2; // Min:1 Max:2 + uint32_t reserved0 : 16; + uint32_t data : 32; // OFM base address (bottom right tile) +#ifdef __cplusplus + CONSTEXPR bool valid() const + { + return cmd_code == static_cast(cmd1::NPU_SET_OFM_BASE3) && must_be_zero == 0 && payload_size >= 1 && + payload_size <= 2; + } + CONSTEXPR void init() + { + cmd_code = static_cast(cmd1::NPU_SET_OFM_BASE3); + must_be_zero = 0; + payload_size = 1; + } + CONSTEXPR ::cmd1 get_cmd_code() const + { + return static_cast<::cmd1>(cmd_code); + } + CONSTEXPR npu_set_ofm_base3_t &set_cmd_code(::cmd1 value) + { + cmd_code = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_data() const + { + return static_cast(data); + } + CONSTEXPR npu_set_ofm_base3_t &set_data(uint32_t value) + { + data = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_payload_size() const + { + return static_cast(payload_size); + } + CONSTEXPR npu_set_ofm_base3_t &set_payload_size(uint32_t value) + { + payload_size = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// Set OFM byte stride between horizontal values +struct npu_set_ofm_stride_x_t +{ + uint32_t cmd_code : 10; // NPU_SET_OFM_STRIDE_X + uint32_t must_be_zero : 4; // 0 + uint32_t payload_size : 2; // Min:1 Max:2 + uint32_t reserved0 : 16; + uint32_t data : 32; // OFM byte stride between horizontal values +#ifdef __cplusplus + CONSTEXPR bool valid() const + { + return cmd_code == static_cast(cmd1::NPU_SET_OFM_STRIDE_X) && must_be_zero == 0 && + payload_size >= 1 && payload_size <= 2; + } + CONSTEXPR void init() + { + cmd_code = static_cast(cmd1::NPU_SET_OFM_STRIDE_X); + must_be_zero = 0; + payload_size = 1; + } + CONSTEXPR ::cmd1 get_cmd_code() const + { + return static_cast<::cmd1>(cmd_code); + } + CONSTEXPR npu_set_ofm_stride_x_t &set_cmd_code(::cmd1 value) + { + cmd_code = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_data() const + { + return static_cast(data); + } + CONSTEXPR npu_set_ofm_stride_x_t &set_data(uint32_t value) + { + data = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_payload_size() const + { + return static_cast(payload_size); + } + CONSTEXPR npu_set_ofm_stride_x_t &set_payload_size(uint32_t value) + { + payload_size = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// Set OFM byte stride between vertical values +struct npu_set_ofm_stride_y_t +{ + uint32_t cmd_code : 10; // NPU_SET_OFM_STRIDE_Y + uint32_t must_be_zero : 4; // 0 + uint32_t payload_size : 2; // Min:1 Max:2 + uint32_t reserved0 : 16; + uint32_t data : 32; // OFM byte stride between vertical values +#ifdef __cplusplus + CONSTEXPR bool valid() const + { + return cmd_code == static_cast(cmd1::NPU_SET_OFM_STRIDE_Y) && must_be_zero == 0 && + payload_size >= 1 && payload_size <= 2; + } + CONSTEXPR void init() + { + cmd_code = static_cast(cmd1::NPU_SET_OFM_STRIDE_Y); + must_be_zero = 0; + payload_size = 1; + } + CONSTEXPR ::cmd1 get_cmd_code() const + { + return static_cast<::cmd1>(cmd_code); + } + CONSTEXPR npu_set_ofm_stride_y_t &set_cmd_code(::cmd1 value) + { + cmd_code = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_data() const + { + return static_cast(data); + } + CONSTEXPR npu_set_ofm_stride_y_t &set_data(uint32_t value) + { + data = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_payload_size() const + { + return static_cast(payload_size); + } + CONSTEXPR npu_set_ofm_stride_y_t &set_payload_size(uint32_t value) + { + payload_size = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// Set OFM byte stride between channel blocks (of 16 bytes each block) +struct npu_set_ofm_stride_c_t +{ + uint32_t cmd_code : 10; // NPU_SET_OFM_STRIDE_C + uint32_t must_be_zero : 4; // 0 + uint32_t payload_size : 2; // Min:1 Max:2 + uint32_t reserved0 : 16; + uint32_t data : 32; // OFM byte stride between channel blocks (of 16 bytes each block) +#ifdef __cplusplus + CONSTEXPR bool valid() const + { + return cmd_code == static_cast(cmd1::NPU_SET_OFM_STRIDE_C) && must_be_zero == 0 && + payload_size >= 1 && payload_size <= 2; + } + CONSTEXPR void init() + { + cmd_code = static_cast(cmd1::NPU_SET_OFM_STRIDE_C); + must_be_zero = 0; + payload_size = 1; + } + CONSTEXPR ::cmd1 get_cmd_code() const + { + return static_cast<::cmd1>(cmd_code); + } + CONSTEXPR npu_set_ofm_stride_c_t &set_cmd_code(::cmd1 value) + { + cmd_code = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_data() const + { + return static_cast(data); + } + CONSTEXPR npu_set_ofm_stride_c_t &set_data(uint32_t value) + { + data = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_payload_size() const + { + return static_cast(payload_size); + } + CONSTEXPR npu_set_ofm_stride_c_t &set_payload_size(uint32_t value) + { + payload_size = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// Set Weight stream input base address +struct npu_set_weight_base_t +{ + uint32_t cmd_code : 10; // NPU_SET_WEIGHT_BASE + uint32_t must_be_zero : 4; // 0 + uint32_t payload_size : 2; // Min:1 Max:2 + uint32_t reserved0 : 16; + uint32_t data : 32; // Weight stream input base address +#ifdef __cplusplus + CONSTEXPR bool valid() const + { + return cmd_code == static_cast(cmd1::NPU_SET_WEIGHT_BASE) && must_be_zero == 0 && payload_size >= 1 && + payload_size <= 2; + } + CONSTEXPR void init() + { + cmd_code = static_cast(cmd1::NPU_SET_WEIGHT_BASE); + must_be_zero = 0; + payload_size = 1; + } + CONSTEXPR ::cmd1 get_cmd_code() const + { + return static_cast<::cmd1>(cmd_code); + } + CONSTEXPR npu_set_weight_base_t &set_cmd_code(::cmd1 value) + { + cmd_code = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_data() const + { + return static_cast(data); + } + CONSTEXPR npu_set_weight_base_t &set_data(uint32_t value) + { + data = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_payload_size() const + { + return static_cast(payload_size); + } + CONSTEXPR npu_set_weight_base_t &set_payload_size(uint32_t value) + { + payload_size = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// Set Weight stream length +struct npu_set_weight_length_t +{ + uint32_t cmd_code : 10; // NPU_SET_WEIGHT_LENGTH + uint32_t must_be_zero : 4; // 0 + uint32_t payload_size : 2; // Min:1 Max:2 + uint32_t reserved0 : 16; + uint32_t data : 32; // Weight stream length +#ifdef __cplusplus + CONSTEXPR bool valid() const + { + return cmd_code == static_cast(cmd1::NPU_SET_WEIGHT_LENGTH) && must_be_zero == 0 && + payload_size >= 1 && payload_size <= 2; + } + CONSTEXPR void init() + { + cmd_code = static_cast(cmd1::NPU_SET_WEIGHT_LENGTH); + must_be_zero = 0; + payload_size = 1; + } + CONSTEXPR ::cmd1 get_cmd_code() const + { + return static_cast<::cmd1>(cmd_code); + } + CONSTEXPR npu_set_weight_length_t &set_cmd_code(::cmd1 value) + { + cmd_code = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_data() const + { + return static_cast(data); + } + CONSTEXPR npu_set_weight_length_t &set_data(uint32_t value) + { + data = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_payload_size() const + { + return static_cast(payload_size); + } + CONSTEXPR npu_set_weight_length_t &set_payload_size(uint32_t value) + { + payload_size = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// Set Scale and bias stream input base address +struct npu_set_scale_base_t +{ + uint32_t cmd_code : 10; // NPU_SET_SCALE_BASE + uint32_t must_be_zero : 4; // 0 + uint32_t payload_size : 2; // Min:1 Max:2 + uint32_t reserved0 : 16; + uint32_t data : 32; // Scale and bias stream input base address +#ifdef __cplusplus + CONSTEXPR bool valid() const + { + return cmd_code == static_cast(cmd1::NPU_SET_SCALE_BASE) && must_be_zero == 0 && payload_size >= 1 && + payload_size <= 2; + } + CONSTEXPR void init() + { + cmd_code = static_cast(cmd1::NPU_SET_SCALE_BASE); + must_be_zero = 0; + payload_size = 1; + } + CONSTEXPR ::cmd1 get_cmd_code() const + { + return static_cast<::cmd1>(cmd_code); + } + CONSTEXPR npu_set_scale_base_t &set_cmd_code(::cmd1 value) + { + cmd_code = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_data() const + { + return static_cast(data); + } + CONSTEXPR npu_set_scale_base_t &set_data(uint32_t value) + { + data = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_payload_size() const + { + return static_cast(payload_size); + } + CONSTEXPR npu_set_scale_base_t &set_payload_size(uint32_t value) + { + payload_size = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// Set Scale and bias stream input length +struct npu_set_scale_length_t +{ + uint32_t cmd_code : 10; // NPU_SET_SCALE_LENGTH + uint32_t must_be_zero : 4; // 0 + uint32_t payload_size : 2; // Min:1 Max:2 + uint32_t reserved0 : 16; + uint32_t data : 32; // Scale and bias stream input length +#ifdef __cplusplus + CONSTEXPR bool valid() const + { + return cmd_code == static_cast(cmd1::NPU_SET_SCALE_LENGTH) && must_be_zero == 0 && + payload_size >= 1 && payload_size <= 2; + } + CONSTEXPR void init() + { + cmd_code = static_cast(cmd1::NPU_SET_SCALE_LENGTH); + must_be_zero = 0; + payload_size = 1; + } + CONSTEXPR ::cmd1 get_cmd_code() const + { + return static_cast<::cmd1>(cmd_code); + } + CONSTEXPR npu_set_scale_length_t &set_cmd_code(::cmd1 value) + { + cmd_code = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_data() const + { + return static_cast(data); + } + CONSTEXPR npu_set_scale_length_t &set_data(uint32_t value) + { + data = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_payload_size() const + { + return static_cast(payload_size); + } + CONSTEXPR npu_set_scale_length_t &set_payload_size(uint32_t value) + { + payload_size = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// Set scale (32-bit). Used by average pool with pad=0, elementwise MUL, ADD, SUB +struct npu_set_ofm_scale_t +{ + uint32_t cmd_code : 10; // NPU_SET_OFM_SCALE + uint32_t must_be_zero : 4; // 0 + uint32_t payload_size : 2; // Min:1 Max:2 + uint32_t shift : 16; + uint32_t data : 32; // scale (32-bit). Used by average pool with pad=0, elementwise MUL, ADD, SUB +#ifdef __cplusplus + CONSTEXPR bool valid() const + { + return cmd_code == static_cast(cmd1::NPU_SET_OFM_SCALE) && must_be_zero == 0 && payload_size >= 1 && + payload_size <= 2; + } + CONSTEXPR void init() + { + cmd_code = static_cast(cmd1::NPU_SET_OFM_SCALE); + must_be_zero = 0; + payload_size = 1; + } + CONSTEXPR ::cmd1 get_cmd_code() const + { + return static_cast<::cmd1>(cmd_code); + } + CONSTEXPR npu_set_ofm_scale_t &set_cmd_code(::cmd1 value) + { + cmd_code = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_data() const + { + return static_cast(data); + } + CONSTEXPR npu_set_ofm_scale_t &set_data(uint32_t value) + { + data = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_payload_size() const + { + return static_cast(payload_size); + } + CONSTEXPR npu_set_ofm_scale_t &set_payload_size(uint32_t value) + { + payload_size = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_shift() const + { + return static_cast(shift); + } + CONSTEXPR npu_set_ofm_scale_t &set_shift(uint32_t value) + { + shift = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// Set scale (32-bit) used for elementwise ADD/SUB OPA prescale. If IFM scale mode is 0 then shift is ignored and scale +// is 16-bit. If IFM scale mode is 1 or 2 then shift is 6-bit and scale is 32-bit +struct npu_set_opa_scale_t +{ + uint32_t cmd_code : 10; // NPU_SET_OPA_SCALE + uint32_t must_be_zero : 4; // 0 + uint32_t payload_size : 2; // Min:1 Max:2 + uint32_t shift : 16; + uint32_t + data : 32; // scale (32-bit) used for elementwise ADD/SUB OPA prescale. If IFM scale mode is 0 then shift is + // ignored and scale is 16-bit. If IFM scale mode is 1 or 2 then shift is 6-bit and scale is 32-bit +#ifdef __cplusplus + CONSTEXPR bool valid() const + { + return cmd_code == static_cast(cmd1::NPU_SET_OPA_SCALE) && must_be_zero == 0 && payload_size >= 1 && + payload_size <= 2; + } + CONSTEXPR void init() + { + cmd_code = static_cast(cmd1::NPU_SET_OPA_SCALE); + must_be_zero = 0; + payload_size = 1; + } + CONSTEXPR ::cmd1 get_cmd_code() const + { + return static_cast<::cmd1>(cmd_code); + } + CONSTEXPR npu_set_opa_scale_t &set_cmd_code(::cmd1 value) + { + cmd_code = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_data() const + { + return static_cast(data); + } + CONSTEXPR npu_set_opa_scale_t &set_data(uint32_t value) + { + data = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_payload_size() const + { + return static_cast(payload_size); + } + CONSTEXPR npu_set_opa_scale_t &set_payload_size(uint32_t value) + { + payload_size = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_shift() const + { + return static_cast(shift); + } + CONSTEXPR npu_set_opa_scale_t &set_shift(uint32_t value) + { + shift = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// Set scale (16-bit) used for elementwise ADD/SUB OPB prescale. If IFM scale mode is 0 then scale is 16-bit. If IFM +// scale mode is 1 or 2 then this register is not used +struct npu_set_opb_scale_t +{ + uint32_t cmd_code : 10; // NPU_SET_OPB_SCALE + uint32_t must_be_zero : 4; // 0 + uint32_t payload_size : 2; // Min:1 Max:2 + uint32_t reserved0 : 16; + uint32_t data : 32; // scale (16-bit) used for elementwise ADD/SUB OPB prescale. If IFM scale mode is 0 then scale + // is 16-bit. If IFM scale mode is 1 or 2 then this register is not used +#ifdef __cplusplus + CONSTEXPR bool valid() const + { + return cmd_code == static_cast(cmd1::NPU_SET_OPB_SCALE) && must_be_zero == 0 && payload_size >= 1 && + payload_size <= 2; + } + CONSTEXPR void init() + { + cmd_code = static_cast(cmd1::NPU_SET_OPB_SCALE); + must_be_zero = 0; + payload_size = 1; + } + CONSTEXPR ::cmd1 get_cmd_code() const + { + return static_cast<::cmd1>(cmd_code); + } + CONSTEXPR npu_set_opb_scale_t &set_cmd_code(::cmd1 value) + { + cmd_code = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_data() const + { + return static_cast(data); + } + CONSTEXPR npu_set_opb_scale_t &set_data(uint32_t value) + { + data = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_payload_size() const + { + return static_cast(payload_size); + } + CONSTEXPR npu_set_opb_scale_t &set_payload_size(uint32_t value) + { + payload_size = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// Set DMA source address +struct npu_set_dma0_src_t +{ + uint32_t cmd_code : 10; // NPU_SET_DMA0_SRC + uint32_t must_be_zero : 4; // 0 + uint32_t payload_size : 2; // Min:1 Max:2 + uint32_t reserved0 : 16; + uint32_t data : 32; +#ifdef __cplusplus + CONSTEXPR bool valid() const + { + return cmd_code == static_cast(cmd1::NPU_SET_DMA0_SRC) && must_be_zero == 0 && payload_size >= 1 && + payload_size <= 2; + } + CONSTEXPR void init() + { + cmd_code = static_cast(cmd1::NPU_SET_DMA0_SRC); + must_be_zero = 0; + payload_size = 1; + } + CONSTEXPR ::cmd1 get_cmd_code() const + { + return static_cast<::cmd1>(cmd_code); + } + CONSTEXPR npu_set_dma0_src_t &set_cmd_code(::cmd1 value) + { + cmd_code = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_data() const + { + return static_cast(data); + } + CONSTEXPR npu_set_dma0_src_t &set_data(uint32_t value) + { + data = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_payload_size() const + { + return static_cast(payload_size); + } + CONSTEXPR npu_set_dma0_src_t &set_payload_size(uint32_t value) + { + payload_size = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// Set DMA destination address +struct npu_set_dma0_dst_t +{ + uint32_t cmd_code : 10; // NPU_SET_DMA0_DST + uint32_t must_be_zero : 4; // 0 + uint32_t payload_size : 2; // Min:1 Max:2 + uint32_t reserved0 : 16; + uint32_t data : 32; +#ifdef __cplusplus + CONSTEXPR bool valid() const + { + return cmd_code == static_cast(cmd1::NPU_SET_DMA0_DST) && must_be_zero == 0 && payload_size >= 1 && + payload_size <= 2; + } + CONSTEXPR void init() + { + cmd_code = static_cast(cmd1::NPU_SET_DMA0_DST); + must_be_zero = 0; + payload_size = 1; + } + CONSTEXPR ::cmd1 get_cmd_code() const + { + return static_cast<::cmd1>(cmd_code); + } + CONSTEXPR npu_set_dma0_dst_t &set_cmd_code(::cmd1 value) + { + cmd_code = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_data() const + { + return static_cast(data); + } + CONSTEXPR npu_set_dma0_dst_t &set_data(uint32_t value) + { + data = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_payload_size() const + { + return static_cast(payload_size); + } + CONSTEXPR npu_set_dma0_dst_t &set_payload_size(uint32_t value) + { + payload_size = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// Set DMA length +struct npu_set_dma0_len_t +{ + uint32_t cmd_code : 10; // NPU_SET_DMA0_LEN + uint32_t must_be_zero : 4; // 0 + uint32_t payload_size : 2; // Min:1 Max:2 + uint32_t reserved0 : 16; + uint32_t data : 32; // DMA length +#ifdef __cplusplus + CONSTEXPR bool valid() const + { + return cmd_code == static_cast(cmd1::NPU_SET_DMA0_LEN) && must_be_zero == 0 && payload_size >= 1 && + payload_size <= 2; + } + CONSTEXPR void init() + { + cmd_code = static_cast(cmd1::NPU_SET_DMA0_LEN); + must_be_zero = 0; + payload_size = 1; + } + CONSTEXPR ::cmd1 get_cmd_code() const + { + return static_cast<::cmd1>(cmd_code); + } + CONSTEXPR npu_set_dma0_len_t &set_cmd_code(::cmd1 value) + { + cmd_code = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_data() const + { + return static_cast(data); + } + CONSTEXPR npu_set_dma0_len_t &set_data(uint32_t value) + { + data = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_payload_size() const + { + return static_cast(payload_size); + } + CONSTEXPR npu_set_dma0_len_t &set_payload_size(uint32_t value) + { + payload_size = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// Set Byte distance to skip after inner size (2D/3D mode) +struct npu_set_dma0_skip0_t +{ + uint32_t cmd_code : 10; // NPU_SET_DMA0_SKIP0 + uint32_t must_be_zero : 4; // 0 + uint32_t payload_size : 2; // Min:1 Max:2 + uint32_t param : 16; + uint32_t data : 32; // Byte distance to skip after inner size (2D/3D mode) +#ifdef __cplusplus + CONSTEXPR bool valid() const + { + return cmd_code == static_cast(cmd1::NPU_SET_DMA0_SKIP0) && must_be_zero == 0 && payload_size >= 1 && + payload_size <= 2; + } + CONSTEXPR void init() + { + cmd_code = static_cast(cmd1::NPU_SET_DMA0_SKIP0); + must_be_zero = 0; + payload_size = 1; + } + CONSTEXPR ::cmd1 get_cmd_code() const + { + return static_cast<::cmd1>(cmd_code); + } + CONSTEXPR npu_set_dma0_skip0_t &set_cmd_code(::cmd1 value) + { + cmd_code = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_data() const + { + return static_cast(data); + } + CONSTEXPR npu_set_dma0_skip0_t &set_data(uint32_t value) + { + data = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_param() const + { + return static_cast(param); + } + CONSTEXPR npu_set_dma0_skip0_t &set_param(uint32_t value) + { + param = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_payload_size() const + { + return static_cast(payload_size); + } + CONSTEXPR npu_set_dma0_skip0_t &set_payload_size(uint32_t value) + { + payload_size = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// Set Byte distance to skip after outer size (3D mode) +struct npu_set_dma0_skip1_t +{ + uint32_t cmd_code : 10; // NPU_SET_DMA0_SKIP1 + uint32_t must_be_zero : 4; // 0 + uint32_t payload_size : 2; // Min:1 Max:2 + uint32_t param : 16; + uint32_t data : 32; // Byte distance to skip after outer size (3D mode) +#ifdef __cplusplus + CONSTEXPR bool valid() const + { + return cmd_code == static_cast(cmd1::NPU_SET_DMA0_SKIP1) && must_be_zero == 0 && payload_size >= 1 && + payload_size <= 2; + } + CONSTEXPR void init() + { + cmd_code = static_cast(cmd1::NPU_SET_DMA0_SKIP1); + must_be_zero = 0; + payload_size = 1; + } + CONSTEXPR ::cmd1 get_cmd_code() const + { + return static_cast<::cmd1>(cmd_code); + } + CONSTEXPR npu_set_dma0_skip1_t &set_cmd_code(::cmd1 value) + { + cmd_code = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_data() const + { + return static_cast(data); + } + CONSTEXPR npu_set_dma0_skip1_t &set_data(uint32_t value) + { + data = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_param() const + { + return static_cast(param); + } + CONSTEXPR npu_set_dma0_skip1_t &set_param(uint32_t value) + { + param = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_payload_size() const + { + return static_cast(payload_size); + } + CONSTEXPR npu_set_dma0_skip1_t &set_payload_size(uint32_t value) + { + payload_size = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// Set IFM2 tile0 offset (top left tile) from IFM_REGION start +struct npu_set_ifm2_base0_t +{ + uint32_t cmd_code : 10; // NPU_SET_IFM2_BASE0 + uint32_t must_be_zero : 4; // 0 + uint32_t payload_size : 2; // Min:1 Max:2 + uint32_t reserved0 : 16; + uint32_t data : 32; // IFM2 tile0 offset (top left tile) from IFM_REGION start +#ifdef __cplusplus + CONSTEXPR bool valid() const + { + return cmd_code == static_cast(cmd1::NPU_SET_IFM2_BASE0) && must_be_zero == 0 && payload_size >= 1 && + payload_size <= 2; + } + CONSTEXPR void init() + { + cmd_code = static_cast(cmd1::NPU_SET_IFM2_BASE0); + must_be_zero = 0; + payload_size = 1; + } + CONSTEXPR ::cmd1 get_cmd_code() const + { + return static_cast<::cmd1>(cmd_code); + } + CONSTEXPR npu_set_ifm2_base0_t &set_cmd_code(::cmd1 value) + { + cmd_code = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_data() const + { + return static_cast(data); + } + CONSTEXPR npu_set_ifm2_base0_t &set_data(uint32_t value) + { + data = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_payload_size() const + { + return static_cast(payload_size); + } + CONSTEXPR npu_set_ifm2_base0_t &set_payload_size(uint32_t value) + { + payload_size = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// Set IFM2 tile1 offset (top right tile) from IFM_REGION start +struct npu_set_ifm2_base1_t +{ + uint32_t cmd_code : 10; // NPU_SET_IFM2_BASE1 + uint32_t must_be_zero : 4; // 0 + uint32_t payload_size : 2; // Min:1 Max:2 + uint32_t reserved0 : 16; + uint32_t data : 32; // IFM2 tile1 offset (top right tile) from IFM_REGION start +#ifdef __cplusplus + CONSTEXPR bool valid() const + { + return cmd_code == static_cast(cmd1::NPU_SET_IFM2_BASE1) && must_be_zero == 0 && payload_size >= 1 && + payload_size <= 2; + } + CONSTEXPR void init() + { + cmd_code = static_cast(cmd1::NPU_SET_IFM2_BASE1); + must_be_zero = 0; + payload_size = 1; + } + CONSTEXPR ::cmd1 get_cmd_code() const + { + return static_cast<::cmd1>(cmd_code); + } + CONSTEXPR npu_set_ifm2_base1_t &set_cmd_code(::cmd1 value) + { + cmd_code = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_data() const + { + return static_cast(data); + } + CONSTEXPR npu_set_ifm2_base1_t &set_data(uint32_t value) + { + data = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_payload_size() const + { + return static_cast(payload_size); + } + CONSTEXPR npu_set_ifm2_base1_t &set_payload_size(uint32_t value) + { + payload_size = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// Set IFM2 tile2 offset (bottom left tile) from IFM_REGION start +struct npu_set_ifm2_base2_t +{ + uint32_t cmd_code : 10; // NPU_SET_IFM2_BASE2 + uint32_t must_be_zero : 4; // 0 + uint32_t payload_size : 2; // Min:1 Max:2 + uint32_t reserved0 : 16; + uint32_t data : 32; // IFM2 tile2 offset (bottom left tile) from IFM_REGION start +#ifdef __cplusplus + CONSTEXPR bool valid() const + { + return cmd_code == static_cast(cmd1::NPU_SET_IFM2_BASE2) && must_be_zero == 0 && payload_size >= 1 && + payload_size <= 2; + } + CONSTEXPR void init() + { + cmd_code = static_cast(cmd1::NPU_SET_IFM2_BASE2); + must_be_zero = 0; + payload_size = 1; + } + CONSTEXPR ::cmd1 get_cmd_code() const + { + return static_cast<::cmd1>(cmd_code); + } + CONSTEXPR npu_set_ifm2_base2_t &set_cmd_code(::cmd1 value) + { + cmd_code = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_data() const + { + return static_cast(data); + } + CONSTEXPR npu_set_ifm2_base2_t &set_data(uint32_t value) + { + data = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_payload_size() const + { + return static_cast(payload_size); + } + CONSTEXPR npu_set_ifm2_base2_t &set_payload_size(uint32_t value) + { + payload_size = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// Set IFM2 tile3 offset (bottom right tile) from IFM_REGION start +struct npu_set_ifm2_base3_t +{ + uint32_t cmd_code : 10; // NPU_SET_IFM2_BASE3 + uint32_t must_be_zero : 4; // 0 + uint32_t payload_size : 2; // Min:1 Max:2 + uint32_t reserved0 : 16; + uint32_t data : 32; // IFM2 tile3 offset (bottom right tile) from IFM_REGION start +#ifdef __cplusplus + CONSTEXPR bool valid() const + { + return cmd_code == static_cast(cmd1::NPU_SET_IFM2_BASE3) && must_be_zero == 0 && payload_size >= 1 && + payload_size <= 2; + } + CONSTEXPR void init() + { + cmd_code = static_cast(cmd1::NPU_SET_IFM2_BASE3); + must_be_zero = 0; + payload_size = 1; + } + CONSTEXPR ::cmd1 get_cmd_code() const + { + return static_cast<::cmd1>(cmd_code); + } + CONSTEXPR npu_set_ifm2_base3_t &set_cmd_code(::cmd1 value) + { + cmd_code = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_data() const + { + return static_cast(data); + } + CONSTEXPR npu_set_ifm2_base3_t &set_data(uint32_t value) + { + data = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_payload_size() const + { + return static_cast(payload_size); + } + CONSTEXPR npu_set_ifm2_base3_t &set_payload_size(uint32_t value) + { + payload_size = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// Set IFM2 byte stride between horizontal values +struct npu_set_ifm2_stride_x_t +{ + uint32_t cmd_code : 10; // NPU_SET_IFM2_STRIDE_X + uint32_t must_be_zero : 4; // 0 + uint32_t payload_size : 2; // Min:1 Max:2 + uint32_t reserved0 : 16; + uint32_t data : 32; // IFM2 byte stride between horizontal values +#ifdef __cplusplus + CONSTEXPR bool valid() const + { + return cmd_code == static_cast(cmd1::NPU_SET_IFM2_STRIDE_X) && must_be_zero == 0 && + payload_size >= 1 && payload_size <= 2; + } + CONSTEXPR void init() + { + cmd_code = static_cast(cmd1::NPU_SET_IFM2_STRIDE_X); + must_be_zero = 0; + payload_size = 1; + } + CONSTEXPR ::cmd1 get_cmd_code() const + { + return static_cast<::cmd1>(cmd_code); + } + CONSTEXPR npu_set_ifm2_stride_x_t &set_cmd_code(::cmd1 value) + { + cmd_code = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_data() const + { + return static_cast(data); + } + CONSTEXPR npu_set_ifm2_stride_x_t &set_data(uint32_t value) + { + data = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_payload_size() const + { + return static_cast(payload_size); + } + CONSTEXPR npu_set_ifm2_stride_x_t &set_payload_size(uint32_t value) + { + payload_size = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// Set IFM2 byte stride between vertical values +struct npu_set_ifm2_stride_y_t +{ + uint32_t cmd_code : 10; // NPU_SET_IFM2_STRIDE_Y + uint32_t must_be_zero : 4; // 0 + uint32_t payload_size : 2; // Min:1 Max:2 + uint32_t reserved0 : 16; + uint32_t data : 32; // IFM2 byte stride between vertical values +#ifdef __cplusplus + CONSTEXPR bool valid() const + { + return cmd_code == static_cast(cmd1::NPU_SET_IFM2_STRIDE_Y) && must_be_zero == 0 && + payload_size >= 1 && payload_size <= 2; + } + CONSTEXPR void init() + { + cmd_code = static_cast(cmd1::NPU_SET_IFM2_STRIDE_Y); + must_be_zero = 0; + payload_size = 1; + } + CONSTEXPR ::cmd1 get_cmd_code() const + { + return static_cast<::cmd1>(cmd_code); + } + CONSTEXPR npu_set_ifm2_stride_y_t &set_cmd_code(::cmd1 value) + { + cmd_code = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_data() const + { + return static_cast(data); + } + CONSTEXPR npu_set_ifm2_stride_y_t &set_data(uint32_t value) + { + data = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_payload_size() const + { + return static_cast(payload_size); + } + CONSTEXPR npu_set_ifm2_stride_y_t &set_payload_size(uint32_t value) + { + payload_size = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// Set IFM2 byte stride between channel blocks (of 16 bytes each block) +struct npu_set_ifm2_stride_c_t +{ + uint32_t cmd_code : 10; // NPU_SET_IFM2_STRIDE_C + uint32_t must_be_zero : 4; // 0 + uint32_t payload_size : 2; // Min:1 Max:2 + uint32_t reserved0 : 16; + uint32_t data : 32; // IFM2 byte stride between channel blocks (of 16 bytes each block) +#ifdef __cplusplus + CONSTEXPR bool valid() const + { + return cmd_code == static_cast(cmd1::NPU_SET_IFM2_STRIDE_C) && must_be_zero == 0 && + payload_size >= 1 && payload_size <= 2; + } + CONSTEXPR void init() + { + cmd_code = static_cast(cmd1::NPU_SET_IFM2_STRIDE_C); + must_be_zero = 0; + payload_size = 1; + } + CONSTEXPR ::cmd1 get_cmd_code() const + { + return static_cast<::cmd1>(cmd_code); + } + CONSTEXPR npu_set_ifm2_stride_c_t &set_cmd_code(::cmd1 value) + { + cmd_code = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_data() const + { + return static_cast(data); + } + CONSTEXPR npu_set_ifm2_stride_c_t &set_data(uint32_t value) + { + data = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_payload_size() const + { + return static_cast(payload_size); + } + CONSTEXPR npu_set_ifm2_stride_c_t &set_payload_size(uint32_t value) + { + payload_size = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// Set Weight stream byte offset in WEIGHT_REGION +struct npu_set_weight1_base_t +{ + uint32_t cmd_code : 10; // NPU_SET_WEIGHT1_BASE + uint32_t must_be_zero : 4; // 0 + uint32_t payload_size : 2; // Min:1 Max:2 + uint32_t param : 16; + uint32_t data : 32; // Weight stream byte offset in WEIGHT_REGION +#ifdef __cplusplus + CONSTEXPR bool valid() const + { + return cmd_code == static_cast(cmd1::NPU_SET_WEIGHT1_BASE) && must_be_zero == 0 && + payload_size >= 1 && payload_size <= 2; + } + CONSTEXPR void init() + { + cmd_code = static_cast(cmd1::NPU_SET_WEIGHT1_BASE); + must_be_zero = 0; + payload_size = 1; + } + CONSTEXPR ::cmd1 get_cmd_code() const + { + return static_cast<::cmd1>(cmd_code); + } + CONSTEXPR npu_set_weight1_base_t &set_cmd_code(::cmd1 value) + { + cmd_code = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_data() const + { + return static_cast(data); + } + CONSTEXPR npu_set_weight1_base_t &set_data(uint32_t value) + { + data = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_param() const + { + return static_cast(param); + } + CONSTEXPR npu_set_weight1_base_t &set_param(uint32_t value) + { + param = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_payload_size() const + { + return static_cast(payload_size); + } + CONSTEXPR npu_set_weight1_base_t &set_payload_size(uint32_t value) + { + payload_size = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// Set Weight stream byte length (unsigned 32 bits) +struct npu_set_weight1_length_t +{ + uint32_t cmd_code : 10; // NPU_SET_WEIGHT1_LENGTH + uint32_t must_be_zero : 4; // 0 + uint32_t payload_size : 2; // Min:1 Max:2 + uint32_t reserved0 : 16; + uint32_t data : 32; // Weight stream byte length (unsigned 32 bits) +#ifdef __cplusplus + CONSTEXPR bool valid() const + { + return cmd_code == static_cast(cmd1::NPU_SET_WEIGHT1_LENGTH) && must_be_zero == 0 && + payload_size >= 1 && payload_size <= 2; + } + CONSTEXPR void init() + { + cmd_code = static_cast(cmd1::NPU_SET_WEIGHT1_LENGTH); + must_be_zero = 0; + payload_size = 1; + } + CONSTEXPR ::cmd1 get_cmd_code() const + { + return static_cast<::cmd1>(cmd_code); + } + CONSTEXPR npu_set_weight1_length_t &set_cmd_code(::cmd1 value) + { + cmd_code = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_data() const + { + return static_cast(data); + } + CONSTEXPR npu_set_weight1_length_t &set_data(uint32_t value) + { + data = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_payload_size() const + { + return static_cast(payload_size); + } + CONSTEXPR npu_set_weight1_length_t &set_payload_size(uint32_t value) + { + payload_size = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// Set Scale and bias stream input byte offset from SCALE_REGION +struct npu_set_scale1_base_t +{ + uint32_t cmd_code : 10; // NPU_SET_SCALE1_BASE + uint32_t must_be_zero : 4; // 0 + uint32_t payload_size : 2; // Min:1 Max:2 + uint32_t param : 16; + uint32_t data : 32; // Scale and bias stream input byte offset from SCALE_REGION +#ifdef __cplusplus + CONSTEXPR bool valid() const + { + return cmd_code == static_cast(cmd1::NPU_SET_SCALE1_BASE) && must_be_zero == 0 && payload_size >= 1 && + payload_size <= 2; + } + CONSTEXPR void init() + { + cmd_code = static_cast(cmd1::NPU_SET_SCALE1_BASE); + must_be_zero = 0; + payload_size = 1; + } + CONSTEXPR ::cmd1 get_cmd_code() const + { + return static_cast<::cmd1>(cmd_code); + } + CONSTEXPR npu_set_scale1_base_t &set_cmd_code(::cmd1 value) + { + cmd_code = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_data() const + { + return static_cast(data); + } + CONSTEXPR npu_set_scale1_base_t &set_data(uint32_t value) + { + data = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_param() const + { + return static_cast(param); + } + CONSTEXPR npu_set_scale1_base_t &set_param(uint32_t value) + { + param = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_payload_size() const + { + return static_cast(payload_size); + } + CONSTEXPR npu_set_scale1_base_t &set_payload_size(uint32_t value) + { + payload_size = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +// Set Scale and bias stream input byte length (unsigned 20 bits) +struct npu_set_scale1_length_t +{ + uint32_t cmd_code : 10; // NPU_SET_SCALE1_LENGTH + uint32_t must_be_zero : 4; // 0 + uint32_t payload_size : 2; // Min:1 Max:2 + uint32_t reserved0 : 16; + uint32_t data : 32; // Scale and bias stream input byte length (unsigned 20 bits) +#ifdef __cplusplus + CONSTEXPR bool valid() const + { + return cmd_code == static_cast(cmd1::NPU_SET_SCALE1_LENGTH) && must_be_zero == 0 && + payload_size >= 1 && payload_size <= 2; + } + CONSTEXPR void init() + { + cmd_code = static_cast(cmd1::NPU_SET_SCALE1_LENGTH); + must_be_zero = 0; + payload_size = 1; + } + CONSTEXPR ::cmd1 get_cmd_code() const + { + return static_cast<::cmd1>(cmd_code); + } + CONSTEXPR npu_set_scale1_length_t &set_cmd_code(::cmd1 value) + { + cmd_code = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_data() const + { + return static_cast(data); + } + CONSTEXPR npu_set_scale1_length_t &set_data(uint32_t value) + { + data = static_cast(value); + return *this; + } + CONSTEXPR uint32_t get_payload_size() const + { + return static_cast(payload_size); + } + CONSTEXPR npu_set_scale1_length_t &set_payload_size(uint32_t value) + { + payload_size = static_cast(value); + return *this; + } +#endif //__cplusplus +}; + +#define NPU_DATA_STRUCTS \ + NPU_STRUCT(command_no_payload) \ + NPU_STRUCT(command_with_payload) \ + NPU_STRUCT(npu_op_stop) \ + NPU_STRUCT(npu_op_irq) \ + NPU_STRUCT(npu_op_conv) \ + NPU_STRUCT(npu_op_depthwise) \ + NPU_STRUCT(npu_op_pool) \ + NPU_STRUCT(npu_op_elementwise) \ + NPU_STRUCT(npu_op_dma_start) \ + NPU_STRUCT(npu_op_dma_wait) \ + NPU_STRUCT(npu_op_kernel_wait) \ + NPU_STRUCT(npu_op_pmu_mask) \ + NPU_STRUCT(npu_set_ifm_pad_top) \ + NPU_STRUCT(npu_set_ifm_pad_left) \ + NPU_STRUCT(npu_set_ifm_pad_right) \ + NPU_STRUCT(npu_set_ifm_pad_bottom) \ + NPU_STRUCT(npu_set_ifm_depth_m1) \ + NPU_STRUCT(npu_set_ifm_precision) \ + NPU_STRUCT(npu_set_ifm_upscale) \ + NPU_STRUCT(npu_set_ifm_zero_point) \ + NPU_STRUCT(npu_set_ifm_width0_m1) \ + NPU_STRUCT(npu_set_ifm_height0_m1) \ + NPU_STRUCT(npu_set_ifm_height1_m1) \ + NPU_STRUCT(npu_set_ifm_ib_end) \ + NPU_STRUCT(npu_set_ifm_region) \ + NPU_STRUCT(npu_set_ofm_width_m1) \ + NPU_STRUCT(npu_set_ofm_height_m1) \ + NPU_STRUCT(npu_set_ofm_depth_m1) \ + NPU_STRUCT(npu_set_ofm_precision) \ + NPU_STRUCT(npu_set_ofm_blk_width_m1) \ + NPU_STRUCT(npu_set_ofm_blk_height_m1) \ + NPU_STRUCT(npu_set_ofm_blk_depth_m1) \ + NPU_STRUCT(npu_set_ofm_zero_point) \ + NPU_STRUCT(npu_set_ofm_width0_m1) \ + NPU_STRUCT(npu_set_ofm_height0_m1) \ + NPU_STRUCT(npu_set_ofm_height1_m1) \ + NPU_STRUCT(npu_set_ofm_region) \ + NPU_STRUCT(npu_set_kernel_width_m1) \ + NPU_STRUCT(npu_set_kernel_height_m1) \ + NPU_STRUCT(npu_set_kernel_stride) \ + NPU_STRUCT(npu_set_parallel_mode) \ + NPU_STRUCT(npu_set_acc_format) \ + NPU_STRUCT(npu_set_activation) \ + NPU_STRUCT(npu_set_activation_min) \ + NPU_STRUCT(npu_set_activation_max) \ + NPU_STRUCT(npu_set_weight_region) \ + NPU_STRUCT(npu_set_scale_region) \ + NPU_STRUCT(npu_set_ab_start) \ + NPU_STRUCT(npu_set_blockdep) \ + NPU_STRUCT(npu_set_dma0_src_region) \ + NPU_STRUCT(npu_set_dma0_dst_region) \ + NPU_STRUCT(npu_set_dma0_size0) \ + NPU_STRUCT(npu_set_dma0_size1) \ + NPU_STRUCT(npu_set_ifm2_broadcast) \ + NPU_STRUCT(npu_set_ifm2_scalar) \ + NPU_STRUCT(npu_set_ifm2_precision) \ + NPU_STRUCT(npu_set_ifm2_zero_point) \ + NPU_STRUCT(npu_set_ifm2_width0_m1) \ + NPU_STRUCT(npu_set_ifm2_height0_m1) \ + NPU_STRUCT(npu_set_ifm2_height1_m1) \ + NPU_STRUCT(npu_set_ifm2_ib_start) \ + NPU_STRUCT(npu_set_ifm2_region) \ + NPU_STRUCT(npu_set_ifm_base0) \ + NPU_STRUCT(npu_set_ifm_base1) \ + NPU_STRUCT(npu_set_ifm_base2) \ + NPU_STRUCT(npu_set_ifm_base3) \ + NPU_STRUCT(npu_set_ifm_stride_x) \ + NPU_STRUCT(npu_set_ifm_stride_y) \ + NPU_STRUCT(npu_set_ifm_stride_c) \ + NPU_STRUCT(npu_set_ofm_base0) \ + NPU_STRUCT(npu_set_ofm_base1) \ + NPU_STRUCT(npu_set_ofm_base2) \ + NPU_STRUCT(npu_set_ofm_base3) \ + NPU_STRUCT(npu_set_ofm_stride_x) \ + NPU_STRUCT(npu_set_ofm_stride_y) \ + NPU_STRUCT(npu_set_ofm_stride_c) \ + NPU_STRUCT(npu_set_weight_base) \ + NPU_STRUCT(npu_set_weight_length) \ + NPU_STRUCT(npu_set_scale_base) \ + NPU_STRUCT(npu_set_scale_length) \ + NPU_STRUCT(npu_set_ofm_scale) \ + NPU_STRUCT(npu_set_opa_scale) \ + NPU_STRUCT(npu_set_opb_scale) \ + NPU_STRUCT(npu_set_dma0_src) \ + NPU_STRUCT(npu_set_dma0_dst) \ + NPU_STRUCT(npu_set_dma0_len) \ + NPU_STRUCT(npu_set_dma0_skip0) \ + NPU_STRUCT(npu_set_dma0_skip1) \ + NPU_STRUCT(npu_set_ifm2_base0) \ + NPU_STRUCT(npu_set_ifm2_base1) \ + NPU_STRUCT(npu_set_ifm2_base2) \ + NPU_STRUCT(npu_set_ifm2_base3) \ + NPU_STRUCT(npu_set_ifm2_stride_x) \ + NPU_STRUCT(npu_set_ifm2_stride_y) \ + NPU_STRUCT(npu_set_ifm2_stride_c) \ + NPU_STRUCT(npu_set_weight1_base) \ + NPU_STRUCT(npu_set_weight1_length) \ + NPU_STRUCT(npu_set_scale1_base) \ + NPU_STRUCT(npu_set_scale1_length) +#define NPU_OP_STRUCTS \ + NPU_OP_(stop) \ + NPU_OP_(irq) \ + NPU_OP_(conv) \ + NPU_OP_(depthwise) \ + NPU_OP_(pool) \ + NPU_OP_(elementwise) \ + NPU_OP_(dma_start) \ + NPU_OP_(dma_wait) \ + NPU_OP_(kernel_wait) \ + NPU_OP_(pmu_mask) +#define NPU_SET_STRUCTS \ + NPU_SET_(ifm_pad_top) \ + NPU_SET_(ifm_pad_left) \ + NPU_SET_(ifm_pad_right) \ + NPU_SET_(ifm_pad_bottom) \ + NPU_SET_(ifm_depth_m1) \ + NPU_SET_(ifm_precision) \ + NPU_SET_(ifm_upscale) \ + NPU_SET_(ifm_zero_point) \ + NPU_SET_(ifm_width0_m1) \ + NPU_SET_(ifm_height0_m1) \ + NPU_SET_(ifm_height1_m1) \ + NPU_SET_(ifm_ib_end) \ + NPU_SET_(ifm_region) \ + NPU_SET_(ofm_width_m1) \ + NPU_SET_(ofm_height_m1) \ + NPU_SET_(ofm_depth_m1) \ + NPU_SET_(ofm_precision) \ + NPU_SET_(ofm_blk_width_m1) \ + NPU_SET_(ofm_blk_height_m1) \ + NPU_SET_(ofm_blk_depth_m1) \ + NPU_SET_(ofm_zero_point) \ + NPU_SET_(ofm_width0_m1) \ + NPU_SET_(ofm_height0_m1) \ + NPU_SET_(ofm_height1_m1) \ + NPU_SET_(ofm_region) \ + NPU_SET_(kernel_width_m1) \ + NPU_SET_(kernel_height_m1) \ + NPU_SET_(kernel_stride) \ + NPU_SET_(parallel_mode) \ + NPU_SET_(acc_format) \ + NPU_SET_(activation) \ + NPU_SET_(activation_min) \ + NPU_SET_(activation_max) \ + NPU_SET_(weight_region) \ + NPU_SET_(scale_region) \ + NPU_SET_(ab_start) \ + NPU_SET_(blockdep) \ + NPU_SET_(dma0_src_region) \ + NPU_SET_(dma0_dst_region) \ + NPU_SET_(dma0_size0) \ + NPU_SET_(dma0_size1) \ + NPU_SET_(ifm2_broadcast) \ + NPU_SET_(ifm2_scalar) \ + NPU_SET_(ifm2_precision) \ + NPU_SET_(ifm2_zero_point) \ + NPU_SET_(ifm2_width0_m1) \ + NPU_SET_(ifm2_height0_m1) \ + NPU_SET_(ifm2_height1_m1) \ + NPU_SET_(ifm2_ib_start) \ + NPU_SET_(ifm2_region) \ + NPU_SET_(ifm_base0) \ + NPU_SET_(ifm_base1) \ + NPU_SET_(ifm_base2) \ + NPU_SET_(ifm_base3) \ + NPU_SET_(ifm_stride_x) \ + NPU_SET_(ifm_stride_y) \ + NPU_SET_(ifm_stride_c) \ + NPU_SET_(ofm_base0) \ + NPU_SET_(ofm_base1) \ + NPU_SET_(ofm_base2) \ + NPU_SET_(ofm_base3) \ + NPU_SET_(ofm_stride_x) \ + NPU_SET_(ofm_stride_y) \ + NPU_SET_(ofm_stride_c) \ + NPU_SET_(weight_base) \ + NPU_SET_(weight_length) \ + NPU_SET_(scale_base) \ + NPU_SET_(scale_length) \ + NPU_SET_(ofm_scale) \ + NPU_SET_(opa_scale) \ + NPU_SET_(opb_scale) \ + NPU_SET_(dma0_src) \ + NPU_SET_(dma0_dst) \ + NPU_SET_(dma0_len) \ + NPU_SET_(dma0_skip0) \ + NPU_SET_(dma0_skip1) \ + NPU_SET_(ifm2_base0) \ + NPU_SET_(ifm2_base1) \ + NPU_SET_(ifm2_base2) \ + NPU_SET_(ifm2_base3) \ + NPU_SET_(ifm2_stride_x) \ + NPU_SET_(ifm2_stride_y) \ + NPU_SET_(ifm2_stride_c) \ + NPU_SET_(weight1_base) \ + NPU_SET_(weight1_length) \ + NPU_SET_(scale1_base) \ + NPU_SET_(scale1_length) +#define COMMAND_STRUCTS \ + COMMAND_(no_payload) \ + COMMAND_(with_payload) + +#define EXPAND_ACC_FORMAT(FUNC, SEP) \ + FUNC(acc_format, INT_32BIT) SEP FUNC(acc_format, INT_40BIT) SEP FUNC(acc_format, FP_S5_10) + +#define EXPAND_ACTIVATION(FUNC, SEP) \ + FUNC(activation, NONE) \ + SEP FUNC(activation, TANH) SEP FUNC(activation, SIGMOID) SEP FUNC(activation, LUT_START) \ + SEP FUNC(activation, LUT_END) + +#define EXPAND_AXI_MEM_ENCODING_TYPE(FUNC, SEP) \ + FUNC(axi_mem_encoding_type, DEVICE_NON_BUFFERABLE) \ + SEP FUNC(axi_mem_encoding_type, DEVICE_BUFFERABLE) \ + SEP FUNC(axi_mem_encoding_type, NORMAL_NON_CACHEABLE_NON_BUFFERABLE) \ + SEP FUNC(axi_mem_encoding_type, NORMAL_NON_CACHEABLE_BUFFERABLE) \ + SEP FUNC(axi_mem_encoding_type, WRITE_THROUGH_NO_ALLOCATE) \ + SEP FUNC(axi_mem_encoding_type, WRITE_THROUGH_READ_ALLOCATE) \ + SEP FUNC(axi_mem_encoding_type, WRITE_THROUGH_WRITE_ALLOCATE) \ + SEP FUNC(axi_mem_encoding_type, WRITE_THROUGH_READ_AND_WRITE_ALLOCATE) \ + SEP FUNC(axi_mem_encoding_type, WRITE_BACK_NO_ALLOCATE) \ + SEP FUNC(axi_mem_encoding_type, WRITE_BACK_READ_ALLOCATE) \ + SEP FUNC(axi_mem_encoding_type, WRITE_BACK_WRITE_ALLOCATE) \ + SEP FUNC(axi_mem_encoding_type, WRITE_BACK_READ_AND_WRITE_ALLOCATE) \ + SEP FUNC(axi_mem_encoding_type, RESERVED_12) \ + SEP FUNC(axi_mem_encoding_type, RESERVED_13) \ + SEP FUNC(axi_mem_encoding_type, RESERVED_14) \ + SEP FUNC(axi_mem_encoding_type, RESERVED_15) + +#define EXPAND_CLIP_RANGE(FUNC, SEP) \ + FUNC(clip_range, OFM_PRECISION) \ + SEP FUNC(clip_range, FORCE_UINT8) SEP FUNC(clip_range, FORCE_INT8) SEP FUNC(clip_range, FORCE_INT16) + +#define EXPAND_CMD0(FUNC, SEP) \ + FUNC(cmd0, NPU_OP_STOP) \ + SEP FUNC(cmd0, NPU_OP_IRQ) SEP FUNC(cmd0, NPU_OP_CONV) SEP FUNC(cmd0, NPU_OP_DEPTHWISE) SEP FUNC( \ + cmd0, NPU_OP_POOL) SEP FUNC(cmd0, NPU_OP_ELEMENTWISE) SEP FUNC(cmd0, NPU_OP_DMA_START) \ + SEP FUNC(cmd0, NPU_OP_DMA_WAIT) SEP FUNC(cmd0, NPU_OP_KERNEL_WAIT) SEP FUNC(cmd0, NPU_OP_PMU_MASK) SEP FUNC( \ + cmd0, NPU_SET_IFM_PAD_TOP) SEP FUNC(cmd0, NPU_SET_IFM_PAD_LEFT) SEP FUNC(cmd0, NPU_SET_IFM_PAD_RIGHT) \ + SEP FUNC(cmd0, NPU_SET_IFM_PAD_BOTTOM) SEP FUNC(cmd0, NPU_SET_IFM_DEPTH_M1) SEP FUNC( \ + cmd0, NPU_SET_IFM_PRECISION) SEP FUNC(cmd0, NPU_SET_IFM_UPSCALE) \ + SEP FUNC(cmd0, NPU_SET_IFM_ZERO_POINT) SEP FUNC(cmd0, NPU_SET_IFM_WIDTH0_M1) SEP FUNC( \ + cmd0, NPU_SET_IFM_HEIGHT0_M1) SEP FUNC(cmd0, NPU_SET_IFM_HEIGHT1_M1) SEP FUNC(cmd0, \ + NPU_SET_IFM_IB_END) \ + SEP FUNC(cmd0, NPU_SET_IFM_REGION) SEP FUNC(cmd0, NPU_SET_OFM_WIDTH_M1) SEP FUNC( \ + cmd0, NPU_SET_OFM_HEIGHT_M1) SEP FUNC(cmd0, NPU_SET_OFM_DEPTH_M1) \ + SEP FUNC(cmd0, NPU_SET_OFM_PRECISION) SEP FUNC(cmd0, NPU_SET_OFM_BLK_WIDTH_M1) SEP FUNC( \ + cmd0, NPU_SET_OFM_BLK_HEIGHT_M1) SEP FUNC(cmd0, NPU_SET_OFM_BLK_DEPTH_M1) \ + SEP FUNC(cmd0, NPU_SET_OFM_ZERO_POINT) SEP FUNC(cmd0, NPU_SET_OFM_WIDTH0_M1) SEP FUNC( \ + cmd0, NPU_SET_OFM_HEIGHT0_M1) SEP FUNC(cmd0, NPU_SET_OFM_HEIGHT1_M1) \ + SEP FUNC(cmd0, NPU_SET_OFM_REGION) SEP FUNC(cmd0, NPU_SET_KERNEL_WIDTH_M1) SEP FUNC( \ + cmd0, NPU_SET_KERNEL_HEIGHT_M1) SEP FUNC(cmd0, NPU_SET_KERNEL_STRIDE) \ + SEP FUNC(cmd0, NPU_SET_PARALLEL_MODE) SEP FUNC(cmd0, NPU_SET_ACC_FORMAT) SEP FUNC( \ + cmd0, NPU_SET_ACTIVATION) SEP FUNC(cmd0, NPU_SET_ACTIVATION_MIN) \ + SEP FUNC(cmd0, NPU_SET_ACTIVATION_MAX) SEP FUNC(cmd0, NPU_SET_WEIGHT_REGION) \ + SEP FUNC(cmd0, NPU_SET_SCALE_REGION) SEP FUNC(cmd0, NPU_SET_AB_START) \ + SEP FUNC(cmd0, \ + NPU_SET_BLOCKDEP) SEP FUNC(cmd0, NPU_SET_DMA0_SRC_REGION) \ + SEP FUNC(cmd0, NPU_SET_DMA0_DST_REGION) SEP FUNC( \ + cmd0, NPU_SET_DMA0_SIZE0) SEP FUNC(cmd0, NPU_SET_DMA0_SIZE1) \ + SEP FUNC(cmd0, NPU_SET_IFM2_BROADCAST) \ + SEP FUNC(cmd0, NPU_SET_IFM2_SCALAR) \ + SEP FUNC(cmd0, NPU_SET_IFM2_PRECISION) SEP FUNC( \ + cmd0, NPU_SET_IFM2_ZERO_POINT) \ + SEP FUNC(cmd0, NPU_SET_IFM2_WIDTH0_M1) SEP FUNC( \ + cmd0, NPU_SET_IFM2_HEIGHT0_M1) \ + SEP FUNC(cmd0, NPU_SET_IFM2_HEIGHT1_M1) \ + SEP FUNC(cmd0, NPU_SET_IFM2_IB_START) \ + SEP FUNC(cmd0, NPU_SET_IFM2_REGION) + +#define EXPAND_CMD1(FUNC, SEP) \ + FUNC(cmd1, NPU_SET_IFM_BASE0) \ + SEP FUNC(cmd1, NPU_SET_IFM_BASE1) SEP FUNC(cmd1, NPU_SET_IFM_BASE2) SEP FUNC(cmd1, NPU_SET_IFM_BASE3) \ + SEP FUNC(cmd1, NPU_SET_IFM_STRIDE_X) SEP FUNC(cmd1, NPU_SET_IFM_STRIDE_Y) SEP FUNC(cmd1, NPU_SET_IFM_STRIDE_C) \ + SEP FUNC(cmd1, NPU_SET_OFM_BASE0) SEP FUNC(cmd1, NPU_SET_OFM_BASE1) SEP FUNC(cmd1, NPU_SET_OFM_BASE2) \ + SEP FUNC(cmd1, NPU_SET_OFM_BASE3) SEP FUNC(cmd1, NPU_SET_OFM_STRIDE_X) \ + SEP FUNC(cmd1, NPU_SET_OFM_STRIDE_Y) SEP FUNC(cmd1, NPU_SET_OFM_STRIDE_C) \ + SEP FUNC(cmd1, NPU_SET_WEIGHT_BASE) SEP FUNC(cmd1, NPU_SET_WEIGHT_LENGTH) \ + SEP FUNC(cmd1, NPU_SET_SCALE_BASE) SEP FUNC(cmd1, NPU_SET_SCALE_LENGTH) \ + SEP FUNC(cmd1, NPU_SET_OFM_SCALE) SEP FUNC(cmd1, NPU_SET_OPA_SCALE) \ + SEP FUNC(cmd1, NPU_SET_OPB_SCALE) SEP FUNC(cmd1, NPU_SET_DMA0_SRC) \ + SEP FUNC(cmd1, NPU_SET_DMA0_DST) SEP FUNC(cmd1, NPU_SET_DMA0_LEN) SEP FUNC( \ + cmd1, NPU_SET_DMA0_SKIP0) SEP FUNC(cmd1, NPU_SET_DMA0_SKIP1) \ + SEP FUNC(cmd1, NPU_SET_IFM2_BASE0) SEP FUNC(cmd1, NPU_SET_IFM2_BASE1) \ + SEP FUNC(cmd1, NPU_SET_IFM2_BASE2) SEP FUNC(cmd1, NPU_SET_IFM2_BASE3) \ + SEP FUNC(cmd1, NPU_SET_IFM2_STRIDE_X) \ + SEP FUNC(cmd1, NPU_SET_IFM2_STRIDE_Y) \ + SEP FUNC(cmd1, NPU_SET_IFM2_STRIDE_C) \ + SEP FUNC(cmd1, NPU_SET_WEIGHT1_BASE) \ + SEP FUNC(cmd1, NPU_SET_WEIGHT1_LENGTH) \ + SEP FUNC(cmd1, NPU_SET_SCALE1_BASE) \ + SEP FUNC(cmd1, NPU_SET_SCALE1_LENGTH) + +#define EXPAND_DATA_FORMAT(FUNC, SEP) FUNC(data_format, NHWC) SEP FUNC(data_format, NHCWB16) + +#define EXPAND_ELEMENTWISE_MODE(FUNC, SEP) \ + FUNC(elementwise_mode, MUL) \ + SEP FUNC(elementwise_mode, ADD) SEP FUNC(elementwise_mode, SUB) SEP FUNC(elementwise_mode, MIN) \ + SEP FUNC(elementwise_mode, MAX) SEP FUNC(elementwise_mode, LRELU) SEP FUNC(elementwise_mode, ABS) \ + SEP FUNC(elementwise_mode, CLZ) SEP FUNC(elementwise_mode, SHR) SEP FUNC(elementwise_mode, SHL) + +#define EXPAND_IFM_PRECISION(FUNC, SEP) \ + FUNC(ifm_precision, U8) \ + SEP FUNC(ifm_precision, S8) SEP FUNC(ifm_precision, U16) SEP FUNC(ifm_precision, S16) SEP FUNC(ifm_precision, S32) + +#define EXPAND_IFM_SCALE_MODE(FUNC, SEP) \ + FUNC(ifm_scale_mode, SCALE_16BIT) \ + SEP FUNC(ifm_scale_mode, SCALE_OPA_32BIT) SEP FUNC(ifm_scale_mode, SCALE_OPB_32BIT) + +#define EXPAND_MACS_PER_CC(FUNC, SEP) \ + FUNC(macs_per_cc, MACS_PER_CC_IS_5) \ + SEP FUNC(macs_per_cc, MACS_PER_CC_IS_6) SEP FUNC(macs_per_cc, MACS_PER_CC_IS_7) \ + SEP FUNC(macs_per_cc, MACS_PER_CC_IS_8) + +#define EXPAND_MEMORY_TYPE(FUNC, SEP) \ + FUNC(memory_type, AXI0_OUTSTANDING_COUNTER0) \ + SEP FUNC(memory_type, AXI0_OUTSTANDING_COUNTER1) SEP FUNC(memory_type, AXI1_OUTSTANDING_COUNTER2) \ + SEP FUNC(memory_type, AXI1_OUTSTANDING_COUNTER3) + +#define EXPAND_OFM_PRECISION(FUNC, SEP) \ + FUNC(ofm_precision, U8) \ + SEP FUNC(ofm_precision, S8) SEP FUNC(ofm_precision, U16) SEP FUNC(ofm_precision, S16) SEP FUNC(ofm_precision, S32) + +#define EXPAND_PMU_EVENT_TYPE(FUNC, SEP) \ + FUNC(pmu_event_type, NO_EVENT) \ + SEP FUNC(pmu_event_type, CYCLE) SEP FUNC(pmu_event_type, NPU_IDLE) SEP FUNC( \ + pmu_event_type, CC_STALLED_ON_BLOCKDEP) SEP FUNC(pmu_event_type, \ + CC_STALLED_ON_SHRAM_RECONFIG) SEP FUNC(pmu_event_type, \ + NPU_ACTIVE) \ + SEP FUNC(pmu_event_type, MAC_ACTIVE) SEP FUNC(pmu_event_type, MAC_ACTIVE_8BIT) SEP FUNC( \ + pmu_event_type, MAC_ACTIVE_16BIT) SEP FUNC(pmu_event_type, MAC_DPU_ACTIVE) SEP FUNC(pmu_event_type, \ + MAC_STALLED_BY_WD_ACC) \ + SEP FUNC(pmu_event_type, MAC_STALLED_BY_WD) SEP FUNC(pmu_event_type, MAC_STALLED_BY_ACC) SEP FUNC( \ + pmu_event_type, MAC_STALLED_BY_IB) SEP FUNC(pmu_event_type, \ + MAC_ACTIVE_32BIT) SEP FUNC(pmu_event_type, \ + MAC_STALLED_BY_INT_W) \ + SEP FUNC(pmu_event_type, MAC_STALLED_BY_INT_ACC) SEP FUNC(pmu_event_type, AO_ACTIVE) SEP FUNC( \ + pmu_event_type, AO_ACTIVE_8BIT) SEP FUNC(pmu_event_type, \ + AO_ACTIVE_16BIT) SEP FUNC(pmu_event_type, \ + AO_STALLED_BY_OFMP_OB) \ + SEP FUNC(pmu_event_type, AO_STALLED_BY_OFMP) SEP FUNC(pmu_event_type, AO_STALLED_BY_OB) SEP FUNC( \ + pmu_event_type, \ + AO_STALLED_BY_ACC_IB) SEP FUNC(pmu_event_type, \ + AO_STALLED_BY_ACC) SEP FUNC(pmu_event_type, \ + AO_STALLED_BY_IB) SEP FUNC(pmu_event_type, \ + WD_ACTIVE) SEP \ + FUNC(pmu_event_type, WD_STALLED) SEP FUNC(pmu_event_type, WD_STALLED_BY_WS) SEP FUNC( \ + pmu_event_type, \ + WD_STALLED_BY_WD_BUF) SEP \ + FUNC(pmu_event_type, WD_PARSE_ACTIVE) SEP FUNC(pmu_event_type, WD_PARSE_STALLED) SEP FUNC( \ + pmu_event_type, \ + WD_PARSE_STALLED_IN) SEP FUNC(pmu_event_type, \ + WD_PARSE_STALLED_OUT) SEP \ + FUNC(pmu_event_type, WD_TRANS_WS) SEP FUNC(pmu_event_type, WD_TRANS_WB) SEP FUNC( \ + pmu_event_type, \ + WD_TRANS_DW0) SEP FUNC(pmu_event_type, \ + WD_TRANS_DW1) SEP FUNC(pmu_event_type, \ + AXI0_RD_TRANS_ACCEPTED) SEP \ + FUNC(pmu_event_type, AXI0_RD_TRANS_COMPLETED) SEP FUNC( \ + pmu_event_type, \ + AXI0_RD_DATA_BEAT_RECEIVED) SEP FUNC(pmu_event_type, AXI0_RD_TRAN_REQ_STALLED) \ + SEP FUNC(pmu_event_type, \ + AXI0_WR_TRANS_ACCEPTED) SEP FUNC(pmu_event_type, \ + AXI0_WR_TRANS_COMPLETED_M) \ + SEP FUNC(pmu_event_type, AXI0_WR_TRANS_COMPLETED_S) SEP FUNC( \ + pmu_event_type, \ + AXI0_WR_DATA_BEAT_WRITTEN) \ + SEP FUNC(pmu_event_type, AXI0_WR_TRAN_REQ_STALLED) SEP FUNC( \ + pmu_event_type, \ + AXI0_WR_DATA_BEAT_STALLED) SEP \ + FUNC(pmu_event_type, AXI0_ENABLED_CYCLES) SEP FUNC( \ + pmu_event_type, \ + AXI0_RD_STALL_LIMIT) SEP FUNC(pmu_event_type, \ + AXI0_WR_STALL_LIMIT) SEP \ + FUNC(pmu_event_type, AXI1_RD_TRANS_ACCEPTED) SEP FUNC( \ + pmu_event_type, \ + AXI1_RD_TRANS_COMPLETED) SEP FUNC(pmu_event_type, \ + AXI1_RD_DATA_BEAT_RECEIVED) SEP \ + FUNC(pmu_event_type, AXI1_RD_TRAN_REQ_STALLED) SEP FUNC( \ + pmu_event_type, \ + AXI1_WR_TRANS_ACCEPTED) SEP \ + FUNC(pmu_event_type, AXI1_WR_TRANS_COMPLETED_M) SEP FUNC( \ + pmu_event_type, \ + AXI1_WR_TRANS_COMPLETED_S) SEP \ + FUNC(pmu_event_type, AXI1_WR_DATA_BEAT_WRITTEN) SEP FUNC( \ + pmu_event_type, \ + AXI1_WR_TRAN_REQ_STALLED) SEP \ + FUNC(pmu_event_type, AXI1_WR_DATA_BEAT_STALLED) SEP FUNC( \ + pmu_event_type, \ + AXI1_ENABLED_CYCLES) SEP FUNC(pmu_event_type, \ + AXI1_RD_STALL_LIMIT) SEP \ + FUNC(pmu_event_type, AXI1_WR_STALL_LIMIT) SEP FUNC( \ + pmu_event_type, \ + AXI_LATENCY_ANY) SEP FUNC(pmu_event_type, \ + AXI_LATENCY_32) SEP \ + FUNC(pmu_event_type, AXI_LATENCY_64) SEP FUNC( \ + pmu_event_type, \ + AXI_LATENCY_128) SEP \ + FUNC(pmu_event_type, \ + AXI_LATENCY_256) SEP \ + FUNC(pmu_event_type, \ + AXI_LATENCY_512) SEP \ + FUNC(pmu_event_type, \ + AXI_LATENCY_1024) SEP \ + FUNC(pmu_event_type, \ + ECC_DMA) SEP \ + FUNC( \ + pmu_event_type, \ + ECC_SB0) SEP \ + FUNC( \ + pmu_event_type, \ + ECC_SB1) + +#define EXPAND_POOLING_MODE(FUNC, SEP) \ + FUNC(pooling_mode, MAX) SEP FUNC(pooling_mode, AVERAGE) SEP FUNC(pooling_mode, REDUCE_SUM) + +#define EXPAND_PRIVILEGE_LEVEL(FUNC, SEP) FUNC(privilege_level, USER) SEP FUNC(privilege_level, PRIVILEGED) + +#define EXPAND_RESAMPLING_MODE(FUNC, SEP) \ + FUNC(resampling_mode, NONE) SEP FUNC(resampling_mode, NEAREST) SEP FUNC(resampling_mode, TRANSPOSE) + +#define EXPAND_ROUNDING(FUNC, SEP) FUNC(rounding, TFL) SEP FUNC(rounding, TRUNCATE) SEP FUNC(rounding, NATURAL) + +#define EXPAND_SECURITY_LEVEL(FUNC, SEP) FUNC(security_level, SECURE) SEP FUNC(security_level, NON_SECURE) + +#define EXPAND_SHRAM_SIZE(FUNC, SEP) \ + FUNC(shram_size, SHRAM_96KB) \ + SEP FUNC(shram_size, SHRAM_48KB) SEP FUNC(shram_size, SHRAM_24KB) SEP FUNC(shram_size, SHRAM_16KB) + +#define EXPAND_STATE(FUNC, SEP) FUNC(state, STOPPED) SEP FUNC(state, RUNNING) + +#define EXPAND_STRIDE_MODE(FUNC, SEP) \ + FUNC(stride_mode, STRIDE_MODE_1D) SEP FUNC(stride_mode, STRIDE_MODE_2D) SEP FUNC(stride_mode, STRIDE_MODE_3D) +#endif /* ETHOSU55_INTERFACE_H */ diff --git a/edgeimpulse/edge-impulse-sdk/porting/ethos-core-driver/src/ethosu_common.h b/edgeimpulse/edge-impulse-sdk/porting/ethos-core-driver/src/ethosu_common.h new file mode 100644 index 0000000..0402411 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/porting/ethos-core-driver/src/ethosu_common.h @@ -0,0 +1,126 @@ +/* + * Copyright (c) 2019-2021 Arm Limited. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ETHOSU_COMMON_H +#define ETHOSU_COMMON_H + +/****************************************************************************** + * Includes + ******************************************************************************/ + +#include "ethosu55_interface.h" + +#include + +/****************************************************************************** + * Defines + ******************************************************************************/ + +// Log severity levels +#define ETHOSU_LOG_EMERG 0 +#define ETHOSU_LOG_ALERT 1 +#define ETHOSU_LOG_CRIT 2 +#define ETHOSU_LOG_ERR 3 +#define ETHOSU_LOG_WARN 4 +#define ETHOSU_LOG_NOTICE 5 +#define ETHOSU_LOG_INFO 6 +#define ETHOSU_LOG_DEBUG 7 + +// Define default log severity +#ifndef ETHOSU_LOG_SEVERITY +#define ETHOSU_LOG_SEVERITY ETHOSU_LOG_DEBUG +#endif + +#if ETHOSU_LOG_SEVERITY >= ETHOSU_LOG_EMERG +#define LOG_EMERG(format, ...) \ + fprintf(stderr, format, ##__VA_ARGS__); \ + fflush(stderr); \ + exit(-1) +#else +#define LOG_EMERG(format, ...) +#endif + +#if ETHOSU_LOG_SEVERITY >= ETHOSU_LOG_ALERT +#define LOG_ALERT(format, ...) \ + fprintf(stderr, format, ##__VA_ARGS__); \ + fflush(stderr); \ + exit(-1) +#else +#define LOG_ALERT(format, ...) +#endif + +#if ETHOSU_LOG_SEVERITY >= ETHOSU_LOG_CRIT +#define LOG_CRIT(format, ...) \ + fprintf(stderr, format, ##__VA_ARGS__); \ + fflush(stderr); \ + exit(-1) +#else +#define LOG_CRIT(format, ...) +#endif + +#if ETHOSU_LOG_SEVERITY >= ETHOSU_LOG_ERR +#define LOG_ERR(format, ...) \ + fprintf(stderr, format, ##__VA_ARGS__); \ + fflush(stderr) +#else +#define LOG_ERR(format, ...) +#endif + +#if ETHOSU_LOG_SEVERITY >= ETHOSU_LOG_WARN +#define LOG_WARN(format, ...) fprintf(stdout, format, ##__VA_ARGS__) +#else +#define LOG_WARN(format, ...) +#endif + +#if ETHOSU_LOG_SEVERITY >= ETHOSU_LOG_NOTICE +#define LOG_NOTICE(format, ...) fprintf(stdout, format, ##__VA_ARGS__) +#else +#define LOG_NOTICE(format, ...) +#endif + +#if ETHOSU_LOG_SEVERITY >= ETHOSU_LOG_INFO +#define LOG_INFO(format, ...) fprintf(stdout, format, ##__VA_ARGS__) +#else +#define LOG_INFO(format, ...) +#endif + +#if ETHOSU_LOG_SEVERITY >= ETHOSU_LOG_DEBUG +#define LOG_DEBUG(format, ...) fprintf(stdout, format, ##__VA_ARGS__) +#else +#define LOG_DEBUG(format, ...) +#endif + +#define UNUSED(x) ((void)x) + +#define VER_STR(X) VNUM_STR(X) +#define VNUM_STR(X) #X + +#define MASK_0_31_BITS (0xFFFFFFFF) +#define MASK_32_47_BITS (0xFFFF00000000) + +/****************************************************************************** + * Inline functions + ******************************************************************************/ + +static const __attribute__((section("npu_driver_version"))) char driver_version_str[] = VER_STR( + ETHOSU_DRIVER_VERSION_MAJOR) "." VER_STR(ETHOSU_DRIVER_VERSION_MINOR) "." VER_STR(ETHOSU_DRIVER_VERSION_PATCH); + +static const __attribute__((section("npu_driver_arch_version"))) char driver_arch_version_str[] = + VER_STR(NNX_ARCH_VERSION_MAJOR) "." VER_STR(NNX_ARCH_VERSION_MINOR) "." VER_STR(NNX_ARCH_VERSION_PATCH); + +#endif // ETHOSU_COMMON_H diff --git a/edgeimpulse/edge-impulse-sdk/porting/ethos-core-driver/src/ethosu_config.h b/edgeimpulse/edge-impulse-sdk/porting/ethos-core-driver/src/ethosu_config.h new file mode 100644 index 0000000..a822e93 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/porting/ethos-core-driver/src/ethosu_config.h @@ -0,0 +1,119 @@ +/* + * Copyright (c) 2019-2021 Arm Limited. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ETHOSU_CONFIG_H +#define ETHOSU_CONFIG_H + +/* Set default values if not manually overriden */ + +#ifndef NPU_QCONFIG +#define NPU_QCONFIG 2 +#endif + +#ifndef NPU_REGIONCFG_0 +#define NPU_REGIONCFG_0 3 +#endif + +#ifndef NPU_REGIONCFG_1 +#define NPU_REGIONCFG_1 0 +#endif + +#ifndef NPU_REGIONCFG_2 +#define NPU_REGIONCFG_2 1 +#endif + +#ifndef NPU_REGIONCFG_3 +#define NPU_REGIONCFG_3 1 +#endif + +#ifndef NPU_REGIONCFG_4 +#define NPU_REGIONCFG_4 1 +#endif + +#ifndef NPU_REGIONCFG_5 +#define NPU_REGIONCFG_5 1 +#endif + +#ifndef NPU_REGIONCFG_6 +#define NPU_REGIONCFG_6 1 +#endif + +#ifndef NPU_REGIONCFG_7 +#define NPU_REGIONCFG_7 1 +#endif + +#ifndef AXI_LIMIT0_MAX_BEATS_BYTES +#define AXI_LIMIT0_MAX_BEATS_BYTES 0x0 +#endif +#ifndef AXI_LIMIT0_MEM_TYPE +#define AXI_LIMIT0_MEM_TYPE 0x0 +#endif +#ifndef AXI_LIMIT0_MAX_OUTSTANDING_READS +#define AXI_LIMIT0_MAX_OUTSTANDING_READS 32 +#endif +#ifndef AXI_LIMIT0_MAX_OUTSTANDING_WRITES +#define AXI_LIMIT0_MAX_OUTSTANDING_WRITES 16 +#endif + +#ifndef AXI_LIMIT1_MAX_BEATS_BYTES +#define AXI_LIMIT1_MAX_BEATS_BYTES 0x0 +#endif +#ifndef AXI_LIMIT1_MEM_TYPE +#define AXI_LIMIT1_MEM_TYPE 0x0 +#endif +#ifndef AXI_LIMIT1_MAX_OUTSTANDING_READS +#define AXI_LIMIT1_MAX_OUTSTANDING_READS 32 +#endif +#ifndef AXI_LIMIT1_MAX_OUTSTANDING_WRITES +#define AXI_LIMIT1_MAX_OUTSTANDING_WRITES 16 +#endif + +#ifndef AXI_LIMIT2_MAX_BEATS_BYTES +#define AXI_LIMIT2_MAX_BEATS_BYTES 0x0 +#endif +#ifndef AXI_LIMIT2_MEM_TYPE +#define AXI_LIMIT2_MEM_TYPE 0x0 +#endif +#ifndef AXI_LIMIT2_MAX_OUTSTANDING_READS +#define AXI_LIMIT2_MAX_OUTSTANDING_READS 32 +#endif +#ifndef AXI_LIMIT2_MAX_OUTSTANDING_WRITES +#define AXI_LIMIT2_MAX_OUTSTANDING_WRITES 16 +#endif +#ifndef AXI_LIMIT3_MAX_BEATS_BYTES +#define AXI_LIMIT3_MAX_BEATS_BYTES 0x0 +#endif +#ifndef AXI_LIMIT3_MEM_TYPE +#define AXI_LIMIT3_MEM_TYPE 0x0 +#endif +#ifndef AXI_LIMIT3_MAX_OUTSTANDING_READS +#define AXI_LIMIT3_MAX_OUTSTANDING_READS 32 +#endif +#ifndef AXI_LIMIT3_MAX_OUTSTANDING_WRITES +#define AXI_LIMIT3_MAX_OUTSTANDING_WRITES 16 +#endif + +/* + * Address offset between the CPU and the NPU. The offset is + * applied to the QBASE and BASEP registers. + */ +#ifndef BASE_POINTER_OFFSET +#define BASE_POINTER_OFFSET 0 +#endif + +#endif /* #ifndef ETHOSU_CONFIG_H */ diff --git a/edgeimpulse/edge-impulse-sdk/porting/ethos-core-driver/src/ethosu_device.c b/edgeimpulse/edge-impulse-sdk/porting/ethos-core-driver/src/ethosu_device.c new file mode 100644 index 0000000..8c17337 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/porting/ethos-core-driver/src/ethosu_device.c @@ -0,0 +1,716 @@ +/* + * Copyright (c) 2019-2021 Arm Limited. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#if EI_ETHOS + +#include "ethosu_device.h" +#include "ethosu_common.h" +#include "ethosu_config.h" + +#include +#include +#include + +#define BASEP_OFFSET 4 +#define REG_OFFSET 4 +#define BYTES_1KB 1024 + +#define ADDRESS_BITS 48 +#define ADDRESS_MASK ((1ull << ADDRESS_BITS) - 1) + +#if defined(ARM_NPU_STUB) +static uint32_t stream_length = 0; +#endif + +enum ethosu_error_codes ethosu_dev_init(struct ethosu_device *dev, + const void *base_address, + uint32_t secure_enable, + uint32_t privilege_enable) +{ +#if !defined(ARM_NPU_STUB) + dev->base_address = (volatile uintptr_t)base_address; + dev->secure = secure_enable; + dev->privileged = privilege_enable; + + ethosu_save_pmu_config(dev); +#else + UNUSED(dev); + UNUSED(base_address); +#endif + return ETHOSU_SUCCESS; +} + +enum ethosu_error_codes ethosu_get_id(struct ethosu_device *dev, struct ethosu_id *id) +{ + struct id_r _id; + +#if !defined(ARM_NPU_STUB) + _id.word = ethosu_read_reg(dev, NPU_REG_ID); +#else + UNUSED(dev); + + _id.word = 0; + _id.arch_patch_rev = NNX_ARCH_VERSION_PATCH; + _id.arch_minor_rev = NNX_ARCH_VERSION_MINOR; + _id.arch_major_rev = NNX_ARCH_VERSION_MAJOR; +#endif + + id->version_status = _id.version_status; + id->version_minor = _id.version_minor; + id->version_major = _id.version_major; + id->product_major = _id.product_major; + id->arch_patch_rev = _id.arch_patch_rev; + id->arch_minor_rev = _id.arch_minor_rev; + id->arch_major_rev = _id.arch_major_rev; + + return ETHOSU_SUCCESS; +} + +enum ethosu_error_codes ethosu_get_config(struct ethosu_device *dev, struct ethosu_config *config) +{ + struct config_r cfg = {.word = 0}; + +#if !defined(ARM_NPU_STUB) + cfg.word = ethosu_read_reg(dev, NPU_REG_CONFIG); +#else + UNUSED(dev); +#endif + + config->macs_per_cc = cfg.macs_per_cc; + config->cmd_stream_version = cfg.cmd_stream_version; + config->shram_size = cfg.shram_size; + config->custom_dma = cfg.custom_dma; + + return ETHOSU_SUCCESS; +} + +// Added by Edge Impulse +// Test for memory in DTCM. If so, use global address +uint64_t alias_memory_if_needed(uint64_t addr) { +#if EI_ALIF_ADDR_TRANSLATION + if ((addr & 0xFF000000) == 0x20000000) { +#if EI_CONFIG_ETHOS_U55_128 // means HE core + addr = 0x60800000 | ( addr & 0x007FFFFF ); +#else // assume HP core + addr = 0x50800000 | ( addr & 0x007FFFFF ); +#endif + } +#endif + return addr; +} + +enum ethosu_error_codes ethosu_run_command_stream(struct ethosu_device *dev, + const uint8_t *cmd_stream_ptr, + uint32_t cms_length, + const uint64_t *base_addr, + int num_base_addr) +{ + enum ethosu_error_codes ret_code = ETHOSU_SUCCESS; + +#if !defined(ARM_NPU_STUB) + assert(num_base_addr <= ETHOSU_DRIVER_BASEP_INDEXES); + + uint64_t qbase = (uintptr_t)cmd_stream_ptr + BASE_POINTER_OFFSET; + + // Added by Edge Impulse + // Test for memory in DTCM. If so, use global address + qbase = alias_memory_if_needed(qbase); + + assert(qbase <= ADDRESS_MASK); + LOG_DEBUG("QBASE=0x%016llx, QSIZE=%u, base_pointer_offset=0x%08x\n", qbase, cms_length, BASE_POINTER_OFFSET); + ethosu_write_reg(dev, NPU_REG_QBASE0, qbase & 0xffffffff); + ethosu_write_reg(dev, NPU_REG_QBASE1, qbase >> 32); + ethosu_write_reg(dev, NPU_REG_QSIZE, cms_length); + + for (int i = 0; i < num_base_addr; i++) + { + uint64_t addr = base_addr[i] + BASE_POINTER_OFFSET; + assert(addr <= ADDRESS_MASK); + LOG_DEBUG("BASEP%d=0x%016llx\n", i, addr); + + // Added by Edge Impulse + // Test for memory in DTCM. If so, use global address + addr = alias_memory_if_needed(addr); + + ethosu_write_reg(dev, NPU_REG_BASEP0 + (2 * i) * BASEP_OFFSET, addr & 0xffffffff); + ethosu_write_reg(dev, NPU_REG_BASEP0 + (2 * i + 1) * BASEP_OFFSET, addr >> 32); + } + + ret_code = ethosu_set_command_run(dev); +#else + // NPU stubbed + UNUSED(dev); + stream_length = cms_length; + UNUSED(cmd_stream_ptr); + UNUSED(base_addr); + assert(num_base_addr < ETHOSU_DRIVER_BASEP_INDEXES); +#if defined(NDEBUG) + UNUSED(num_base_addr); +#endif +#endif + + return ret_code; +} + +enum ethosu_error_codes ethosu_is_irq_raised(struct ethosu_device *dev, uint8_t *irq_raised) +{ +#if !defined(ARM_NPU_STUB) + struct status_r status; + status.word = ethosu_read_reg(dev, NPU_REG_STATUS); + if (status.irq_raised == 1) + { + *irq_raised = 1; + } + else + { + *irq_raised = 0; + } +#else + UNUSED(dev); + *irq_raised = 1; +#endif + return ETHOSU_SUCCESS; +} + +enum ethosu_error_codes ethosu_clear_irq_status(struct ethosu_device *dev) +{ +#if !defined(ARM_NPU_STUB) + struct cmd_r oldcmd; + oldcmd.word = ethosu_read_reg(dev, NPU_REG_CMD); + struct cmd_r cmd; + + cmd.word = 0; + cmd.clear_irq = 1; + cmd.clock_q_enable = oldcmd.clock_q_enable; + cmd.power_q_enable = oldcmd.power_q_enable; + ethosu_write_reg(dev, NPU_REG_CMD, cmd.word); + LOG_DEBUG("CMD=0x%08x\n", cmd.word); +#else + UNUSED(dev); +#endif + return ETHOSU_SUCCESS; +} + +enum ethosu_error_codes ethosu_soft_reset(struct ethosu_device *dev) +{ + enum ethosu_error_codes return_code = ETHOSU_SUCCESS; +#if !defined(ARM_NPU_STUB) + struct reset_r reset; + struct prot_r prot; + + reset.word = 0; + reset.pending_CPL = dev->privileged ? PRIVILEGE_LEVEL_PRIVILEGED : PRIVILEGE_LEVEL_USER; + reset.pending_CSL = dev->secure ? SECURITY_LEVEL_SECURE : SECURITY_LEVEL_NON_SECURE; + + // Reset and set security level + LOG_INFO("Soft reset NPU\n"); + ethosu_write_reg(dev, NPU_REG_RESET, reset.word); + + // Wait for reset to complete + return_code = ethosu_wait_for_reset(dev); + if (return_code != ETHOSU_SUCCESS) + { + LOG_ERR("Soft reset timed out\n"); + return return_code; + } + + // Verify that NPU has switched security state and privilege level + prot.word = ethosu_read_reg(dev, NPU_REG_PROT); + if (prot.active_CPL != reset.pending_CPL || prot.active_CSL != reset.pending_CSL) + { + LOG_ERR("Failed to switch security state and privilege level\n"); + // Register access not permitted + return ETHOSU_GENERIC_FAILURE; + } + + // Save the prot register + dev->proto = ethosu_read_reg(dev, NPU_REG_PROT); + + // Soft reset will clear the PMU configuration and counters. The shadow PMU counters + // are cleared by saving the PMU counters to ram, which will read back zeros. + // The PMU configuration will be restored in the invoke function after power save + // has been disabled. + ethosu_save_pmu_counters(dev); +#else + UNUSED(dev); +#endif + + return return_code; +} + +enum ethosu_error_codes ethosu_wait_for_reset(struct ethosu_device *dev) +{ +#if !defined(ARM_NPU_STUB) + struct status_r status; + + // Wait until reset status indicates that reset has been completed + for (int i = 0; i < 100000; i++) + { + status.word = ethosu_read_reg(dev, NPU_REG_STATUS); + if (0 == status.reset_status) + { + break; + } + } + + if (1 == status.reset_status) + { + return ETHOSU_GENERIC_FAILURE; + } +#else + UNUSED(dev); +#endif + + return ETHOSU_SUCCESS; +} + +enum ethosu_error_codes ethosu_read_apb_reg(struct ethosu_device *dev, + uint32_t start_address, + uint16_t num_reg, + uint32_t *reg) +{ +#if !defined(ARM_NPU_STUB) + uint32_t address = start_address; + + assert((start_address + num_reg) < ID_REGISTERS_SIZE); + + for (int i = 0; i < num_reg; i++) + { + reg[i] = ethosu_read_reg(dev, address); + address += REG_OFFSET; + } +#else + // NPU stubbed + UNUSED(dev); + UNUSED(start_address); + UNUSED(num_reg); + UNUSED(reg); +#endif + + return ETHOSU_SUCCESS; +} + +enum ethosu_error_codes ethosu_set_qconfig(struct ethosu_device *dev, enum ethosu_memory_type memory_type) +{ + if (memory_type > ETHOSU_AXI1_OUTSTANDING_COUNTER3) + { + return ETHOSU_INVALID_PARAM; + } +#if !defined(ARM_NPU_STUB) + ethosu_write_reg(dev, NPU_REG_QCONFIG, memory_type); + LOG_DEBUG("QCONFIG=0x%08x\n", memory_type); +#else + // NPU stubbed + UNUSED(dev); + UNUSED(memory_type); +#endif + return ETHOSU_SUCCESS; +} + +enum ethosu_error_codes ethosu_set_regioncfg(struct ethosu_device *dev, + uint8_t region, + enum ethosu_memory_type memory_type) +{ + if (region > 7) + { + return ETHOSU_INVALID_PARAM; + } +#if !defined(ARM_NPU_STUB) + struct regioncfg_r regioncfg; + regioncfg.word = ethosu_read_reg(dev, NPU_REG_REGIONCFG); + regioncfg.word &= ~(0x3 << (2 * region)); + regioncfg.word |= (memory_type & 0x3) << (2 * region); + ethosu_write_reg(dev, NPU_REG_REGIONCFG, regioncfg.word); + LOG_DEBUG("REGIONCFG%u=0x%08x\n", region, regioncfg.word); +#else + // NPU stubbed + UNUSED(dev); + UNUSED(region); + UNUSED(memory_type); +#endif + return ETHOSU_SUCCESS; +} + +enum ethosu_error_codes ethosu_set_axi_limit0(struct ethosu_device *dev, + enum ethosu_axi_limit_beats max_beats, + enum ethosu_axi_limit_mem_type memtype, + uint8_t max_reads, + uint8_t max_writes) +{ +#if !defined(ARM_NPU_STUB) + struct axi_limit0_r axi_limit0; + axi_limit0.word = 0; + axi_limit0.max_beats = max_beats; + axi_limit0.memtype = memtype; + axi_limit0.max_outstanding_read_m1 = max_reads - 1; + axi_limit0.max_outstanding_write_m1 = max_writes - 1; + + ethosu_write_reg(dev, NPU_REG_AXI_LIMIT0, axi_limit0.word); + LOG_DEBUG("AXI_LIMIT0=0x%08x\n", axi_limit0.word); +#else + // NPU stubbed + UNUSED(dev); + UNUSED(max_beats); + UNUSED(memtype); + UNUSED(max_reads); + UNUSED(max_writes); +#endif + + return ETHOSU_SUCCESS; +} + +enum ethosu_error_codes ethosu_set_axi_limit1(struct ethosu_device *dev, + enum ethosu_axi_limit_beats max_beats, + enum ethosu_axi_limit_mem_type memtype, + uint8_t max_reads, + uint8_t max_writes) +{ +#if !defined(ARM_NPU_STUB) + struct axi_limit1_r axi_limit1; + axi_limit1.word = 0; + axi_limit1.max_beats = max_beats; + axi_limit1.memtype = memtype; + axi_limit1.max_outstanding_read_m1 = max_reads - 1; + axi_limit1.max_outstanding_write_m1 = max_writes - 1; + + ethosu_write_reg(dev, NPU_REG_AXI_LIMIT1, axi_limit1.word); + LOG_DEBUG("AXI_LIMIT1=0x%08x\n", axi_limit1.word); +#else + // NPU stubbed + UNUSED(dev); + UNUSED(max_beats); + UNUSED(memtype); + UNUSED(max_reads); + UNUSED(max_writes); +#endif + + return ETHOSU_SUCCESS; +} + +enum ethosu_error_codes ethosu_set_axi_limit2(struct ethosu_device *dev, + enum ethosu_axi_limit_beats max_beats, + enum ethosu_axi_limit_mem_type memtype, + uint8_t max_reads, + uint8_t max_writes) +{ +#if !defined(ARM_NPU_STUB) + struct axi_limit2_r axi_limit2; + axi_limit2.word = 0; + axi_limit2.max_beats = max_beats; + axi_limit2.memtype = memtype; + axi_limit2.max_outstanding_read_m1 = max_reads - 1; + axi_limit2.max_outstanding_write_m1 = max_writes - 1; + + ethosu_write_reg(dev, NPU_REG_AXI_LIMIT2, axi_limit2.word); + LOG_DEBUG("AXI_LIMIT2=0x%08x\n", axi_limit2.word); +#else + // NPU stubbed + UNUSED(dev); + UNUSED(max_beats); + UNUSED(memtype); + UNUSED(max_reads); + UNUSED(max_writes); +#endif + + return ETHOSU_SUCCESS; +} + +enum ethosu_error_codes ethosu_set_axi_limit3(struct ethosu_device *dev, + enum ethosu_axi_limit_beats max_beats, + enum ethosu_axi_limit_mem_type memtype, + uint8_t max_reads, + uint8_t max_writes) +{ +#if !defined(ARM_NPU_STUB) + struct axi_limit3_r axi_limit3; + axi_limit3.word = 0; + axi_limit3.max_beats = max_beats; + axi_limit3.memtype = memtype; + axi_limit3.max_outstanding_read_m1 = max_reads - 1; + axi_limit3.max_outstanding_write_m1 = max_writes - 1; + + ethosu_write_reg(dev, NPU_REG_AXI_LIMIT3, axi_limit3.word); + LOG_DEBUG("AXI_LIMIT3=0x%08x\n", axi_limit3.word); +#else + // NPU stubbed + UNUSED(dev); + UNUSED(max_beats); + UNUSED(memtype); + UNUSED(max_reads); + UNUSED(max_writes); +#endif + + return ETHOSU_SUCCESS; +} + +enum ethosu_error_codes ethosu_get_revision(struct ethosu_device *dev, uint32_t *revision) +{ +#if !defined(ARM_NPU_STUB) + *revision = ethosu_read_reg(dev, NPU_REG_REVISION); +#else + UNUSED(dev); + *revision = 0xDEADC0DE; +#endif + return ETHOSU_SUCCESS; +} + +enum ethosu_error_codes ethosu_get_qread(struct ethosu_device *dev, uint32_t *qread) +{ +#if !defined(ARM_NPU_STUB) + *qread = ethosu_read_reg(dev, NPU_REG_QREAD); +#else + UNUSED(dev); + *qread = stream_length; +#endif + return ETHOSU_SUCCESS; +} + +enum ethosu_error_codes ethosu_get_status_mask(struct ethosu_device *dev, uint16_t *status_mask) +{ +#if !defined(ARM_NPU_STUB) + struct status_r status; + + status.word = ethosu_read_reg(dev, NPU_REG_STATUS); + *status_mask = status.word & 0xFFFF; +#else + UNUSED(dev); + *status_mask = 0x0000; +#endif + return ETHOSU_SUCCESS; +} + +enum ethosu_error_codes ethosu_get_irq_history_mask(struct ethosu_device *dev, uint16_t *irq_history_mask) +{ +#if !defined(ARM_NPU_STUB) + struct status_r status; + + status.word = ethosu_read_reg(dev, NPU_REG_STATUS); + *irq_history_mask = status.irq_history_mask; +#else + UNUSED(dev); + *irq_history_mask = 0xffff; +#endif + return ETHOSU_SUCCESS; +} + +enum ethosu_error_codes ethosu_clear_irq_history_mask(struct ethosu_device *dev, uint16_t irq_history_clear_mask) +{ +#if !defined(ARM_NPU_STUB) + struct cmd_r oldcmd; + oldcmd.word = ethosu_read_reg(dev, NPU_REG_CMD); + + struct cmd_r cmd; + cmd.word = 0; + cmd.clock_q_enable = oldcmd.clock_q_enable; + cmd.power_q_enable = oldcmd.power_q_enable; + cmd.clear_irq_history = irq_history_clear_mask; + ethosu_write_reg(dev, NPU_REG_CMD, cmd.word); + LOG_DEBUG("CMD=0x%08x\n", cmd.word); +#else + UNUSED(dev); + UNUSED(irq_history_clear_mask); +#endif + return ETHOSU_SUCCESS; +} + +enum ethosu_error_codes ethosu_set_command_run(struct ethosu_device *dev) +{ +#if !defined(ARM_NPU_STUB) + struct cmd_r oldcmd; + oldcmd.word = ethosu_read_reg(dev, NPU_REG_CMD); + + struct cmd_r cmd; + cmd.word = 0; + cmd.transition_to_running_state = 1; + cmd.clock_q_enable = oldcmd.clock_q_enable; + cmd.power_q_enable = oldcmd.power_q_enable; + ethosu_write_reg(dev, NPU_REG_CMD, cmd.word); + LOG_DEBUG("CMD=0x%08x\n", cmd.word); +#else + UNUSED(dev); +#endif + return ETHOSU_SUCCESS; +} + +enum ethosu_error_codes ethosu_get_shram_data(struct ethosu_device *dev, int section, uint32_t *shram_p) +{ +#if !defined(ARM_NPU_STUB) + int i = 0; + uint32_t address = NPU_REG_SHARED_BUFFER0; + ethosu_write_reg(dev, NPU_REG_DEBUG_ADDRESS, section * BYTES_1KB); + + while (address <= NPU_REG_SHARED_BUFFER255) + { + shram_p[i] = ethosu_read_reg(dev, address); + address += REG_OFFSET; + i++; + } +#else + // NPU stubbed + UNUSED(dev); + UNUSED(section); + UNUSED(shram_p); +#endif + + return ETHOSU_SUCCESS; +} + +enum ethosu_error_codes ethosu_set_clock_and_power(struct ethosu_device *dev, + enum ethosu_clock_q_request clock_q, + enum ethosu_power_q_request power_q) +{ +#if !defined(ARM_NPU_STUB) + struct cmd_r cmd; + cmd.word = 0; + cmd.clock_q_enable = clock_q; + cmd.power_q_enable = power_q; + ethosu_write_reg(dev, NPU_REG_CMD, cmd.word); + LOG_DEBUG("CMD=0x%08x\n", cmd.word); +#else + UNUSED(dev); + UNUSED(clock_q); + UNUSED(power_q); +#endif + return ETHOSU_SUCCESS; +} + +uint32_t ethosu_read_reg(struct ethosu_device *dev, uint32_t address) +{ +#if !defined(ARM_NPU_STUB) + assert(dev->base_address != 0); + assert(address % 4 == 0); + + volatile uint32_t *reg = (volatile uint32_t *)(dev->base_address + address); + return *reg; +#else + UNUSED(dev); + UNUSED(address); + + return 0; +#endif +} + +void ethosu_write_reg(struct ethosu_device *dev, uint32_t address, uint32_t value) +{ +#if !defined(ARM_NPU_STUB) + assert(dev->base_address != 0); + assert(address % 4 == 0); + + volatile uint32_t *reg = (volatile uint32_t *)(dev->base_address + address); + *reg = value; +#else + UNUSED(dev); + UNUSED(address); + UNUSED(value); +#endif +} + +void ethosu_write_reg_shadow(struct ethosu_device *dev, uint32_t address, uint32_t value, uint32_t *shadow) +{ + ethosu_write_reg(dev, address, value); + *shadow = ethosu_read_reg(dev, address); +} + +enum ethosu_error_codes ethosu_save_pmu_config(struct ethosu_device *dev) +{ +#if !defined(ARM_NPU_STUB) + // Save the PMU control register + dev->pmcr = ethosu_read_reg(dev, NPU_REG_PMCR); + + // Save IRQ control + dev->pmint = ethosu_read_reg(dev, NPU_REG_PMINTSET); + + // Save the enabled events mask + dev->pmcnten = ethosu_read_reg(dev, NPU_REG_PMCNTENSET); + + // Save start and stop event + dev->pmccntr_cfg = ethosu_read_reg(dev, NPU_REG_PMCCNTR_CFG); + + // Save the event settings and counters + for (uint32_t i = 0; i < ETHOSU_PMU_NCOUNTERS; i++) + { + dev->pmu_evtypr[i] = ethosu_read_reg(dev, NPU_REG_PMEVTYPER0 + i * sizeof(uint32_t)); + } +#else + UNUSED(dev); +#endif + + return ETHOSU_SUCCESS; +} + +enum ethosu_error_codes ethosu_restore_pmu_config(struct ethosu_device *dev) +{ +#if !defined(ARM_NPU_STUB) + // Restore PMU control register + ethosu_write_reg(dev, NPU_REG_PMCR, dev->pmcr); + + // Restore IRQ control + ethosu_write_reg(dev, NPU_REG_PMINTSET, dev->pmint); + + // Restore enabled event mask + ethosu_write_reg(dev, NPU_REG_PMCNTENSET, dev->pmcnten); + + // Restore start and stop event + ethosu_write_reg(dev, NPU_REG_PMCCNTR_CFG, dev->pmccntr_cfg); + + // Save the event settings and counters + for (uint32_t i = 0; i < ETHOSU_PMU_NCOUNTERS; i++) + { + ethosu_write_reg(dev, NPU_REG_PMEVTYPER0 + i * sizeof(uint32_t), dev->pmu_evtypr[i]); + } +#else + UNUSED(dev); +#endif + + return ETHOSU_SUCCESS; +} + +enum ethosu_error_codes ethosu_save_pmu_counters(struct ethosu_device *dev) +{ +#if !defined(ARM_NPU_STUB) + // Save the cycle counter + dev->pmccntr[0] = ethosu_read_reg(dev, NPU_REG_PMCCNTR_LO); + dev->pmccntr[1] = ethosu_read_reg(dev, NPU_REG_PMCCNTR_HI); + + // Save the event settings and counters + for (uint32_t i = 0; i < ETHOSU_PMU_NCOUNTERS; i++) + { + dev->pmu_evcntr[i] = ethosu_read_reg(dev, NPU_REG_PMEVCNTR0 + i * sizeof(uint32_t)); + } +#else + UNUSED(dev); +#endif + + return ETHOSU_SUCCESS; +} + +bool ethosu_status_has_error(struct ethosu_device *dev) +{ + bool status_error = false; +#if !defined(ARM_NPU_STUB) + struct status_r status; + status.word = ethosu_read_reg(dev, NPU_REG_STATUS); + status_error = ((1 == status.bus_status) || (1 == status.cmd_parse_error) || (1 == status.wd_fault) || + (1 == status.ecc_fault)); +#else + UNUSED(dev); +#endif + return status_error; +} + +#endif //EI ETHOS diff --git a/edgeimpulse/edge-impulse-sdk/porting/ethos-core-driver/src/ethosu_driver.c b/edgeimpulse/edge-impulse-sdk/porting/ethos-core-driver/src/ethosu_driver.c new file mode 100644 index 0000000..f616dfb --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/porting/ethos-core-driver/src/ethosu_driver.c @@ -0,0 +1,1292 @@ +/* + * Copyright (c) 2019-2021 Arm Limited. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/****************************************************************************** + * Includes + ******************************************************************************/ + +#if EI_ETHOS + +#include "ethosu_driver.h" +#include "ethosu_common.h" +#include "ethosu_config.h" +#include "ethosu_device.h" + +#include +#include +#include +#include +#include +#include +#include + +/****************************************************************************** + * Defines + ******************************************************************************/ + +#define MACS_PER_CYCLE_LOG2_MASK 0x000F +#define SHRAM_SIZE_MASK 0xFF00 +#define SHRAM_SIZE_RIGHT_SHIFT 8 +#define BYTES_IN_32_BITS 4 +#define CUSTOM_OPTION_LENGTH_32_BIT_WORD 1 +#define DRIVER_ACTION_LENGTH_32_BIT_WORD 1 +#define OPTIMIZER_CONFIG_LENGTH_32_BIT_WORD 2 +#define ETHOSU_FOURCC ('1' << 24 | 'P' << 16 | 'O' << 8 | 'C') // "Custom Operator Payload 1" +#define APB_START_ADDR_MASK 0x0FFF +#define APB_NUM_REG_BIT_SHIFT 12 +#define BYTES_1KB 1024 +#define PRODUCT_MAJOR_ETHOSU55 (4) +#define MASK_16_BYTE_ALIGN (0xF) +#define FAST_MEMORY_BASE_ADDR_INDEX 2 + +/****************************************************************************** + * Types + ******************************************************************************/ + +// Driver actions +enum DRIVER_ACTION_e +{ + RESERVED = 0, + OPTIMIZER_CONFIG = 1, + COMMAND_STREAM = 2, + READ_APB_REG = 3, + DUMP_SHRAM = 4, + NOP = 5, +}; + +// Custom data struct +struct custom_data_s +{ + union + { + // Driver action data + struct + { + // Driver action command (valid values in DRIVER_ACTION_e) + uint8_t driver_action_command; + + // reserved + uint8_t reserved; + + // Driver action data + union + { + // DA_CMD_OPT_CFG + struct + { + uint16_t rel_nbr : 4; + uint16_t patch_nbr : 4; + uint16_t opt_cfg_reserved : 8; + }; + + // DA_CMD_CMSTRM + struct + { + uint16_t length; + }; + + // DA_CMD_READAPB + struct + { + uint16_t start_address : 12; + uint16_t nbr_reg_minus1 : 4; + }; + + uint16_t driver_action_data; + }; + }; + + uint32_t word; + }; +}; + +// optimizer config struct +struct opt_cfg_s +{ + struct custom_data_s da_data; + union + { + struct + { + uint32_t macs_per_cc : 4; + uint32_t cmd_stream_version : 4; + uint32_t shram_size : 8; + uint32_t reserved0 : 11; + uint32_t custom_dma : 1; + uint32_t product : 4; + }; + uint32_t npu_cfg; + }; + union + { + struct + { + uint32_t version_status : 4; + uint32_t version_minor : 4; + uint32_t version_major : 4; + uint32_t product_major : 4; + uint32_t arch_patch_rev : 4; + uint32_t arch_minor_rev : 8; + uint32_t arch_major_rev : 4; + }; + uint32_t ethosu_id; + }; +}; + +/****************************************************************************** + * Functions + ******************************************************************************/ + +struct ethosu_driver ethosu_drv = { + .dev = {.base_address = NULL, .proto = 0, .pmccntr = {0}, .pmu_evcntr = {0, 0, 0, 0}, .pmu_evtypr = {0, 0, 0, 0}}, + .abort_inference = false, + .status_error = false, + .dev_power_always_on = false}; + +// Registered drivers linked list HEAD +static struct ethosu_driver *registered_drivers = NULL; + +/* + * Following section handles the minimal sempahore and mutex implementation in case of baremetal applications. + * Weak symbols will be overwritten by RTOS definitions and implement true thread-safety. (Done in application layer) + */ + +// Baremetal sempahore implementation +struct ethosu_semaphore_t +{ + int count; +}; + +// Minimal needed declaration to allow baremetal functionality. +static void *ethosu_mutex; +static void *ethosu_semaphore; + +void *__attribute__((weak)) ethosu_mutex_create(void) +{ + return NULL; +} + +void __attribute__((weak)) ethosu_mutex_lock(void *mutex) +{ + UNUSED(mutex); +} + +void __attribute__((weak)) ethosu_mutex_unlock(void *mutex) +{ + UNUSED(mutex); +} + +// Baremetal implementation of creating a semaphore +void *__attribute__((weak)) ethosu_semaphore_create(void) +{ + struct ethosu_semaphore_t *sem = malloc(sizeof(*sem)); + sem->count = 1; + return sem; +} + +// Baremetal simulation of waiting/sleeping for and then taking a semaphore using intrisics +void __attribute__((weak)) ethosu_semaphore_take(void *sem) +{ + struct ethosu_semaphore_t *s = sem; + while (s->count <= 0) + { + __WFE(); + } + s->count--; +} + +// Baremetal simulation of giving a semaphore and waking up processes using intrinsics +void __attribute__((weak)) ethosu_semaphore_give(void *sem) +{ + struct ethosu_semaphore_t *s = sem; + s->count++; + __SEV(); +} +// <--- End of semaphore and mutex implementations + +static int ethosu_soft_reset_and_restore(struct ethosu_driver *drv); + +void __attribute__((weak)) ethosu_irq_handler(struct ethosu_driver *drv) +{ + uint8_t irq_raised = 0; + + LOG_DEBUG("Interrupt. status=0x%08x, qread=%d\n", + ethosu_read_reg(&drv->dev, NPU_REG_STATUS), + ethosu_read_reg(&drv->dev, NPU_REG_QREAD)); + + // Verify that interrupt has been raised + (void)ethosu_is_irq_raised(&drv->dev, &irq_raised); + assert(irq_raised == 1); + drv->irq_triggered = true; + + // Clear interrupt + (void)ethosu_clear_irq_status(&drv->dev); + + // Verify that interrupt has been successfully cleared + (void)ethosu_is_irq_raised(&drv->dev, &irq_raised); + assert(irq_raised == 0); + + if (ethosu_status_has_error(&drv->dev)) + { + ethosu_soft_reset_and_restore(drv); + drv->status_error = true; + } + + ethosu_semaphore_give(drv->semaphore); +} + +static inline void wait_for_irq(struct ethosu_driver *drv) +{ + while (1) + { + if (drv->irq_triggered || drv->abort_inference) + { + drv->irq_triggered = false; + break; + } + + ethosu_semaphore_take(drv->semaphore); + } +} + +void __attribute__((weak)) ethosu_inference_begin(struct ethosu_driver *drv, const void *inference_data) +{ + (void)inference_data; + (void)drv; +} + +void __attribute__((weak)) ethosu_inference_end(struct ethosu_driver *drv, const void *inference_data) +{ + (void)inference_data; + (void)drv; +} + +static int handle_optimizer_config(struct ethosu_driver *drv, struct opt_cfg_s *opt_cfg_p); +static int handle_command_stream(struct ethosu_driver *drv, + const uint8_t *cmd_stream, + const int cms_length, + const uint64_t *base_addr, + const size_t *base_addr_size, + const int num_base_addr); +static int read_apb_reg(struct ethosu_driver *drv, uint16_t); +static int dump_shram(struct ethosu_driver *drv); +static void dump_npu_register(struct ethosu_driver *drv, int npu_reg, int npu_reg_end); +static void dump_command_stream(const uint32_t *cmd_stream, const int cms_length, int qread); +static void npu_axi_init(struct ethosu_driver *drv); +static struct ethosu_driver *ethosu_find_and_reserve_driver(void); + +int ethosu_init(struct ethosu_driver *drv, + const void *base_address, + const void *fast_memory, + const size_t fast_memory_size, + uint32_t secure_enable, + uint32_t privilege_enable) +{ + int return_code = 0; + + LOG_INFO("%s. base_address=%p, fast_memory=%p, fast_memory_size=%zu, secure=%" PRIu32 ", privileged=%" PRIu32 "\n", + __FUNCTION__, + base_address, + fast_memory, + fast_memory_size, + secure_enable, + privilege_enable); + + if (!ethosu_mutex) + { + ethosu_mutex = ethosu_mutex_create(); + } + + if (!ethosu_semaphore) + { + ethosu_semaphore = ethosu_semaphore_create(); + } + + ethosu_register_driver(drv); + + drv->fast_memory = (uint32_t)fast_memory; + drv->fast_memory_size = fast_memory_size; + drv->irq_triggered = false; + drv->semaphore = ethosu_semaphore_create(); + + if (ETHOSU_SUCCESS != ethosu_dev_init(&drv->dev, base_address, secure_enable, privilege_enable)) + { + LOG_ERR("Failed in ethosu_dev_init"); + return -1; + } + + if (ETHOSU_SUCCESS != + set_clock_and_power_request(drv, ETHOSU_INFERENCE_REQUEST, ETHOSU_CLOCK_Q_DISABLE, ETHOSU_POWER_Q_DISABLE)) + { + LOG_ERR("Failed to disable clock-q & power-q for Ethos-U\n"); + return -1; + } + + if (ETHOSU_SUCCESS != ethosu_soft_reset(&drv->dev)) + { + return -1; + } + + if (ETHOSU_SUCCESS != ethosu_wait_for_reset(&drv->dev)) + { + LOG_ERR("Failed reset of Ethos-U\n"); + return -1; + } + + drv->status_error = false; + + return return_code; +} + +int ethosu_get_version(struct ethosu_driver *drv, struct ethosu_version *version) +{ + int return_code = 0; + + if (NULL != version) + { + struct ethosu_id id; + struct ethosu_config cfg; + (void)ethosu_get_id(&drv->dev, &id); + (void)ethosu_get_config(&drv->dev, &cfg); + + version->id.version_status = id.version_status; + version->id.version_minor = id.version_minor; + version->id.version_major = id.version_major; + version->id.product_major = id.product_major; + version->id.arch_patch_rev = id.arch_patch_rev; + version->id.arch_minor_rev = id.arch_minor_rev; + version->id.arch_major_rev = id.arch_major_rev; + version->id.driver_patch_rev = ETHOSU_DRIVER_VERSION_PATCH; + version->id.driver_minor_rev = ETHOSU_DRIVER_VERSION_MINOR; + version->id.driver_major_rev = ETHOSU_DRIVER_VERSION_MAJOR; + version->cfg.macs_per_cc = cfg.macs_per_cc; + version->cfg.cmd_stream_version = cfg.cmd_stream_version; + version->cfg.shram_size = cfg.shram_size; + version->cfg.custom_dma = cfg.custom_dma; + } + else + { + return_code = -1; + } + + return return_code; +} + +int ethosu_invoke(struct ethosu_driver *drv, + const void *custom_data_ptr, + const int custom_data_size, + const uint64_t *base_addr, + const size_t *base_addr_size, + const int num_base_addr) +{ + const struct custom_data_s *data_ptr = custom_data_ptr; + const struct custom_data_s *data_end = custom_data_ptr + custom_data_size; + int return_code = 0; + + LOG_INFO("%s\n", __FUNCTION__); + + // First word in custom_data_ptr should contain "Custom Operator Payload 1" + if (data_ptr->word != ETHOSU_FOURCC) + { + LOG_ERR("Custom Operator Payload: %" PRIu32 " is not correct, expected %x\n", data_ptr->word, ETHOSU_FOURCC); + return -1; + } + + // Custom data length must be a multiple of 32 bits + if ((custom_data_size % BYTES_IN_32_BITS) != 0) + { + LOG_ERR("ethosu_invoke ERROR custom_data_size=0x%x not a multiple of 4\n", custom_data_size); + return -1; + } + + ++data_ptr; + + // Adjust base address to fast memory area + if (drv->fast_memory != 0 && num_base_addr >= FAST_MEMORY_BASE_ADDR_INDEX) + { + uint64_t *fast_memory = (uint64_t *)&base_addr[FAST_MEMORY_BASE_ADDR_INDEX]; + + if (base_addr_size != NULL && base_addr_size[FAST_MEMORY_BASE_ADDR_INDEX] > drv->fast_memory_size) + { + LOG_ERR("Fast memory area too small. fast_memory_size=%u, base_addr_size=%u\n", + drv->fast_memory_size, + base_addr_size[FAST_MEMORY_BASE_ADDR_INDEX]); + return -1; + } + + *fast_memory = drv->fast_memory; + } + + if (!drv->dev_power_always_on) + { + // Only soft reset if securty state or privilege level needs changing + if (drv->dev.proto != ethosu_read_reg(&drv->dev, NPU_REG_PROT)) + { + if (ETHOSU_SUCCESS != ethosu_soft_reset(&drv->dev)) + { + return -1; + } + } + + drv->status_error = false; + set_clock_and_power_request(drv, ETHOSU_INFERENCE_REQUEST, ETHOSU_CLOCK_Q_ENABLE, ETHOSU_POWER_Q_DISABLE); + ethosu_restore_pmu_config(&drv->dev); + npu_axi_init(drv); + } + + drv->status_error = false; + + ethosu_inference_begin(drv, custom_data_ptr); + while (data_ptr < data_end) + { + int ret = 0; + switch (data_ptr->driver_action_command) + { + case OPTIMIZER_CONFIG: + LOG_INFO("ethosu_invoke OPTIMIZER_CONFIG\n"); + struct opt_cfg_s *opt_cfg_p = (struct opt_cfg_s *)data_ptr; + + ret = handle_optimizer_config(drv, opt_cfg_p); + data_ptr += DRIVER_ACTION_LENGTH_32_BIT_WORD + OPTIMIZER_CONFIG_LENGTH_32_BIT_WORD; + break; + case COMMAND_STREAM: + LOG_INFO("ethosu_invoke COMMAND_STREAM\n"); + void *command_stream = (uint8_t *)(data_ptr) + sizeof(struct custom_data_s); + int cms_length = (data_ptr->reserved << 16) | data_ptr->length; + + drv->abort_inference = false; + // It is safe to clear this flag without atomic, because npu is not running. + drv->irq_triggered = false; + + ret = handle_command_stream(drv, command_stream, cms_length, base_addr, base_addr_size, num_base_addr); + + if (return_code == -1 && drv->abort_inference) + { + uint32_t qread = 0; + ethosu_get_qread(&drv->dev, &qread); + LOG_ERR("NPU timeout\n"); + dump_command_stream(command_stream, cms_length, qread); + dump_npu_register(drv, 0x200, 0x2BF); + dump_npu_register(drv, 0x800, 0xB3F); + dump_shram(drv); + } + + data_ptr += DRIVER_ACTION_LENGTH_32_BIT_WORD + cms_length; + break; + case READ_APB_REG: + LOG_INFO("ethosu_invoke READ_APB_REG\n"); + ret = read_apb_reg(drv, data_ptr->driver_action_data); + data_ptr += DRIVER_ACTION_LENGTH_32_BIT_WORD; + break; + case DUMP_SHRAM: + LOG_INFO("ethosu_invoke DUMP_SHRAM\n"); + ret = dump_shram(drv); + data_ptr += DRIVER_ACTION_LENGTH_32_BIT_WORD; + break; + case NOP: + LOG_INFO("ethosu_invoke NOP\n"); + data_ptr += DRIVER_ACTION_LENGTH_32_BIT_WORD; + break; + default: + LOG_ERR("ethosu_invoke UNSUPPORTED driver_action_command %d \n", data_ptr->driver_action_command); + ret = -1; + break; + } + if (ret != 0) + { + return_code = -1; + break; + } + } + ethosu_inference_end(drv, custom_data_ptr); + + if (!drv->status_error && !drv->dev_power_always_on) + { + ethosu_save_pmu_counters(&drv->dev); + set_clock_and_power_request(drv, ETHOSU_INFERENCE_REQUEST, ETHOSU_CLOCK_Q_ENABLE, ETHOSU_POWER_Q_ENABLE); + } + + return return_code; +} + +void ethosu_abort(struct ethosu_driver *drv) +{ + drv->abort_inference = true; +} + +void ethosu_set_power_mode(struct ethosu_driver *drv, bool always_on) +{ + drv->dev_power_always_on = always_on; + + if (always_on) + { + npu_axi_init(drv); + } +} + +int ethosu_register_driver(struct ethosu_driver *drv) +{ + // Safeguard check for if driver is already registered + struct ethosu_driver *cur = registered_drivers; + while (cur != NULL) + { + if (cur == drv) + { + LOG_ERR("%s: NPU driver at address %p is already registered.\n", __FUNCTION__, drv); + return -1; + } + cur = cur->next; + } + + drv->next = registered_drivers; + // Designate new registered driver HEAD + registered_drivers = drv; + + LOG_INFO("%s: New NPU driver at address %p is registered.\n", __FUNCTION__, drv); + return 0; +} + +int ethosu_deregister_driver(struct ethosu_driver *drv) +{ + struct ethosu_driver *cur = registered_drivers; + struct ethosu_driver **prev = ®istered_drivers; + + while (cur != NULL) + { + if (cur == drv) + { + *prev = cur->next; + LOG_INFO("%s: NPU driver at address %p is deregistered.\n", __FUNCTION__, drv); + return 0; + } + + prev = &cur->next; + cur = cur->next; + } + + LOG_ERR("%s: NPU driver at address %p does not match a registered driver and therefore may not be deregistered.\n", + __FUNCTION__, + drv); + + return -1; +} + +struct ethosu_driver *ethosu_reserve_driver(void) +{ + struct ethosu_driver *drv = NULL; + + do + { + ethosu_mutex_lock(ethosu_mutex); + drv = ethosu_find_and_reserve_driver(); + ethosu_mutex_unlock(ethosu_mutex); + + if (drv != NULL) + { + break; + } + + LOG_INFO("%s - Waiting for driver \n", __FUNCTION__); + ethosu_semaphore_take(ethosu_semaphore); + + } while (1); + + return drv; +} + +static struct ethosu_driver *ethosu_find_and_reserve_driver(void) +{ + struct ethosu_driver *drv = registered_drivers; + + while (drv != NULL) + { + if (!drv->reserved) + { + drv->reserved = true; + LOG_INFO("%s - Driver %p reserved.\n", __FUNCTION__, drv); + return drv; + } + drv = drv->next; + } + + LOG_INFO("%s: No available drivers.\n", __FUNCTION__); + + return NULL; +} + +void ethosu_release_driver(struct ethosu_driver *drv) +{ + ethosu_mutex_lock(ethosu_mutex); + if (drv != NULL && drv->reserved) + { + drv->reserved = false; + LOG_INFO("%s - Driver %p released\n", __FUNCTION__, drv); + ethosu_semaphore_give(ethosu_semaphore); + } + ethosu_mutex_unlock(ethosu_mutex); +} + +static int ethosu_soft_reset_and_restore(struct ethosu_driver *drv) +{ + + if (ETHOSU_SUCCESS != ethosu_soft_reset(&drv->dev)) + { + return -1; + } + + set_clock_and_power_request(drv, ETHOSU_INFERENCE_REQUEST, ETHOSU_CLOCK_Q_ENABLE, ETHOSU_POWER_Q_DISABLE); + + npu_axi_init(drv); + ethosu_restore_pmu_config(&drv->dev); + + return 0; +} + +enum ethosu_error_codes set_clock_and_power_request(struct ethosu_driver *drv, + enum ethosu_request_clients client, + enum ethosu_clock_q_request clock_request, + enum ethosu_power_q_request power_request) +{ + // Set clock request bit for client + if (clock_request == ETHOSU_CLOCK_Q_DISABLE) + { + drv->clock_request |= (1 << client); + } + else + { + drv->clock_request &= ~(1 << client); + } + // Get current clock request (ENABLE if both PMU and INFERENCE asks for clock request, else DISABLE) + clock_request = drv->clock_request == 0 ? ETHOSU_CLOCK_Q_ENABLE : ETHOSU_CLOCK_Q_DISABLE; + + // Set power request bit for client + if (power_request == ETHOSU_POWER_Q_DISABLE) + { + drv->power_request |= (1 << client); + } + else + { + drv->power_request &= ~(1 << client); + } + // Get current power request (ENABLE if both PMU and INFERENCE asks for power request, else DISABLE) + power_request = drv->power_request == 0 ? ETHOSU_POWER_Q_ENABLE : ETHOSU_POWER_Q_DISABLE; + + // Set clock and power + enum ethosu_error_codes ret = ethosu_set_clock_and_power(&drv->dev, clock_request, power_request); + + return ret; +} + +static int handle_optimizer_config(struct ethosu_driver *drv, struct opt_cfg_s *opt_cfg_p) +{ + struct ethosu_config cfg; + struct ethosu_id id; + int return_code = 0; + + LOG_INFO("handle_optimizer_config:\n"); + LOG_INFO("Optimizer release nbr: %d patch: %d\n", opt_cfg_p->da_data.rel_nbr, opt_cfg_p->da_data.patch_nbr); + LOG_INFO("Optimizer config cmd_stream_version: %d macs_per_cc: %d shram_size: %d custom_dma: %d\n", + opt_cfg_p->cmd_stream_version, + opt_cfg_p->macs_per_cc, + opt_cfg_p->shram_size, + opt_cfg_p->custom_dma); + LOG_INFO("Optimizer config Ethos-U version: %d.%d.%d\n", + opt_cfg_p->arch_major_rev, + opt_cfg_p->arch_minor_rev, + opt_cfg_p->arch_patch_rev); + + (void)ethosu_get_config(&drv->dev, &cfg); + (void)ethosu_get_id(&drv->dev, &id); + LOG_INFO("Ethos-U config cmd_stream_version: %" PRIu32 " macs_per_cc: %" PRIu32 " shram_size: %" PRIu32 + " custom_dma: %" PRIu32 "\n", + cfg.cmd_stream_version, + cfg.macs_per_cc, + cfg.shram_size, + cfg.custom_dma); + LOG_INFO("Ethos-U version: %" PRIu32 ".%" PRIu32 ".%" PRIu32 "\n", + id.arch_major_rev, + id.arch_minor_rev, + id.arch_patch_rev); + + if ((cfg.macs_per_cc != opt_cfg_p->macs_per_cc) || (cfg.shram_size != opt_cfg_p->shram_size) || + (cfg.cmd_stream_version != opt_cfg_p->cmd_stream_version) || (!cfg.custom_dma && opt_cfg_p->custom_dma)) + { + if (cfg.macs_per_cc != opt_cfg_p->macs_per_cc) + { + LOG_ERR("NPU config mismatch: npu.macs_per_cc=%" PRIu32 " optimizer.macs_per_cc=%d\n", + cfg.macs_per_cc, + opt_cfg_p->macs_per_cc); + } + if (cfg.shram_size != opt_cfg_p->shram_size) + { + LOG_ERR("NPU config mismatch: npu.shram_size=%" PRIu32 " optimizer.shram_size=%d\n", + cfg.shram_size, + opt_cfg_p->shram_size); + } + if (cfg.cmd_stream_version != opt_cfg_p->cmd_stream_version) + { + LOG_ERR("NPU config mismatch: npu.cmd_stream_version=%" PRIu32 " optimizer.cmd_stream_version=%d\n", + cfg.cmd_stream_version, + opt_cfg_p->cmd_stream_version); + } + if (!cfg.custom_dma && opt_cfg_p->custom_dma) + { + LOG_ERR("NPU config mismatch: npu.custom_dma=%" PRIu32 " optimize.custom_dma=%d\n", + cfg.custom_dma, + opt_cfg_p->custom_dma); + } + LOG_ERR("Did you choose the correct target core? This model was compiled for a different Ethos configuration\n"); + return_code = -1; + } + + if ((id.arch_major_rev != opt_cfg_p->arch_major_rev) || (id.arch_minor_rev < opt_cfg_p->arch_minor_rev)) + { + LOG_ERR("NPU arch mismatch: npu.arch=%" PRIu32 ".%" PRIu32 ".%" PRIu32 " optimizer.arch=%d.%d.%d\n", + id.arch_major_rev, + id.arch_minor_rev, + id.arch_patch_rev, + opt_cfg_p->arch_major_rev, + opt_cfg_p->arch_minor_rev, + opt_cfg_p->arch_patch_rev); + return_code = -1; + } + +#if !defined(LOG_ENABLED) + UNUSED(opt_cfg_p); +#endif + return return_code; +} + +static void npu_axi_init(struct ethosu_driver *drv) +{ + ethosu_set_qconfig(&drv->dev, NPU_QCONFIG); + + ethosu_set_regioncfg(&drv->dev, 0, NPU_REGIONCFG_0); + ethosu_set_regioncfg(&drv->dev, 1, NPU_REGIONCFG_1); + ethosu_set_regioncfg(&drv->dev, 2, NPU_REGIONCFG_2); + ethosu_set_regioncfg(&drv->dev, 3, NPU_REGIONCFG_3); + ethosu_set_regioncfg(&drv->dev, 4, NPU_REGIONCFG_4); + ethosu_set_regioncfg(&drv->dev, 5, NPU_REGIONCFG_5); + ethosu_set_regioncfg(&drv->dev, 6, NPU_REGIONCFG_6); + ethosu_set_regioncfg(&drv->dev, 7, NPU_REGIONCFG_7); + + (void)ethosu_set_axi_limit0(&drv->dev, + AXI_LIMIT0_MAX_BEATS_BYTES, + AXI_LIMIT0_MEM_TYPE, + AXI_LIMIT0_MAX_OUTSTANDING_READS, + AXI_LIMIT0_MAX_OUTSTANDING_WRITES); + (void)ethosu_set_axi_limit1(&drv->dev, + AXI_LIMIT1_MAX_BEATS_BYTES, + AXI_LIMIT1_MEM_TYPE, + AXI_LIMIT1_MAX_OUTSTANDING_READS, + AXI_LIMIT1_MAX_OUTSTANDING_WRITES); + (void)ethosu_set_axi_limit2(&drv->dev, + AXI_LIMIT2_MAX_BEATS_BYTES, + AXI_LIMIT2_MEM_TYPE, + AXI_LIMIT2_MAX_OUTSTANDING_READS, + AXI_LIMIT2_MAX_OUTSTANDING_WRITES); + (void)ethosu_set_axi_limit3(&drv->dev, + AXI_LIMIT3_MAX_BEATS_BYTES, + AXI_LIMIT3_MEM_TYPE, + AXI_LIMIT3_MAX_OUTSTANDING_READS, + AXI_LIMIT3_MAX_OUTSTANDING_WRITES); +} + +/* Default implementation to flush the data cache. Override if available on the targeted device. + * Passing NULL as p argument expects the whole cache to be flushed. + */ +void __attribute__((weak)) ethosu_flush_dcache(uint32_t *p, size_t bytes) +{ + (void)p; + (void)bytes; +} + +/* Default implementation to invalidate the data cache. Override if available on the targeted device. + * Passing NULL as p argument expects the whole cache to be flushed. + */ +void __attribute__((weak)) ethosu_invalidate_dcache(uint32_t *p, size_t bytes) +{ + (void)p; + (void)bytes; +} + +static int handle_command_stream(struct ethosu_driver *drv, + const uint8_t *cmd_stream, + const int cms_length, + const uint64_t *base_addr, + const size_t *base_addr_size, + const int num_base_addr) +{ + uint32_t qread = 0; + uint32_t cms_bytes = cms_length * BYTES_IN_32_BITS; + ptrdiff_t cmd_stream_ptr = (ptrdiff_t)cmd_stream; + + LOG_INFO("handle_command_stream: cmd_stream=%p, cms_length %d\n", cmd_stream, cms_length); + + if (0 != ((ptrdiff_t)cmd_stream & MASK_16_BYTE_ALIGN)) + { + LOG_ERR("Error: Command stream addr %p not aligned to 16 bytes\n", cmd_stream); + return -1; + } + + bool base_addr_invalid = false; + for (int i = 0; i < num_base_addr; i++) + { + if (0 != (base_addr[i] & MASK_16_BYTE_ALIGN)) + { + LOG_ERR("Error: Base addr %d: 0x%llx not aligned to 16 bytes\n", i, base_addr[i]); + base_addr_invalid = true; + } + } + + if (base_addr_invalid) + { + return -1; + } + + /* Flush the cache if available on our CPU. + * The upcasting to uin32_t* is ok since the pointer never is dereferenced. + * The base_addr_size is null if invoking from prior to invoke_V2, in that case + * the whole cache is being flushed. + */ + + if (base_addr_size != NULL) + { + ethosu_flush_dcache((uint32_t *)cmd_stream_ptr, cms_bytes); + for (int i = 0; i < num_base_addr; i++) + { + ethosu_flush_dcache((uint32_t *)(uintptr_t)base_addr[i], base_addr_size[i]); + } + } + else + { + ethosu_flush_dcache(NULL, 0); + } + + if (ETHOSU_SUCCESS != ethosu_run_command_stream(&drv->dev, cmd_stream, cms_bytes, base_addr, num_base_addr)) + { + return -1; + } + + wait_for_irq(drv); + + if (drv->status_error) + { + return -1; + } + + if (base_addr_size != NULL) + { + for (int i = 0; i < num_base_addr; i++) + { + ethosu_invalidate_dcache((uint32_t *)(uintptr_t)base_addr[i], base_addr_size[i]); + } + } + else + { + ethosu_invalidate_dcache(NULL, 0); + } + + (void)ethosu_get_qread(&drv->dev, &qread); + if (qread != cms_bytes) + { + LOG_WARN( + "Failure: IRQ received but qread (%" PRIu32 ") not at end of stream (%" PRIu32 ").\n", qread, cms_bytes); + return -1; + } + + return 0; +} + +static int read_apb_reg(struct ethosu_driver *drv, uint16_t da_data) +{ + uint32_t *reg_p; + uint32_t start_address = (uint32_t)(da_data & APB_START_ADDR_MASK); + uint16_t num_reg = (da_data >> APB_NUM_REG_BIT_SHIFT) + 1; + + reg_p = (uint32_t *)malloc(num_reg * sizeof(uint32_t)); + if (reg_p == NULL) + { + LOG_INFO("read_apb_reg, Error! memory not allocated."); + return -1; + } + + if (ETHOSU_SUCCESS == ethosu_read_apb_reg(&drv->dev, start_address, num_reg, reg_p)) + { + for (int i = 0; i < num_reg; i++) + { + LOG_INFO( + "NPU_REG ADDR 0x%04" PRIu32 " = 0x%08" PRIu32 "\n", (start_address + (i * BYTES_IN_32_BITS)), reg_p[i]); + } + } + else + { + free(reg_p); + return -1; + } + + free(reg_p); + return 0; +} + +static int dump_shram(struct ethosu_driver *drv) +{ + struct ethosu_config cfg; + uint32_t *shram_p; + (void)ethosu_get_config(&drv->dev, &cfg); + + LOG_INFO("dump_shram size = %" PRIu32 " KB\n", cfg.shram_size); + + shram_p = (uint32_t *)malloc(BYTES_1KB); + if (shram_p == NULL) + { + LOG_ERR("read_shram, Error! memory not allocated."); + return -1; + } + + for (uint32_t i = 0; i < cfg.shram_size; i++) + { + ethosu_get_shram_data(&drv->dev, i, (uint32_t *)shram_p); + // Output 1KB of SHRAM + LOG_INFO("***SHRAM SECTION %" PRIu32 "***\n", i); + for (int j = 0; j < (BYTES_1KB / BYTES_IN_32_BITS); j++) + { + LOG_INFO("[0x%04" PRIx32 "] %" PRIx32 "\n", (i * 1024 + j * 4), shram_p[j]); + } + } + free(shram_p); + + return 0; +} + +typedef struct +{ + int number; + const char *name; +} name_lookup_t; + +static const name_lookup_t npu_reg_name_tbl[] = { + {0x200, "KERNEL_X"}, + {0x204, "KERNEL_Y"}, + {0x208, "KERNEL_W_M1"}, + {0x20C, "KERNEL_H_M1"}, + {0x210, "OFM_CBLK_WIDTH_M1"}, + {0x214, "OFM_CBLK_HEIGHT_M1"}, + {0x218, "OFM_CBLK_DEPTH_M1"}, + {0x21c, "IFM_CBLK_DEPTH_M1"}, + {0x220, "OFM_X"}, + {0x224, "OFM_Y"}, + {0x228, "OFM_Z"}, + {0x22C, "IFM_Z"}, + {0x230, "PAD_TOP"}, + {0x234, "PAD_LEFT"}, + {0x238, "IFM_CBLK_WIDTH"}, + {0x23C, "IFM_CBLK_HEIGHT"}, + {0x240, "DMA_IFM_SRC"}, + {0x244, "DMA_IFM_SRC_HI"}, + {0x248, "DMA_IFM_DST"}, + {0x24c, "DMA_OFM_SRC"}, + {0x250, "DMA_OFM_DST"}, + {0x254, "DMA_OFM_DST_HI"}, + {0x258, "DMA_WEIGHT_SRC"}, + {0x25c, "DMA_WEIGHT_SRC_HI"}, + {0x260, "DMA_CMD_SRC"}, + {0x264, "DMA_CMD_SRC_HI"}, + {0x268, "DMA_CMD_SIZE"}, + {0x26c, "DMA_M2M_SRC"}, + {0x270, "DMA_M2M_SRC_HI"}, + {0x274, "DMA_M2M_DST"}, + {0x278, "DMA_M2M_DST_HI"}, + {0x27c, "CURRENT_QREAD"}, + {0x280, "DMA_SCALE_SRC"}, + {0x284, "DMA_SCALE_SRC_HI"}, + {0x2BC, "CURRENT_CMD"}, + {0x800, "IFM_PAD_TOP"}, + {0x804, "IFM_PAD_LEFT"}, + {0x808, "IFM_PAD_RIGHT"}, + {0x80C, "IFM_PAD_BOTTOM"}, + {0x810, "IFM_DEPTH_M1"}, + {0x814, "IFM_PRECISION"}, + {0x81C, "IFM_UPSCALE"}, + {0x824, "IFM_ZERO_POINT"}, + {0x828, "IFM_WIDTH0_M1"}, + {0x82C, "IFM_HEIGHT0_M1"}, + {0x830, "IFM_HEIGHT1_M1"}, + {0x834, "IFM_IB_END"}, + {0x83C, "IFM_REGION"}, + {0x844, "OFM_WIDTH_M1"}, + {0x848, "OFM_HEIGHT_M1"}, + {0x84C, "OFM_DEPTH_M1"}, + {0x850, "OFM_PRECISION"}, + {0x854, "OFM_BLK_WIDTH_M1"}, + {0x858, "OFM_BLK_HEIGHT_M1"}, + {0x85C, "OFM_BLK_DEPTH_M1"}, + {0x860, "OFM_ZERO_POINT"}, + {0x868, "OFM_WIDTH0_M1"}, + {0x86C, "OFM_HEIGHT0_M1"}, + {0x870, "OFM_HEIGHT1_M1"}, + {0x87C, "OFM_REGION"}, + {0x880, "KERNEL_WIDTH_M1"}, + {0x884, "KERNEL_HEIGHT_M1"}, + {0x888, "KERNEL_STRIDE"}, + {0x88C, "PARALLEL_MODE"}, + {0x890, "ACC_FORMAT"}, + {0x894, "ACTIVATION"}, + {0x898, "ACTIVATION_MIN"}, + {0x89C, "ACTIVATION_MAX"}, + {0x8A0, "WEIGHT_REGION"}, + {0x8A4, "SCALE_REGION"}, + {0x8B4, "AB_START"}, + {0x8BC, "BLOCKDEP"}, + {0x8C0, "DMA0_SRC_REGION"}, + {0x8C4, "DMA0_DST_REGION"}, + {0x8C8, "DMA0_SIZE0"}, + {0x8CC, "DMA0_SIZE1"}, + {0x900, "IFM2_BROADCAST"}, + {0x904, "IFM2_SCALAR"}, + {0x924, "IFM2_ZERO_POINT"}, + {0x928, "IFM2_WIDTH0_M1"}, + {0x92C, "IFM2_HEIGHT0_M1"}, + {0x930, "IFM2_HEIGHT1_M1"}, + {0x934, "IFM2_IB_START"}, + {0x93C, "IFM2_REGION"}, + {0xA00, "IFM_BASE0"}, + {0xA04, "IFM_BASE0_HI"}, + {0xA08, "IFM_BASE1"}, + {0xA0C, "IFM_BASE1_HI"}, + {0xA10, "IFM_BASE2"}, + {0xA14, "IFM_BASE2_HI"}, + {0xA18, "IFM_BASE3"}, + {0xA1C, "IFM_BASE3_HI"}, + {0xA20, "IFM_STRIDE_X"}, + {0xA24, "IFM_STRIDE_X_HI"}, + {0xA28, "IFM_STRIDE_Y"}, + {0xA2C, "IFM_STRIDE_Y_HI"}, + {0xA30, "IFM_STRIDE_C"}, + {0xA34, "IFM_STRIDE_C_HI"}, + {0xA40, "OFM_BASE0"}, + {0xA44, "OFM_BASE0_HI"}, + {0xA48, "OFM_BASE1"}, + {0xA4C, "OFM_BASE1_HI"}, + {0xA50, "OFM_BASE2"}, + {0xA54, "OFM_BASE2_HI"}, + {0xA58, "OFM_BASE3"}, + {0xA5C, "OFM_BASE3_HI"}, + {0xA60, "OFM_STRIDE_X"}, + {0xA64, "OFM_STRIDE_X_HI"}, + {0xA68, "OFM_STRIDE_Y"}, + {0xA6C, "OFM_STRIDE_Y_HI"}, + {0xA70, "OFM_STRIDE_C"}, + {0xA74, "OFM_STRIDE_C_HI"}, + {0xA80, "WEIGHT_BASE"}, + {0xA84, "WEIGHT_BASE_HI"}, + {0xA88, "WEIGHT_LENGTH"}, + {0xA8C, "WEIGHT_LENGTH_HI"}, + {0xA90, "SCALE_BASE"}, + {0xA94, "SCALE_BASE_HI"}, + {0xA98, "SCALE_LENGTH"}, + {0xAA0, "OFM_SCALE"}, + {0xAA4, "OFM_SCALE_SHIFT"}, + {0xAA8, "OPA_SCALE "}, + {0xAB0, "OPB_SCALE"}, + {0xAC0, "DMA0_SRC"}, + {0xAC4, "DMA0_SRC_HI"}, + {0xAC8, "DMA0_DST"}, + {0xACC, "DMA0_DST_HI"}, + {0xAD0, "DMA0_LEN"}, + {0xAD4, "DMA0_LEN_HI"}, + {0xAD8, "DMA0_SKIP0"}, + {0xADC, "DMA0_SKIP0_HI"}, + {0xAE0, "DMA0_SKIP1"}, + {0xAE4, "DMA0_SKIP1_HI"}, + {0xB00, "IFM2_BASE0"}, + {0xB04, "IFM2_BASE0_HI"}, + {0xB08, "IFM2_BASE1"}, + {0xB0C, "IFM2_BASE1_HI"}, + {0xB10, "IFM2_BASE2"}, + {0xB14, "IFM2_BASE2_HI"}, + {0xB18, "IFM2_BASE3"}, + {0xB1C, "IFM2_BASE3_HI"}, + {0xB20, "IFM2_STRIDE_X"}, + {0xB24, "IFM2_STRIDE_X_HI"}, + {0xB28, "IFM2_STRIDE_Y"}, + {0xB2C, "IFM2_STRIDE_Y_HI"}, + {0xB30, "IFM2_STRIDE_C"}, + {0xB34, "IFM2_STRIDE_C_HI"}, + {0xB40, "WEIGHT1_BASE"}, + {0xB44, "WEIGHT1_BASE_HI"}, + {0xB48, "WEIGHT1_LENGTH"}, + {0xB4C, "WEIGHT1_LENGTH_HI"}, + {0xB50, "SCALE1_BASE"}, + {0xB54, "SCALE1_BASE_HI"}, + {0xB58, "SCALE1_LENGTH"}, +}; + +static const char *lookup_name(const name_lookup_t *lookup_table, int lookup_table_count, int find) +{ + int n; + for (n = 0; n < lookup_table_count; n++) + { + if (lookup_table[n].number == find) + { + return lookup_table[n].name; + } + } + // Not found + return 0; +} + +static void dump_npu_register(struct ethosu_driver *drv, int npu_reg, int npu_reg_end) +{ + unsigned int reg_val; + const char *reg_name; + int npu_reg_name_tbl_count = sizeof(npu_reg_name_tbl) / sizeof(npu_reg_name_tbl[0]); + + LOG_INFO("dump_register %X - %X\n", npu_reg, npu_reg_end); + for (; npu_reg <= npu_reg_end; npu_reg += sizeof(int)) + { + reg_val = ethosu_read_reg(&drv->dev, npu_reg); + reg_name = lookup_name(npu_reg_name_tbl, npu_reg_name_tbl_count, npu_reg); + LOG_INFO("[0x%.4X] 0x%.8X\t%s\n", npu_reg, reg_val, (reg_name) ? reg_name : ""); + } +} + +static const name_lookup_t cmd0_name_tbl[] = { + {0x000, "NPU_OP_STOP"}, + {0x001, "NPU_OP_IRQ"}, + {0x002, "NPU_OP_CONV"}, + {0x003, "NPU_OP_DEPTHWISE"}, + {0x004, "NPU_OP_VECTOR_PROD"}, + {0x005, "NPU_OP_POOL"}, + {0x006, "NPU_OP_ELEMENTWISE"}, + {0x010, "NPU_OP_DMA_START"}, + {0x011, "NPU_OP_DMA_WAIT"}, + {0x012, "NPU_OP_KERNEL_WAIT"}, + {0x100, "NPU_SET_IFM_PAD_TOP"}, + {0x101, "NPU_SET_IFM_PAD_LEFT"}, + {0x102, "NPU_SET_IFM_PAD_RIGHT"}, + {0x103, "NPU_SET_IFM_PAD_BOTTOM"}, + {0x104, "NPU_SET_IFM_DEPTH_M1"}, + {0x105, "NPU_SET_IFM_PRECISION"}, + {0x107, "NPU_SET_IFM_UPSCALE"}, + {0x109, "NPU_SET_IFM_ZERO_POINT"}, + {0x10A, "NPU_SET_IFM_WIDTH0_M1"}, + {0x10B, "NPU_SET_IFM_HEIGHT0_M1"}, + {0x10C, "NPU_SET_IFM_HEIGHT1_M1"}, + {0x10D, "NPU_SET_IFM_IB_END"}, + {0x10F, "NPU_SET_IFM_REGION"}, + {0x110, "NPU_SET_OFM_BATCH_SIZE_M1"}, + {0x111, "NPU_SET_OFM_WIDTH_M1"}, + {0x112, "NPU_SET_OFM_HEIGHT_M1"}, + {0x113, "NPU_SET_OFM_DEPTH_M1"}, + {0x114, "NPU_SET_OFM_PRECISION"}, + {0x115, "NPU_SET_OFM_BLK_WIDTH_M1"}, + {0x116, "NPU_SET_OFM_BLK_HEIGHT_M1"}, + {0x117, "NPU_SET_OFM_BLK_DEPTH_M1"}, + {0x118, "NPU_SET_OFM_ZERO_POINT"}, + {0x11A, "NPU_SET_OFM_WIDTH0_M1"}, + {0x11B, "NPU_SET_OFM_HEIGHT0_M1"}, + {0x11C, "NPU_SET_OFM_HEIGHT1_M1"}, + {0x11F, "NPU_SET_OFM_REGION"}, + {0x120, "NPU_SET_KERNEL_WIDTH_M1"}, + {0x121, "NPU_SET_KERNEL_HEIGHT_M1"}, + {0x122, "NPU_SET_KERNEL_STRIDE"}, + {0x124, "NPU_SET_ACC_FORMAT"}, + {0x125, "NPU_SET_ACTIVATION"}, + {0x126, "NPU_SET_ACTIVATION_MIN"}, + {0x127, "NPU_SET_ACTIVATION_MAX"}, + {0x128, "NPU_SET_WEIGHT_REGION"}, + {0x129, "NPU_SET_SCALE_REGION"}, + {0x12D, "NPU_SET_AB_START"}, + {0x12F, "NPU_SET_BLOCKDEP"}, + {0x130, "NPU_SET_DMA0_SRC_REGION"}, + {0x131, "NPU_SET_DMA0_DST_REGION"}, + {0x180, "NPU_SET_IFM2_BROADCAST"}, + {0x181, "NPU_SET_IFM2_SCALAR"}, + {0x185, "NPU_SET_IFM2_PRECISION"}, + {0x189, "NPU_SET_IFM2_ZERO_POINT"}, + {0x18A, "NPU_SET_IFM2_WIDTH0_M1"}, + {0x18B, "NPU_SET_IFM2_HEIGHT0_M1"}, + {0x18C, "NPU_SET_IFM2_HEIGHT1_M1"}, + {0x18D, "NPU_SET_IFM2_IB_START"}, + {0x18F, "NPU_SET_IFM2_REGION"}, +}; + +static const name_lookup_t cmd1_name_tbl[] = { + {0x000, "NPU_SET_IFM_BASE0"}, {0x001, "NPU_SET_IFM_BASE1"}, {0x002, "NPU_SET_IFM_BASE2"}, + {0x003, "NPU_SET_IFM_BASE3"}, {0x004, "NPU_SET_IFM_STRIDE_X"}, {0x005, "NPU_SET_IFM_STRIDE_Y"}, + {0x006, "NPU_SET_IFM_STRIDE_C"}, {0x007, "NPU_SET_IFM_STRIDE_N"}, {0x010, "NPU_SET_OFM_BASE0"}, + {0x011, "NPU_SET_OFM_BASE1"}, {0x012, "NPU_SET_OFM_BASE2"}, {0x013, "NPU_SET_OFM_BASE3"}, + {0x014, "NPU_SET_OFM_STRIDE_X"}, {0x015, "NPU_SET_OFM_STRIDE_Y"}, {0x016, "NPU_SET_OFM_STRIDE_C"}, + {0x017, "NPU_SET_OFM_STRIDE_N"}, {0x020, "NPU_SET_WEIGHT_BASE"}, {0x021, "NPU_SET_WEIGHT_LENGTH"}, + {0x022, "NPU_SET_SCALE_BASE"}, {0x023, "NPU_SET_SCALE_LENGTH"}, {0x024, "NPU_SET_OFM_SCALE"}, + {0x025, "NPU_SET_OPA_SCALE"}, {0x026, "NPU_SET_OPB_SCALE"}, {0x030, "NPU_SET_DMA0_SRC"}, + {0x031, "NPU_SET_DMA0_DST"}, {0x032, "NPU_SET_DMA0_LEN"}, {0x080, "NPU_SET_IFM2_BASE0"}, + {0x081, "NPU_SET_IFM2_BASE1"}, {0x082, "NPU_SET_IFM2_BASE2"}, {0x083, "NPU_SET_IFM2_BASE3"}, + {0x084, "NPU_SET_IFM2_STRIDE_X"}, {0x085, "NPU_SET_IFM2_STRIDE_Y"}, {0x086, "NPU_SET_IFM2_STRIDE_C"}, +}; + +static void dump_command_stream(const uint32_t *cmd_stream, const int cms_length, int qread) +{ + int n; + int offset; + uint32_t cmd_val; + const uint8_t *cmd_ptr; + const char *cmd_name; + int cmd0_name_tbl_count = sizeof(cmd0_name_tbl) / sizeof(cmd0_name_tbl[0]); + int cmd1_name_tbl_count = sizeof(cmd1_name_tbl) / sizeof(cmd1_name_tbl[0]); + + LOG_INFO("dump_command_stream cmd_stream = 0x%8p cms_length = %d\n", cmd_stream, cms_length); + for (n = 0; n < cms_length; n++) + { + // Offset + offset = n * sizeof(int); + LOG_INFO("[%.4d] ", offset); + // Command + cmd_ptr = (const uint8_t *)&cmd_stream[n]; + LOG_INFO("0x%.2X 0x%.2X 0x%.2X 0x%.2X ", cmd_ptr[0], cmd_ptr[1], cmd_ptr[2], cmd_ptr[3]); + // Command name and payload + if (cmd_stream[n] & 0x4000) + { + cmd_name = lookup_name(cmd1_name_tbl, cmd1_name_tbl_count, cmd_stream[n] & 0x3FF); + n++; + cmd_val = cmd_stream[n]; + cmd_ptr = (const uint8_t *)&cmd_stream[n]; + LOG_INFO("0x%.2X 0x%.2X 0x%.2X 0x%.2X ", cmd_ptr[0], cmd_ptr[1], cmd_ptr[2], cmd_ptr[3]); + } + else + { + cmd_val = cmd_stream[n] >> 16; + cmd_name = lookup_name(cmd0_name_tbl, cmd0_name_tbl_count, cmd_stream[n] & 0x3FF); + } + if (cmd_name) + { + LOG_INFO("\t%s 0x%.8" PRIX32, cmd_name, cmd_val); + } + if (offset == qread) + { + LOG_INFO(" <<== QREAD\n"); + } + else + { + LOG_INFO("\n"); + } + } +} + +#endif //EI ETHOS diff --git a/edgeimpulse/edge-impulse-sdk/porting/ethos-core-driver/src/ethosu_pmu.c b/edgeimpulse/edge-impulse-sdk/porting/ethos-core-driver/src/ethosu_pmu.c new file mode 100644 index 0000000..818b20d --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/porting/ethos-core-driver/src/ethosu_pmu.c @@ -0,0 +1,314 @@ +/* + * Copyright (c) 2019-2021 Arm Limited. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/***************************************************************************** + * Includes + *****************************************************************************/ +#if EI_ETHOS + +#include "ethosu55_interface.h" +#include "ethosu_common.h" +#include "ethosu_driver.h" +#include "pmu_ethosu.h" + +#include +#include +#include + +/***************************************************************************** + * Defines + *****************************************************************************/ + +#define COMMA , +#define SEMICOLON ; + +#define EVTYPE(A, name) \ + case PMU_EVENT_TYPE_##name: \ + return ETHOSU_PMU_##name + +#define EVID(A, name) (PMU_EVENT_TYPE_##name) + +#define NPU_REG_PMEVCNTR(x) (NPU_REG_PMEVCNTR0 + ((x) * sizeof(uint32_t))) +#define NPU_REG_PMEVTYPER(x) (NPU_REG_PMEVTYPER0 + ((x) * sizeof(uint32_t))) + +/***************************************************************************** + * Variables + *****************************************************************************/ + +static const enum pmu_event_type eventbyid[] = {EXPAND_PMU_EVENT_TYPE(EVID, COMMA)}; + +/***************************************************************************** + * Static functions + *****************************************************************************/ + +static enum ethosu_pmu_event_type pmu_event_type(uint32_t id) +{ + switch (id) + { + EXPAND_PMU_EVENT_TYPE(EVTYPE, SEMICOLON); + default: + LOG_ERR("Unknown PMU event id: 0x%" PRIx32 "\n", id); + } + + return ETHOSU_PMU_SENTINEL; +} + +static uint32_t pmu_event_value(enum ethosu_pmu_event_type event) +{ + int a = event; + if ((a < ETHOSU_PMU_SENTINEL) && (a >= ETHOSU_PMU_NO_EVENT)) + { + return eventbyid[event]; + } + else + { + return (uint32_t)(-1); + } +} + +/***************************************************************************** + * Functions + *****************************************************************************/ + +void ETHOSU_PMU_Enable(struct ethosu_driver *drv) +{ + LOG_DEBUG("%s:\n", __FUNCTION__); + struct pmcr_r pmcr; + pmcr.word = drv->dev.pmcr; + pmcr.cnt_en = 1; + set_clock_and_power_request(drv, ETHOSU_PMU_REQUEST, ETHOSU_CLOCK_Q_DISABLE, ETHOSU_POWER_Q_DISABLE); + ethosu_write_reg_shadow(&drv->dev, NPU_REG_PMCR, pmcr.word, &drv->dev.pmcr); +} + +void ETHOSU_PMU_Disable(struct ethosu_driver *drv) +{ + LOG_DEBUG("%s:\n", __FUNCTION__); + struct pmcr_r pmcr; + pmcr.word = drv->dev.pmcr; + pmcr.cnt_en = 0; + set_clock_and_power_request(drv, ETHOSU_PMU_REQUEST, ETHOSU_CLOCK_Q_ENABLE, ETHOSU_POWER_Q_ENABLE); + ethosu_write_reg_shadow(&drv->dev, NPU_REG_PMCR, pmcr.word, &drv->dev.pmcr); +} + +void ETHOSU_PMU_Set_EVTYPER(struct ethosu_driver *drv, uint32_t num, enum ethosu_pmu_event_type type) +{ + assert(num < ETHOSU_PMU_NCOUNTERS); + uint32_t val = pmu_event_value(type); + LOG_DEBUG("%s: num=%u, type=%d, val=%u\n", __FUNCTION__, num, type, val); + ethosu_write_reg_shadow(&drv->dev, NPU_REG_PMEVTYPER(num), val, &drv->dev.pmu_evtypr[num]); +} + +enum ethosu_pmu_event_type ETHOSU_PMU_Get_EVTYPER(struct ethosu_driver *drv, uint32_t num) +{ + assert(num < ETHOSU_PMU_NCOUNTERS); + uint32_t val = drv->dev.pmu_evtypr[num]; + enum ethosu_pmu_event_type type = pmu_event_type(val); + LOG_DEBUG("%s: num=%u, type=%d, val=%u\n", __FUNCTION__, num, type, val); + return type; +} + +void ETHOSU_PMU_CYCCNT_Reset(struct ethosu_driver *drv) +{ + LOG_DEBUG("%s:\n", __FUNCTION__); + struct pmcr_r pmcr; + pmcr.word = drv->dev.pmcr; + pmcr.cycle_cnt_rst = 1; + ethosu_write_reg_shadow(&drv->dev, NPU_REG_PMCR, pmcr.word, &drv->dev.pmcr); + drv->dev.pmccntr[0] = 0; + drv->dev.pmccntr[1] = 0; +} + +void ETHOSU_PMU_EVCNTR_ALL_Reset(struct ethosu_driver *drv) +{ + LOG_DEBUG("%s:\n", __FUNCTION__); + struct pmcr_r pmcr; + pmcr.word = drv->dev.pmcr; + pmcr.event_cnt_rst = 1; + ethosu_write_reg_shadow(&drv->dev, NPU_REG_PMCR, pmcr.word, &drv->dev.pmcr); + + for (uint32_t i = 0; i < ETHOSU_PMU_NCOUNTERS; i++) + { + drv->dev.pmu_evcntr[i] = 0; + } +} + +void ETHOSU_PMU_CNTR_Enable(struct ethosu_driver *drv, uint32_t mask) +{ + LOG_DEBUG("%s: mask=0x%08x\n", __FUNCTION__, mask); + ethosu_write_reg_shadow(&drv->dev, NPU_REG_PMCNTENSET, mask, &drv->dev.pmcnten); +} + +void ETHOSU_PMU_CNTR_Disable(struct ethosu_driver *drv, uint32_t mask) +{ + LOG_DEBUG("%s: mask=0x%08x\n", __FUNCTION__, mask); + ethosu_write_reg_shadow(&drv->dev, NPU_REG_PMCNTENCLR, mask, &drv->dev.pmcnten); +} + +uint32_t ETHOSU_PMU_CNTR_Status(struct ethosu_driver *drv) +{ + LOG_DEBUG("%s: mask=0x%08x\n", __FUNCTION__, drv->dev.pmcnten); + return drv->dev.pmcnten; +} + +uint64_t ETHOSU_PMU_Get_CCNTR(struct ethosu_driver *drv) +{ + uint32_t val_lo = ethosu_read_reg(&drv->dev, NPU_REG_PMCCNTR_LO); + uint32_t val_hi = ethosu_read_reg(&drv->dev, NPU_REG_PMCCNTR_HI); + uint64_t val = ((uint64_t)val_hi << 32) | val_lo; + uint64_t shadow = ((uint64_t)drv->dev.pmccntr[1] << 32) | drv->dev.pmccntr[0]; + + LOG_DEBUG("%s: val=%" PRIu64 ", shadow=%" PRIu64 "\n", __FUNCTION__, val, shadow); + + // Return the shadow variable in case the NPU was powered off and lost the cycle count + if (shadow > val) + { + return shadow; + } + + // Update the shadow variable + drv->dev.pmccntr[0] = val_lo; + drv->dev.pmccntr[1] = val_hi; + + return val; +} + +void ETHOSU_PMU_Set_CCNTR(struct ethosu_driver *drv, uint64_t val) +{ + uint32_t active = ETHOSU_PMU_CNTR_Status(drv) & ETHOSU_PMU_CCNT_Msk; + + LOG_DEBUG("%s: val=%llu\n", __FUNCTION__, val); + + if (active) + { + ETHOSU_PMU_CNTR_Disable(drv, ETHOSU_PMU_CCNT_Msk); + } + + ethosu_write_reg_shadow(&drv->dev, NPU_REG_PMCCNTR_LO, val & MASK_0_31_BITS, &drv->dev.pmccntr[0]); + ethosu_write_reg_shadow(&drv->dev, NPU_REG_PMCCNTR_HI, (val & MASK_32_47_BITS) >> 32, &drv->dev.pmccntr[1]); + + if (active) + { + ETHOSU_PMU_CNTR_Enable(drv, ETHOSU_PMU_CCNT_Msk); + } +} + +uint32_t ETHOSU_PMU_Get_EVCNTR(struct ethosu_driver *drv, uint32_t num) +{ + assert(num < ETHOSU_PMU_NCOUNTERS); + uint32_t val = ethosu_read_reg(&drv->dev, NPU_REG_PMEVCNTR(num)); + LOG_DEBUG("%s: num=%u, val=%u, shadow=%u\n", __FUNCTION__, num, val, drv->dev.pmu_evcntr[num]); + + // Return the shadow variable in case the NPU was powered off and lost the event count + if (drv->dev.pmu_evcntr[num] > val) + { + return drv->dev.pmu_evcntr[num]; + } + + // Update the shadow variable + drv->dev.pmu_evcntr[num] = val; + + return val; +} + +void ETHOSU_PMU_Set_EVCNTR(struct ethosu_driver *drv, uint32_t num, uint32_t val) +{ + assert(num < ETHOSU_PMU_NCOUNTERS); + LOG_DEBUG("%s: num=%u, val=%u\n", __FUNCTION__, num, val); + ethosu_write_reg(&drv->dev, NPU_REG_PMEVCNTR(num), val); +} + +uint32_t ETHOSU_PMU_Get_CNTR_OVS(struct ethosu_driver *drv) +{ + LOG_DEBUG("%s:\n", __FUNCTION__); + return ethosu_read_reg(&drv->dev, NPU_REG_PMOVSSET); +} + +void ETHOSU_PMU_Set_CNTR_OVS(struct ethosu_driver *drv, uint32_t mask) +{ + LOG_DEBUG("%s:\n", __FUNCTION__); + ethosu_write_reg(&drv->dev, NPU_REG_PMOVSCLR, mask); +} + +void ETHOSU_PMU_Set_CNTR_IRQ_Enable(struct ethosu_driver *drv, uint32_t mask) +{ + LOG_DEBUG("%s: mask=0x%08x\n", __FUNCTION__, mask); + ethosu_write_reg_shadow(&drv->dev, NPU_REG_PMINTSET, mask, &drv->dev.pmint); +} + +void ETHOSU_PMU_Set_CNTR_IRQ_Disable(struct ethosu_driver *drv, uint32_t mask) +{ + LOG_DEBUG("%s: mask=0x%08x\n", __FUNCTION__, mask); + ethosu_write_reg_shadow(&drv->dev, NPU_REG_PMINTCLR, mask, &drv->dev.pmint); +} + +uint32_t ETHOSU_PMU_Get_IRQ_Enable(struct ethosu_driver *drv) +{ + LOG_DEBUG("%s: mask=0x%08x\n", __FUNCTION__, drv->dev.pmint); + return drv->dev.pmint; +} + +void ETHOSU_PMU_CNTR_Increment(struct ethosu_driver *drv, uint32_t mask) +{ + LOG_DEBUG("%s:\n", __FUNCTION__); + uint32_t cntrs_active = ETHOSU_PMU_CNTR_Status(drv); + + // Disable counters + ETHOSU_PMU_CNTR_Disable(drv, mask); + + // Increment cycle counter + if (mask & ETHOSU_PMU_CCNT_Msk) + { + uint64_t val = ETHOSU_PMU_Get_CCNTR(drv) + 1; + ethosu_write_reg_shadow(&drv->dev, NPU_REG_PMCCNTR_LO, val & MASK_0_31_BITS, &drv->dev.pmccntr[0]); + ethosu_write_reg_shadow(&drv->dev, NPU_REG_PMCCNTR_HI, (val & MASK_32_47_BITS) >> 32, &drv->dev.pmccntr[1]); + } + + for (int i = 0; i < ETHOSU_PMU_NCOUNTERS; i++) + { + if (mask & (1 << i)) + { + uint32_t val = ETHOSU_PMU_Get_EVCNTR(drv, i); + ethosu_write_reg_shadow(&drv->dev, NPU_REG_PMEVCNTR(i), val + 1, &drv->dev.pmu_evcntr[i]); + } + } + + // Reenable the active counters + ETHOSU_PMU_CNTR_Enable(drv, cntrs_active); +} + +void ETHOSU_PMU_PMCCNTR_CFG_Set_Start_Event(struct ethosu_driver *drv, enum ethosu_pmu_event_type start_event) +{ + LOG_DEBUG("%s: start_event=%u\n", __FUNCTION__, start_event); + uint32_t val = pmu_event_value(start_event); + struct pmccntr_cfg_r cfg; + cfg.word = drv->dev.pmccntr_cfg; + cfg.CYCLE_CNT_CFG_START = val; + ethosu_write_reg_shadow(&drv->dev, NPU_REG_PMCCNTR_CFG, cfg.word, &drv->dev.pmccntr_cfg); +} + +void ETHOSU_PMU_PMCCNTR_CFG_Set_Stop_Event(struct ethosu_driver *drv, enum ethosu_pmu_event_type stop_event) +{ + LOG_DEBUG("%s: stop_event=%u\n", __FUNCTION__, stop_event); + uint32_t val = pmu_event_value(stop_event); + struct pmccntr_cfg_r cfg; + cfg.word = drv->dev.pmccntr_cfg; + cfg.CYCLE_CNT_CFG_STOP = val; + ethosu_write_reg_shadow(&drv->dev, NPU_REG_PMCCNTR_CFG, cfg.word, &drv->dev.pmccntr_cfg); +} + +#endif //EI ETHOS diff --git a/edgeimpulse/edge-impulse-sdk/porting/himax/debug_log.cpp b/edgeimpulse/edge-impulse-sdk/porting/himax/debug_log.cpp new file mode 100644 index 0000000..9022698 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/porting/himax/debug_log.cpp @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2022 EdgeImpulse Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an "AS + * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language + * governing permissions and limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "../ei_classifier_porting.h" +#if EI_PORTING_HIMAX == 1 + +#include "edge-impulse-sdk/tensorflow/lite/micro/debug_log.h" +#include +#include + +// Redirect TFLite DebugLog to ei_printf +#if defined(__cplusplus) && EI_C_LINKAGE == 1 +extern "C" +#endif // defined(__cplusplus) && EI_C_LINKAGE == 1 +void DebugLog(const char* s) { + ei_printf("%s", s); +} + +#endif // EI_PORTING_HIMAX == 1 diff --git a/edgeimpulse/edge-impulse-sdk/porting/himax/ei_classifier_porting.cpp b/edgeimpulse/edge-impulse-sdk/porting/himax/ei_classifier_porting.cpp new file mode 100644 index 0000000..1b62b56 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/porting/himax/ei_classifier_porting.cpp @@ -0,0 +1,168 @@ +/* + * Copyright (c) 2022 EdgeImpulse Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an "AS + * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language + * governing permissions and limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "../ei_classifier_porting.h" +#if EI_PORTING_HIMAX == 1 + +/* Include ----------------------------------------------------------------- */ +#include +#include +#include +#include "hx_drv_tflm.h" +#include + + +/* Constants ---------------------------------------------------------------- */ +#define HIMAX_TIMER_CLK_FREQ_HZ 400000000 +#define HIMAX_TIMER_TICK_1SEC (HIMAX_TIMER_CLK_FREQ_HZ/1) +#define HIMAX_TIMER_TICK_1MSEC (HIMAX_TIMER_TICK_1SEC/1000) + +extern "C" void print_out(const char *format, va_list args); + +/* Private variables -------------------------------------------------------- */ +static uint64_t system_time_ms = 0; +static uint32_t prev_tick_us = 0; + +__attribute__((weak)) EI_IMPULSE_ERROR ei_run_impulse_check_canceled() { + return EI_IMPULSE_OK; +} + +/** + * Cancelable sleep, can be triggered with signal from other thread + */ +__attribute__((weak)) EI_IMPULSE_ERROR ei_sleep(int32_t time_ms) { + uint64_t end_delay, cur_time = 0; + + end_delay = (uint64_t)time_ms + ei_read_timer_ms(); + + do { + cur_time = ei_read_timer_ms(); + } while (cur_time < end_delay); + + return EI_IMPULSE_OK; +} + +// Should be called at least once every ~10.7 seconds +uint64_t ei_read_timer_ms() +{ + uint32_t tick_us, diff_tick_us, elapsed_time_ms; + + // handles 32-bit overflows + hx_drv_tick_get(&tick_us); + diff_tick_us = (uint32_t)(tick_us - prev_tick_us); + + // integer number of ms elapsed + elapsed_time_ms = diff_tick_us / HIMAX_TIMER_TICK_1MSEC; + + // update system time and previous tick reference + if (elapsed_time_ms > 0) { + system_time_ms += elapsed_time_ms; + + // use the remainder of ms elapsed + // handles 32-bit overflows + prev_tick_us = (uint32_t)(tick_us - (diff_tick_us % HIMAX_TIMER_TICK_1MSEC)); + } + + return system_time_ms; +} + +uint64_t ei_read_timer_us() +{ + return ei_read_timer_ms() * 1000; +} + +void ei_serial_set_baudrate(int baudrate) +{ + hx_drv_uart_initial((HX_DRV_UART_BAUDRATE_E)baudrate); +} + +void ei_putchar(char c) +{ + /* Send char to serial output */ + hx_drv_uart_print("%c", c); +} + +__attribute__((weak)) void ei_printf(const char *format, ...) { + va_list args; + va_start(args, format); + print_out(format, args); + va_end(args); +} + +__attribute__((weak)) void ei_printf_float(float f) { + float n = f; + + static double PRECISION = 0.00001; + static int MAX_NUMBER_STRING_SIZE = 32; + + char s[MAX_NUMBER_STRING_SIZE]; + + if (n == 0.0) { + ei_printf("0.00000"); + } else { + int digit, m; //, m1; + char *c = s; + int neg = (n < 0); + if (neg) { + n = -n; + } + // calculate magnitude + m = log10(n); + if (neg) { + *(c++) = '-'; + } + if (m < 1.0) { + m = 0; + } + // convert the number + while (n > PRECISION || m >= 0) { + double weight = pow(10.0, m); + if (weight > 0 && !isinf(weight)) { + digit = floor(n / weight); + n -= (digit * weight); + *(c++) = '0' + digit; + } + if (m == 0 && n > 0) { + *(c++) = '.'; + } + m--; + } + *(c) = '\0'; + ei_printf("%s", s); + } +} + +__attribute__((weak)) void *ei_malloc(size_t size) { + return malloc(size); +} + +__attribute__((weak)) void *ei_calloc(size_t nitems, size_t size) { + return calloc(nitems, size); +} + +__attribute__((weak)) void ei_free(void *ptr) { + free(ptr); +} + +#if defined(__cplusplus) && EI_C_LINKAGE == 1 +extern "C" +#endif +__attribute__((weak)) void DebugLog(const char* s) { + ei_printf("%s", s); +} + +#endif // #if EI_PORTING_HIMAX == 1 diff --git a/edgeimpulse/edge-impulse-sdk/porting/iar/debug_log.cpp b/edgeimpulse/edge-impulse-sdk/porting/iar/debug_log.cpp new file mode 100644 index 0000000..ae378e4 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/porting/iar/debug_log.cpp @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2022 EdgeImpulse Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an "AS + * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language + * governing permissions and limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "../ei_classifier_porting.h" +#if EI_PORTING_IAR == 1 + +#include "edge-impulse-sdk/tensorflow/lite/micro/debug_log.h" +#include +#include + +// Redirect TFLite DebugLog to ei_printf +#if defined(__cplusplus) && EI_C_LINKAGE == 1 +extern "C" +#endif // defined(__cplusplus) && EI_C_LINKAGE == 1 +void DebugLog(const char* s) { + ei_printf("%s", s); +} + +#endif // EI_PORTING_IAR == 1 diff --git a/edgeimpulse/edge-impulse-sdk/porting/iar/ei_classifier_porting.cpp b/edgeimpulse/edge-impulse-sdk/porting/iar/ei_classifier_porting.cpp new file mode 100644 index 0000000..3d450f0 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/porting/iar/ei_classifier_porting.cpp @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2022 EdgeImpulse Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an "AS + * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language + * governing permissions and limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "../ei_classifier_porting.h" +#if EI_PORTING_IAR == 1 + +#include +#include +#include + +#include "main.h" +#include "stm32f4xx_hal.h" + + +__attribute__((weak)) EI_IMPULSE_ERROR ei_run_impulse_check_canceled() { + return EI_IMPULSE_OK; +} + +/** + * Cancelable sleep, can be triggered with signal from other thread + */ +__attribute__((weak)) EI_IMPULSE_ERROR ei_sleep(int32_t time_ms) { + + HAL_Delay(time_ms); + + return EI_IMPULSE_OK; +} + +uint64_t ei_read_timer_ms() { + + return HAL_GetTick(); +} + +uint64_t ei_read_timer_us() { + + return HAL_GetTick() * 1000; +} + +__attribute__((weak)) void ei_printf(const char *format, ...) { + + va_list myargs; + va_start(myargs, format); + vprintf(format, myargs); + va_end(myargs); +} + +__attribute__((weak)) void ei_printf_float(float f) { + ei_printf("%f", f); +} + +__attribute__((weak)) void ei_putchar(char data) +{ + putchar(data); +} + +__attribute__((weak)) void *ei_malloc(size_t size) { + return malloc(size); +} + +__attribute__((weak)) void *ei_calloc(size_t nitems, size_t size) { + return calloc(nitems, size); +} + +__attribute__((weak)) void ei_free(void *ptr) { + free(ptr); +} + +#if defined(__cplusplus) && EI_C_LINKAGE == 1 +extern "C" +#endif +__attribute__((weak)) void DebugLog(const char* s) { + ei_printf("%s", s); +} + +#endif // EI_PORTING_IAR == 1 diff --git a/edgeimpulse/edge-impulse-sdk/porting/infineon-psoc62/debug_log.cpp b/edgeimpulse/edge-impulse-sdk/porting/infineon-psoc62/debug_log.cpp new file mode 100644 index 0000000..47dfb2f --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/porting/infineon-psoc62/debug_log.cpp @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2022 EdgeImpulse Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an "AS + * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language + * governing permissions and limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "../ei_classifier_porting.h" +#if EI_PORTING_INFINEONPSOC62 == 1 + +#include "edge-impulse-sdk/tensorflow/lite/micro/debug_log.h" +#include +#include + +// Redirect TFLite DebugLog to ei_printf +#if defined(__cplusplus) && EI_C_LINKAGE == 1 +extern "C" +#endif // defined(__cplusplus) && EI_C_LINKAGE == 1 +void DebugLog(const char* s) { + ei_printf("%s", s); +} + +#endif // EI_PORTING_INFINEONPSOC62 == 1 diff --git a/edgeimpulse/edge-impulse-sdk/porting/infineon-psoc62/ei_classifier_porting.cpp b/edgeimpulse/edge-impulse-sdk/porting/infineon-psoc62/ei_classifier_porting.cpp new file mode 100644 index 0000000..23e5b27 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/porting/infineon-psoc62/ei_classifier_porting.cpp @@ -0,0 +1,158 @@ +/* + * Copyright (c) 2022 EdgeImpulse Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an "AS + * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language + * governing permissions and limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "../ei_classifier_porting.h" +#if EI_PORTING_INFINEONPSOC62 == 1 + +#include +#include +#include +#include "unistd.h" +#include "cyhal.h" +#ifdef FREERTOS_ENABLED +#include +#include +#include +#else /* bare-metal */ +#include "cyhal_lptimer.h" + +static bool timer_init = false; +static volatile uint64_t tick = 0; + +static void systick_isr(void) +{ + tick++; +} +#endif + +__attribute__((weak)) EI_IMPULSE_ERROR ei_run_impulse_check_canceled() { + return EI_IMPULSE_OK; +} + +#ifdef FREERTOS_ENABLED +__attribute__((weak)) EI_IMPULSE_ERROR ei_sleep(int32_t time_ms) { + vTaskDelay(time_ms / portTICK_PERIOD_MS); + + return EI_IMPULSE_OK; +} + +__attribute__((weak)) uint64_t ei_read_timer_ms() { + + return xTaskGetTickCount(); +} + +__attribute__((weak)) uint64_t ei_read_timer_us() { + + return xTaskGetTickCount()*1000; +} +#else /* Bare-metal */ +__attribute__((weak)) EI_IMPULSE_ERROR ei_sleep(int32_t time_ms) { + cyhal_system_delay_ms(time_ms); + return EI_IMPULSE_OK; +} + +uint64_t ei_read_timer_ms() { + if(timer_init == false) { + cyhal_clock_t clock; + uint32_t freq; + + // get IMO clock frequency + cyhal_clock_reserve(&clock, &CYHAL_CLOCK_IMO); + freq = cyhal_clock_get_frequency(&clock); + cyhal_clock_free(&clock); + + // set SysTick to 1 ms + Cy_SysTick_Init(CY_SYSTICK_CLOCK_SOURCE_CLK_IMO, (freq / 1000) - 1); + Cy_SysTick_SetCallback(0, systick_isr); + timer_init = true; + return 0; + } + return tick; +} + +uint64_t ei_read_timer_us() { + return ei_read_timer_ms() * 1000; +} +#endif /* FREERTOS_ENABLED */ + +void ei_putchar(char c) +{ + putchar(c); +} + +__attribute__((weak)) char ei_getchar(void) +{ + return getchar(); +} + +__attribute__((weak)) void ei_printf(const char *format, ...) { + + char buffer[256]; + va_list myargs; + va_start(myargs, format); + vsnprintf(buffer, 256, format, myargs); + va_end(myargs); + + printf("%s", buffer); +} + +__attribute__((weak)) void ei_printf_float(float f) { + ei_printf("%f", f); +} + +#ifdef FREERTOS_ENABLED +__attribute__((weak)) void *ei_malloc(size_t size) { + return pvPortMalloc(size); +} + +__attribute__((weak)) void *ei_calloc(size_t nitems, size_t size) { + void *mem = NULL; + + /* Infineon port of FreeRTOS does not support pvPortCalloc */ + mem = pvPortMalloc(nitems * size); + if (mem) { + /* zero the memory */ + memset(mem, 0, nitems * size); + } + return mem; +} + +__attribute__((weak)) void ei_free(void *ptr) { + vPortFree(ptr); +} +#else +__attribute__((weak)) void *ei_malloc(size_t size) { + return malloc(size); +} + +__attribute__((weak)) void *ei_calloc(size_t nitems, size_t size) { + return calloc(nitems, size); +} + +__attribute__((weak)) void ei_free(void *ptr) { + free(ptr); +} +#endif + +#if defined(__cplusplus) && EI_C_LINKAGE == 1 +extern "C" +#endif +__attribute__((weak)) void DebugLog(const char* s) { + ei_printf("%s", s); +} + +#endif // EI_PORTING_INFINEONPSOC62 diff --git a/edgeimpulse/edge-impulse-sdk/porting/mbed/debug_log.cpp b/edgeimpulse/edge-impulse-sdk/porting/mbed/debug_log.cpp new file mode 100644 index 0000000..a7ac637 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/porting/mbed/debug_log.cpp @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2022 EdgeImpulse Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an "AS + * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language + * governing permissions and limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "../ei_classifier_porting.h" +#if EI_PORTING_MBED == 1 + +#include "edge-impulse-sdk/tensorflow/lite/micro/debug_log.h" +#include +#include + +// On mbed platforms, we set up a serial port and write to it for debug logging. +#if defined(__cplusplus) && EI_C_LINKAGE == 1 +extern "C" +#endif // defined(__cplusplus) && EI_C_LINKAGE == 1 +void DebugLog(const char* s) { + ei_printf("%s", s); +} + +#endif // EI_PORTING_MBED == 1 diff --git a/edgeimpulse/edge-impulse-sdk/porting/mbed/ei_classifier_porting.cpp b/edgeimpulse/edge-impulse-sdk/porting/mbed/ei_classifier_porting.cpp new file mode 100644 index 0000000..02a1431 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/porting/mbed/ei_classifier_porting.cpp @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2022 EdgeImpulse Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an "AS + * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language + * governing permissions and limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "../ei_classifier_porting.h" +#if EI_PORTING_MBED == 1 + +#include "mbed.h" +#include +#include +#include "us_ticker_api.h" + +#define EI_WEAK_FN __attribute__((weak)) + +EI_WEAK_FN EI_IMPULSE_ERROR ei_run_impulse_check_canceled() { + return EI_IMPULSE_OK; +} + +/** + * Cancelable sleep, can be triggered with signal from other thread + */ +EI_WEAK_FN EI_IMPULSE_ERROR ei_sleep(int32_t time_ms) { +#if MBED_VERSION >= MBED_ENCODE_VERSION((5), (11), (0)) + rtos::ThisThread::sleep_for(time_ms); +#else + wait_ms(time_ms); +#endif // MBED_VERSION >= MBED_ENCODE_VERSION((5), (11), (0)) + return EI_IMPULSE_OK; +} + +uint64_t ei_read_timer_ms() { +#if DEVICE_USTICKER + return us_ticker_read() / 1000L; +#elif DEVICE_LPTICKER + return ei_read_timer_us() / 1000L; +#else + #error "Target does not have DEVICE_LPTICKER nor DEVICE_USTICKER" +#endif +} + +uint64_t ei_read_timer_us() { +#if DEVICE_USTICKER + return us_ticker_read(); +#elif DEVICE_LPTICKER + const ticker_info_t *info = lp_ticker_get_info(); + uint32_t n_ticks = lp_ticker_read(); + return (uint64_t)n_ticks * (1000000UL / info->frequency); +#else + #error "Target does not have DEVICE_LPTICKER nor DEVICE_USTICKER" +#endif +} + +__attribute__((weak)) void ei_printf(const char *format, ...) { + va_list myargs; + va_start(myargs, format); + vprintf(format, myargs); + va_end(myargs); +} + +__attribute__((weak)) void ei_printf_float(float f) { + ei_printf("%f", f); +} + +__attribute__((weak)) void *ei_malloc(size_t size) { + return malloc(size); +} + +__attribute__((weak)) void *ei_calloc(size_t nitems, size_t size) { + return calloc(nitems, size); +} + +__attribute__((weak)) void ei_free(void *ptr) { + free(ptr); +} + +#if defined(__cplusplus) && EI_C_LINKAGE == 1 +extern "C" +#endif +__attribute__((weak)) void DebugLog(const char* s) { + ei_printf("%s", s); +} + +#endif // EI_PORTING_MBED == 1 diff --git a/edgeimpulse/edge-impulse-sdk/porting/mingw32/debug_log.cpp b/edgeimpulse/edge-impulse-sdk/porting/mingw32/debug_log.cpp new file mode 100644 index 0000000..3b57943 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/porting/mingw32/debug_log.cpp @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2022 EdgeImpulse Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an "AS + * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language + * governing permissions and limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "../ei_classifier_porting.h" +#if EI_PORTING_MINGW32 == 1 + +#include "edge-impulse-sdk/tensorflow/lite/micro/debug_log.h" +#include +#include + +// Redirect TFLite DebugLog to ei_printf +#if defined(__cplusplus) && EI_C_LINKAGE == 1 +extern "C" +#endif // defined(__cplusplus) && EI_C_LINKAGE == 1 +void DebugLog(const char* s) { + ei_printf("%s", s); +} + +#endif // EI_PORTING_MINGW32 == 1 diff --git a/edgeimpulse/edge-impulse-sdk/porting/mingw32/ei_classifier_porting.cpp b/edgeimpulse/edge-impulse-sdk/porting/mingw32/ei_classifier_porting.cpp new file mode 100644 index 0000000..6223e17 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/porting/mingw32/ei_classifier_porting.cpp @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2022 EdgeImpulse Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an "AS + * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language + * governing permissions and limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "../ei_classifier_porting.h" +#if EI_PORTING_MINGW32 == 1 + +#include +#include +#include +#include +#include +#include +#include +#include + +EI_IMPULSE_ERROR ei_run_impulse_check_canceled() { + return EI_IMPULSE_OK; +} + +/** + * Cancelable sleep, can be triggered with signal from other thread + */ +EI_IMPULSE_ERROR ei_sleep(int32_t time_ms) { + usleep(time_ms * 1000); + return EI_IMPULSE_OK; +} + +uint64_t ei_read_timer_ms() { + auto now = std::chrono::system_clock::now(); + auto duration = now.time_since_epoch(); + auto millis = std::chrono::duration_cast(duration).count(); + return static_cast(millis); +} + +uint64_t ei_read_timer_us() { + auto now = std::chrono::system_clock::now(); + auto duration = now.time_since_epoch(); + auto micros = std::chrono::duration_cast(duration).count(); + return static_cast(micros); +} + +void ei_printf(const char *format, ...) { + va_list myargs; + va_start(myargs, format); + vprintf(format, myargs); + va_end(myargs); +} + +void ei_printf_float(float f) { + ei_printf("%f", f); +} + +void *ei_malloc(size_t size) { + return malloc(size); +} + +void *ei_calloc(size_t nitems, size_t size) { + return calloc(nitems, size); +} + +void ei_free(void *ptr) { + free(ptr); +} + +#endif // EI_PORTING_MINGW32 == 1 diff --git a/edgeimpulse/edge-impulse-sdk/porting/particle/debug_log.cpp b/edgeimpulse/edge-impulse-sdk/porting/particle/debug_log.cpp new file mode 100644 index 0000000..58b697b --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/porting/particle/debug_log.cpp @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2022 EdgeImpulse Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an "AS + * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language + * governing permissions and limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "../ei_classifier_porting.h" +#if EI_PORTING_PARTICLE == 1 + +#include "edge-impulse-sdk/tensorflow/lite/micro/debug_log.h" +#include +#include + +// On mbed platforms, we set up a serial port and write to it for debug logging. +#if defined(__cplusplus) && EI_C_LINKAGE == 1 +extern "C" +#endif // defined(__cplusplus) && EI_C_LINKAGE == 1 +void DebugLog(const char* s) { + ei_printf("%s", s); +} + +#endif // EI_PORTING_PARTICLE diff --git a/edgeimpulse/edge-impulse-sdk/porting/particle/ei_classifier_porting.cpp b/edgeimpulse/edge-impulse-sdk/porting/particle/ei_classifier_porting.cpp new file mode 100644 index 0000000..b280d33 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/porting/particle/ei_classifier_porting.cpp @@ -0,0 +1,102 @@ +/* + * Copyright (c) 2023 EdgeImpulse Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an "AS + * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language + * governing permissions and limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "../ei_classifier_porting.h" +#if EI_PORTING_PARTICLE == 1 + +#include +#include +#include + +#define EI_WEAK_FN __attribute__((weak)) + +EI_WEAK_FN EI_IMPULSE_ERROR ei_run_impulse_check_canceled() { + return EI_IMPULSE_OK; +} + +EI_WEAK_FN EI_IMPULSE_ERROR ei_sleep(int32_t time_ms) { + delay(time_ms); + return EI_IMPULSE_OK; +} + +uint64_t ei_read_timer_ms() { + return millis(); +} + +uint64_t ei_read_timer_us() { + return micros(); +} + +void ei_serial_set_baudrate(int baudrate) +{ + +} + +EI_WEAK_FN void ei_putchar(char c) +{ + Serial.write(c); +} + +EI_WEAK_FN char ei_getchar() +{ + char ch = 0; + if (Serial.available() > 0) { + ch = Serial.read(); + } + return ch; +} + +/** + * Printf function uses vsnprintf and output using Arduino Serial + */ +__attribute__((weak)) void ei_printf(const char *format, ...) { + static char print_buf[1024] = { 0 }; + + va_list args; + va_start(args, format); + int r = vsnprintf(print_buf, sizeof(print_buf), format, args); + va_end(args); + + if (r > 0) { + Serial.write(print_buf); + } +} + +__attribute__((weak)) void ei_printf_float(float f) { + Serial.print(f, 6); +} + +__attribute__((weak)) void *ei_malloc(size_t size) { + return malloc(size); +} + +__attribute__((weak)) void *ei_calloc(size_t nitems, size_t size) { + return calloc(nitems, size); +} + +__attribute__((weak)) void ei_free(void *ptr) { + free(ptr); +} + +#if defined(__cplusplus) && EI_C_LINKAGE == 1 +extern "C" +#endif +__attribute__((weak)) void DebugLog(const char* s) { + ei_printf("%s", s); +} + +#endif // EI_PORTING_PARTICLE == 1 diff --git a/edgeimpulse/edge-impulse-sdk/porting/posix/debug_log.cpp b/edgeimpulse/edge-impulse-sdk/porting/posix/debug_log.cpp new file mode 100644 index 0000000..6f7164a --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/porting/posix/debug_log.cpp @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2022 EdgeImpulse Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an "AS + * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language + * governing permissions and limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "../ei_classifier_porting.h" +#if EI_PORTING_POSIX == 1 + +#include +#include + +// Redirect TFLite DebugLog to ei_printf +#if defined(__cplusplus) && EI_C_LINKAGE == 1 +extern "C" +#endif // defined(__cplusplus) && EI_C_LINKAGE == 1 +void DebugLog(const char* s) { + ei_printf("%s", s); +} + +#endif // EI_PORTING_POSIX == 1 diff --git a/edgeimpulse/edge-impulse-sdk/porting/posix/ei_classifier_porting.cpp b/edgeimpulse/edge-impulse-sdk/porting/posix/ei_classifier_porting.cpp new file mode 100644 index 0000000..cf1d5ef --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/porting/posix/ei_classifier_porting.cpp @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2022 EdgeImpulse Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an "AS + * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language + * governing permissions and limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "edge-impulse-sdk/porting/ei_classifier_porting.h" +#if EI_PORTING_POSIX == 1 + +#include +#include +#include +#include +#include +#include +#include + +__attribute__((weak)) EI_IMPULSE_ERROR ei_run_impulse_check_canceled() { + return EI_IMPULSE_OK; +} + +/** + * Cancelable sleep, can be triggered with signal from other thread + */ +__attribute__((weak)) EI_IMPULSE_ERROR ei_sleep(int32_t time_ms) { + usleep(time_ms * 1000); + return EI_IMPULSE_OK; +} + +uint64_t ei_read_timer_ms() { + return ei_read_timer_us() / 1000; +} + +uint64_t ei_read_timer_us() { + uint64_t us; // Milliseconds + uint64_t s; // Seconds + struct timespec spec; + + clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &spec); + + s = spec.tv_sec; + us = round(spec.tv_nsec / 1.0e3); // Convert nanoseconds to micros + if (us > 999999) { + s++; + us = 0; + } + + return (s * 1000000) + us; +} + +__attribute__((weak)) void ei_printf(const char *format, ...) { + va_list myargs; + va_start(myargs, format); + vprintf(format, myargs); + va_end(myargs); +} + +__attribute__((weak)) void ei_printf_float(float f) { + ei_printf("%f", f); +} + +__attribute__((weak)) void *ei_malloc(size_t size) { + return malloc(size); +} + +__attribute__((weak)) void *ei_calloc(size_t nitems, size_t size) { + return calloc(nitems, size); +} + +__attribute__((weak)) void ei_free(void *ptr) { + free(ptr); +} + +#if defined(__cplusplus) && EI_C_LINKAGE == 1 +extern "C" +#endif +__attribute__((weak)) void DebugLog(const char* s) { + ei_printf("%s", s); +} + +#endif // EI_PORTING_POSIX == 1 diff --git a/edgeimpulse/edge-impulse-sdk/porting/raspberry/ei_classifier_porting.cpp b/edgeimpulse/edge-impulse-sdk/porting/raspberry/ei_classifier_porting.cpp new file mode 100644 index 0000000..8b699fb --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/porting/raspberry/ei_classifier_porting.cpp @@ -0,0 +1,126 @@ +/* + * Copyright (c) 2022 EdgeImpulse Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an "AS + * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language + * governing permissions and limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "../ei_classifier_porting.h" +#if EI_PORTING_RASPBERRY == 1 + +#include "pico/stdlib.h" +#include +#include +#include +#include + +#ifdef FREERTOS_ENABLED +// Include FreeRTOS for delay +#include +#include +#endif + +#define EI_WEAK_FN __attribute__((weak)) + +EI_WEAK_FN EI_IMPULSE_ERROR ei_run_impulse_check_canceled() { + return EI_IMPULSE_OK; +} + +EI_WEAK_FN EI_IMPULSE_ERROR ei_sleep(int32_t time_ms) { +#ifdef FREERTOS_ENABLED + vTaskDelay(time_ms / portTICK_PERIOD_MS); +#else + sleep_ms(time_ms); +#endif + return EI_IMPULSE_OK; +} + +uint64_t ei_read_timer_ms() { + return to_ms_since_boot(get_absolute_time()); +} + +uint64_t ei_read_timer_us() { + return to_us_since_boot(get_absolute_time()); +} + +void ei_putchar(char c) +{ + /* Send char to serial output */ + ei_printf("%c", c); +} + +/** + * Printf function uses vsnprintf and output using USB Serial + */ +__attribute__((weak)) void ei_printf(const char *format, ...) { + static char print_buf[1024] = { 0 }; + + va_list args; + va_start(args, format); + int r = vsnprintf(print_buf, sizeof(print_buf), format, args); + va_end(args); + + if (r > 0) { + printf(print_buf); + } +} + +__attribute__((weak)) void ei_printf_float(float f) { + printf("%f", f); +} + +__attribute__((weak)) void *ei_malloc(size_t size) { +#ifdef FREERTOS_ENABLED + return pvPortMalloc(size); +#else + return malloc(size); +#endif +} + +#ifdef FREERTOS_ENABLED +void *pvPortCalloc(size_t sNb, size_t sSize) +{ + void *vPtr = NULL; + if (sSize > 0) { + vPtr = pvPortMalloc(sSize * sNb); // Call FreeRTOS or other standard API + if(vPtr) + memset(vPtr, 0, (sSize * sNb)); // Must required + } + return vPtr; +} +#endif + +__attribute__((weak)) void *ei_calloc(size_t nitems, size_t size) { +#ifdef FREERTOS_ENABLED + return pvPortCalloc(nitems, size); +#else + return calloc(nitems, size); +#endif +} + +__attribute__((weak)) void ei_free(void *ptr) { +#ifdef FREERTOS_ENABLED + vPortFree(ptr); +#else + free(ptr); +#endif +} + +#if defined(__cplusplus) && EI_C_LINKAGE == 1 +extern "C" +#endif +__attribute__((weak)) void DebugLog(const char* s) { + ei_printf("%s", s); +} + +#endif // EI_PORTING_RASPBERRY == 1 diff --git a/edgeimpulse/edge-impulse-sdk/porting/renesas-ra/debug_log.cpp b/edgeimpulse/edge-impulse-sdk/porting/renesas-ra/debug_log.cpp new file mode 100644 index 0000000..47c6847 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/porting/renesas-ra/debug_log.cpp @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2022 EdgeImpulse Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an "AS + * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language + * governing permissions and limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "../ei_classifier_porting.h" +#if ((EI_PORTING_RENESASRA65 == 1) || (EI_PORTING_RENESASRA8D1 == 1)) + +#include "edge-impulse-sdk/tensorflow/lite/micro/debug_log.h" +#include +#include + +// Redirect TFLite DebugLog to ei_printf +#if defined(__cplusplus) && EI_C_LINKAGE == 1 +extern "C" +#endif // defined(__cplusplus) && EI_C_LINKAGE == 1 +void DebugLog(const char* s) { + ei_printf("%s", s); +} + +#endif // EI_PORTING_RENESASRA65 == 1 diff --git a/edgeimpulse/edge-impulse-sdk/porting/renesas-ra/ei_classifier_porting.cpp b/edgeimpulse/edge-impulse-sdk/porting/renesas-ra/ei_classifier_porting.cpp new file mode 100644 index 0000000..e3f88aa --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/porting/renesas-ra/ei_classifier_porting.cpp @@ -0,0 +1,318 @@ +/* + * Copyright (c) 2022 EdgeImpulse Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an "AS + * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language + * governing permissions and limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +/* Includes */ +#include "../ei_classifier_porting.h" + +#if ((EI_PORTING_RENESASRA65 == 1) || (EI_PORTING_RENESASRA8D1 == 1)) + +#include +#include +#include +#include "unistd.h" +#include "peripheral/uart_ep.h" +#include + +extern "C" uint32_t timer_get_ms(void); +extern "C" uint32_t timer_get_us(void); + +__attribute__((weak)) EI_IMPULSE_ERROR ei_run_impulse_check_canceled() { + return EI_IMPULSE_OK; +} + +/** + * Cancelable sleep, can be triggered with signal from other thread + */ +__attribute__((weak)) EI_IMPULSE_ERROR ei_sleep(int32_t time_ms) { + + uint64_t start_time = ei_read_timer_ms(); + + while(start_time + time_ms > ei_read_timer_ms()){}; + + return EI_IMPULSE_OK; +} + +uint64_t ei_read_timer_ms() { + + return timer_get_ms(); +} + +uint64_t ei_read_timer_us() { + + return timer_get_us(); +} + +__attribute__((weak)) char ei_getchar() +{ + // dummy implementation + char ch = 0; + return ch; +} + +__attribute__((weak)) void ei_printf(const char *format, ...) { + + char buffer[1024] = {0}; + int length; + va_list myargs; + va_start(myargs, format); + length = vsnprintf(buffer, sizeof(buffer), format, myargs); + va_end(myargs); + + if (length > 0) { + uart_print_user_msg((uint8_t *)buffer, length); + } +} + +__attribute__((weak)) void ei_printf_float(float f) { + float n = f; + + static double PRECISION = 0.00001; + static int MAX_NUMBER_STRING_SIZE = 32; + + char s[MAX_NUMBER_STRING_SIZE]; + + if (n == 0.0) { + strcpy(s, "0"); + } + else { + int digit, m; + char *c = s; + int neg = (n < 0); + if (neg) { + n = -n; + } + // calculate magnitude + m = log10(n); + if (neg) { + *(c++) = '-'; + } + if (m < 1.0) { + m = 0; + } + // convert the number + while (n > PRECISION || m >= 0) { + double weight = pow(10.0, m); + if (weight > 0 && !isinf(weight)) { + digit = floor(n / weight); + n -= (digit * weight); + *(c++) = '0' + digit; + } + if (m == 0 && n > 0) { + *(c++) = '.'; + } + m--; + } + *(c) = '\0'; + } + + ei_printf("%s", s); +} + +/** + * + * @param c + */ +void ei_putchar(char c) +{ + uart_putc(c); +} + +__attribute__((weak)) void *ei_malloc(size_t size) { + return malloc(size); +} + +__attribute__((weak)) void *ei_calloc(size_t nitems, size_t size) { + return calloc(nitems, size); +} + +__attribute__((weak)) void ei_free(void *ptr) { + free(ptr); +} + +#if defined(__cplusplus) && EI_C_LINKAGE == 1 +extern "C" +#endif +__attribute__((weak)) void DebugLog(const char* s) { + ei_printf("%s", s); +} + +#elif EI_PORTING_RENESASRA8D1_FREERTOS == 1 + +#include +#include +#include +#include "unistd.h" +#include "peripheral/uart.h" +#include "peripheral/usb/usb.h" +#include + +#include "FreeRTOS.h" +#include "task.h" +#include "stream_buffer.h" +#include "common_data.h" + +extern "C" uint32_t timer_get_ms(void); +extern "C" uint32_t timer_get_us(void); + +__attribute__((weak)) EI_IMPULSE_ERROR ei_run_impulse_check_canceled() { + return EI_IMPULSE_OK; +} + +/** + * Cancelable sleep, can be triggered with signal from other thread + */ +/** + * Cancelable sleep, can be triggered with signal from other thread + */ +__attribute__((weak)) EI_IMPULSE_ERROR ei_sleep(int32_t time_ms) { + + vTaskDelay(time_ms / portTICK_PERIOD_MS); + + return EI_IMPULSE_OK; +} + +uint64_t ei_read_timer_ms() { + + return timer_get_ms(); +} + +uint64_t ei_read_timer_us() { + + return timer_get_us(); +} + +__attribute__((weak)) char ei_getchar() +{ + // dummy implementation + char ch = 0; + return ch; +} + +#include + +__attribute__((weak)) void ei_printf(const char *format, ...) { + + char buffer[1024] = {0}; + int length; + va_list myargs; + va_start(myargs, format); + length = vsnprintf(buffer, sizeof(buffer), format, myargs); + va_end(myargs); + + if (length > 0) { + //uart_print_user_msg((uint8_t *)buffer, length); + //xStreamBufferSend(g_uart_buffer, buffer, length, 0); + //uart_print_to_console((uint8_t *)buffer, length); + comms_send((uint8_t *)buffer, length, 1000); + } +} + +__attribute__((weak)) void ei_printf_float(float f) { + float n = f; + + static double PRECISION = 0.00001; + static int MAX_NUMBER_STRING_SIZE = 32; + + char s[MAX_NUMBER_STRING_SIZE]; + + if (n == 0.0) { + strcpy(s, "0"); + } + else { + int digit, m; + char *c = s; + int neg = (n < 0); + if (neg) { + n = -n; + } + // calculate magnitude + m = log10(n); + if (neg) { + *(c++) = '-'; + } + if (m < 1.0) { + m = 0; + } + // convert the number + while (n > PRECISION || m >= 0) { + double weight = pow(10.0, m); + if (weight > 0 && !isinf(weight)) { + digit = floor(n / weight); + n -= (digit * weight); + *(c++) = '0' + digit; + } + if (m == 0 && n > 0) { + *(c++) = '.'; + } + m--; + } + *(c) = '\0'; + } + + ei_printf("%s", s); +} + +/** + * + * @param c + */ +void ei_putchar(char c) +{ + //uart_putc(c); + ei_printf("%c", c); +} + +__attribute__((weak)) void *ei_malloc(size_t size) { + return pvPortMalloc(size); +} + +__attribute__((weak)) void *ei_calloc(size_t nitems, size_t size) { + return pvPortCalloc(nitems, size); +} + +__attribute__((weak)) void ei_free(void *ptr) { + vPortFree(ptr); +} + +#if defined(__cplusplus) && EI_C_LINKAGE == 1 +extern "C" +#endif +__attribute__((weak)) void DebugLog(const char* s) { + ei_printf("%s", s); +} + +void * operator new( size_t size ) +{ + return pvPortMalloc( size ); +} + +void * operator new[]( size_t size ) +{ + return pvPortMalloc(size); +} + +void operator delete( void * ptr ) +{ + vPortFree ( ptr ); +} + +void operator delete[]( void * ptr ) +{ + vPortFree ( ptr ); +} + +#endif // EI_PORTING_RENESASRA65 == 1 diff --git a/edgeimpulse/edge-impulse-sdk/porting/seeed-vision-ai/debug_log.cpp b/edgeimpulse/edge-impulse-sdk/porting/seeed-vision-ai/debug_log.cpp new file mode 100644 index 0000000..9022698 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/porting/seeed-vision-ai/debug_log.cpp @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2022 EdgeImpulse Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an "AS + * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language + * governing permissions and limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "../ei_classifier_porting.h" +#if EI_PORTING_HIMAX == 1 + +#include "edge-impulse-sdk/tensorflow/lite/micro/debug_log.h" +#include +#include + +// Redirect TFLite DebugLog to ei_printf +#if defined(__cplusplus) && EI_C_LINKAGE == 1 +extern "C" +#endif // defined(__cplusplus) && EI_C_LINKAGE == 1 +void DebugLog(const char* s) { + ei_printf("%s", s); +} + +#endif // EI_PORTING_HIMAX == 1 diff --git a/edgeimpulse/edge-impulse-sdk/porting/seeed-vision-ai/ei_classifier_porting.cpp b/edgeimpulse/edge-impulse-sdk/porting/seeed-vision-ai/ei_classifier_porting.cpp new file mode 100644 index 0000000..39145da --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/porting/seeed-vision-ai/ei_classifier_porting.cpp @@ -0,0 +1,160 @@ +/* + * Copyright (c) 2022 EdgeImpulse Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an "AS + * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language + * governing permissions and limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "../ei_classifier_porting.h" +#if EI_PORTING_SEEED_VISION_AI == 1 + +/* Include ----------------------------------------------------------------- */ +#include +#include +#include +// #include "hx_drv_tflm.h" +#include "hx_drv_timer.h" +#include + +#include "embARC_debug.h" + + +/* Constants ---------------------------------------------------------------- */ +#define HIMAX_TIMER_CLK_FREQ_HZ 400000000 +#define HIMAX_TIMER_TICK_1SEC (HIMAX_TIMER_CLK_FREQ_HZ/1) +#define HIMAX_TIMER_TICK_1MSEC (HIMAX_TIMER_TICK_1SEC/1000) + +/* Private variables -------------------------------------------------------- */ + +__attribute__((weak)) EI_IMPULSE_ERROR ei_run_impulse_check_canceled() { + return EI_IMPULSE_OK; +} + +/** + * Cancelable sleep, can be triggered with signal from other thread + */ +__attribute__((weak)) EI_IMPULSE_ERROR ei_sleep(int32_t time_ms) { + + board_delay_ms(time_ms); + + return EI_IMPULSE_OK; +} + +// Should be called at least once every ~10.7 seconds +uint64_t ei_read_timer_ms() +{ + static uint64_t system_time_ms = 0; + static uint64_t prev_tick_us = 0; + uint64_t tick_us; + int64_t diff_tick_us, elapsed_time_ms; + + tick_us = board_get_cur_us(); + diff_tick_us = tick_us - prev_tick_us; + elapsed_time_ms = diff_tick_us / 1000; + + // update system time and previous tick reference + if (elapsed_time_ms > 0) { + system_time_ms += elapsed_time_ms; + prev_tick_us = tick_us; + } + + return system_time_ms; +} + +uint64_t ei_read_timer_us() +{ + return board_get_cur_us(); +} + +void ei_serial_set_baudrate(int baudrate) +{ + // hx_drv_uart_initial((HX_DRV_UART_BAUDRATE_E)baudrate); +} + +void ei_putchar(char c) +{ + /* Send char to serial output */ + ei_printf("%c", c); +} + +__attribute__((weak)) void ei_printf(const char *format, ...) { + va_list args; + va_start(args, format); + // print_out(format, args); + xvprintf(format, args); + va_end(args); +} + +__attribute__((weak)) void ei_printf_float(float f) { + float n = f; + + static double PRECISION = 0.00001; + static int MAX_NUMBER_STRING_SIZE = 32; + + char s[MAX_NUMBER_STRING_SIZE]; + + if (n == 0.0) { + ei_printf("0.00000"); + } else { + int digit, m; //, m1; + char *c = s; + int neg = (n < 0); + if (neg) { + n = -n; + } + // calculate magnitude + m = log10(n); + if (neg) { + *(c++) = '-'; + } + if (m < 1.0) { + m = 0; + } + // convert the number + while (n > PRECISION || m >= 0) { + double weight = pow(10.0, m); + if (weight > 0 && !isinf(weight)) { + digit = floor(n / weight); + n -= (digit * weight); + *(c++) = '0' + digit; + } + if (m == 0 && n > 0) { + *(c++) = '.'; + } + m--; + } + *(c) = '\0'; + ei_printf("%s", s); + } +} + +__attribute__((weak)) void *ei_malloc(size_t size) { + return malloc(size); +} + +__attribute__((weak)) void *ei_calloc(size_t nitems, size_t size) { + return calloc(nitems, size); +} + +__attribute__((weak)) void ei_free(void *ptr) { + free(ptr); +} + +#if defined(__cplusplus) && EI_C_LINKAGE == 1 +extern "C" +#endif +__attribute__((weak)) void DebugLog(const char* s) { + ei_printf("%s", s); +} + +#endif // #if EI_PORTING_SEEED_VISION_AI == 1 diff --git a/edgeimpulse/edge-impulse-sdk/porting/silabs/debug_log.cpp b/edgeimpulse/edge-impulse-sdk/porting/silabs/debug_log.cpp new file mode 100644 index 0000000..1cc01e3 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/porting/silabs/debug_log.cpp @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2022 EdgeImpulse Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an "AS + * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language + * governing permissions and limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "../ei_classifier_porting.h" +#if EI_PORTING_SILABS == 1 + +#include "edge-impulse-sdk/tensorflow/lite/micro/debug_log.h" +#include +#include + +// Redirect TFLite DebugLog to ei_printf +#if defined(__cplusplus) && EI_C_LINKAGE == 1 +extern "C" +#endif // defined(__cplusplus) && EI_C_LINKAGE == 1 +void DebugLog(const char* s) { + ei_printf("%s", s); +} + +#endif // EI_PORTING_SILABS == 1 diff --git a/edgeimpulse/edge-impulse-sdk/porting/silabs/ei_classifier_porting.cpp b/edgeimpulse/edge-impulse-sdk/porting/silabs/ei_classifier_porting.cpp new file mode 100644 index 0000000..42903a1 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/porting/silabs/ei_classifier_porting.cpp @@ -0,0 +1,106 @@ +/* + * Copyright (c) 2022 EdgeImpulse Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an "AS + * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language + * governing permissions and limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "../ei_classifier_porting.h" +#if EI_PORTING_SILABS == 1 + +/* Include ----------------------------------------------------------------- */ +#include +#include +#include +#include "sl_sleeptimer.h" +#include "sl_stdio.h" + +__attribute__((weak)) EI_IMPULSE_ERROR ei_run_impulse_check_canceled() { + return EI_IMPULSE_OK; +} + +/** + * Cancelable sleep, can be triggered with signal from other thread + */ +__attribute__((weak)) EI_IMPULSE_ERROR ei_sleep(int32_t time_ms) { + sl_sleeptimer_delay_millisecond(time_ms); + return EI_IMPULSE_OK; +} + +uint64_t ei_read_timer_ms() +{ + return (uint32_t)sl_sleeptimer_tick_to_ms(sl_sleeptimer_get_tick_count()); +} + +uint64_t ei_read_timer_us() +{ + return ei_read_timer_ms() * 1000; +} + +void ei_serial_set_baudrate(int baudrate) +{ +} + +void ei_putchar(char c) +{ + sl_putchar(c); +} + +__attribute__((weak)) char ei_getchar() +{ + char ch = 0; + + if(sl_getchar(&ch) == SL_STATUS_OK) { + return ch; + } + else { + return 0; + } +} + +__attribute__((weak)) void ei_printf(const char *format, ...) { + va_list myargs; + va_start(myargs, format); + vprintf(format, myargs); + va_end(myargs); +} + +__attribute__((weak)) void ei_printf_float(float f) { + ei_printf("%f", f); +} + +__attribute__((weak)) void *ei_malloc(size_t size) { + return malloc(size); +} + +__attribute__((weak)) void *ei_calloc(size_t nitems, size_t size) { + return calloc(nitems, size); +} + +__attribute__((weak)) void ei_free(void *ptr) { + free(ptr); +} + +__attribute__((weak)) void ei_putc(char c) +{ + sl_putchar(c); +} + +#if defined(__cplusplus) && EI_C_LINKAGE == 1 +extern "C" +#endif +__attribute__((weak)) void DebugLog(const char* s) { + ei_printf("%s", s); +} + +#endif // EI_PORTING_SILABS == 1 diff --git a/edgeimpulse/edge-impulse-sdk/porting/sony/debug_log.cpp b/edgeimpulse/edge-impulse-sdk/porting/sony/debug_log.cpp new file mode 100644 index 0000000..51cc138 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/porting/sony/debug_log.cpp @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2022 EdgeImpulse Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an "AS + * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language + * governing permissions and limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "../ei_classifier_porting.h" +#if EI_PORTING_SONY_SPRESENSE == 1 + +#include "edge-impulse-sdk/tensorflow/lite/micro/debug_log.h" +#include +#include + +// Redirect TFLite DebugLog to ei_printf +#if defined(__cplusplus) && EI_C_LINKAGE == 1 +extern "C" +#endif // defined(__cplusplus) && EI_C_LINKAGE == 1 +void DebugLog(const char* s) { + ei_printf("%s", s); +} + +#endif // EI_PORTING_SONY_SPRESENSE == 1 diff --git a/edgeimpulse/edge-impulse-sdk/porting/sony/ei_classifier_porting.cpp b/edgeimpulse/edge-impulse-sdk/porting/sony/ei_classifier_porting.cpp new file mode 100644 index 0000000..fe0f5be --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/porting/sony/ei_classifier_porting.cpp @@ -0,0 +1,119 @@ +/* + * Copyright (c) 2022 EdgeImpulse Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an "AS + * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language + * governing permissions and limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "../ei_classifier_porting.h" +#if EI_PORTING_SONY_SPRESENSE == 1 + +#include +#include +#include + +extern "C" void spresense_time_cb(uint32_t *sec, uint32_t *nano); +extern "C" void spresense_putchar(char cChar); +extern "C" char spresense_getchar(void); + +__attribute__((weak)) EI_IMPULSE_ERROR ei_run_impulse_check_canceled() { + return EI_IMPULSE_OK; +} + +/** + * Cancelable sleep, can be triggered with signal from other thread + */ +__attribute__((weak)) EI_IMPULSE_ERROR ei_sleep(int32_t time_ms) { + + uint64_t end_ms = ei_read_timer_ms() + time_ms; + + while(end_ms > ei_read_timer_ms()){}; + + return EI_IMPULSE_OK; +} + +uint64_t ei_read_timer_ms() { + + uint64_t time_ms; + uint32_t seconds, nano_seconds; + + spresense_time_cb(&seconds, &nano_seconds); + + time_ms = (seconds * 1000) + (nano_seconds / 1000000); + return time_ms; +} + +uint64_t ei_read_timer_us() { + + uint64_t time_us; + uint32_t seconds, nano_seconds; + + spresense_time_cb(&seconds, &nano_seconds); + + time_us = (seconds * 1000000) + (nano_seconds / 1000); + return time_us; +} + +__attribute__((weak)) void ei_printf(const char *format, ...) { + + char buffer[256]; + int length; + va_list myargs; + va_start(myargs, format); + length = vsprintf(buffer, format, myargs); + va_end(myargs); + + for(int i = 0; i < length; i++) { + spresense_putchar(buffer[i]); + } +} + +__attribute__((weak)) void ei_printf_float(float f) { + ei_printf("%f", f); +} + +/** + * @brief Write single character to serial output + * + * @param[in] cChar The character + */ +__attribute__((weak)) void ei_putchar(char cChar) +{ + spresense_putchar(cChar); +} + +__attribute__((weak)) char ei_getchar(void) +{ + return spresense_getchar(); +} + +__attribute__((weak)) void *ei_malloc(size_t size) { + return malloc(size); +} + +__attribute__((weak)) void *ei_calloc(size_t nitems, size_t size) { + return calloc(nitems, size); +} + +__attribute__((weak)) void ei_free(void *ptr) { + free(ptr); +} + +#if defined(__cplusplus) && EI_C_LINKAGE == 1 +extern "C" +#endif +__attribute__((weak)) void DebugLog(const char* s) { + ei_printf("%s", s); +} + +#endif // EI_PORTING_SONY_SPRESENSE == 1 diff --git a/edgeimpulse/edge-impulse-sdk/porting/stm32-cubeai/debug_log.cpp b/edgeimpulse/edge-impulse-sdk/porting/stm32-cubeai/debug_log.cpp new file mode 100644 index 0000000..35408b7 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/porting/stm32-cubeai/debug_log.cpp @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2022 EdgeImpulse Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an "AS + * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language + * governing permissions and limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "../ei_classifier_porting.h" +#if EI_PORTING_STM32_CUBEAI == 1 + +#include "edge-impulse-sdk/tensorflow/lite/micro/debug_log.h" +#include +#include + +// Redirect TFLite DebugLog to ei_printf +#if defined(__cplusplus) && EI_C_LINKAGE == 1 +extern "C" +#endif // defined(__cplusplus) && EI_C_LINKAGE == 1 +void DebugLog(const char* s) { + ei_printf("%s", s); +} + +#endif // EI_PORTING_STM32_CUBEAI == 1 diff --git a/edgeimpulse/edge-impulse-sdk/porting/stm32-cubeai/ei_classifier_porting.cpp b/edgeimpulse/edge-impulse-sdk/porting/stm32-cubeai/ei_classifier_porting.cpp new file mode 100644 index 0000000..c626f30 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/porting/stm32-cubeai/ei_classifier_porting.cpp @@ -0,0 +1,117 @@ +/* + * Copyright (c) 2022 EdgeImpulse Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an "AS + * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language + * governing permissions and limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "../ei_classifier_porting.h" +#if EI_PORTING_STM32_CUBEAI == 1 + +#include "main.h" +#include +#include +#include +#include +#include + +__attribute__((weak)) EI_IMPULSE_ERROR ei_run_impulse_check_canceled() { + return EI_IMPULSE_OK; +} + +__attribute__((weak)) EI_IMPULSE_ERROR ei_sleep(int32_t time_ms) { + HAL_Delay(time_ms); + return EI_IMPULSE_OK; +} + +uint64_t ei_read_timer_ms() { + return HAL_GetTick(); +} + +uint64_t ei_read_timer_us() { + return HAL_GetTick() * 1000; +} + +__attribute__((weak)) void ei_printf(const char *format, ...) { + va_list myargs; + va_start(myargs, format); + vprintf(format, myargs); + va_end(myargs); +} + +__attribute__((weak)) void ei_printf_float(float f) { + float n = f; + + static double PRECISION = 0.00001; + static int MAX_NUMBER_STRING_SIZE = 32; + + char s[MAX_NUMBER_STRING_SIZE]; + + if (n == 0.0) { + strcpy(s, "0"); + } + else { + int digit, m; + char *c = s; + int neg = (n < 0); + if (neg) { + n = -n; + } + // calculate magnitude + m = log10(n); + if (neg) { + *(c++) = '-'; + } + if (m < 1.0) { + m = 0; + } + // convert the number + while (n > PRECISION || m >= 0) { + double weight = pow(10.0, m); + if (weight > 0 && !isinf(weight)) { + digit = floor(n / weight); + n -= (digit * weight); + *(c++) = '0' + digit; + } + if (m == 0 && n > 0) { + *(c++) = '.'; + } + m--; + } + *(c) = '\0'; + } + + + ei_printf("%s", s); +} + +__attribute__((weak)) void *ei_malloc(size_t size) { + return malloc(size); +} + +__attribute__((weak)) void *ei_calloc(size_t nitems, size_t size) { + return calloc(nitems, size); +} + +__attribute__((weak)) void ei_free(void *ptr) { + free(ptr); +} + +#if defined(__cplusplus) && EI_C_LINKAGE == 1 +extern "C" +#endif +__attribute__((weak)) void DebugLog(const char* s) { + ei_printf("%s", s); +} + +#endif // EI_PORTING_STM32_CUBEAI diff --git a/edgeimpulse/edge-impulse-sdk/porting/synaptics/debug_log.cpp b/edgeimpulse/edge-impulse-sdk/porting/synaptics/debug_log.cpp new file mode 100644 index 0000000..381c82b --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/porting/synaptics/debug_log.cpp @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2022 EdgeImpulse Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an "AS + * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language + * governing permissions and limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "../ei_classifier_porting.h" +#if EI_PORTING_SYNAPTICS == 1 + +#include "edge-impulse-sdk/tensorflow/lite/micro/debug_log.h" +#include +#include + +// Redirect TFLite DebugLog to ei_printf +#if defined(__cplusplus) && EI_C_LINKAGE == 1 +extern "C" +#endif // defined(__cplusplus) && EI_C_LINKAGE == 1 +void DebugLog(const char* s) { + ei_printf("%s", s); +} + +#endif // EI_PORTING_SYNAPTICS == 1 diff --git a/edgeimpulse/edge-impulse-sdk/porting/synaptics/ei_classifier_porting.cpp b/edgeimpulse/edge-impulse-sdk/porting/synaptics/ei_classifier_porting.cpp new file mode 100644 index 0000000..bc5ac26 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/porting/synaptics/ei_classifier_porting.cpp @@ -0,0 +1,132 @@ +/* + * Copyright (c) 2022 EdgeImpulse Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an "AS + * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language + * governing permissions and limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "../ei_classifier_porting.h" +#if EI_PORTING_SYNAPTICS == 1 + +#include +#include +#include +#include + +#include "mcu.h" +#include "uart_drv.h" + +extern "C" void *os_Malloc(unsigned long); +extern "C" int os_Free(void *); +extern "C" uint64_t get_time_ms(void); +extern void print_out(const char *format, va_list args); + +__attribute__((weak)) EI_IMPULSE_ERROR ei_run_impulse_check_canceled() { + return EI_IMPULSE_OK; +} + +/** + * Cancelable sleep, can be triggered with signal from other thread + */ +__attribute__((weak)) EI_IMPULSE_ERROR ei_sleep(int32_t time_ms) { + + os_TaskSleep(time_ms); + + return EI_IMPULSE_OK; +} + +uint64_t ei_read_timer_ms() { + + return get_time_ms(); +} + +uint64_t ei_read_timer_us() { + + return get_time_ms() * 1000; +} + +__attribute__((weak)) void ei_printf(const char *format, ...) { + va_list args; + va_start(args, format); + print_out(format, args); + va_end(args); +} + + +__attribute__((weak)) void ei_putchar(char c) { + uart_putchar(c); +} + +__attribute__((weak)) void ei_printf_float(float f) { + float n = f; + + static double PRECISION = 0.00001; + static int MAX_NUMBER_STRING_SIZE = 32; + + char s[MAX_NUMBER_STRING_SIZE]; + + if (n == 0.0) { + ei_printf("0.00000"); + } else { + int digit, m; //, m1; + char *c = s; + int neg = (n < 0); + if (neg) { + n = -n; + } + // calculate magnitude + m = log10(n); + if (neg) { + *(c++) = '-'; + } + if (m < 1.0) { + m = 0; + } + // convert the number + while (n > PRECISION || m >= 0) { + double weight = pow(10.0, m); + if (weight > 0 && !isinf(weight)) { + digit = floor(n / weight); + n -= (digit * weight); + *(c++) = '0' + digit; + } + if (m == 0 && n > 0) { + *(c++) = '.'; + } + m--; + } + *(c) = '\0'; + ei_printf("%s", s); + } +} + +__attribute__((weak)) void *ei_malloc(size_t size) { + return os_Malloc(size); +} + +__attribute__((weak)) void *ei_calloc(size_t nitems, size_t size) { + return os_Malloc(nitems * size); +} + +__attribute__((weak)) void ei_free(void *ptr) { + os_Free(ptr); +} + +#if defined(__cplusplus) && EI_C_LINKAGE == 1 +extern "C" +#endif +__attribute__((weak)) void DebugLog(const char* s) { + ei_printf("%s", s); +} + +#endif // EI_PORTING_SYNAPTICS == 1 diff --git a/edgeimpulse/edge-impulse-sdk/porting/ti/debug_log.cpp b/edgeimpulse/edge-impulse-sdk/porting/ti/debug_log.cpp new file mode 100644 index 0000000..4f8ee4a --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/porting/ti/debug_log.cpp @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2022 EdgeImpulse Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an "AS + * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language + * governing permissions and limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "../ei_classifier_porting.h" +#if EI_PORTING_TI == 1 + +#include "edge-impulse-sdk/tensorflow/lite/micro/debug_log.h" +#include +#include + +// Redirect TFLite DebugLog to ei_printf +#if defined(__cplusplus) && EI_C_LINKAGE == 1 +extern "C" +#endif // defined(__cplusplus) && EI_C_LINKAGE == 1 +void DebugLog(const char* s) { + ei_printf("%s", s); +} + +#endif // EI_PORTING_SONY_SPRESENSE == 1 diff --git a/edgeimpulse/edge-impulse-sdk/porting/ti/ei_classifier_porting.cpp b/edgeimpulse/edge-impulse-sdk/porting/ti/ei_classifier_porting.cpp new file mode 100644 index 0000000..b0e38f4 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/porting/ti/ei_classifier_porting.cpp @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2022 EdgeImpulse Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an "AS + * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language + * governing permissions and limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "../ei_classifier_porting.h" +#if EI_PORTING_TI == 1 + +#include + +#include +#include +#include +#include "unistd.h" + +extern "C" void Serial_Out(char *string, int length); +extern "C" uint64_t Timer_getMs(void); + +__attribute__((weak)) EI_IMPULSE_ERROR ei_run_impulse_check_canceled() { + return EI_IMPULSE_OK; +} + +/** + * Cancelable sleep, can be triggered with signal from other thread + */ +__attribute__((weak)) EI_IMPULSE_ERROR ei_sleep(int32_t time_ms) { + + usleep(time_ms * 1000); + + return EI_IMPULSE_OK; +} + +uint64_t ei_read_timer_ms() { + + return Timer_getMs(); +} + +uint64_t ei_read_timer_us() { + + /* TI board hangs when trying to call callback function each micro second */ + return Timer_getMs() * 1000; +} + +__attribute__((weak)) void ei_printf(const char *format, ...) { + + char buffer[256]; + int length; + va_list myargs; + va_start(myargs, format); + length = vsnprintf(buffer, 256, format, myargs); + va_end(myargs); + + Serial_Out(buffer, length); +} + +__attribute__((weak)) void ei_printf_float(float f) { + ei_printf("%f", f); +} + +__attribute__((weak)) void ei_putchar(char data) +{ + Serial_Out(&data, 1); +} + +__attribute__((weak)) void *ei_malloc(size_t size) { + return malloc(size); +} + +__attribute__((weak)) void *ei_calloc(size_t nitems, size_t size) { + return calloc(nitems, size); +} + +__attribute__((weak)) void ei_free(void *ptr) { + free(ptr); +} + +#if defined(__cplusplus) && EI_C_LINKAGE == 1 +extern "C" +#endif +__attribute__((weak)) void DebugLog(const char* s) { + ei_printf("%s", s); +} + +#endif // EI_PORTING_TI == 1 diff --git a/edgeimpulse/edge-impulse-sdk/porting/zephyr/debug_log.cpp b/edgeimpulse/edge-impulse-sdk/porting/zephyr/debug_log.cpp new file mode 100644 index 0000000..53e4c46 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/porting/zephyr/debug_log.cpp @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2022 EdgeImpulse Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an "AS + * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language + * governing permissions and limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "../ei_classifier_porting.h" +#if EI_PORTING_ZEPHYR == 1 + +#include "edge-impulse-sdk/tensorflow/lite/micro/debug_log.h" +#include +#include + +// Route back to `ei_printf` +#if defined(__cplusplus) && EI_C_LINKAGE == 1 +extern "C" +#endif // defined(__cplusplus) && EI_C_LINKAGE == 1 +void DebugLog(const char* s) { + ei_printf("%s", s); +} + +#endif // #if EI_PORTING_ZEPHYR == 1 diff --git a/edgeimpulse/edge-impulse-sdk/porting/zephyr/ei_classifier_porting.cpp b/edgeimpulse/edge-impulse-sdk/porting/zephyr/ei_classifier_porting.cpp new file mode 100644 index 0000000..0ba58b8 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/porting/zephyr/ei_classifier_porting.cpp @@ -0,0 +1,104 @@ +/* + * Copyright (c) 2022 EdgeImpulse Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an "AS + * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language + * governing permissions and limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "../ei_classifier_porting.h" +#if EI_PORTING_ZEPHYR == 1 + +#include +// Zpehyr 3.1.x and newer uses different include scheme +#if (KERNEL_VERSION_MAJOR > 3) || ((KERNEL_VERSION_MAJOR == 3) && (KERNEL_VERSION_MINOR >= 1)) +#include +#include +#else +#include +#include +#endif +#include +#include + +extern const struct device *uart; + +#define EI_WEAK_FN __attribute__((weak)) + +EI_WEAK_FN EI_IMPULSE_ERROR ei_run_impulse_check_canceled() { + return EI_IMPULSE_OK; +} + +EI_WEAK_FN EI_IMPULSE_ERROR ei_sleep(int32_t time_ms) { + k_msleep(time_ms); + return EI_IMPULSE_OK; +} + +uint64_t ei_read_timer_ms() { + return k_uptime_get(); +} + +uint64_t ei_read_timer_us() { + return k_uptime_get() * 1000; +} + +EI_WEAK_FN char ei_getchar() +{ + uint8_t rcv_char = 0; + if(uart_fifo_read(uart, &rcv_char, 1) == 1) { + return rcv_char; + } + else { + return 0; + } +} + +/** + * Printf function uses vsnprintf and output using Arduino Serial + */ +__attribute__((weak)) void ei_printf(const char *format, ...) { + static char print_buf[1024] = { 0 }; + + va_list args; + va_start(args, format); + int r = vsnprintf(print_buf, sizeof(print_buf), format, args); + va_end(args); + + if(r > 0) { + printf("%s", print_buf); + } +} + +__attribute__((weak)) void ei_printf_float(float f) { + printf("%f", f); +} + +__attribute__((weak)) void *ei_malloc(size_t size) { + return malloc(size); +} + +__attribute__((weak)) void *ei_calloc(size_t nitems, size_t size) { + return calloc(nitems, size); +} + +__attribute__((weak)) void ei_free(void *ptr) { + free(ptr); +} + +#if defined(__cplusplus) && EI_C_LINKAGE == 1 +extern "C" +#endif +__attribute__((weak)) void DebugLog(const char* s) { + printf("%s", s); +} + +#endif // #if EI_PORTING_ZEPHYR == 1 diff --git a/edgeimpulse/edge-impulse-sdk/sources.txt b/edgeimpulse/edge-impulse-sdk/sources.txt new file mode 100644 index 0000000..6e67469 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/sources.txt @@ -0,0 +1,7 @@ +tensorflow and third_party folders based on: https://github.com/tensorflow/tensorflow/#c903b4607821a03c36c17b0befa2535c7dd0e066 +TensorFlow source was prepared using `make -f tensorflow/lite/micro/tools/make/Makefile generate_projects` +The folders were taken from `tensorflow/lite/micro/tools/make/gen/osx_x86_64/prj/hello_world/make` +These files and directories were then deleted: + - `tensorflow/lite/micro/debug_log.cc` + - `tensorflow/lite/micro/examples/hello_world` +CMSIS-DSP based on: https://github.com/ARM-software/CMSIS_5/tree/4d378e81968c6bec5441a42885b24db7cf189bca diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/LICENSE b/edgeimpulse/edge-impulse-sdk/tensorflow/LICENSE new file mode 100644 index 0000000..d645695 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/LICENSE @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/builtin_op_data.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/builtin_op_data.h new file mode 100644 index 0000000..b512ba7 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/builtin_op_data.h @@ -0,0 +1,22 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +// Compatibility shim for new location of interface definitions. + +#ifndef TENSORFLOW_LITE_BUILTIN_OP_DATA_H_ +#define TENSORFLOW_LITE_BUILTIN_OP_DATA_H_ + +#include "edge-impulse-sdk/tensorflow/lite/core/c/builtin_op_data.h" + +#endif // TENSORFLOW_LITE_BUILTIN_OP_DATA_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/builtin_ops.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/builtin_ops.h new file mode 100644 index 0000000..3370730 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/builtin_ops.h @@ -0,0 +1,194 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_LITE_BUILTIN_OPS_H_ +#define TENSORFLOW_LITE_BUILTIN_OPS_H_ + +// DO NOT EDIT MANUALLY: This file is automatically generated by +// `schema/builtin_ops_header/generator.cc`. + +#ifdef __cplusplus +extern "C" { +#endif // __cplusplus + +// The enum for builtin operators. +// Note: CUSTOM, DELEGATE, and PLACEHOLDER_FOR_GREATER_OP_CODES are 3 special +// ops which are not real built-in ops. +typedef enum { + kTfLiteBuiltinAdd = 0, + kTfLiteBuiltinAveragePool2d = 1, + kTfLiteBuiltinConcatenation = 2, + kTfLiteBuiltinConv2d = 3, + kTfLiteBuiltinDepthwiseConv2d = 4, + kTfLiteBuiltinDepthToSpace = 5, + kTfLiteBuiltinDequantize = 6, + kTfLiteBuiltinEmbeddingLookup = 7, + kTfLiteBuiltinFloor = 8, + kTfLiteBuiltinFullyConnected = 9, + kTfLiteBuiltinHashtableLookup = 10, + kTfLiteBuiltinL2Normalization = 11, + kTfLiteBuiltinL2Pool2d = 12, + kTfLiteBuiltinLocalResponseNormalization = 13, + kTfLiteBuiltinLogistic = 14, + kTfLiteBuiltinLshProjection = 15, + kTfLiteBuiltinLstm = 16, + kTfLiteBuiltinMaxPool2d = 17, + kTfLiteBuiltinMul = 18, + kTfLiteBuiltinRelu = 19, + kTfLiteBuiltinReluN1To1 = 20, + kTfLiteBuiltinRelu6 = 21, + kTfLiteBuiltinReshape = 22, + kTfLiteBuiltinResizeBilinear = 23, + kTfLiteBuiltinRnn = 24, + kTfLiteBuiltinSoftmax = 25, + kTfLiteBuiltinSpaceToDepth = 26, + kTfLiteBuiltinSvdf = 27, + kTfLiteBuiltinTanh = 28, + kTfLiteBuiltinConcatEmbeddings = 29, + kTfLiteBuiltinSkipGram = 30, + kTfLiteBuiltinCall = 31, + kTfLiteBuiltinCustom = 32, + kTfLiteBuiltinEmbeddingLookupSparse = 33, + kTfLiteBuiltinPad = 34, + kTfLiteBuiltinUnidirectionalSequenceRnn = 35, + kTfLiteBuiltinGather = 36, + kTfLiteBuiltinBatchToSpaceNd = 37, + kTfLiteBuiltinSpaceToBatchNd = 38, + kTfLiteBuiltinTranspose = 39, + kTfLiteBuiltinMean = 40, + kTfLiteBuiltinSub = 41, + kTfLiteBuiltinDiv = 42, + kTfLiteBuiltinSqueeze = 43, + kTfLiteBuiltinUnidirectionalSequenceLstm = 44, + kTfLiteBuiltinStridedSlice = 45, + kTfLiteBuiltinBidirectionalSequenceRnn = 46, + kTfLiteBuiltinExp = 47, + kTfLiteBuiltinTopkV2 = 48, + kTfLiteBuiltinSplit = 49, + kTfLiteBuiltinLogSoftmax = 50, + kTfLiteBuiltinDelegate = 51, + kTfLiteBuiltinBidirectionalSequenceLstm = 52, + kTfLiteBuiltinCast = 53, + kTfLiteBuiltinPrelu = 54, + kTfLiteBuiltinMaximum = 55, + kTfLiteBuiltinArgMax = 56, + kTfLiteBuiltinMinimum = 57, + kTfLiteBuiltinLess = 58, + kTfLiteBuiltinNeg = 59, + kTfLiteBuiltinPadv2 = 60, + kTfLiteBuiltinGreater = 61, + kTfLiteBuiltinGreaterEqual = 62, + kTfLiteBuiltinLessEqual = 63, + kTfLiteBuiltinSelect = 64, + kTfLiteBuiltinSlice = 65, + kTfLiteBuiltinSin = 66, + kTfLiteBuiltinTransposeConv = 67, + kTfLiteBuiltinSparseToDense = 68, + kTfLiteBuiltinTile = 69, + kTfLiteBuiltinExpandDims = 70, + kTfLiteBuiltinEqual = 71, + kTfLiteBuiltinNotEqual = 72, + kTfLiteBuiltinLog = 73, + kTfLiteBuiltinSum = 74, + kTfLiteBuiltinSqrt = 75, + kTfLiteBuiltinRsqrt = 76, + kTfLiteBuiltinShape = 77, + kTfLiteBuiltinPow = 78, + kTfLiteBuiltinArgMin = 79, + kTfLiteBuiltinFakeQuant = 80, + kTfLiteBuiltinReduceProd = 81, + kTfLiteBuiltinReduceMax = 82, + kTfLiteBuiltinPack = 83, + kTfLiteBuiltinLogicalOr = 84, + kTfLiteBuiltinOneHot = 85, + kTfLiteBuiltinLogicalAnd = 86, + kTfLiteBuiltinLogicalNot = 87, + kTfLiteBuiltinUnpack = 88, + kTfLiteBuiltinReduceMin = 89, + kTfLiteBuiltinFloorDiv = 90, + kTfLiteBuiltinReduceAny = 91, + kTfLiteBuiltinSquare = 92, + kTfLiteBuiltinZerosLike = 93, + kTfLiteBuiltinFill = 94, + kTfLiteBuiltinFloorMod = 95, + kTfLiteBuiltinRange = 96, + kTfLiteBuiltinResizeNearestNeighbor = 97, + kTfLiteBuiltinLeakyRelu = 98, + kTfLiteBuiltinSquaredDifference = 99, + kTfLiteBuiltinMirrorPad = 100, + kTfLiteBuiltinAbs = 101, + kTfLiteBuiltinSplitV = 102, + kTfLiteBuiltinUnique = 103, + kTfLiteBuiltinCeil = 104, + kTfLiteBuiltinReverseV2 = 105, + kTfLiteBuiltinAddN = 106, + kTfLiteBuiltinGatherNd = 107, + kTfLiteBuiltinCos = 108, + kTfLiteBuiltinWhere = 109, + kTfLiteBuiltinRank = 110, + kTfLiteBuiltinElu = 111, + kTfLiteBuiltinReverseSequence = 112, + kTfLiteBuiltinMatrixDiag = 113, + kTfLiteBuiltinQuantize = 114, + kTfLiteBuiltinMatrixSetDiag = 115, + kTfLiteBuiltinRound = 116, + kTfLiteBuiltinHardSwish = 117, + kTfLiteBuiltinIf = 118, + kTfLiteBuiltinWhile = 119, + kTfLiteBuiltinNonMaxSuppressionV4 = 120, + kTfLiteBuiltinNonMaxSuppressionV5 = 121, + kTfLiteBuiltinScatterNd = 122, + kTfLiteBuiltinSelectV2 = 123, + kTfLiteBuiltinDensify = 124, + kTfLiteBuiltinSegmentSum = 125, + kTfLiteBuiltinBatchMatmul = 126, + kTfLiteBuiltinPlaceholderForGreaterOpCodes = 127, + kTfLiteBuiltinCumsum = 128, + kTfLiteBuiltinCallOnce = 129, + kTfLiteBuiltinBroadcastTo = 130, + kTfLiteBuiltinRfft2d = 131, + kTfLiteBuiltinConv3d = 132, + kTfLiteBuiltinImag = 133, + kTfLiteBuiltinReal = 134, + kTfLiteBuiltinComplexAbs = 135, + kTfLiteBuiltinHashtable = 136, + kTfLiteBuiltinHashtableFind = 137, + kTfLiteBuiltinHashtableImport = 138, + kTfLiteBuiltinHashtableSize = 139, + kTfLiteBuiltinReduceAll = 140, + kTfLiteBuiltinConv3dTranspose = 141, + kTfLiteBuiltinVarHandle = 142, + kTfLiteBuiltinReadVariable = 143, + kTfLiteBuiltinAssignVariable = 144, + kTfLiteBuiltinBroadcastArgs = 145, + kTfLiteBuiltinRandomStandardNormal = 146, + kTfLiteBuiltinBucketize = 147, + kTfLiteBuiltinRandomUniform = 148, + kTfLiteBuiltinMultinomial = 149, + kTfLiteBuiltinGelu = 150, + kTfLiteBuiltinDynamicUpdateSlice = 151, + kTfLiteBuiltinRelu0To1 = 152, + kTfLiteBuiltinUnsortedSegmentProd = 153, + kTfLiteBuiltinUnsortedSegmentMax = 154, + kTfLiteBuiltinUnsortedSegmentSum = 155, + kTfLiteBuiltinAtan2 = 156, + kTfLiteBuiltinUnsortedSegmentMin = 157, + kTfLiteBuiltinSign = 158, +} TfLiteBuiltinOperator; + +#ifdef __cplusplus +} // extern "C" +#endif // __cplusplus +#endif // TENSORFLOW_LITE_BUILTIN_OPS_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h new file mode 100644 index 0000000..f1e511a --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h @@ -0,0 +1,22 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_C_BUILTIN_OP_DATA_H_ +#define TENSORFLOW_LITE_C_BUILTIN_OP_DATA_H_ + +/// For documentation, see +/// third_party/tensorflow/lite/core/c/builtin_op_data.h. +#include "edge-impulse-sdk/tensorflow/lite/core/c/builtin_op_data.h" // IWYU pragma: export + +#endif // TENSORFLOW_LITE_C_BUILTIN_OP_DATA_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/c/c_api_types.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/c/c_api_types.h new file mode 100644 index 0000000..4d3fab2 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/c/c_api_types.h @@ -0,0 +1,26 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// This file declares types used by the pure C inference API defined in c_api.h, +// some of which are also used in the C++ and C kernel and interpreter APIs. + +#ifndef TENSORFLOW_LITE_C_C_API_TYPES_H_ +#define TENSORFLOW_LITE_C_C_API_TYPES_H_ + +/// For documentation, see +/// third_party/tensorflow/lite/core/c/c_api_types.h. +#include "edge-impulse-sdk/tensorflow/lite/core/c/c_api_types.h" // IWYU pragma: export + +#endif // TENSORFLOW_LITE_C_C_API_TYPES_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/c/common.c b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/c/common.c new file mode 100644 index 0000000..9efcd3a --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/c/common.c @@ -0,0 +1,17 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +// Dummy file for backwards compatibility. +// See core/api/common.cc + diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/c/common.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/c/common.h new file mode 100644 index 0000000..00c3768 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/c/common.h @@ -0,0 +1,43 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// This file defines common C types and APIs for implementing operations, +// delegates and other constructs in TensorFlow Lite. The actual operations and +// delegates can be defined using C++, but the interface between the interpreter +// and the operations are C. +// +// Summary of abstractions +// TF_LITE_ENSURE - Self-sufficient error checking +// TfLiteStatus - Status reporting +// TfLiteIntArray - stores tensor shapes (dims), +// TfLiteContext - allows an op to access the tensors +// TfLiteTensor - tensor (a multidimensional array) +// TfLiteNode - a single node or operation +// TfLiteRegistration - the implementation of a conceptual operation. +// TfLiteDelegate - allows delegation of nodes to alternative backends. +// +// Some abstractions in this file are created and managed by Interpreter. +// +// NOTE: The order of values in these structs are "semi-ABI stable". New values +// should be added only to the end of structs and never reordered. + +#ifndef TENSORFLOW_LITE_C_COMMON_H_ +#define TENSORFLOW_LITE_C_COMMON_H_ + +/// For documentation, see +/// third_party/tensorflow/lite/core/c/common.h. +#include "edge-impulse-sdk/tensorflow/lite/core/c/common.h" // IWYU pragma: export + +#endif // TENSORFLOW_LITE_C_COMMON_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/context_util.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/context_util.h new file mode 100644 index 0000000..8c97a8d --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/context_util.h @@ -0,0 +1,54 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +/// \file +/// +/// This provides a few C++ helpers that are useful for manipulating C +/// structures in C++. +#ifndef TENSORFLOW_LITE_CONTEXT_UTIL_H_ +#define TENSORFLOW_LITE_CONTEXT_UTIL_H_ + +#include + +#include "edge-impulse-sdk/tensorflow/lite/core/c/common.h" + +namespace tflite { + +/// Provides a range iterable wrapper for TfLiteIntArray* (C lists) that TfLite +/// C api uses. +// Can't use the google array_view, since we can't depend on even +// absl for embedded device reasons. +class TfLiteIntArrayView { + public: + /// Construct a view of a TfLiteIntArray*. Note, `int_array` should be + /// non-null and this view does not take ownership of it. + explicit TfLiteIntArrayView(const TfLiteIntArray* int_array) + : int_array_(int_array) {} + + TfLiteIntArrayView(const TfLiteIntArrayView&) = default; + TfLiteIntArrayView& operator=(const TfLiteIntArrayView& rhs) = default; + + typedef const int* const_iterator; + const_iterator begin() const { return int_array_->data; } + const_iterator end() const { return &int_array_->data[int_array_->size]; } + size_t size() const { return end() - begin(); } + int operator[](size_t pos) const { return int_array_->data[pos]; } + + private: + const TfLiteIntArray* int_array_; +}; + +} // namespace tflite + +#endif // TENSORFLOW_LITE_CONTEXT_UTIL_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/core/api/common.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/core/api/common.cpp new file mode 100644 index 0000000..67b8c6c --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/core/api/common.cpp @@ -0,0 +1,354 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "edge-impulse-sdk/tensorflow/lite/core/c/common.h" + +#include "edge-impulse-sdk/tensorflow/lite/core/c/c_api_types.h" +#ifdef TF_LITE_TENSORFLOW_PROFILER +#include "edge-impulse-sdk/tensorflow/lite/tensorflow_profiler_logger.h" +#endif + +#ifndef TF_LITE_STATIC_MEMORY +#include +#include +#endif // TF_LITE_STATIC_MEMORY + +extern "C" { + +size_t TfLiteIntArrayGetSizeInBytes(int size) { + static TfLiteIntArray dummy; + + size_t computed_size = sizeof(dummy) + sizeof(dummy.data[0]) * size; +#if defined(_MSC_VER) + // Context for why this is needed is in http://b/189926408#comment21 + computed_size -= sizeof(dummy.data[0]); +#endif + return computed_size; +} + +int TfLiteIntArrayEqual(const TfLiteIntArray* a, const TfLiteIntArray* b) { + if (a == b) return 1; + if (a == nullptr || b == nullptr) return 0; + return TfLiteIntArrayEqualsArray(a, b->size, b->data); +} + +int TfLiteIntArrayEqualsArray(const TfLiteIntArray* a, int b_size, + const int b_data[]) { + if (a == nullptr) return (b_size == 0); + if (a->size != b_size) return 0; + int i = 0; + for (; i < a->size; i++) + if (a->data[i] != b_data[i]) return 0; + return 1; +} + +#ifndef TF_LITE_STATIC_MEMORY + +TfLiteIntArray* TfLiteIntArrayCreate(int size) { + size_t alloc_size = TfLiteIntArrayGetSizeInBytes(size); + if (alloc_size <= 0) return nullptr; + TfLiteIntArray* ret = (TfLiteIntArray*)malloc(alloc_size); + if (!ret) return ret; + ret->size = size; + return ret; +} + +TfLiteIntArray* TfLiteIntArrayCopy(const TfLiteIntArray* src) { + if (!src) return nullptr; + TfLiteIntArray* ret = TfLiteIntArrayCreate(src->size); + if (ret) { + memcpy(ret->data, src->data, src->size * sizeof(int)); + } + return ret; +} + +void TfLiteIntArrayFree(TfLiteIntArray* a) { free(a); } + +#endif // TF_LITE_STATIC_MEMORY + +int TfLiteFloatArrayGetSizeInBytes(int size) { + static TfLiteFloatArray dummy; + + int computed_size = sizeof(dummy) + sizeof(dummy.data[0]) * size; +#if defined(_MSC_VER) + // Context for why this is needed is in http://b/189926408#comment21 + computed_size -= sizeof(dummy.data[0]); +#endif + return computed_size; +} + +#ifndef TF_LITE_STATIC_MEMORY + +TfLiteFloatArray* TfLiteFloatArrayCreate(int size) { + TfLiteFloatArray* ret = + (TfLiteFloatArray*)malloc(TfLiteFloatArrayGetSizeInBytes(size)); + ret->size = size; + return ret; +} + +void TfLiteFloatArrayFree(TfLiteFloatArray* a) { free(a); } + +void TfLiteTensorDataFree(TfLiteTensor* t) { + if (t->allocation_type == kTfLiteDynamic || + t->allocation_type == kTfLitePersistentRo) { + if (t->data.raw) { +#ifdef TF_LITE_TENSORFLOW_PROFILER + tflite::PauseHeapMonitoring(/*pause=*/true); + tflite::OnTfLiteTensorDealloc(t); +#endif + free(t->data.raw); +#ifdef TF_LITE_TENSORFLOW_PROFILER + tflite::PauseHeapMonitoring(/*pause=*/false); +#endif + } + } + t->data.raw = nullptr; +} + +void TfLiteQuantizationFree(TfLiteQuantization* quantization) { + if (quantization->type == kTfLiteAffineQuantization) { + TfLiteAffineQuantization* q_params = + (TfLiteAffineQuantization*)(quantization->params); + if (q_params->scale) { + TfLiteFloatArrayFree(q_params->scale); + q_params->scale = nullptr; + } + if (q_params->zero_point) { + TfLiteIntArrayFree(q_params->zero_point); + q_params->zero_point = nullptr; + } + free(q_params); + } + quantization->params = nullptr; + quantization->type = kTfLiteNoQuantization; +} + +void TfLiteSparsityFree(TfLiteSparsity* sparsity) { + if (sparsity == nullptr) { + return; + } + + if (sparsity->traversal_order) { + TfLiteIntArrayFree(sparsity->traversal_order); + sparsity->traversal_order = nullptr; + } + + if (sparsity->block_map) { + TfLiteIntArrayFree(sparsity->block_map); + sparsity->block_map = nullptr; + } + + if (sparsity->dim_metadata) { + int i = 0; + for (; i < sparsity->dim_metadata_size; i++) { + TfLiteDimensionMetadata metadata = sparsity->dim_metadata[i]; + if (metadata.format == kTfLiteDimSparseCSR) { + TfLiteIntArrayFree(metadata.array_segments); + metadata.array_segments = nullptr; + TfLiteIntArrayFree(metadata.array_indices); + metadata.array_indices = nullptr; + } + } + free(sparsity->dim_metadata); + sparsity->dim_metadata = nullptr; + } + + free(sparsity); +} + +void TfLiteTensorFree(TfLiteTensor* t) { + TfLiteTensorDataFree(t); + if (t->dims) TfLiteIntArrayFree(t->dims); + t->dims = nullptr; + + if (t->dims_signature) { + TfLiteIntArrayFree((TfLiteIntArray*)t->dims_signature); + } + t->dims_signature = nullptr; + + TfLiteQuantizationFree(&t->quantization); + TfLiteSparsityFree(t->sparsity); + t->sparsity = nullptr; +} + +void TfLiteTensorReset(TfLiteType type, const char* name, TfLiteIntArray* dims, + TfLiteQuantizationParams quantization, char* buffer, + size_t size, TfLiteAllocationType allocation_type, + const void* allocation, bool is_variable, + TfLiteTensor* tensor) { + TfLiteTensorFree(tensor); + tensor->type = type; + tensor->name = name; + tensor->dims = dims; + tensor->params = quantization; + tensor->data.raw = buffer; + tensor->bytes = size; + tensor->allocation_type = allocation_type; + tensor->allocation = allocation; + tensor->is_variable = is_variable; + + tensor->quantization.type = kTfLiteNoQuantization; + tensor->quantization.params = nullptr; +} + +TfLiteStatus TfLiteTensorCopy(const TfLiteTensor* src, TfLiteTensor* dst) { + if (!src || !dst) return kTfLiteOk; + if (src->bytes != dst->bytes) return kTfLiteError; + if (src == dst) return kTfLiteOk; + + dst->type = src->type; + if (dst->dims) TfLiteIntArrayFree(dst->dims); + dst->dims = TfLiteIntArrayCopy(src->dims); + memcpy(dst->data.raw, src->data.raw, src->bytes); + dst->buffer_handle = src->buffer_handle; + dst->data_is_stale = src->data_is_stale; + dst->delegate = src->delegate; + + return kTfLiteOk; +} + +TfLiteStatus TfLiteTensorResizeMaybeCopy(size_t num_bytes, TfLiteTensor* tensor, + bool preserve_data) { + if (tensor->allocation_type != kTfLiteDynamic && + tensor->allocation_type != kTfLitePersistentRo) { + return kTfLiteOk; + } +#ifdef TF_LITE_TENSORFLOW_PROFILER + tflite::PauseHeapMonitoring(/*pause=*/true); +#endif + size_t alloc_bytes = num_bytes; + // TODO(b/145340303): Tensor data should be aligned. +#ifdef TFLITE_KERNEL_USE_XNNPACK + alloc_bytes += 16; // XNNPACK_EXTRA_BYTES = 16 +#endif + if (!tensor->data.data) { + tensor->data.data = (char*)malloc(alloc_bytes); +#ifdef TF_LITE_TENSORFLOW_PROFILER + tflite::OnTfLiteTensorAlloc(tensor, alloc_bytes); +#endif + } else if (num_bytes > tensor->bytes) { +#ifdef TF_LITE_TENSORFLOW_PROFILER + tflite::OnTfLiteTensorDealloc(tensor); +#endif + if (preserve_data) { + tensor->data.data = (char*)realloc(tensor->data.data, alloc_bytes); + } else { + // Calling free and malloc can be more efficient as it avoids needlessly + // copying the data when it is not required. + free(tensor->data.data); + tensor->data.data = (char*)malloc(alloc_bytes); + } +#ifdef TF_LITE_TENSORFLOW_PROFILER + tflite::OnTfLiteTensorAlloc(tensor, alloc_bytes); +#endif + } +#ifdef TF_LITE_TENSORFLOW_PROFILER + tflite::PauseHeapMonitoring(/*pause=*/false); +#endif + tensor->bytes = num_bytes; + if (tensor->data.data == nullptr && num_bytes != 0) { + // We are done allocating but tensor is pointing to null and a valid size + // was requested, so we error. + return kTfLiteError; + } + return kTfLiteOk; +} + +TfLiteStatus TfLiteTensorRealloc(size_t num_bytes, TfLiteTensor* tensor) { + return TfLiteTensorResizeMaybeCopy(num_bytes, tensor, true); +} +#endif // TF_LITE_STATIC_MEMORY + +const char* TfLiteTypeGetName(TfLiteType type) { + switch (type) { + case kTfLiteNoType: + return "NOTYPE"; + case kTfLiteFloat32: + return "FLOAT32"; + case kTfLiteUInt16: + return "UINT16"; + case kTfLiteInt16: + return "INT16"; + case kTfLiteInt32: + return "INT32"; + case kTfLiteUInt32: + return "UINT32"; + case kTfLiteUInt8: + return "UINT8"; + case kTfLiteInt8: + return "INT8"; + case kTfLiteInt64: + return "INT64"; + case kTfLiteUInt64: + return "UINT64"; + case kTfLiteBool: + return "BOOL"; + case kTfLiteComplex64: + return "COMPLEX64"; + case kTfLiteComplex128: + return "COMPLEX128"; + case kTfLiteString: + return "STRING"; + case kTfLiteFloat16: + return "FLOAT16"; + case kTfLiteFloat64: + return "FLOAT64"; + case kTfLiteResource: + return "RESOURCE"; + case kTfLiteVariant: + return "VARIANT"; + case kTfLiteInt4: + return "INT4"; + } + return "Unknown type"; +} + +TfLiteDelegate TfLiteDelegateCreate() { return TfLiteDelegate{}; } + +TfLiteOpaqueDelegate* TfLiteOpaqueDelegateCreate( + const TfLiteOpaqueDelegateBuilder* opaque_delegate_builder) { + if (!opaque_delegate_builder) return nullptr; + + TfLiteDelegate* result = new TfLiteDelegate{}; + result->opaque_delegate_builder = new TfLiteOpaqueDelegateBuilder{}; + *(result->opaque_delegate_builder) = *opaque_delegate_builder; + + return reinterpret_cast(result); +} + +void TfLiteOpaqueDelegateDelete(TfLiteOpaqueDelegate* opaque_delegate) { + if (!opaque_delegate) return; + + const TfLiteDelegate* tflite_delegate = + reinterpret_cast(opaque_delegate); + delete tflite_delegate->opaque_delegate_builder; + delete tflite_delegate; +} + +void* TfLiteOpaqueDelegateGetData(const TfLiteOpaqueDelegate* delegate) { + if (!delegate) return nullptr; + + // The following cast is safe only because this code is part of the + // TF Lite runtime implementation. Apps using TF Lite should not rely on + // 'TfLiteOpaqueDelegate' and 'TfLiteDelegate' being equivalent. + const auto* tflite_delegate = + reinterpret_cast(delegate); + + if (!tflite_delegate->opaque_delegate_builder) return tflite_delegate->data_; + + return tflite_delegate->opaque_delegate_builder->data; +} + +} // extern "C" diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/core/api/error_reporter.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/core/api/error_reporter.cpp new file mode 100644 index 0000000..e6b84a6 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/core/api/error_reporter.cpp @@ -0,0 +1,38 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "edge-impulse-sdk/tensorflow/lite/core/api/error_reporter.h" +#include + +namespace tflite { + +int ErrorReporter::Report(const char* format, ...) { + va_list args; + va_start(args, format); + int code = Report(format, args); + va_end(args); + return code; +} + +// TODO(aselle): Make the name of ReportError on context the same, so +// we can use the ensure functions w/o a context and w/ a reporter. +int ErrorReporter::ReportError(void*, const char* format, ...) { + va_list args; + va_start(args, format); + int code = Report(format, args); + va_end(args); + return code; +} + +} // namespace tflite diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/core/api/error_reporter.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/core/api/error_reporter.h new file mode 100644 index 0000000..99ab8cf --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/core/api/error_reporter.h @@ -0,0 +1,72 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_CORE_API_ERROR_REPORTER_H_ +#define TENSORFLOW_LITE_CORE_API_ERROR_REPORTER_H_ + +#include + +namespace tflite { + +/// A functor that reports error to supporting system. Invoked similar to +/// printf. +/// +/// Usage: +/// ErrorReporter foo; +/// foo.Report("test %d", 5); +/// or +/// va_list args; +/// foo.Report("test %d", args); // where args is va_list +/// +/// Subclass ErrorReporter to provide another reporting destination. +/// For example, if you have a GUI program, you might redirect to a buffer +/// that drives a GUI error log box. +class ErrorReporter { + public: + virtual ~ErrorReporter() = default; + /// Converts `args` to character equivalents according to `format` string, + /// constructs the error string and report it. + /// Returns number of characters written or zero on success, and negative + /// number on error. + virtual int Report(const char* format, va_list args) = 0; + + /// Converts arguments to character equivalents according to `format` string, + /// constructs the error string and report it. + /// Returns number of characters written or zero on success, and negative + /// number on error. + int Report(const char* format, ...); + + /// Equivalent to `Report` above. The additional `void*` parameter is unused. + /// This method is for compatibility with macros that takes `TfLiteContext`, + /// like TF_LITE_ENSURE and related macros. + int ReportError(void*, const char* format, ...); +}; + +} // namespace tflite + +// You should not make bare calls to the error reporter, instead use the +// TF_LITE_REPORT_ERROR macro, since this allows message strings to be +// stripped when the binary size has to be optimized. If you are looking to +// reduce binary size, define TF_LITE_STRIP_ERROR_STRINGS when compiling and +// every call will be stubbed out, taking no memory. +#ifndef TF_LITE_STRIP_ERROR_STRINGS +#define TF_LITE_REPORT_ERROR(reporter, ...) \ + do { \ + static_cast(reporter)->Report(__VA_ARGS__); \ + } while (false) +#else // TF_LITE_STRIP_ERROR_STRINGS +#define TF_LITE_REPORT_ERROR(reporter, ...) +#endif // TF_LITE_STRIP_ERROR_STRINGS + +#endif // TENSORFLOW_LITE_CORE_API_ERROR_REPORTER_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/core/api/flatbuffer_conversions.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/core/api/flatbuffer_conversions.cpp new file mode 100644 index 0000000..31d4af9 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/core/api/flatbuffer_conversions.cpp @@ -0,0 +1,2518 @@ +/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "edge-impulse-sdk/tensorflow/lite/core/api/flatbuffer_conversions.h" + +#include +#include +#include + +#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/flatbuffers.h" // from @flatbuffers +#include "edge-impulse-sdk/tensorflow/lite/core/api/error_reporter.h" +#include "edge-impulse-sdk/tensorflow/lite/core/c/builtin_op_data.h" +#include "edge-impulse-sdk/tensorflow/lite/core/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/compatibility.h" +#include "edge-impulse-sdk/tensorflow/lite/schema/schema_generated.h" +#include "edge-impulse-sdk/tensorflow/lite/schema/schema_generated_full.h" + +namespace tflite { + +namespace { + +// Utility class for safely allocating POD data. This is useful for avoiding +// leaks in cases where op params are allocated but fail to propagate to the +// parsed op data (e.g., when model parameters are invalid). +class SafeBuiltinDataAllocator { + public: + class BuiltinDataDeleter { + public: + explicit BuiltinDataDeleter(BuiltinDataAllocator* allocator) + : allocator_(allocator) {} + + void operator()(void* data) { allocator_->Deallocate(data); } + + private: + BuiltinDataAllocator* allocator_; + }; + + template + using BuiltinDataPtr = std::unique_ptr; + + explicit SafeBuiltinDataAllocator(BuiltinDataAllocator* allocator) + : allocator_(allocator) {} + + template + BuiltinDataPtr Allocate() { + return BuiltinDataPtr(allocator_->AllocatePOD(), + BuiltinDataDeleter(allocator_)); + } + + private: + BuiltinDataAllocator* allocator_; +}; + +// All the Parse functions take some pointers as params and this function has +// the common DCHECKs to catch if any of those are nullptr. +void CheckParsePointerParams(const Operator* op, ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, + void** builtin_data) { + TFLITE_DCHECK(op != nullptr); + TFLITE_DCHECK(error_reporter != nullptr); + TFLITE_DCHECK(allocator != nullptr); + TFLITE_DCHECK(builtin_data != nullptr); +} + +// Copies the contents from the flatbuffer int vector `flatbuffer` into the +// int array `buffer`. `flat_vector` and `buffer` represent the same +// configuration operation for a given operation. +TfLiteStatus FlatBufferIntVectorToArray( + int max_size_of_buffer, const flatbuffers::Vector* flat_vector, + int* buffer, ErrorReporter* error_reporter, const char* op_name) { + if (!flat_vector) { + TF_LITE_REPORT_ERROR(error_reporter, + "Input array not provided for operation '%s'.\n", + op_name); + return kTfLiteError; + } else { + size_t num_dimensions = flat_vector->size(); + if (num_dimensions > max_size_of_buffer / sizeof(int)) { + TF_LITE_REPORT_ERROR( + error_reporter, + "Found too many dimensions in the input array of operation '%s'.\n", + op_name); + return kTfLiteError; + } else { + for (size_t i = 0; i < num_dimensions; ++i) { + buffer[i] = flat_vector->Get(i); + } + } + } + return kTfLiteOk; +} + +// Converts the flatbuffer activation to what is used at runtime. +TfLiteFusedActivation ConvertActivation(ActivationFunctionType activation) { + switch (activation) { + case ActivationFunctionType_NONE: + return kTfLiteActNone; + case ActivationFunctionType_RELU: + return kTfLiteActRelu; + case ActivationFunctionType_RELU_N1_TO_1: + return kTfLiteActReluN1To1; + case ActivationFunctionType_RELU6: + return kTfLiteActRelu6; + case ActivationFunctionType_TANH: + return kTfLiteActTanh; + case ActivationFunctionType_SIGN_BIT: + return kTfLiteActSignBit; + } + return kTfLiteActNone; +} + +// Converts the flatbuffer padding enum to what is used at runtime. +TfLitePadding ConvertPadding(Padding padding) { + switch (padding) { + case Padding_SAME: + return kTfLitePaddingSame; + case Padding_VALID: + return kTfLitePaddingValid; + } + return kTfLitePaddingUnknown; +} + +// Converts the flatbuffer mirror padding enum to what is used at runtime. +TfLiteMirrorPaddingMode ConvertMirrorPadding(MirrorPadMode padding) { + switch (padding) { + case MirrorPadMode_REFLECT: + return kTfLiteMirrorPaddingReflect; + case MirrorPadMode_SYMMETRIC: + return kTfLiteMirrorPaddingSymmetric; + } + return kTfLiteMirrorPaddingUnknown; +} + +#ifndef TF_LITE_STATIC_MEMORY +TfLiteStatus ParseOpDataTfLite(const Operator* op, BuiltinOperator op_type, + ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, + void** builtin_data) { + auto parseLSHProjectionType = [](LSHProjectionType type) { + switch (type) { + case LSHProjectionType_SPARSE: + return kTfLiteLshProjectionSparse; + case LSHProjectionType_DENSE: + return kTfLiteLshProjectionDense; + default: + return kTfLiteLshProjectionUnknown; + } + }; + auto parseCombinerType = [](CombinerType type) { + switch (type) { + case CombinerType_MEAN: + return kTfLiteCombinerTypeMean; + case CombinerType_SQRTN: + return kTfLiteCombinerTypeSqrtn; + case CombinerType_SUM: + default: + return kTfLiteCombinerTypeSum; + } + }; + + SafeBuiltinDataAllocator safe_allocator(allocator); + *builtin_data = nullptr; + switch (op_type) { + case BuiltinOperator_ABS: { + return ParseAbs(op, error_reporter, allocator, builtin_data); + } + + case BuiltinOperator_ADD: { + return ParseAdd(op, error_reporter, allocator, builtin_data); + } + + case BuiltinOperator_ADD_N: { + return ParseAddN(op, error_reporter, allocator, builtin_data); + } + + case BuiltinOperator_ARG_MAX: { + return ParseArgMax(op, error_reporter, allocator, builtin_data); + } + + case BuiltinOperator_ARG_MIN: { + return ParseArgMin(op, error_reporter, allocator, builtin_data); + } + + case BuiltinOperator_ASSIGN_VARIABLE: { + return ParseAssignVariable(op, error_reporter, allocator, builtin_data); + } + + case BuiltinOperator_AVERAGE_POOL_2D: { + return ParsePool(op, error_reporter, allocator, builtin_data); + } + + case BuiltinOperator_BATCH_MATMUL: { + return ParseBatchMatMul(op, error_reporter, allocator, builtin_data); + } + + case BuiltinOperator_BATCH_TO_SPACE_ND: { + return ParseBatchToSpaceNd(op, error_reporter, allocator, builtin_data); + } + + case BuiltinOperator_BROADCAST_ARGS: { + return ParseBroadcastArgs(op, error_reporter, allocator, builtin_data); + } + + case BuiltinOperator_BROADCAST_TO: { + return ParseBroadcastTo(op, error_reporter, allocator, builtin_data); + } + + case BuiltinOperator_CALL_ONCE: { + return ParseCallOnce(op, error_reporter, allocator, builtin_data); + } + + case BuiltinOperator_CEIL: { + return ParseCeil(op, error_reporter, allocator, builtin_data); + } + + case BuiltinOperator_CONCATENATION: { + return ParseConcatenation(op, error_reporter, allocator, builtin_data); + } + + case BuiltinOperator_CONV_2D: { + return ParseConv2D(op, error_reporter, allocator, builtin_data); + } + + case BuiltinOperator_CUMSUM: { + return ParseCumsum(op, error_reporter, allocator, builtin_data); + } + + case BuiltinOperator_DEPTH_TO_SPACE: { + return ParseDepthToSpace(op, error_reporter, allocator, builtin_data); + } + + case BuiltinOperator_DEPTHWISE_CONV_2D: { + return ParseDepthwiseConv2D(op, error_reporter, allocator, builtin_data); + } + + case BuiltinOperator_DEQUANTIZE: { + return ParseDequantize(op, error_reporter, allocator, builtin_data); + } + + case BuiltinOperator_DIV: { + return ParseDiv(op, error_reporter, allocator, builtin_data); + } + + case BuiltinOperator_ELU: { + return ParseElu(op, error_reporter, allocator, builtin_data); + } + + case BuiltinOperator_EXP: { + return ParseExp(op, error_reporter, allocator, builtin_data); + } + + case BuiltinOperator_EXPAND_DIMS: { + return ParseExpandDims(op, error_reporter, allocator, builtin_data); + } + + case BuiltinOperator_FILL: { + return ParseFill(op, error_reporter, allocator, builtin_data); + } + + case BuiltinOperator_FLOOR: { + return ParseFloor(op, error_reporter, allocator, builtin_data); + } + + case BuiltinOperator_FLOOR_DIV: { + return ParseFloorDiv(op, error_reporter, allocator, builtin_data); + } + + case BuiltinOperator_FLOOR_MOD: { + return ParseFloorMod(op, error_reporter, allocator, builtin_data); + } + + case BuiltinOperator_FULLY_CONNECTED: { + return ParseFullyConnected(op, error_reporter, allocator, builtin_data); + } + + case BuiltinOperator_GATHER_ND: { + return ParseGatherNd(op, error_reporter, allocator, builtin_data); + } + + case BuiltinOperator_GREATER: { + return ParseGreater(op, error_reporter, allocator, builtin_data); + } + + case BuiltinOperator_GREATER_EQUAL: { + return ParseGreaterEqual(op, error_reporter, allocator, builtin_data); + } + + case BuiltinOperator_HARD_SWISH: { + return ParseHardSwish(op, error_reporter, allocator, builtin_data); + } + + case BuiltinOperator_L2_NORMALIZATION: { + return ParseL2Normalization(op, error_reporter, allocator, builtin_data); + } + + case BuiltinOperator_L2_POOL_2D: { + return ParsePool(op, error_reporter, allocator, builtin_data); + } + + case BuiltinOperator_LEAKY_RELU: { + return ParseLeakyRelu(op, error_reporter, allocator, builtin_data); + } + + case BuiltinOperator_LESS: { + return ParseLess(op, error_reporter, allocator, builtin_data); + } + + case BuiltinOperator_LESS_EQUAL: { + return ParseLessEqual(op, error_reporter, allocator, builtin_data); + } + + case BuiltinOperator_LOG: { + return ParseLog(op, error_reporter, allocator, builtin_data); + } + + case BuiltinOperator_LOGICAL_AND: { + return ParseLogicalAnd(op, error_reporter, allocator, builtin_data); + } + + case BuiltinOperator_LOGICAL_NOT: { + return ParseLogicalNot(op, error_reporter, allocator, builtin_data); + } + + case BuiltinOperator_LOGICAL_OR: { + return ParseLogicalOr(op, error_reporter, allocator, builtin_data); + } + + case BuiltinOperator_LOGISTIC: { + return ParseLogistic(op, error_reporter, allocator, builtin_data); + } + + case BuiltinOperator_LOG_SOFTMAX: { + return ParseLogSoftmax(op, error_reporter, allocator, builtin_data); + } + + case BuiltinOperator_LSTM: { + return ParseLSTM(op, error_reporter, allocator, builtin_data); + } + + case BuiltinOperator_MAXIMUM: { + return ParseMaximum(op, error_reporter, allocator, builtin_data); + } + + case BuiltinOperator_MAX_POOL_2D: { + return ParsePool(op, error_reporter, allocator, builtin_data); + } + + case BuiltinOperator_MIRROR_PAD: { + return ParseMirrorPad(op, error_reporter, allocator, builtin_data); + } + + case BuiltinOperator_MEAN: { + return ParseReducer(op, error_reporter, allocator, builtin_data); + } + + case BuiltinOperator_MINIMUM: { + return ParseMinimum(op, error_reporter, allocator, builtin_data); + } + + case BuiltinOperator_MUL: { + return ParseMul(op, error_reporter, allocator, builtin_data); + } + + case BuiltinOperator_NEG: { + return ParseNeg(op, error_reporter, allocator, builtin_data); + } + + case BuiltinOperator_NOT_EQUAL: { + return ParseNotEqual(op, error_reporter, allocator, builtin_data); + } + + case BuiltinOperator_PACK: { + return ParsePack(op, error_reporter, allocator, builtin_data); + } + + case BuiltinOperator_PAD: { + return ParsePad(op, error_reporter, allocator, builtin_data); + } + + case BuiltinOperator_PADV2: { + return ParsePadV2(op, error_reporter, allocator, builtin_data); + } + + case BuiltinOperator_POW: { + return ParsePow(op, error_reporter, allocator, builtin_data); + } + + case BuiltinOperator_PRELU: { + return ParsePrelu(op, error_reporter, allocator, builtin_data); + } + + case BuiltinOperator_QUANTIZE: { + return ParseQuantize(op, error_reporter, allocator, builtin_data); + } + + case BuiltinOperator_READ_VARIABLE: { + return ParseReadVariable(op, error_reporter, allocator, builtin_data); + } + + case BuiltinOperator_REDUCE_ANY: { + return ParseReducer(op, error_reporter, allocator, builtin_data); + } + + case BuiltinOperator_REDUCE_ALL: { + return ParseReducer(op, error_reporter, allocator, builtin_data); + } + + case BuiltinOperator_REDUCE_MAX: { + return ParseReducer(op, error_reporter, allocator, builtin_data); + } + + case BuiltinOperator_REDUCE_MIN: { + return ParseReducer(op, error_reporter, allocator, builtin_data); + } + + case BuiltinOperator_REDUCE_PROD: { + return ParseReducer(op, error_reporter, allocator, builtin_data); + } + + case BuiltinOperator_RELU: { + return ParseRelu(op, error_reporter, allocator, builtin_data); + } + + case BuiltinOperator_RELU6: { + return ParseRelu6(op, error_reporter, allocator, builtin_data); + } + + case BuiltinOperator_RESHAPE: { + return ParseReshape(op, error_reporter, allocator, builtin_data); + } + + case BuiltinOperator_RESIZE_BILINEAR: { + return ParseResizeBilinear(op, error_reporter, allocator, builtin_data); + } + + case BuiltinOperator_RESIZE_NEAREST_NEIGHBOR: { + return ParseResizeNearestNeighbor(op, error_reporter, allocator, + builtin_data); + } + + case BuiltinOperator_ROUND: { + return ParseRound(op, error_reporter, allocator, builtin_data); + } + + case BuiltinOperator_RSQRT: { + return ParseRsqrt(op, error_reporter, allocator, builtin_data); + } + + case BuiltinOperator_SELECT_V2: { + return ParseSelectV2(op, error_reporter, allocator, builtin_data); + } + + case BuiltinOperator_SHAPE: { + return ParseShape(op, error_reporter, allocator, builtin_data); + } + + case BuiltinOperator_SIN: { + return ParseSin(op, error_reporter, allocator, builtin_data); + } + + case BuiltinOperator_SOFTMAX: { + return ParseSoftmax(op, error_reporter, allocator, builtin_data); + } + + case BuiltinOperator_SPACE_TO_BATCH_ND: { + return ParseSpaceToBatchNd(op, error_reporter, allocator, builtin_data); + } + + case BuiltinOperator_SPACE_TO_DEPTH: { + return ParseSpaceToDepth(op, error_reporter, allocator, builtin_data); + } + + case BuiltinOperator_SPLIT: { + return ParseSplit(op, error_reporter, allocator, builtin_data); + } + + case BuiltinOperator_SPLIT_V: { + return ParseSplitV(op, error_reporter, allocator, builtin_data); + } + + case BuiltinOperator_SQRT: { + return ParseSqrt(op, error_reporter, allocator, builtin_data); + } + + case BuiltinOperator_SQUARE: { + return ParseSquare(op, error_reporter, allocator, builtin_data); + } + + case BuiltinOperator_SQUARED_DIFFERENCE: { + return ParseSquaredDifference(op, error_reporter, allocator, + builtin_data); + } + + case BuiltinOperator_SQUEEZE: { + return ParseSqueeze(op, error_reporter, allocator, builtin_data); + } + + case BuiltinOperator_STRIDED_SLICE: { + return ParseStridedSlice(op, error_reporter, allocator, builtin_data); + } + + case BuiltinOperator_SUB: { + return ParseSub(op, error_reporter, allocator, builtin_data); + } + + case BuiltinOperator_SUM: { + return ParseReducer(op, error_reporter, allocator, builtin_data); + } + + case BuiltinOperator_SVDF: { + return ParseSvdf(op, error_reporter, allocator, builtin_data); + } + + case BuiltinOperator_TANH: { + return ParseTanh(op, error_reporter, allocator, builtin_data); + } + + case BuiltinOperator_TRANSPOSE_CONV: { + return ParseTransposeConv(op, error_reporter, allocator, builtin_data); + } + + case BuiltinOperator_UNPACK: { + return ParseUnpack(op, error_reporter, allocator, builtin_data); + } + + case BuiltinOperator_VAR_HANDLE: { + return ParseVarHandle(op, error_reporter, allocator, builtin_data); + } + + case BuiltinOperator_ZEROS_LIKE: { + return ParseZerosLike(op, error_reporter, allocator, builtin_data); + } + + case BuiltinOperator_CAST: { + return ParseCast(op, error_reporter, allocator, builtin_data); + } + case BuiltinOperator_LSH_PROJECTION: { + auto params = safe_allocator.Allocate(); + TF_LITE_ENSURE(error_reporter, params != nullptr); + if (const auto* lshParams = + op->builtin_options_as_LSHProjectionOptions()) { + params->type = parseLSHProjectionType(lshParams->type()); + } + *builtin_data = params.release(); + return kTfLiteOk; + } + case BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_RNN: { + auto params = safe_allocator.Allocate(); + TF_LITE_ENSURE(error_reporter, params != nullptr); + if (const auto* sequence_rnn_params = + op->builtin_options_as_SequenceRNNOptions()) { + params->activation = + ConvertActivation(sequence_rnn_params->fused_activation_function()); + params->time_major = sequence_rnn_params->time_major(); + params->asymmetric_quantize_inputs = + sequence_rnn_params->asymmetric_quantize_inputs(); + } + *builtin_data = params.release(); + return kTfLiteOk; + } + case BuiltinOperator_BIDIRECTIONAL_SEQUENCE_RNN: { + auto params = + safe_allocator.Allocate(); + TF_LITE_ENSURE(error_reporter, params != nullptr); + if (const auto* bidi_sequence_rnn_params = + op->builtin_options_as_BidirectionalSequenceRNNOptions()) { + params->activation = ConvertActivation( + bidi_sequence_rnn_params->fused_activation_function()); + params->time_major = bidi_sequence_rnn_params->time_major(); + params->merge_outputs = bidi_sequence_rnn_params->merge_outputs(); + params->asymmetric_quantize_inputs = + bidi_sequence_rnn_params->asymmetric_quantize_inputs(); + } + *builtin_data = params.release(); + return kTfLiteOk; + } + case BuiltinOperator_RNN: { + auto params = safe_allocator.Allocate(); + TF_LITE_ENSURE(error_reporter, params != nullptr); + if (const auto* rnn_params = op->builtin_options_as_RNNOptions()) { + params->activation = + ConvertActivation(rnn_params->fused_activation_function()); + params->asymmetric_quantize_inputs = + rnn_params->asymmetric_quantize_inputs(); + } + *builtin_data = params.release(); + return kTfLiteOk; + } + case BuiltinOperator_EMBEDDING_LOOKUP_SPARSE: { + auto params = + safe_allocator.Allocate(); + TF_LITE_ENSURE(error_reporter, params != nullptr); + if (const auto* embedding_params = + op->builtin_options_as_EmbeddingLookupSparseOptions()) { + params->combiner = parseCombinerType(embedding_params->combiner()); + } + *builtin_data = params.release(); + return kTfLiteOk; + } + + case BuiltinOperator_HASHTABLE_LOOKUP: + // no-op. + return kTfLiteOk; + + case BuiltinOperator_LOCAL_RESPONSE_NORMALIZATION: { + auto params = safe_allocator.Allocate(); + TF_LITE_ENSURE(error_reporter, params != nullptr); + if (const auto* schema_params = + op->builtin_options_as_LocalResponseNormalizationOptions()) { + params->radius = schema_params->radius(); + params->bias = schema_params->bias(); + params->alpha = schema_params->alpha(); + params->beta = schema_params->beta(); + } + *builtin_data = params.release(); + return kTfLiteOk; + } + case BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_LSTM: { + return ParseUnidirectionalSequenceLSTM(op, error_reporter, allocator, + builtin_data); + } + case BuiltinOperator_BIDIRECTIONAL_SEQUENCE_LSTM: { + auto params = + safe_allocator.Allocate(); + TF_LITE_ENSURE(error_reporter, params != nullptr); + if (const auto* bidi_lstm_params = + op->builtin_options_as_BidirectionalSequenceLSTMOptions()) { + params->activation = + ConvertActivation(bidi_lstm_params->fused_activation_function()); + params->cell_clip = bidi_lstm_params->cell_clip(); + params->proj_clip = bidi_lstm_params->proj_clip(); + params->merge_outputs = bidi_lstm_params->merge_outputs(); + params->time_major = bidi_lstm_params->time_major(); + params->asymmetric_quantize_inputs = + bidi_lstm_params->asymmetric_quantize_inputs(); + } + *builtin_data = params.release(); + return kTfLiteOk; + } + case BuiltinOperator_SKIP_GRAM: { + auto params = safe_allocator.Allocate(); + TF_LITE_ENSURE(error_reporter, params != nullptr); + if (const auto* skip_gram_params = + op->builtin_options_as_SkipGramOptions()) { + params->ngram_size = skip_gram_params->ngram_size(); + params->max_skip_size = skip_gram_params->max_skip_size(); + params->include_all_ngrams = skip_gram_params->include_all_ngrams(); + } + *builtin_data = params.release(); + return kTfLiteOk; + } + + case BuiltinOperator_GATHER: { + return ParseGather(op, error_reporter, allocator, builtin_data); + } + case BuiltinOperator_SPARSE_TO_DENSE: { + auto params = safe_allocator.Allocate(); + TF_LITE_ENSURE(error_reporter, params != nullptr); + if (const auto* sparse_to_dense_params = + op->builtin_options_as_SparseToDenseOptions()) { + params->validate_indices = sparse_to_dense_params->validate_indices(); + } + *builtin_data = params.release(); + return kTfLiteOk; + } + case BuiltinOperator_DELEGATE: { + TF_LITE_REPORT_ERROR(error_reporter, + "DELEGATE op shouldn't exist in model."); + return kTfLiteError; + } + case BuiltinOperator_FAKE_QUANT: { + auto params = safe_allocator.Allocate(); + TF_LITE_ENSURE(error_reporter, params != nullptr); + if (const auto* schema_params = + op->builtin_options_as_FakeQuantOptions()) { + params->min = schema_params->min(); + params->max = schema_params->max(); + params->num_bits = schema_params->num_bits(); + params->narrow_range = schema_params->narrow_range(); + } + *builtin_data = params.release(); + return kTfLiteOk; + } + case BuiltinOperator_ONE_HOT: { + auto params = safe_allocator.Allocate(); + TF_LITE_ENSURE(error_reporter, params != nullptr); + if (const auto* schema_params = op->builtin_options_as_OneHotOptions()) { + params->axis = schema_params->axis(); + } + *builtin_data = params.release(); + return kTfLiteOk; + } + case BuiltinOperator_UNIQUE: { + auto params = safe_allocator.Allocate(); + TF_LITE_ENSURE(error_reporter, params != nullptr); + const auto* unique_params = op->builtin_options_as_UniqueOptions(); + if (unique_params != nullptr) { + params->index_out_type = + unique_params->idx_out_type() == tflite::TensorType_INT64 + ? TfLiteType::kTfLiteInt64 + : TfLiteType::kTfLiteInt32; + } + *builtin_data = params.release(); + return kTfLiteOk; + } + case BuiltinOperator_REVERSE_SEQUENCE: { + auto params = safe_allocator.Allocate(); + TF_LITE_ENSURE(error_reporter, params != nullptr); + if (const auto* reverse_seq_params = + op->builtin_options_as_ReverseSequenceOptions()) { + params->seq_dim = reverse_seq_params->seq_dim(); + params->batch_dim = reverse_seq_params->batch_dim(); + } + *builtin_data = params.release(); + return kTfLiteOk; + } + case BuiltinOperator_IF: { + auto params = safe_allocator.Allocate(); + TF_LITE_ENSURE(error_reporter, params != nullptr); + if (const auto* if_params = op->builtin_options_as_IfOptions()) { + params->then_subgraph_index = if_params->then_subgraph_index(); + params->else_subgraph_index = if_params->else_subgraph_index(); + } + *builtin_data = params.release(); + return kTfLiteOk; + } + case BuiltinOperator_WHILE: { + auto params = safe_allocator.Allocate(); + TF_LITE_ENSURE(error_reporter, params != nullptr); + if (const auto* while_params = op->builtin_options_as_WhileOptions()) { + params->cond_subgraph_index = while_params->cond_subgraph_index(); + params->body_subgraph_index = while_params->body_subgraph_index(); + } + *builtin_data = params.release(); + return kTfLiteOk; + } + case BuiltinOperator_CONV_3D: + case BuiltinOperator_CONV_3D_TRANSPOSE: { + auto params = safe_allocator.Allocate(); + TF_LITE_ENSURE(error_reporter, params != nullptr); + if (const auto* conv3d_params = op->builtin_options_as_Conv3DOptions()) { + params->padding = ConvertPadding(conv3d_params->padding()); + params->activation = + ConvertActivation(conv3d_params->fused_activation_function()); + params->stride_depth = conv3d_params->stride_d(); + params->stride_height = conv3d_params->stride_h(); + params->stride_width = conv3d_params->stride_w(); + params->dilation_depth_factor = conv3d_params->dilation_d_factor(); + params->dilation_height_factor = conv3d_params->dilation_h_factor(); + params->dilation_width_factor = conv3d_params->dilation_w_factor(); + } + *builtin_data = params.release(); + return kTfLiteOk; + } + case BuiltinOperator_HASHTABLE: { + auto params = safe_allocator.Allocate(); + TF_LITE_ENSURE(error_reporter, params != nullptr); + if (const auto* hashtable_params = + op->builtin_options_as_HashtableOptions()) { + params->table_id = hashtable_params->table_id(); + TF_LITE_ENSURE_STATUS(ConvertTensorType( + hashtable_params->key_dtype(), ¶ms->key_dtype, error_reporter)); + TF_LITE_ENSURE_STATUS(ConvertTensorType(hashtable_params->value_dtype(), + ¶ms->value_dtype, + error_reporter)); + } + *builtin_data = params.release(); + return kTfLiteOk; + } + case BuiltinOperator_MULTINOMIAL: { + auto params = safe_allocator.Allocate(); + TF_LITE_ENSURE(error_reporter, params != nullptr); + if (const auto* multinomial_params = + op->builtin_options_as_RandomOptions()) { + params->seed = multinomial_params->seed(); + params->seed2 = multinomial_params->seed2(); + } + *builtin_data = params.release(); + return kTfLiteOk; + } + case BuiltinOperator_RANDOM_STANDARD_NORMAL: { + auto params = safe_allocator.Allocate(); + TF_LITE_ENSURE(error_reporter, params != nullptr); + if (const auto* random_std_normal_params = + op->builtin_options_as_RandomOptions()) { + params->seed = random_std_normal_params->seed(); + params->seed2 = random_std_normal_params->seed2(); + } + *builtin_data = params.release(); + return kTfLiteOk; + } + case BuiltinOperator_BUCKETIZE: { + auto params = safe_allocator.Allocate(); + TF_LITE_ENSURE(error_reporter, params != nullptr); + if (const auto* bucketize_params = + op->builtin_options_as_BucketizeOptions()) { + const flatbuffers::Vector* boundaries = + bucketize_params->boundaries(); + if (boundaries == nullptr) { + TF_LITE_REPORT_ERROR( + error_reporter, + "boundaries array not provided for operation 'bucketize'.\n"); + return kTfLiteError; + } + params->num_boundaries = boundaries->size(); + if (boundaries->data() == nullptr) { + TF_LITE_REPORT_ERROR(error_reporter, + "boundaries.data() returned nullptr for " + "operation 'bucketize'.\n"); + return kTfLiteError; + } + params->boundaries = boundaries->data(); + } + *builtin_data = params.release(); + return kTfLiteOk; + } + case BuiltinOperator_RANDOM_UNIFORM: { + auto params = safe_allocator.Allocate(); + TF_LITE_ENSURE(error_reporter, params != nullptr); + if (const auto* random_uniform_params = + op->builtin_options_as_RandomOptions()) { + params->seed = random_uniform_params->seed(); + params->seed2 = random_uniform_params->seed2(); + } + *builtin_data = params.release(); + return kTfLiteOk; + } + case BuiltinOperator_GELU: { + auto params = safe_allocator.Allocate(); + TF_LITE_ENSURE(error_reporter, params != nullptr); + if (const auto* gelu_params = op->builtin_options_as_GeluOptions()) { + params->approximate = gelu_params->approximate(); + } + *builtin_data = params.release(); + return kTfLiteOk; + } + // Below are the ops with no builtin_data structure. + // TODO(aselle): Implement call in BuiltinOptions, but nullptrs are + // ok for now, since there is no call implementation either. + case BuiltinOperator_CALL: + case BuiltinOperator_COMPLEX_ABS: + case BuiltinOperator_CONCAT_EMBEDDINGS: + case BuiltinOperator_COS: + case BuiltinOperator_CUSTOM: + case BuiltinOperator_DENSIFY: + case BuiltinOperator_DYNAMIC_UPDATE_SLICE: + case BuiltinOperator_EMBEDDING_LOOKUP: + case BuiltinOperator_EQUAL: + case BuiltinOperator_HASHTABLE_FIND: + case BuiltinOperator_HASHTABLE_IMPORT: + case BuiltinOperator_HASHTABLE_SIZE: + case BuiltinOperator_IMAG: + case BuiltinOperator_MATRIX_DIAG: + case BuiltinOperator_MATRIX_SET_DIAG: + case BuiltinOperator_NON_MAX_SUPPRESSION_V4: + case BuiltinOperator_NON_MAX_SUPPRESSION_V5: + case BuiltinOperator_RELU_N1_TO_1: + case BuiltinOperator_RELU_0_TO_1: + case BuiltinOperator_SCATTER_ND: + case BuiltinOperator_SELECT: + case BuiltinOperator_SLICE: + case BuiltinOperator_TILE: + case BuiltinOperator_TOPK_V2: + case BuiltinOperator_TRANSPOSE: + case BuiltinOperator_RANGE: + case BuiltinOperator_RANK: + case BuiltinOperator_REAL: + case BuiltinOperator_RFFT2D: + case BuiltinOperator_SEGMENT_SUM: + case BuiltinOperator_REVERSE_V2: + case BuiltinOperator_UNSORTED_SEGMENT_MAX: + case BuiltinOperator_UNSORTED_SEGMENT_MIN: + case BuiltinOperator_UNSORTED_SEGMENT_PROD: + case BuiltinOperator_UNSORTED_SEGMENT_SUM: + case BuiltinOperator_ATAN2: + case BuiltinOperator_SIGN: + case BuiltinOperator_WHERE: + return kTfLiteOk; + case BuiltinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES: + return kTfLiteError; + } + return kTfLiteError; +} // NOLINT[readability/fn_size] +#endif // !defined(TF_LITE_STATIC_MEMORY) +} // namespace + +TfLiteStatus ConvertTensorType(TensorType tensor_type, TfLiteType* type, + ErrorReporter* error_reporter) { + switch (tensor_type) { + case TensorType_FLOAT16: + *type = kTfLiteFloat16; + return kTfLiteOk; + case TensorType_FLOAT32: + *type = kTfLiteFloat32; + return kTfLiteOk; + case TensorType_FLOAT64: + *type = kTfLiteFloat64; + return kTfLiteOk; + case TensorType_INT16: + *type = kTfLiteInt16; + return kTfLiteOk; + case TensorType_UINT16: + *type = kTfLiteUInt16; + return kTfLiteOk; + case TensorType_INT32: + *type = kTfLiteInt32; + return kTfLiteOk; + case TensorType_UINT32: + *type = kTfLiteUInt32; + return kTfLiteOk; + case TensorType_UINT8: + *type = kTfLiteUInt8; + return kTfLiteOk; + case TensorType_INT8: + *type = kTfLiteInt8; + return kTfLiteOk; + case TensorType_INT64: + *type = kTfLiteInt64; + return kTfLiteOk; + case TensorType_UINT64: + *type = kTfLiteUInt64; + return kTfLiteOk; + case TensorType_STRING: + *type = kTfLiteString; + return kTfLiteOk; + case TensorType_BOOL: + *type = kTfLiteBool; + return kTfLiteOk; + case TensorType_COMPLEX64: + *type = kTfLiteComplex64; + return kTfLiteOk; + case TensorType_COMPLEX128: + *type = kTfLiteComplex128; + return kTfLiteOk; + case TensorType_RESOURCE: + *type = kTfLiteResource; + return kTfLiteOk; + case TensorType_VARIANT: + *type = kTfLiteVariant; + return kTfLiteOk; + case TensorType_INT4: + *type = kTfLiteInt4; + return kTfLiteOk; + default: + *type = kTfLiteNoType; + TF_LITE_REPORT_ERROR(error_reporter, + "Unsupported data type %d in tensor\n", tensor_type); + return kTfLiteError; + } +} + +// We have this parse function instead of directly returning kTfLiteOk from the +// switch-case in ParseOpData because this function is used as part of the +// selective registration for the OpResolver implementation in micro. +TfLiteStatus ParseAbs(const Operator*, ErrorReporter*, BuiltinDataAllocator*, + void**) { + return kTfLiteOk; +} + +TfLiteStatus ParseAdd(const Operator* op, ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, void** builtin_data) { + CheckParsePointerParams(op, error_reporter, allocator, builtin_data); + + SafeBuiltinDataAllocator safe_allocator(allocator); + std::unique_ptr + params = safe_allocator.Allocate(); + TF_LITE_ENSURE(error_reporter, params != nullptr); + + const AddOptions* schema_params = op->builtin_options_as_AddOptions(); + + if (schema_params != nullptr) { + params->activation = + ConvertActivation(schema_params->fused_activation_function()); + params->pot_scale_int16 = schema_params->pot_scale_int16(); + } else { + // TODO(b/157480169): We should either return kTfLiteError or fill in some + // reasonable defaults in the params struct. We are not doing so until we + // better undertand the ramifications of changing the legacy behavior. + } + + *builtin_data = params.release(); + return kTfLiteOk; +} + +TfLiteStatus ParseAddN(const Operator* op, ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, void** builtin_data) { + return kTfLiteOk; +} + +TfLiteStatus ParseArgMax(const Operator* op, ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, void** builtin_data) { + CheckParsePointerParams(op, error_reporter, allocator, builtin_data); + + SafeBuiltinDataAllocator safe_allocator(allocator); + std::unique_ptr + params = safe_allocator.Allocate(); + TF_LITE_ENSURE(error_reporter, params != nullptr); + + const ArgMaxOptions* schema_params = op->builtin_options_as_ArgMaxOptions(); + + if (schema_params != nullptr) { + TF_LITE_ENSURE_STATUS(ConvertTensorType( + schema_params->output_type(), ¶ms->output_type, error_reporter)); + } else { + // TODO(b/157480169): We should either return kTfLiteError or fill in some + // reasonable defaults in the params struct. We are not doing so until we + // better undertand the ramifications of changing the legacy behavior. + } + + *builtin_data = params.release(); + return kTfLiteOk; +} + +TfLiteStatus ParseArgMin(const Operator* op, ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, void** builtin_data) { + CheckParsePointerParams(op, error_reporter, allocator, builtin_data); + + SafeBuiltinDataAllocator safe_allocator(allocator); + std::unique_ptr + params = safe_allocator.Allocate(); + TF_LITE_ENSURE(error_reporter, params != nullptr); + + const ArgMinOptions* schema_params = op->builtin_options_as_ArgMinOptions(); + + if (schema_params != nullptr) { + TF_LITE_ENSURE_STATUS(ConvertTensorType( + schema_params->output_type(), ¶ms->output_type, error_reporter)); + } else { + // TODO(b/157480169): We should either return kTfLiteError or fill in some + // reasonable defaults in the params struct. We are not doing so until we + // better undertand the ramifications of changing the legacy behavior. + } + + *builtin_data = params.release(); + return kTfLiteOk; +} + +// We have this parse function instead of directly returning kTfLiteOk from the +// switch-case in ParseOpData because this function is used as part of the +// selective registration for the OpResolver implementation in micro. +TfLiteStatus ParseAssignVariable(const Operator*, ErrorReporter*, + BuiltinDataAllocator*, void**) { + return kTfLiteOk; +} + +// We have this parse function instead of directly returning kTfLiteOk from the +// switch-case in ParseOpData because this function is used as part of the +// selective registration for the OpResolver implementation in micro. +TfLiteStatus ParseBatchMatMul(const Operator* op, ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, + void** builtin_data) { + CheckParsePointerParams(op, error_reporter, allocator, builtin_data); + + SafeBuiltinDataAllocator safe_allocator(allocator); + auto params = safe_allocator.Allocate(); + TF_LITE_ENSURE(error_reporter, params != nullptr); + if (const auto* bmm_params = op->builtin_options_as_BatchMatMulOptions()) { + params->adj_x = bmm_params->adj_x(); + params->adj_y = bmm_params->adj_y(); + params->asymmetric_quantize_inputs = + bmm_params->asymmetric_quantize_inputs(); + } + *builtin_data = params.release(); + return kTfLiteOk; +} + +// We have this parse function instead of directly returning kTfLiteOk from the +// switch-case in ParseOpData because this function is used as part of the +// selective registration for the OpResolver implementation in micro. +TfLiteStatus ParseBatchToSpaceNd(const Operator*, ErrorReporter*, + BuiltinDataAllocator*, void**) { + return kTfLiteOk; +} + +// We have this parse function instead of directly returning kTfLiteOk from the +// switch-case in ParseOpData because this function is used as part of the +// selective registration for the OpResolver implementation in micro. +TfLiteStatus ParseBroadcastArgs(const Operator*, ErrorReporter*, + BuiltinDataAllocator*, void**) { + return kTfLiteOk; +} + +// We have this parse function instead of directly returning kTfLiteOk from the +// switch-case in ParseOpData because this function is used as part of the +// selective registration for the OpResolver implementation in micro. +TfLiteStatus ParseBroadcastTo(const Operator*, ErrorReporter*, + BuiltinDataAllocator*, void**) { + return kTfLiteOk; +} + +TfLiteStatus ParseCallOnce(const Operator* op, ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, + void** builtin_data) { + CheckParsePointerParams(op, error_reporter, allocator, builtin_data); + + SafeBuiltinDataAllocator safe_allocator(allocator); + std::unique_ptr + params = safe_allocator.Allocate(); + TF_LITE_ENSURE(error_reporter, params != nullptr); + + const CallOnceOptions* schema_params = + op->builtin_options_as_CallOnceOptions(); + + if (schema_params != nullptr) { + params->init_subgraph_index = schema_params->init_subgraph_index(); + + } else { + // TODO(b/157480169): We should either return kTfLiteError or fill in some + // reasonable defaults in the params struct. We are not doing so until we + // better undertand the ramifications of changing the legacy behavior. + } + + *builtin_data = params.release(); + return kTfLiteOk; +} + +// We have this parse function instead of directly returning kTfLiteOk from the +// switch-case in ParseOpData because this function is used as part of the +// selective registration for the OpResolver implementation in micro. +TfLiteStatus ParseCast(const Operator* op, ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, void** builtin_data) { + CheckParsePointerParams(op, error_reporter, allocator, builtin_data); + + SafeBuiltinDataAllocator safe_allocator(allocator); + auto params = safe_allocator.Allocate(); + TF_LITE_ENSURE(error_reporter, params != nullptr); + if (const auto* schema_params = op->builtin_options_as_CastOptions()) { + TF_LITE_ENSURE_STATUS(ConvertTensorType( + schema_params->in_data_type(), ¶ms->in_data_type, error_reporter)); + TF_LITE_ENSURE_STATUS(ConvertTensorType(schema_params->out_data_type(), + ¶ms->out_data_type, + error_reporter)); + } + *builtin_data = params.release(); + return kTfLiteOk; +} + +// We have this parse function instead of directly returning kTfLiteOk from the +// switch-case in ParseOpData because this function is used as part of the +// selective registration for the OpResolver implementation in micro. +TfLiteStatus ParseCeil(const Operator*, ErrorReporter*, BuiltinDataAllocator*, + void**) { + return kTfLiteOk; +} + +TfLiteStatus ParseConcatenation(const Operator* op, + ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, + void** builtin_data) { + CheckParsePointerParams(op, error_reporter, allocator, builtin_data); + + SafeBuiltinDataAllocator safe_allocator(allocator); + std::unique_ptr + params = safe_allocator.Allocate(); + TF_LITE_ENSURE(error_reporter, params != nullptr); + + const ConcatenationOptions* schema_params = + op->builtin_options_as_ConcatenationOptions(); + + if (schema_params != nullptr) { + params->activation = + ConvertActivation(schema_params->fused_activation_function()); + params->axis = schema_params->axis(); + } else { + // TODO(b/157480169): We should either return kTfLiteError or fill in some + // reasonable defaults in the params struct. We are not doing so until we + // better undertand the ramifications of changing the legacy behavior. + } + + *builtin_data = params.release(); + return kTfLiteOk; +} + +// We have this parse function instead of directly returning kTfLiteOk from the +// switch-case in ParseOpData because this function is used as part of the +// selective registration for the OpResolver implementation in micro. +TfLiteStatus ParseComplexAbs(const Operator*, ErrorReporter*, BuiltinDataAllocator*, + void**) { + return kTfLiteOk; +} + +TfLiteStatus ParseConv2D(const Operator* op, ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, void** builtin_data) { + CheckParsePointerParams(op, error_reporter, allocator, builtin_data); + + SafeBuiltinDataAllocator safe_allocator(allocator); + std::unique_ptr + params = safe_allocator.Allocate(); + TF_LITE_ENSURE(error_reporter, params != nullptr); + + const Conv2DOptions* schema_params = op->builtin_options_as_Conv2DOptions(); + + if (schema_params != nullptr) { + params->padding = ConvertPadding(schema_params->padding()); + params->stride_width = schema_params->stride_w(); + params->stride_height = schema_params->stride_h(); + params->activation = + ConvertActivation(schema_params->fused_activation_function()); + + params->dilation_width_factor = schema_params->dilation_w_factor(); + params->dilation_height_factor = schema_params->dilation_h_factor(); + } else { + // TODO(b/157480169): We should either return kTfLiteError or fill in some + // reasonable defaults in the params struct. We are not doing so until we + // better undertand the ramifications of changing the legacy behavior. + } + + *builtin_data = params.release(); + return kTfLiteOk; +} + +// We have this parse function instead of directly returning kTfLiteOk from the +// switch-case in ParseOpData because this function is used as part of the +// selective registration for the OpResolver implementation in micro. +TfLiteStatus ParseCumsum(const Operator* op, ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, void** builtin_data) { + CheckParsePointerParams(op, error_reporter, allocator, builtin_data); + + SafeBuiltinDataAllocator safe_allocator(allocator); + auto params = safe_allocator.Allocate(); + TF_LITE_ENSURE(error_reporter, params != nullptr); + if (const auto* cumsum_params = op->builtin_options_as_CumsumOptions()) { + params->exclusive = cumsum_params->exclusive(); + params->reverse = cumsum_params->reverse(); + } + *builtin_data = params.release(); + return kTfLiteOk; +} + +// We have this parse function instead of directly returning kTfLiteOk from the +// switch-case in ParseOpData because this function is used as part of the +// selective registration for the OpResolver implementation in micro. +TfLiteStatus ParseCos(const Operator*, ErrorReporter*, BuiltinDataAllocator*, + void**) { + return kTfLiteOk; +} + +TfLiteStatus ParseDepthToSpace(const Operator* op, + ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, + void** builtin_data) { + CheckParsePointerParams(op, error_reporter, allocator, builtin_data); + + SafeBuiltinDataAllocator safe_allocator(allocator); + std::unique_ptr + params = safe_allocator.Allocate(); + TF_LITE_ENSURE(error_reporter, params != nullptr); + + const auto* schema_params = op->builtin_options_as_DepthToSpaceOptions(); + if (schema_params != nullptr) { + params->block_size = schema_params->block_size(); + } else { + // TODO(b/157480169): We should either return kTfLiteError or fill in some + // reasonable defaults in the params struct. We are not doing so until we + // better undertand the ramifications of changing the legacy behavior. + } + + *builtin_data = params.release(); + return kTfLiteOk; +} + +TfLiteStatus ParseDepthwiseConv2D(const Operator* op, + ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, + void** builtin_data) { + CheckParsePointerParams(op, error_reporter, allocator, builtin_data); + + SafeBuiltinDataAllocator safe_allocator(allocator); + + std::unique_ptr + params = safe_allocator.Allocate(); + TF_LITE_ENSURE(error_reporter, params != nullptr); + + const DepthwiseConv2DOptions* schema_params = + op->builtin_options_as_DepthwiseConv2DOptions(); + + if (schema_params != nullptr) { + params->padding = ConvertPadding(schema_params->padding()); + params->stride_width = schema_params->stride_w(); + params->stride_height = schema_params->stride_h(); + params->depth_multiplier = schema_params->depth_multiplier(); + params->activation = + ConvertActivation(schema_params->fused_activation_function()); + + params->dilation_width_factor = schema_params->dilation_w_factor(); + params->dilation_height_factor = schema_params->dilation_h_factor(); + } else { + // TODO(b/157480169): We should either return kTfLiteError or fill in some + // reasonable defaults in the params struct. We are not doing so until we + // better undertand the ramifications of changing the legacy behavior. + } + + *builtin_data = params.release(); + return kTfLiteOk; +} + +// We have this parse function instead of directly returning kTfLiteOk from the +// switch-case in ParseOpData because this function is used as part of the +// selective registration for the OpResolver implementation in micro. +TfLiteStatus ParseDequantize(const Operator*, ErrorReporter*, + BuiltinDataAllocator*, void**) { + return kTfLiteOk; +} + +TfLiteStatus ParseDiv(const Operator* op, ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, void** builtin_data) { + CheckParsePointerParams(op, error_reporter, allocator, builtin_data); + + SafeBuiltinDataAllocator safe_allocator(allocator); + auto params = safe_allocator.Allocate(); + TF_LITE_ENSURE(error_reporter, params != nullptr); + if (const auto* schema_params = op->builtin_options_as_DivOptions()) { + params->activation = + ConvertActivation(schema_params->fused_activation_function()); + } + *builtin_data = params.release(); + return kTfLiteOk; +} + +// We have this parse function instead of directly returning kTfLiteOk from the +// switch-case in ParseOpData because this function is used as part of the +// selective registration for the OpResolver implementation in micro. +TfLiteStatus ParseElu(const Operator*, ErrorReporter*, BuiltinDataAllocator*, + void**) { + return kTfLiteOk; +} + +// We have this parse function instead of directly returning kTfLiteOk from the +// switch-case in ParseOpData because this function is used as part of the +// selective registration for the OpResolver implementation in micro. +TfLiteStatus ParseEqual(const Operator*, ErrorReporter*, BuiltinDataAllocator*, + void**) { + return kTfLiteOk; +} + +// We have this parse function instead of directly returning kTfLiteOk from the +// switch-case in ParseOpData because this function is used as part of the +// selective registration for the OpResolver implementation in micro. +TfLiteStatus ParseExp(const Operator*, ErrorReporter*, BuiltinDataAllocator*, + void**) { + return kTfLiteOk; +} + +// We have this parse function instead of directly returning kTfLiteOk from the +// switch-case in ParseOpData because this function is used as part of the +// selective registration for the OpResolver implementation in micro. +TfLiteStatus ParseExpandDims(const Operator*, ErrorReporter*, + BuiltinDataAllocator*, void**) { + return kTfLiteOk; +} + +// We have this parse function instead of directly returning kTfLiteOk from the +// switch-case in ParseOpData because this function is used as part of the +// selective registration for the OpResolver implementation in micro. +TfLiteStatus ParseFill(const Operator*, ErrorReporter*, BuiltinDataAllocator*, + void**) { + return kTfLiteOk; +} + +// We have this parse function instead of directly returning kTfLiteOk from the +// switch-case in ParseOpData because this function is used as part of the +// selective registration for the OpResolver implementation in micro. +TfLiteStatus ParseFloor(const Operator*, ErrorReporter*, BuiltinDataAllocator*, + void**) { + return kTfLiteOk; +} + +// We have this parse function instead of directly returning kTfLiteOk from the +// switch-case in ParseOpData because this function is used as part of the +// selective registration for the OpResolver implementation in micro. +TfLiteStatus ParseFloorDiv(const Operator*, ErrorReporter*, + BuiltinDataAllocator*, void**) { + return kTfLiteOk; +} + +// We have this parse function instead of directly returning kTfLiteOk from the +// switch-case in ParseOpData because this function is used as part of the +// selective registration for the OpResolver implementation in micro. +TfLiteStatus ParseFloorMod(const Operator*, ErrorReporter*, + BuiltinDataAllocator*, void**) { + return kTfLiteOk; +} + +TfLiteStatus ParseFullyConnected(const Operator* op, + ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, + void** builtin_data) { + CheckParsePointerParams(op, error_reporter, allocator, builtin_data); + + SafeBuiltinDataAllocator safe_allocator(allocator); + + std::unique_ptr + params = safe_allocator.Allocate(); + TF_LITE_ENSURE(error_reporter, params != nullptr); + + const FullyConnectedOptions* schema_params = + op->builtin_options_as_FullyConnectedOptions(); + + if (schema_params != nullptr) { + params->activation = + ConvertActivation(schema_params->fused_activation_function()); + params->keep_num_dims = schema_params->keep_num_dims(); + params->asymmetric_quantize_inputs = + schema_params->asymmetric_quantize_inputs(); + + switch (schema_params->weights_format()) { + case FullyConnectedOptionsWeightsFormat_DEFAULT: + params->weights_format = kTfLiteFullyConnectedWeightsFormatDefault; + break; + case FullyConnectedOptionsWeightsFormat_SHUFFLED4x16INT8: + params->weights_format = + kTfLiteFullyConnectedWeightsFormatShuffled4x16Int8; + break; + default: + TF_LITE_REPORT_ERROR(error_reporter, + "Unhandled fully-connected weights format."); + return kTfLiteError; + } + } else { + // TODO(b/157480169): We should either return kTfLiteError or fill in some + // reasonable defaults in the params struct. We are not doing so until we + // better undertand the ramifications of changing the legacy behavior. + } + + *builtin_data = params.release(); + return kTfLiteOk; +} + +// We have this parse function instead of directly returning kTfLiteOk from the +// switch-case in ParseOpData because this function is used as part of the +// selective registration for the OpResolver implementation in micro. +TfLiteStatus ParseGather(const Operator* op, ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, void** builtin_data) { + CheckParsePointerParams(op, error_reporter, allocator, builtin_data); + + SafeBuiltinDataAllocator safe_allocator(allocator); + auto params = safe_allocator.Allocate(); + TF_LITE_ENSURE(error_reporter, params != nullptr); + params->axis = 0; + params->batch_dims = 0; + if (const auto* gather_params = op->builtin_options_as_GatherOptions()) { + params->axis = gather_params->axis(); + params->batch_dims = gather_params->batch_dims(); + } + + *builtin_data = params.release(); + return kTfLiteOk; +} + +// We have this parse function instead of directly returning kTfLiteOk from the +// switch-case in ParseOpData because this function is used as part of the +// selective registration for the OpResolver implementation in micro. +TfLiteStatus ParseGatherNd(const Operator*, ErrorReporter*, + BuiltinDataAllocator*, void**) { + return kTfLiteOk; +} + +// We have this parse function instead of directly returning kTfLiteOk from the +// switch-case in ParseOpData because this function is used as part of the +// selective registration for the OpResolver implementation in micro. +TfLiteStatus ParseGreater(const Operator*, ErrorReporter*, + BuiltinDataAllocator*, void**) { + return kTfLiteOk; +} + +// We have this parse function instead of directly returning kTfLiteOk from the +// switch-case in ParseOpData because this function is used as part of the +// selective registration for the OpResolver implementation in micro. +TfLiteStatus ParseGreaterEqual(const Operator*, ErrorReporter*, + BuiltinDataAllocator*, void**) { + return kTfLiteOk; +} + +// We have this parse function instead of directly returning kTfLiteOk from the +// switch-case in ParseOpData because this function is used as part of the +// selective registration for the OpResolver implementation in micro. +TfLiteStatus ParseHardSwish(const Operator*, ErrorReporter*, + BuiltinDataAllocator*, void**) { + return kTfLiteOk; +} + +// We have this parse function instead of directly returning kTfLiteOk from the +// switch-case in ParseOpData because this function is used as part of the +// selective registration for the OpResolver implementation in micro. +TfLiteStatus ParseImag(const Operator*, ErrorReporter*, + BuiltinDataAllocator*, void**) { + return kTfLiteOk; +} + +TfLiteStatus ParseIf(const Operator* op, ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, void** builtin_data) { + CheckParsePointerParams(op, error_reporter, allocator, builtin_data); + + SafeBuiltinDataAllocator safe_allocator(allocator); + std::unique_ptr + params = safe_allocator.Allocate(); + TF_LITE_ENSURE(error_reporter, params != nullptr); + + const IfOptions* schema_params = op->builtin_options_as_IfOptions(); + + if (schema_params != nullptr) { + params->then_subgraph_index = schema_params->then_subgraph_index(); + params->else_subgraph_index = schema_params->else_subgraph_index(); + } else { + // TODO(b/157480169): We should either return kTfLiteError or fill in some + // reasonable defaults in the params struct. We are not doing so until we + // better undertand the ramifications of changing the legacy behavior. + } + + *builtin_data = params.release(); + return kTfLiteOk; +} + +TfLiteStatus ParseL2Normalization(const Operator* op, + ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, + void** builtin_data) { + CheckParsePointerParams(op, error_reporter, allocator, builtin_data); + + SafeBuiltinDataAllocator safe_allocator(allocator); + std::unique_ptr + params = safe_allocator.Allocate(); + TF_LITE_ENSURE(error_reporter, params != nullptr); + + const L2NormOptions* schema_params = op->builtin_options_as_L2NormOptions(); + + if (schema_params != nullptr) { + params->activation = + ConvertActivation(schema_params->fused_activation_function()); + } else { + // TODO(b/157480169): We should either return kTfLiteError or fill in some + // reasonable defaults in the params struct. We are not doing so until we + // better undertand the ramifications of changing the legacy behavior. + } + + *builtin_data = params.release(); + return kTfLiteOk; +} + +TfLiteStatus ParseLeakyRelu(const Operator* op, ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, + void** builtin_data) { + CheckParsePointerParams(op, error_reporter, allocator, builtin_data); + + SafeBuiltinDataAllocator safe_allocator(allocator); + auto params = safe_allocator.Allocate(); + TF_LITE_ENSURE(error_reporter, params != nullptr); + if (const auto* leaky_relu_params = + op->builtin_options_as_LeakyReluOptions()) { + params->alpha = leaky_relu_params->alpha(); + } + *builtin_data = params.release(); + return kTfLiteOk; +} + +// We have this parse function instead of directly returning kTfLiteOk from the +// switch-case in ParseOpData because this function is used as part of the +// selective registration for the OpResolver implementation in micro. +TfLiteStatus ParseLess(const Operator*, ErrorReporter*, BuiltinDataAllocator*, + void**) { + return kTfLiteOk; +} + +// We have this parse function instead of directly returning kTfLiteOk from the +// switch-case in ParseOpData because this function is used as part of the +// selective registration for the OpResolver implementation in micro. +TfLiteStatus ParseLessEqual(const Operator*, ErrorReporter*, + BuiltinDataAllocator*, void**) { + return kTfLiteOk; +} + +// We have this parse function instead of directly returning kTfLiteOk from the +// switch-case in ParseOpData because this function is used as part of the +// selective registration for the OpResolver implementation in micro. +TfLiteStatus ParseLog(const Operator*, ErrorReporter*, BuiltinDataAllocator*, + void**) { + return kTfLiteOk; +} + +// We have this parse function instead of directly returning kTfLiteOk from the +// switch-case in ParseOpData because this function is used as part of the +// selective registration for the OpResolver implementation in micro. +TfLiteStatus ParseLogicalAnd(const Operator*, ErrorReporter*, + BuiltinDataAllocator*, void**) { + return kTfLiteOk; +} + +// We have this parse function instead of directly returning kTfLiteOk from the +// switch-case in ParseOpData because this function is used as part of the +// selective registration for the OpResolver implementation in micro. +TfLiteStatus ParseLogicalNot(const Operator*, ErrorReporter*, + BuiltinDataAllocator*, void**) { + return kTfLiteOk; +} + +// We have this parse function instead of directly returning kTfLiteOk from the +// switch-case in ParseOpData because this function is used as part of the +// selective registration for the OpResolver implementation in micro. +TfLiteStatus ParseLogicalOr(const Operator*, ErrorReporter*, + BuiltinDataAllocator*, void**) { + return kTfLiteOk; +} + +// We have this parse function instead of directly returning kTfLiteOk from the +// switch-case in ParseOpData because this function is used as part of the +// selective registration for the OpResolver implementation in micro. +TfLiteStatus ParseLogistic(const Operator*, ErrorReporter*, + BuiltinDataAllocator*, void**) { + return kTfLiteOk; +} + +// We have this parse function instead of directly returning kTfLiteOk from the +// switch-case in ParseOpData because this function is used as part of the +// selective registration for the OpResolver implementation in micro. +TfLiteStatus ParseLogSoftmax(const Operator*, ErrorReporter*, + BuiltinDataAllocator*, void**) { + return kTfLiteOk; +} + +TfLiteStatus ParseLSTM(const Operator* op, ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, void** builtin_data) { + CheckParsePointerParams(op, error_reporter, allocator, builtin_data); + + SafeBuiltinDataAllocator safe_allocator(allocator); + auto params = safe_allocator.Allocate(); + TF_LITE_ENSURE(error_reporter, params != nullptr); + if (const auto* lstm_params = op->builtin_options_as_LSTMOptions()) { + params->activation = + ConvertActivation(lstm_params->fused_activation_function()); + params->cell_clip = lstm_params->cell_clip(); + params->proj_clip = lstm_params->proj_clip(); + switch (lstm_params->kernel_type()) { + case LSTMKernelType_FULL: + params->kernel_type = kTfLiteLSTMFullKernel; + break; + case LSTMKernelType_BASIC: + params->kernel_type = kTfLiteLSTMBasicKernel; + break; + default: + TF_LITE_REPORT_ERROR(error_reporter, "Unhandled LSTM kernel type: %d", + lstm_params->kernel_type()); + return kTfLiteError; + } + params->asymmetric_quantize_inputs = + lstm_params->asymmetric_quantize_inputs(); + } else { + TF_LITE_REPORT_ERROR(error_reporter, "No valid LSTM builtin options exist"); + return kTfLiteError; + } + *builtin_data = params.release(); + return kTfLiteOk; +} + +// We have this parse function instead of directly returning kTfLiteOk from the +// switch-case in ParseOpData because this function is used as part of the +// selective registration for the OpResolver implementation in micro. +TfLiteStatus ParseMaximum(const Operator*, ErrorReporter*, + BuiltinDataAllocator*, void**) { + return kTfLiteOk; +} + +// We have this parse function instead of directly returning kTfLiteOk from the +// switch-case in ParseOpData because this function is used as part of the +// selective registration for the OpResolver implementation in micro. +TfLiteStatus ParseMinimum(const Operator*, ErrorReporter*, + BuiltinDataAllocator*, void**) { + return kTfLiteOk; +} + +TfLiteStatus ParseMirrorPad(const Operator* op, ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, + void** builtin_data) { + CheckParsePointerParams(op, error_reporter, allocator, builtin_data); + + SafeBuiltinDataAllocator safe_allocator(allocator); + std::unique_ptr + params = safe_allocator.Allocate(); + TF_LITE_ENSURE(error_reporter, params != nullptr); + + const MirrorPadOptions* schema_params = + op->builtin_options_as_MirrorPadOptions(); + + if (schema_params != nullptr) { + params->mode = ConvertMirrorPadding(schema_params->mode()); + } else { + // TODO(b/157480169): We should either return kTfLiteError or fill in some + // reasonable defaults in the params struct. We are not doing so until we + // better undertand the ramifications of changing the legacy behavior. + } + + *builtin_data = params.release(); + return kTfLiteOk; +} + +TfLiteStatus ParseMul(const Operator* op, ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, void** builtin_data) { + CheckParsePointerParams(op, error_reporter, allocator, builtin_data); + + SafeBuiltinDataAllocator safe_allocator(allocator); + std::unique_ptr + params = safe_allocator.Allocate(); + TF_LITE_ENSURE(error_reporter, params != nullptr); + + const MulOptions* schema_params = op->builtin_options_as_MulOptions(); + + if (schema_params != nullptr) { + params->activation = + ConvertActivation(schema_params->fused_activation_function()); + } else { + // TODO(b/157480169): We should either return kTfLiteError or fill in some + // reasonable defaults in the params struct. We are not doing so until we + // better undertand the ramifications of changing the legacy behavior. + } + + *builtin_data = params.release(); + return kTfLiteOk; +} + +// We have this parse function instead of directly returning kTfLiteOk from the +// switch-case in ParseOpData because this function is used as part of the +// selective registration for the OpResolver implementation in micro. +TfLiteStatus ParseNeg(const Operator*, ErrorReporter*, BuiltinDataAllocator*, + void**) { + return kTfLiteOk; +} + +// We have this parse function instead of directly returning kTfLiteOk from the +// switch-case in ParseOpData because this function is used as part of the +// selective registration for the OpResolver implementation in micro. +TfLiteStatus ParseNotEqual(const Operator*, ErrorReporter*, + BuiltinDataAllocator*, void**) { + return kTfLiteOk; +} + +TfLiteStatus ParsePack(const Operator* op, ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, void** builtin_data) { + CheckParsePointerParams(op, error_reporter, allocator, builtin_data); + + SafeBuiltinDataAllocator safe_allocator(allocator); + std::unique_ptr + params = safe_allocator.Allocate(); + TF_LITE_ENSURE(error_reporter, params != nullptr); + + const PackOptions* schema_params = op->builtin_options_as_PackOptions(); + + if (schema_params != nullptr) { + params->values_count = schema_params->values_count(); + params->axis = schema_params->axis(); + } else { + // TODO(b/157480169): We should either return kTfLiteError or fill in some + // reasonable defaults in the params struct. We are not doing so until we + // better undertand the ramifications of changing the legacy behavior. + } + + *builtin_data = params.release(); + return kTfLiteOk; +} + +// We have this parse function instead of directly returning kTfLiteOk from the +// switch-case in ParseOpData because this function is used as part of the +// selective registration for the OpResolver implementation in micro. +TfLiteStatus ParsePad(const Operator*, ErrorReporter*, BuiltinDataAllocator*, + void**) { + return kTfLiteOk; +} + +// We have this parse function instead of directly returning kTfLiteOk from the +// switch-case in ParseOpData because this function is used as part of the +// selective registration for the OpResolver implementation in micro. +TfLiteStatus ParsePadV2(const Operator*, ErrorReporter*, BuiltinDataAllocator*, + void**) { + return kTfLiteOk; +} + +TfLiteStatus ParsePool(const Operator* op, ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, void** builtin_data) { + CheckParsePointerParams(op, error_reporter, allocator, builtin_data); + + SafeBuiltinDataAllocator safe_allocator(allocator); + std::unique_ptr + params = safe_allocator.Allocate(); + TF_LITE_ENSURE(error_reporter, params != nullptr); + + const Pool2DOptions* schema_params = op->builtin_options_as_Pool2DOptions(); + + if (schema_params != nullptr) { + params->padding = ConvertPadding(schema_params->padding()); + params->stride_width = schema_params->stride_w(); + params->stride_height = schema_params->stride_h(); + params->filter_width = schema_params->filter_width(); + params->filter_height = schema_params->filter_height(); + params->activation = + ConvertActivation(schema_params->fused_activation_function()); + } else { + // TODO(b/157480169): We should either return kTfLiteError or fill in some + // reasonable defaults in the params struct. We are not doing so until we + // better undertand the ramifications of changing the legacy behavior. + } + + *builtin_data = params.release(); + return kTfLiteOk; +} + +// We have this parse function instead of directly returning kTfLiteOk from the +// switch-case in ParseOpData because this function is used as part of the +// selective registration for the OpResolver implementation in micro. +TfLiteStatus ParsePow(const Operator*, ErrorReporter*, BuiltinDataAllocator*, + void**) { + return kTfLiteOk; +} + +// We have this parse function instead of directly returning kTfLiteOk from the +// switch-case in ParseOpData because this function is used as part of the +// selective registration for the OpResolver implementation in micro. +TfLiteStatus ParsePrelu(const Operator*, ErrorReporter*, BuiltinDataAllocator*, + void**) { + return kTfLiteOk; +} + +// We have this parse function instead of directly returning kTfLiteOk from the +// switch-case in ParseOpData because this function is used as part of the +// selective registration for the OpResolver implementation in micro. +TfLiteStatus ParseQuantize(const Operator*, ErrorReporter*, + BuiltinDataAllocator*, void**) { + return kTfLiteOk; +} + +// We have this parse function instead of directly returning kTfLiteOk from the +// switch-case in ParseOpData because this function is used as part of the +// selective registration for the OpResolver implementation in micro. +TfLiteStatus ParseReal(const Operator*, ErrorReporter*, + BuiltinDataAllocator*, void**) { + return kTfLiteOk; +} + +// We have this parse function instead of directly returning kTfLiteOk from the +// switch-case in ParseOpData because this function is used as part of the +// selective registration for the OpResolver implementation in micro. +TfLiteStatus ParseReadVariable(const Operator*, ErrorReporter*, + BuiltinDataAllocator*, void**) { + return kTfLiteOk; +} + +TfLiteStatus ParseReducer(const Operator* op, ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, + void** builtin_data) { + CheckParsePointerParams(op, error_reporter, allocator, builtin_data); + + SafeBuiltinDataAllocator safe_allocator(allocator); + + std::unique_ptr + params = safe_allocator.Allocate(); + TF_LITE_ENSURE(error_reporter, params != nullptr); + + const ReducerOptions* schema_params = op->builtin_options_as_ReducerOptions(); + + if (schema_params != nullptr) { + params->keep_dims = schema_params->keep_dims(); + } else { + // TODO(b/157480169): We should either return kTfLiteError or fill in some + // reasonable defaults in the params struct. We are not doing so until we + // better undertand the ramifications of changing the legacy behavior. + } + + *builtin_data = params.release(); + return kTfLiteOk; +} + +// We have this parse function instead of directly returning kTfLiteOk from the +// switch-case in ParseOpData because this function is used as part of the +// selective registration for the OpResolver implementation in micro. +TfLiteStatus ParseRelu(const Operator*, ErrorReporter*, BuiltinDataAllocator*, + void**) { + return kTfLiteOk; +} + +// We have this parse function instead of directly returning kTfLiteOk from the +// switch-case in ParseOpData because this function is used as part of the +// selective registration for the OpResolver implementation in micro. +TfLiteStatus ParseRelu6(const Operator*, ErrorReporter*, BuiltinDataAllocator*, + void**) { + return kTfLiteOk; +} + +TfLiteStatus ParseReshape(const Operator* op, ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, + void** builtin_data) { + CheckParsePointerParams(op, error_reporter, allocator, builtin_data); + + SafeBuiltinDataAllocator safe_allocator(allocator); + + std::unique_ptr + params = safe_allocator.Allocate(); + TF_LITE_ENSURE(error_reporter, params != nullptr); + + const ReshapeOptions* schema_params = op->builtin_options_as_ReshapeOptions(); + + if (schema_params != nullptr) { + const flatbuffers::Vector* new_shape = schema_params->new_shape(); + if (new_shape != nullptr) { + TF_LITE_ENSURE_STATUS( + FlatBufferIntVectorToArray(sizeof(params->shape), new_shape, + params->shape, error_reporter, "reshape")); + params->num_dimensions = new_shape->size(); + } else { + // TODO(b/157480169) TODO(b/147203660): We should either return + // kTfLiteError or fill in some reasonable defaults in the params struct. + // We are not doing so until we better undertand the ramifications of + // changing the legacy behavior. + } + } else { + // TODO(b/157480169): We should either return kTfLiteError or fill in some + // reasonable defaults in the params struct. We are not doing so until we + // better undertand the ramifications of changing the legacy behavior. + } + + *builtin_data = params.release(); + return kTfLiteOk; +} + +TfLiteStatus ParseResizeBilinear(const Operator* op, + ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, + void** builtin_data) { + CheckParsePointerParams(op, error_reporter, allocator, builtin_data); + + SafeBuiltinDataAllocator safe_allocator(allocator); + std::unique_ptr + params = safe_allocator.Allocate(); + TF_LITE_ENSURE(error_reporter, params != nullptr); + + const ResizeBilinearOptions* schema_params = + op->builtin_options_as_ResizeBilinearOptions(); + + if (schema_params != nullptr) { + params->align_corners = schema_params->align_corners(); + params->half_pixel_centers = schema_params->half_pixel_centers(); + } else { + params->align_corners = false; + params->half_pixel_centers = false; + } + + *builtin_data = params.release(); + return kTfLiteOk; +} + +TfLiteStatus ParseResizeNearestNeighbor(const Operator* op, + ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, + void** builtin_data) { + CheckParsePointerParams(op, error_reporter, allocator, builtin_data); + + SafeBuiltinDataAllocator safe_allocator(allocator); + std::unique_ptr + params = safe_allocator.Allocate(); + TF_LITE_ENSURE(error_reporter, params != nullptr); + + const ResizeNearestNeighborOptions* schema_params = + op->builtin_options_as_ResizeNearestNeighborOptions(); + + if (schema_params != nullptr) { + params->align_corners = schema_params->align_corners(); + params->half_pixel_centers = schema_params->half_pixel_centers(); + } else { + params->align_corners = false; + params->half_pixel_centers = false; + } + + *builtin_data = params.release(); + return kTfLiteOk; +} + +// We have this parse function instead of directly returning kTfLiteOk from the +// switch-case in ParseOpData because this function is used as part of the +// selective registration for the OpResolver implementation in micro. +TfLiteStatus ParseRfft2D(const Operator*, ErrorReporter*, + BuiltinDataAllocator*, void**) { + return kTfLiteOk; +} + +// We have this parse function instead of directly returning kTfLiteOk from the +// switch-case in ParseOpData because this function is used as part of the +// selective registration for the OpResolver implementation in micro. +TfLiteStatus ParseRound(const Operator*, ErrorReporter*, BuiltinDataAllocator*, + void**) { + return kTfLiteOk; +} + +// We have this parse function instead of directly returning kTfLiteOk from the +// switch-case in ParseOpData because this function is used as part of the +// selective registration for the OpResolver implementation in micro. +TfLiteStatus ParseRsqrt(const Operator*, ErrorReporter*, BuiltinDataAllocator*, + void**) { + return kTfLiteOk; +} + +// We have this parse function instead of directly returning kTfLiteOk from the +// switch-case in ParseOpData because this function is used as part of the +// selective registration for the OpResolver implementation in micro. +TfLiteStatus ParseSelect(const Operator*, ErrorReporter*, BuiltinDataAllocator*, + void**) { + return kTfLiteOk; +} + +// We have this parse function instead of directly returning kTfLiteOk from the +// switch-case in ParseOpData because this function is used as part of the +// selective registration for the OpResolver implementation in micro. +TfLiteStatus ParseSelectV2(const Operator*, ErrorReporter*, + BuiltinDataAllocator*, void**) { + return kTfLiteOk; +} + +TfLiteStatus ParseShape(const Operator* op, ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, void** builtin_data) { + SafeBuiltinDataAllocator safe_allocator(allocator); + std::unique_ptr + params = safe_allocator.Allocate(); + TF_LITE_ENSURE(error_reporter, params != nullptr); + + const ShapeOptions* schema_params = op->builtin_options_as_ShapeOptions(); + + if (schema_params != nullptr) { + TF_LITE_ENSURE_STATUS(ConvertTensorType(schema_params->out_type(), + ¶ms->out_type, error_reporter)); + } else { + // TODO(b/157480169): We should either return kTfLiteError or fill in some + // reasonable defaults in the params struct. We are not doing so until we + // better undertand the ramifications of changing the legacy behavior. + } + + *builtin_data = params.release(); + return kTfLiteOk; +} + +// We have this parse function instead of directly returning kTfLiteOk from the +// switch-case in ParseOpData because this function is used as part of the +// selective registration for the OpResolver implementation in micro. +TfLiteStatus ParseSin(const Operator*, ErrorReporter*, BuiltinDataAllocator*, + void**) { + return kTfLiteOk; +} + +// We have this parse function instead of directly returning kTfLiteOk from the +// switch-case in ParseOpData because this function is used as part of the +// selective registration for the OpResolver implementation in micro. +TfLiteStatus ParseSlice(const Operator*, ErrorReporter*, BuiltinDataAllocator*, + void**) { + return kTfLiteOk; +} + +TfLiteStatus ParseSoftmax(const Operator* op, ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, + void** builtin_data) { + CheckParsePointerParams(op, error_reporter, allocator, builtin_data); + + SafeBuiltinDataAllocator safe_allocator(allocator); + std::unique_ptr + params = safe_allocator.Allocate(); + TF_LITE_ENSURE(error_reporter, params != nullptr); + + const SoftmaxOptions* schema_params = op->builtin_options_as_SoftmaxOptions(); + + if (schema_params != nullptr) { + params->beta = schema_params->beta(); + } else { + // TODO(b/157480169): We should either return kTfLiteError or fill in some + // reasonable defaults in the params struct. We are not doing so until we + // better undertand the ramifications of changing the legacy behavior. + } + + *builtin_data = params.release(); + return kTfLiteOk; +} + +// We have this parse function instead of directly returning kTfLiteOk from the +// switch-case in ParseOpData because this function is used as part of the +// selective registration for the OpResolver implementation in micro. +TfLiteStatus ParseSpaceToBatchNd(const Operator*, ErrorReporter*, + BuiltinDataAllocator*, void**) { + return kTfLiteOk; +} + +TfLiteStatus ParseSpaceToDepth(const Operator* op, + ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, + void** builtin_data) { + CheckParsePointerParams(op, error_reporter, allocator, builtin_data); + + SafeBuiltinDataAllocator safe_allocator(allocator); + std::unique_ptr + params = safe_allocator.Allocate(); + TF_LITE_ENSURE(error_reporter, params != nullptr); + + const auto* schema_params = op->builtin_options_as_SpaceToDepthOptions(); + if (schema_params != nullptr) { + params->block_size = schema_params->block_size(); + } else { + // TODO(b/157480169): We should either return kTfLiteError or fill in some + // reasonable defaults in the params struct. We are not doing so until we + // better undertand the ramifications of changing the legacy behavior. + } + + *builtin_data = params.release(); + return kTfLiteOk; +} + +TfLiteStatus ParseSplit(const Operator* op, ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, void** builtin_data) { + CheckParsePointerParams(op, error_reporter, allocator, builtin_data); + + SafeBuiltinDataAllocator safe_allocator(allocator); + std::unique_ptr + params = safe_allocator.Allocate(); + TF_LITE_ENSURE(error_reporter, params != nullptr); + + const SplitOptions* schema_params = op->builtin_options_as_SplitOptions(); + + if (schema_params != nullptr) { + params->num_splits = schema_params->num_splits(); + } else { + // TODO(b/157480169): We should either return kTfLiteError or fill in some + // reasonable defaults in the params struct. We are not doing so until we + // better undertand the ramifications of changing the legacy behavior. + } + + *builtin_data = params.release(); + return kTfLiteOk; +} + +TfLiteStatus ParseSplitV(const Operator* op, ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, void** builtin_data) { + CheckParsePointerParams(op, error_reporter, allocator, builtin_data); + SafeBuiltinDataAllocator safe_allocator(allocator); + + std::unique_ptr + params = safe_allocator.Allocate(); + TF_LITE_ENSURE(error_reporter, params != nullptr); + + const SplitVOptions* schema_params = op->builtin_options_as_SplitVOptions(); + + if (schema_params != nullptr) { + params->num_splits = schema_params->num_splits(); + } else { + // TODO(b/157480169): We should either return kTfLiteError or fill in some + // reasonable defaults in the params struct. We are not doing so until we + // better undertand the ramifications of changing the legacy behavior. + } + + *builtin_data = params.release(); + return kTfLiteOk; +} + +TfLiteStatus ParseUnidirectionalSequenceLSTM(const Operator* op, + ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, + void** builtin_data) { + CheckParsePointerParams(op, error_reporter, allocator, builtin_data); + SafeBuiltinDataAllocator safe_allocator(allocator); + auto params = + safe_allocator.Allocate(); + TF_LITE_ENSURE(error_reporter, params != nullptr); + if (const auto* seq_lstm_params = + op->builtin_options_as_UnidirectionalSequenceLSTMOptions()) { + params->activation = + ConvertActivation(seq_lstm_params->fused_activation_function()); + params->cell_clip = seq_lstm_params->cell_clip(); + params->proj_clip = seq_lstm_params->proj_clip(); + params->time_major = seq_lstm_params->time_major(); + params->asymmetric_quantize_inputs = + seq_lstm_params->asymmetric_quantize_inputs(); + params->diagonal_recurrent_tensors = + seq_lstm_params->diagonal_recurrent_tensors(); + } + *builtin_data = params.release(); + return kTfLiteOk; +} + +TfLiteStatus ParseSqueeze(const Operator* op, ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, + void** builtin_data) { + CheckParsePointerParams(op, error_reporter, allocator, builtin_data); + SafeBuiltinDataAllocator safe_allocator(allocator); + + std::unique_ptr + params = safe_allocator.Allocate(); + TF_LITE_ENSURE(error_reporter, params != nullptr); + + const SqueezeOptions* schema_params = op->builtin_options_as_SqueezeOptions(); + + if (schema_params != nullptr) { + const auto* squeeze_dims = schema_params->squeeze_dims(); + if (squeeze_dims != nullptr) { + TF_LITE_ENSURE_STATUS(FlatBufferIntVectorToArray( + sizeof(params->squeeze_dims), squeeze_dims, params->squeeze_dims, + error_reporter, "squeeze")); + params->num_squeeze_dims = squeeze_dims->size(); + } else { + params->num_squeeze_dims = 0; + } + } else { + // TODO(b/157480169): We should either return kTfLiteError or fill in some + // reasonable defaults in the params struct. We are not doing so until we + // better undertand the ramifications of changing the legacy behavior. + } + + *builtin_data = params.release(); + return kTfLiteOk; +} + +// We have this parse function instead of directly returning kTfLiteOk from the +// switch-case in ParseOpData because this function is used as part of the +// selective registration for the OpResolver implementation in micro. +TfLiteStatus ParseSqrt(const Operator*, ErrorReporter*, BuiltinDataAllocator*, + void**) { + return kTfLiteOk; +} + +// We have this parse function instead of directly returning kTfLiteOk from the +// switch-case in ParseOpData because this function is used as part of the +// selective registration for the OpResolver implementation in micro. +TfLiteStatus ParseSquare(const Operator*, ErrorReporter*, BuiltinDataAllocator*, + void**) { + return kTfLiteOk; +} + +// We have this parse function instead of directly returning kTfLiteOk from the +// switch-case in ParseOpData because this function is used as part of the +// selective registration for the OpResolver implementation in micro. +TfLiteStatus ParseSquaredDifference(const Operator*, ErrorReporter*, + BuiltinDataAllocator*, void**) { + return kTfLiteOk; +} + +TfLiteStatus ParseStridedSlice(const Operator* op, + ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, + void** builtin_data) { + CheckParsePointerParams(op, error_reporter, allocator, builtin_data); + + SafeBuiltinDataAllocator safe_allocator(allocator); + std::unique_ptr + params = safe_allocator.Allocate(); + TF_LITE_ENSURE(error_reporter, params != nullptr); + + const StridedSliceOptions* schema_params = + op->builtin_options_as_StridedSliceOptions(); + + if (schema_params != nullptr) { + params->begin_mask = schema_params->begin_mask(); + params->end_mask = schema_params->end_mask(); + params->ellipsis_mask = schema_params->ellipsis_mask(); + params->new_axis_mask = schema_params->new_axis_mask(); + params->shrink_axis_mask = schema_params->shrink_axis_mask(); + } else { + // TODO(b/157480169): We should either return kTfLiteError or fill in some + // reasonable defaults in the params struct. We are not doing so until we + // better undertand the ramifications of changing the legacy behavior. + } + + *builtin_data = params.release(); + return kTfLiteOk; +} + +TfLiteStatus ParseSub(const Operator* op, ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, void** builtin_data) { + CheckParsePointerParams(op, error_reporter, allocator, builtin_data); + + SafeBuiltinDataAllocator safe_allocator(allocator); + std::unique_ptr + params = safe_allocator.Allocate(); + TF_LITE_ENSURE(error_reporter, params != nullptr); + + const SubOptions* schema_params = op->builtin_options_as_SubOptions(); + + if (schema_params != nullptr) { + params->activation = + ConvertActivation(schema_params->fused_activation_function()); + params->pot_scale_int16 = schema_params->pot_scale_int16(); + } else { + // TODO(b/157480169): We should either return kTfLiteError or fill in some + // reasonable defaults in the params struct. We are not doing so until we + // better undertand the ramifications of changing the legacy behavior. + } + + *builtin_data = params.release(); + return kTfLiteOk; +} + +TfLiteStatus ParseSvdf(const Operator* op, ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, void** builtin_data) { + CheckParsePointerParams(op, error_reporter, allocator, builtin_data); + + SafeBuiltinDataAllocator safe_allocator(allocator); + std::unique_ptr + params = safe_allocator.Allocate(); + TF_LITE_ENSURE(error_reporter, params != nullptr); + + const SVDFOptions* schema_params = op->builtin_options_as_SVDFOptions(); + if (schema_params != nullptr) { + params->rank = schema_params->rank(); + params->activation = + ConvertActivation(schema_params->fused_activation_function()); + params->asymmetric_quantize_inputs = + schema_params->asymmetric_quantize_inputs(); + } else { + // TODO(b/157480169): We should either return kTfLiteError or fill in some + // reasonable defaults in the params struct. We are not doing so until we + // better undertand the ramifications of changing the legacy behavior. + } + + *builtin_data = params.release(); + return kTfLiteOk; +} + +// We have this parse function instead of directly returning kTfLiteOk from the +// switch-case in ParseOpData because this function is used as part of the +// selective registration for the OpResolver implementation in micro. +TfLiteStatus ParseTanh(const Operator*, ErrorReporter*, BuiltinDataAllocator*, + void**) { + return kTfLiteOk; +} +// +// We have this parse function instead of directly returning kTfLiteOk from the +// switch-case in ParseOpData because this function is used as part of the +// selective registration for the OpResolver implementation in micro. +TfLiteStatus ParseTranspose(const Operator*, ErrorReporter*, + BuiltinDataAllocator*, void**) { + return kTfLiteOk; +} + +TfLiteStatus ParseTransposeConv(const Operator* op, + ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, + void** builtin_data) { + CheckParsePointerParams(op, error_reporter, allocator, builtin_data); + + SafeBuiltinDataAllocator safe_allocator(allocator); + std::unique_ptr + params = safe_allocator.Allocate(); + TF_LITE_ENSURE(error_reporter, params != nullptr); + const TransposeConvOptions* transpose_conv_params = + op->builtin_options_as_TransposeConvOptions(); + if (transpose_conv_params != nullptr) { + params->padding = ConvertPadding(transpose_conv_params->padding()); + params->stride_width = transpose_conv_params->stride_w(); + params->stride_height = transpose_conv_params->stride_h(); + + params->activation = + ConvertActivation(transpose_conv_params->fused_activation_function()); + } else { + // TODO(b/157480169): We should either return kTfLiteError or fill in some + // reasonable defaults in the params struct. We are not doing so until we + // better undertand the ramifications of changing the legacy behavior. + } + *builtin_data = params.release(); + return kTfLiteOk; +} + +TfLiteStatus ParseUnpack(const Operator* op, ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, void** builtin_data) { + CheckParsePointerParams(op, error_reporter, allocator, builtin_data); + + SafeBuiltinDataAllocator safe_allocator(allocator); + std::unique_ptr + params = safe_allocator.Allocate(); + TF_LITE_ENSURE(error_reporter, params != nullptr); + + const UnpackOptions* schema_params = op->builtin_options_as_UnpackOptions(); + + if (schema_params != nullptr) { + params->num = schema_params->num(); + params->axis = schema_params->axis(); + } else { + // TODO(b/157480169): We should either return kTfLiteError or fill in some + // reasonable defaults in the params struct. We are not doing so until we + // better undertand the ramifications of changing the legacy behavior. + } + + *builtin_data = params.release(); + return kTfLiteOk; +} + +TfLiteStatus ParseVarHandle(const Operator* op, ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, + void** builtin_data) { + CheckParsePointerParams(op, error_reporter, allocator, builtin_data); + + SafeBuiltinDataAllocator safe_allocator(allocator); + std::unique_ptr + params = safe_allocator.Allocate(); + TF_LITE_ENSURE(error_reporter, params != nullptr); + + const VarHandleOptions* schema_params = + op->builtin_options_as_VarHandleOptions(); + + if (schema_params != nullptr) { + if (schema_params->container()) { + params->container = schema_params->container()->c_str(); + } + if (schema_params->shared_name()) { + params->shared_name = schema_params->shared_name()->c_str(); + } + } else { + // TODO(b/157480169): We should either return kTfLiteError or fill in some + // reasonable defaults in the params struct. We are not doing so until we + // better undertand the ramifications of changing the legacy behavior. + } + + *builtin_data = params.release(); + return kTfLiteOk; +} + +TfLiteStatus ParseWhile(const Operator* op, ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, void** builtin_data) { + CheckParsePointerParams(op, error_reporter, allocator, builtin_data); + + SafeBuiltinDataAllocator safe_allocator(allocator); + std::unique_ptr + params = safe_allocator.Allocate(); + TF_LITE_ENSURE(error_reporter, params != nullptr); + + const WhileOptions* schema_params = op->builtin_options_as_WhileOptions(); + + if (schema_params != nullptr) { + params->cond_subgraph_index = schema_params->cond_subgraph_index(); + params->body_subgraph_index = schema_params->body_subgraph_index(); + } else { + // TODO(b/157480169): We should either return kTfLiteError or fill in some + // reasonable defaults in the params struct. We are not doing so until we + // better undertand the ramifications of changing the legacy behavior. + } + + *builtin_data = params.release(); + return kTfLiteOk; +} + +// We have this parse function instead of directly returning kTfLiteOk from the +// switch-case in ParseOpData because this function is used as part of the +// selective registration for the OpResolver implementation in micro. +TfLiteStatus ParseZerosLike(const Operator*, ErrorReporter*, + BuiltinDataAllocator*, void**) { + return kTfLiteOk; +} + +TfLiteStatus ParseOpData(const Operator* op, BuiltinOperator op_type, + ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, void** builtin_data) { +// TODO(b/145762662): It would be preferable to have the build graph for TF Lite +// Micro not have the ParseOpData function at all. This would require splitting +// the current file into two separate files, one of which defines the +// ParseOpData function and the other that defines the operator specific parse +// functions (e.g. ParseAdd). +// +// Such a split was attempted but was not worth the effort at the time because +// of the following reasons: +// * We could either duplicate the functions and the SafeBuiltinDataAllocator +// class in the anonymous namespace of this file, or attempt to make a common +// library with these helper functions and class. +// * Making a common library with a separate build target was not feasible as +// it introduced circular dependencies due to the ErrorReporter and a common +// .cc and .h within the same api build target the also cause circular +// dependencies due to the BuiltinDataAllocator class. +// * If all the builtin operators were to have their own parse functions, or we +// were ok with some amount of code duplication, then this split of the .cc +// files would be a lot more feasible. +#ifdef TF_LITE_STATIC_MEMORY + TF_LITE_REPORT_ERROR( + error_reporter, + "ParseOpData is unsupported on TfLiteMicro, please use the operator " + "specific parse functions (e.g. ParseAdd etc.).\n"); + return kTfLiteError; +#else + return ParseOpDataTfLite(op, op_type, error_reporter, allocator, + builtin_data); +#endif +} + +} // namespace tflite diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/core/api/flatbuffer_conversions.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/core/api/flatbuffer_conversions.h new file mode 100644 index 0000000..b8e6019 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/core/api/flatbuffer_conversions.h @@ -0,0 +1,427 @@ +/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_CORE_API_FLATBUFFER_CONVERSIONS_H_ +#define TENSORFLOW_LITE_CORE_API_FLATBUFFER_CONVERSIONS_H_ + +// These functions transform codes and data structures that are defined in the +// flatbuffer serialization format into in-memory values that are used by the +// runtime API and interpreter. + +#include +#include +#include + +#include "edge-impulse-sdk/tensorflow/lite/core/api/error_reporter.h" +#include "edge-impulse-sdk/tensorflow/lite/core/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/schema/schema_generated.h" + +namespace tflite { + +// Interface class for builtin data allocations. +class BuiltinDataAllocator { + public: + virtual void* Allocate(size_t size, size_t alignment_hint) = 0; + virtual void Deallocate(void* data) = 0; + + // Allocate a structure, but make sure it is a POD structure that doesn't + // require constructors to run. The reason we do this, is that Interpreter's C + // extension part will take ownership so destructors will not be run during + // deallocation. + template + T* AllocatePOD() { + // TODO(b/154346074): Change this to is_trivially_destructible when all + // platform targets support that properly. + static_assert(std::is_pod::value, "Builtin data structure must be POD."); + void* allocated_memory = this->Allocate(sizeof(T), alignof(T)); + return new (allocated_memory) T(); + } + + virtual ~BuiltinDataAllocator() {} +}; + +// Parse the appropriate data out of the op. +// +// This handles builtin data explicitly as there are flatbuffer schemas. +// If it returns kTfLiteOk, it passes the data out with `builtin_data`. The +// calling function has to pass in an allocator object, and this allocator +// will be called to reserve space for the output data. If the calling +// function's allocator reserves memory on the heap, then it's the calling +// function's responsibility to free it. +// If it returns kTfLiteError, `builtin_data` will be `nullptr`. +TfLiteStatus ParseOpData(const Operator* op, BuiltinOperator op_type, + ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, void** builtin_data); + +// Converts the tensor data type used in the flat buffer to the representation +// used by the runtime. +TfLiteStatus ConvertTensorType(TensorType tensor_type, TfLiteType* type, + ErrorReporter* error_reporter); + +TfLiteStatus ParseAbs(const Operator* op, ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, void** builtin_data); + +TfLiteStatus ParseAdd(const Operator* op, ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, void** builtin_data); + +TfLiteStatus ParseAddN(const Operator* op, ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, void** builtin_data); + +TfLiteStatus ParseArgMax(const Operator* op, ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, void** builtin_data); + +TfLiteStatus ParseArgMin(const Operator* op, ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, void** builtin_data); + +TfLiteStatus ParseAssignVariable(const Operator* op, + ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, + void** builtin_data); + +TfLiteStatus ParseBatchMatMul(const Operator* op, ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, + void** builtin_data); + +TfLiteStatus ParseBatchToSpaceNd(const Operator* op, + ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, + void** builtin_data); + +TfLiteStatus ParseBroadcastArgs(const Operator* op, + ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, + void** builtin_data); + +TfLiteStatus ParseBroadcastTo(const Operator* op, ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, + void** builtin_data); + +TfLiteStatus ParseCallOnce(const Operator* op, ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, + void** builtin_data); + +TfLiteStatus ParseCeil(const Operator* op, ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, void** builtin_data); + +TfLiteStatus ParseCast(const Operator* op, ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, void** builtin_data); + +TfLiteStatus ParseComplexAbs(const Operator* op, ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, void** builtin_data); + +TfLiteStatus ParseConcatenation(const Operator* op, + ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, + void** builtin_data); + +TfLiteStatus ParseConv2D(const Operator* op, ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, void** builtin_data); + +TfLiteStatus ParseCos(const Operator* op, ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, void** builtin_data); + +TfLiteStatus ParseCumsum(const Operator* op, ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, void** builtin_data); + +TfLiteStatus ParseDepthToSpace(const Operator* op, + ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, + void** builtin_data); + +TfLiteStatus ParseDepthwiseConv2D(const Operator* op, + ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, + void** builtin_data); + +TfLiteStatus ParseDequantize(const Operator* op, ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, + void** builtin_data); + +TfLiteStatus ParseDiv(const Operator* op, ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, void** builtin_data); + +TfLiteStatus ParseElu(const Operator* op, ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, void** builtin_data); + +TfLiteStatus ParseEqual(const Operator* op, ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, void** builtin_data); + +TfLiteStatus ParseExp(const Operator* op, ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, void** builtin_data); + +TfLiteStatus ParseExpandDims(const Operator* op, ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, + void** builtin_data); + +TfLiteStatus ParseFill(const Operator* op, ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, void** builtin_data); + +TfLiteStatus ParseFloor(const Operator* op, ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, void** builtin_data); + +TfLiteStatus ParseFloorDiv(const Operator* op, ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, + void** builtin_data); + +TfLiteStatus ParseFloorMod(const Operator* op, ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, + void** builtin_data); + +TfLiteStatus ParseFullyConnected(const Operator* op, + ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, + void** builtin_data); + +TfLiteStatus ParseGather(const Operator* op, ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, void** builtin_data); + +TfLiteStatus ParseGatherNd(const Operator* op, ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, + void** builtin_data); + +TfLiteStatus ParseGreater(const Operator* op, ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, void** builtin_data); + +TfLiteStatus ParseGreaterEqual(const Operator* op, + ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, + void** builtin_data); + +TfLiteStatus ParseHardSwish(const Operator* op, ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, + void** builtin_data); + +TfLiteStatus ParseImag(const Operator* op, ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, void** builtin_data); + +TfLiteStatus ParseIf(const Operator* op, ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, void** builtin_data); + +TfLiteStatus ParseL2Normalization(const Operator* op, + ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, + void** builtin_data); + +TfLiteStatus ParseLeakyRelu(const Operator* op, ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, + void** builtin_data); + +TfLiteStatus ParseLess(const Operator* op, ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, void** builtin_data); + +TfLiteStatus ParseLessEqual(const Operator* op, ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, + void** builtin_data); + +TfLiteStatus ParseLog(const Operator* op, ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, void** builtin_data); + +TfLiteStatus ParseLogicalAnd(const Operator* op, ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, + void** builtin_data); + +TfLiteStatus ParseLogicalNot(const Operator* op, ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, + void** builtin_data); + +TfLiteStatus ParseLogicalOr(const Operator* op, ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, + void** builtin_data); + +TfLiteStatus ParseLogistic(const Operator* op, ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, + void** builtin_data); + +TfLiteStatus ParseLogSoftmax(const Operator* op, ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, + void** builtin_data); + +TfLiteStatus ParseLSTM(const Operator* op, ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, void** builtin_data); + +TfLiteStatus ParseMaximum(const Operator* op, ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, void** builtin_data); + +TfLiteStatus ParseMinimum(const Operator* op, ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, void** builtin_data); + +TfLiteStatus ParseMirrorPad(const Operator* op, ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, + void** builtin_data); + +TfLiteStatus ParseMul(const Operator* op, ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, void** builtin_data); + +TfLiteStatus ParseNeg(const Operator* op, ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, void** builtin_data); + +TfLiteStatus ParseNotEqual(const Operator* op, ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, + void** builtin_data); + +TfLiteStatus ParsePack(const Operator* op, ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, void** builtin_data); + +TfLiteStatus ParsePad(const Operator* op, ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, void** builtin_data); + +TfLiteStatus ParsePadV2(const Operator* op, ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, void** builtin_data); + +TfLiteStatus ParsePool(const Operator* op, ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, void** builtin_data); + +TfLiteStatus ParsePow(const Operator* op, ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, void** builtin_data); + +TfLiteStatus ParsePrelu(const Operator* op, ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, void** builtin_data); + +TfLiteStatus ParseQuantize(const Operator* op, ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, + void** builtin_data); + +TfLiteStatus ParseReal(const Operator* op, ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, void** builtin_data); + +TfLiteStatus ParseReadVariable(const Operator* op, + ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, + void** builtin_data); + +TfLiteStatus ParseReducer(const Operator* op, ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, void** builtin_data); + +TfLiteStatus ParseRelu(const Operator* op, ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, void** builtin_data); + +TfLiteStatus ParseRelu6(const Operator* op, ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, void** builtin_data); + +TfLiteStatus ParseReshape(const Operator* op, ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, void** builtin_data); + +TfLiteStatus ParseResizeBilinear(const Operator* op, + ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, + void** builtin_data); + +TfLiteStatus ParseResizeNearestNeighbor(const Operator* op, + ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, + void** builtin_data); + +TfLiteStatus ParseRfft2D(const Operator* op, ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, void** builtin_data); + +TfLiteStatus ParseRound(const Operator* op, ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, void** builtin_data); + +TfLiteStatus ParseRsqrt(const Operator* op, ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, void** builtin_data); + +TfLiteStatus ParseSelect(const Operator* op, ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, void** builtin_data); + +TfLiteStatus ParseSelectV2(const Operator* op, ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, + void** builtin_data); + +TfLiteStatus ParseShape(const Operator* op, ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, void** builtin_data); + +TfLiteStatus ParseSin(const Operator* op, ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, void** builtin_data); + +TfLiteStatus ParseSlice(const Operator* op, ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, void** builtin_data); + +TfLiteStatus ParseSoftmax(const Operator* op, ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, void** builtin_data); + +TfLiteStatus ParseSpaceToBatchNd(const Operator* op, + ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, + void** builtin_data); + +TfLiteStatus ParseSpaceToDepth(const Operator* op, + ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, + void** builtin_data); + +TfLiteStatus ParseSplit(const Operator* op, ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, void** builtin_data); + +TfLiteStatus ParseSplitV(const Operator* op, ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, void** builtin_data); + +TfLiteStatus ParseSqueeze(const Operator* op, ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, void** builtin_data); + +TfLiteStatus ParseSqrt(const Operator* op, ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, void** builtin_data); + +TfLiteStatus ParseSquare(const Operator* op, ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, void** builtin_data); + +TfLiteStatus ParseSquaredDifference(const Operator* op, + ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, + void** builtin_data); + +TfLiteStatus ParseStridedSlice(const Operator* op, + ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, + void** builtin_data); + +TfLiteStatus ParseSub(const Operator* op, ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, void** builtin_data); + +TfLiteStatus ParseSvdf(const Operator* op, ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, void** builtin_data); + +TfLiteStatus ParseTanh(const Operator* op, ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, void** builtin_data); + +TfLiteStatus ParseTranspose(const Operator* op, ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, + void** builtin_data); + +TfLiteStatus ParseTransposeConv(const Operator* op, + ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, + void** builtin_data); + +TfLiteStatus ParseUnpack(const Operator* op, ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, void** builtin_data); + +TfLiteStatus ParseUnidirectionalSequenceLSTM(const Operator* op, + ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, + void** builtin_data); + +TfLiteStatus ParseVarHandle(const Operator* op, ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, + void** builtin_data); + +TfLiteStatus ParseWhile(const Operator* op, ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, void** builtin_data); + +TfLiteStatus ParseZerosLike(const Operator* op, ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, + void** builtin_data); + +} // namespace tflite + +#endif // TENSORFLOW_LITE_CORE_API_FLATBUFFER_CONVERSIONS_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/core/api/op_resolver.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/core/api/op_resolver.cpp new file mode 100644 index 0000000..bb2e080 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/core/api/op_resolver.cpp @@ -0,0 +1,68 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "edge-impulse-sdk/tensorflow/lite/core/api/op_resolver.h" + +#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/flatbuffers.h" // from @flatbuffers +#include "edge-impulse-sdk/tensorflow/lite/core/api/error_reporter.h" +#include "edge-impulse-sdk/tensorflow/lite/core/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/schema/schema_utils.h" + +namespace tflite { + +TfLiteStatus GetRegistrationFromOpCode( + const OperatorCode* opcode, const OpResolver& op_resolver, + ErrorReporter* error_reporter, const TfLiteRegistration** registration) { + TfLiteStatus status = kTfLiteOk; + *registration = nullptr; + auto builtin_code = GetBuiltinCode(opcode); + int version = opcode->version(); + + if (builtin_code > BuiltinOperator_MAX) { + TF_LITE_REPORT_ERROR( + error_reporter, + "Op builtin_code out of range: %d. Are you using old TFLite binary " + "with newer model?", + builtin_code); + status = kTfLiteError; + } else if (builtin_code != BuiltinOperator_CUSTOM) { + *registration = op_resolver.FindOp(builtin_code, version); + if (*registration == nullptr) { + TF_LITE_REPORT_ERROR( + error_reporter, + "Didn't find op for builtin opcode '%s' version '%d'. " + "This model is not supported by EON Compiler of TensorFlow Lite Micro,", + "but is in full TFLite (e.g. on Linux).\n", + EnumNameBuiltinOperator(builtin_code), version); + status = kTfLiteError; + } + } else if (!opcode->custom_code()) { + TF_LITE_REPORT_ERROR( + error_reporter, + "Operator with CUSTOM builtin_code has no custom_code.\n"); + status = kTfLiteError; + } else { + const char* name = opcode->custom_code()->c_str(); + *registration = op_resolver.FindOp(name, version); + if (*registration == nullptr) { + // Do not report error for unresolved custom op, we do the final check + // while preparing ops. + status = kTfLiteError; + } + } + return status; +} + +} // namespace tflite diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/core/api/op_resolver.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/core/api/op_resolver.h new file mode 100644 index 0000000..75fc5d0 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/core/api/op_resolver.h @@ -0,0 +1,129 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_CORE_API_OP_RESOLVER_H_ +#define TENSORFLOW_LITE_CORE_API_OP_RESOLVER_H_ + +#include +#include +#include + +#include "edge-impulse-sdk/tensorflow/lite/core/api/error_reporter.h" +#include "edge-impulse-sdk/tensorflow/lite/core/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/schema/schema_generated.h" + +namespace tflite { + +/// Abstract interface that returns TfLiteRegistrations given op codes or custom +/// op names. This is the mechanism that ops being referenced in the flatbuffer +/// model are mapped to executable function pointers (TfLiteRegistrations). +class OpResolver { + public: + /// Finds the op registration for a builtin operator by enum code. + virtual const TfLiteRegistration* FindOp(tflite::BuiltinOperator op, + int version) const = 0; + /// Finds the op registration of a custom operator by op name. + virtual const TfLiteRegistration* FindOp(const char* op, + int version) const = 0; + + // Represents a sequence of delegates. + using TfLiteDelegatePtrVector = + std::vector>; + + // Returns optional delegates for resolving and handling ops in the flatbuffer + // model. This may be used in addition to the standard TfLiteRegistration + // lookup for graph resolution. + // WARNING: This API is deprecated, GetDelegateCreators is preferred. + virtual TfLiteDelegatePtrVector GetDelegates(int num_threads) const { + return {}; + } + + // Represents a function that creates a TfLite delegate instance. + using TfLiteDelegateCreator = + std::function( + TfLiteContext* /*context*/)>; + + // Represents a sequence of delegate creator functions. + using TfLiteDelegateCreators = std::vector; + + // Returns a vector of delegate creators to create optional delegates for + // resolving and handling ops in the flatbuffer model. This may be used in + // addition to the standard TfLiteRegistration lookup for graph resolution. + // + // Note that this method is not used (will not be called) if you are using + // TF Lite in Google Play Services; the GetOpaqueDelegateCreators method + // (see below) is used for that case. + virtual TfLiteDelegateCreators GetDelegateCreators() const { return {}; } + + // TODO(b/202712825): it would be nice if we could avoid the need for separate + // "opaque" types & methods for use only with TF Lite in Google Play Services. + + // Represents an opaque delegate instance. + // WARNING: Experimental interface, subject to change. + using TfLiteOpaqueDelegatePtr = + std::unique_ptr; + + // Represents a function that creates an opaque delegate instance. + // WARNING: Experimental interface, subject to change. + using TfLiteOpaqueDelegateCreator = + std::function; + + // Represents a sequence of opaque delegate creator functions. + // WARNING: Experimental interface, subject to change. + using TfLiteOpaqueDelegateCreators = std::vector; + + // Returns a vector of opaque delegate creators to create optional opaque + // delegates for resolving and handling ops in the flatbuffer model. This may + // be used in addition to the standard TfLiteRegistration lookup for graph + // resolution. + // + // Note that this method will be called only if you are using TF Lite in + // Google Play Services; if you are using regular TF Lite, GetDelegateCreators + // (see above) is used instead. + // + // WARNING: Experimental interface, subject to change. + virtual TfLiteOpaqueDelegateCreators GetOpaqueDelegateCreators() const { + return {}; + } + + virtual ~OpResolver() {} + + private: + /// Returns true if this OpResolver may contain any "user defined" ops. + /// By "user defined" ops, we mean any op definitions other than those + /// contained in tflite::ops::builtin::BuiltinOpResolver. + /// + /// If this method returns true, it doesn't necessarily mean that the + /// OpResolver contains a user-defined op, just that the absence of + /// user-defined ops can't be guaranteed. + /// + /// Note that "user-defined" ops are not the same as "custom" ops; + /// BuiltinOpResolver may support certain "custom" ops, in addition to + /// "builtin" ops, and may not support all of the "builtin" op enum values. + virtual bool MayContainUserDefinedOps() const { return true; } + + friend class OpResolverInternal; +}; + +// Handles the logic for converting between an OperatorCode structure extracted +// from a flatbuffer and information about a registered operator +// implementation. +TfLiteStatus GetRegistrationFromOpCode(const OperatorCode* opcode, + const OpResolver& op_resolver, + ErrorReporter* error_reporter, + const TfLiteRegistration** registration); + +} // namespace tflite + +#endif // TENSORFLOW_LITE_CORE_API_OP_RESOLVER_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/core/api/tensor_utils.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/core/api/tensor_utils.cpp new file mode 100644 index 0000000..b62d50c --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/core/api/tensor_utils.cpp @@ -0,0 +1,50 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "edge-impulse-sdk/tensorflow/lite/core/api/tensor_utils.h" + +#include + +#include "edge-impulse-sdk/tensorflow/lite/core/c/common.h" + +namespace tflite { + +TfLiteStatus ResetVariableTensor(TfLiteTensor* tensor) { + if (!tensor->is_variable) { + return kTfLiteOk; + } + // TODO(b/115961645): Implement - If a variable tensor has a buffer, reset it + // to the value of the buffer. + int value = 0; + if (tensor->type == kTfLiteInt8) { + value = tensor->params.zero_point; + } + // TODO(b/139446230): Provide a platform header to better handle these + // specific scenarios. +#if __ANDROID__ || defined(__x86_64__) || defined(__i386__) || \ + defined(__i386) || defined(__x86__) || defined(__X86__) || \ + defined(_X86_) || defined(_M_IX86) || defined(_M_X64) + memset(tensor->data.raw, value, tensor->bytes); +#else + char* raw_ptr = tensor->data.raw; + for (size_t i = 0; i < tensor->bytes; ++i) { + *raw_ptr = value; + raw_ptr++; + } +#endif + return kTfLiteOk; +} + +} // namespace tflite diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/core/api/tensor_utils.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/core/api/tensor_utils.h new file mode 100644 index 0000000..608128a --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/core/api/tensor_utils.h @@ -0,0 +1,28 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_LITE_CORE_API_TENSOR_UTILS_H_ +#define TENSORFLOW_LITE_CORE_API_TENSOR_UTILS_H_ + +#include "edge-impulse-sdk/tensorflow/lite/core/c/common.h" + +namespace tflite { + +// Resets a variable tensor to the default value. +TfLiteStatus ResetVariableTensor(TfLiteTensor* tensor); + +} // namespace tflite + +#endif // TENSORFLOW_LITE_CORE_API_TENSOR_UTILS_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/core/c/builtin_op_data.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/core/c/builtin_op_data.h new file mode 100644 index 0000000..3a1ee0e --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/core/c/builtin_op_data.h @@ -0,0 +1,537 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +/// WARNING: Users of TensorFlow Lite should not include this file directly, +/// but should instead include +/// "third_party/tensorflow/lite/c/builtin_op_data.h". +/// Only the TensorFlow Lite implementation itself should include this +/// file directly. +#ifndef TENSORFLOW_LITE_CORE_C_BUILTIN_OP_DATA_H_ +#define TENSORFLOW_LITE_CORE_C_BUILTIN_OP_DATA_H_ + +#include + +#include "edge-impulse-sdk/tensorflow/lite/core/c/common.h" + +#ifdef __cplusplus +extern "C" { +#endif // __cplusplus + +// TfLiteReshapeParams can't have dynamic data so we fix the maximum possible +// number of dimensions. +#define TFLITE_RESHAPE_PARAMS_MAX_DIMENSION_COUNT 8 + +// TODO(aselle): Consider using "if this then that" for testing. + +// Useful placeholder to put in otherwise empty structs to avoid size warnings. +typedef struct { + char dummy; +} EmptyStructPlaceholder; + +// IMPORTANT: All new members of structs must be added at the end to ensure +// backwards compatibility. + +// Possible padding types (for convolutions) +typedef enum { + kTfLitePaddingUnknown = 0, + kTfLitePaddingSame, + kTfLitePaddingValid, +} TfLitePadding; + +typedef enum { + kTfLiteMirrorPaddingUnknown = 0, + kTfLiteMirrorPaddingReflect, + kTfLiteMirrorPaddingSymmetric, +} TfLiteMirrorPaddingMode; + +// TODO(b/130259536): We should move this out of builtin_op_data. +typedef struct { + int width; + int height; + int width_offset; + int height_offset; +} TfLitePaddingValues; + +typedef struct { + TfLiteMirrorPaddingMode mode; +} TfLiteMirrorPaddingParams; + +// Possible fused activation functions. +typedef enum { + kTfLiteActNone = 0, + kTfLiteActRelu, + kTfLiteActReluN1To1, // min(max(-1, x), 1) + kTfLiteActRelu6, // min(max(0, x), 6) + kTfLiteActTanh, + kTfLiteActSignBit, + kTfLiteActSigmoid, +} TfLiteFusedActivation; + +typedef struct { + // Parameters for CONV_2D version 1. + TfLitePadding padding; + int stride_width; + int stride_height; + TfLiteFusedActivation activation; + + // Parameters for CONV_2D version 2. + // Note: Version 2 supports dilation values not equal to 1. + int dilation_width_factor; + int dilation_height_factor; +} TfLiteConvParams; + +typedef struct { + TfLitePadding padding; + int stride_width; + int stride_height; + int stride_depth; + int dilation_width_factor; + int dilation_height_factor; + int dilation_depth_factor; + TfLiteFusedActivation activation; +} TfLiteConv3DParams; + +typedef TfLiteConv3DParams TfLiteConv3DTransposeParams; + +typedef struct { + TfLitePadding padding; + int stride_width; + int stride_height; + int filter_width; + int filter_height; + TfLiteFusedActivation activation; + struct { + TfLitePaddingValues padding; + } computed; +} TfLitePoolParams; + +typedef struct { + // Parameters for DepthwiseConv version 1 or above. + TfLitePadding padding; + int stride_width; + int stride_height; + // `depth_multiplier` is redundant. It's used by CPU kernels in + // TensorFlow 2.0 or below, but ignored in versions above. + // + // The information can be deduced from the shape of input and the shape of + // weights. Since the TFLiteConverter toolchain doesn't support partially + // specified shapes, relying on `depth_multiplier` stops us from supporting + // graphs with dynamic shape tensors. + // + // Note: Some of the delegates (e.g. NNAPI, GPU) are still relying on this + // field. + int depth_multiplier; + TfLiteFusedActivation activation; + // Parameters for DepthwiseConv version 2 or above. + int dilation_width_factor; + int dilation_height_factor; +} TfLiteDepthwiseConvParams; + +typedef struct { + int rank; + TfLiteFusedActivation activation; + + // Parameter for SVDF version 4. + bool asymmetric_quantize_inputs; +} TfLiteSVDFParams; + +typedef struct { + TfLiteFusedActivation activation; + + // Parameter for RNN version 3. + bool asymmetric_quantize_inputs; +} TfLiteRNNParams; + +typedef struct { + bool time_major; + TfLiteFusedActivation activation; + + // Parameter for Sequence RNN version 3. + bool asymmetric_quantize_inputs; +} TfLiteSequenceRNNParams; + +typedef struct { + bool time_major; + TfLiteFusedActivation activation; + bool merge_outputs; + + // Parameter for Bidirectional RNN verison 3. + bool asymmetric_quantize_inputs; +} TfLiteBidirectionalSequenceRNNParams; + +typedef enum { + kTfLiteFullyConnectedWeightsFormatDefault = 0, + kTfLiteFullyConnectedWeightsFormatShuffled4x16Int8 = 1, +} TfLiteFullyConnectedWeightsFormat; + +typedef struct { + // Parameters for FullyConnected version 1 or above. + TfLiteFusedActivation activation; + + // Parameters for FullyConnected version 2 or above. + TfLiteFullyConnectedWeightsFormat weights_format; + + // Parameters for FullyConnected version 5 or above. + // If set to true, then the number of dimensions in the input and the output + // tensors are the same. Furthermore, all but the last dimension of the input + // and output shapes will be equal. + bool keep_num_dims; + + // Parameters for FullyConnected version 7 or above. + // If set to true and the weights are quantized, then non constant inputs + // are quantized at evaluation time with asymmetric quantization. + bool asymmetric_quantize_inputs; +} TfLiteFullyConnectedParams; + +typedef enum { + kTfLiteLshProjectionUnknown = 0, + kTfLiteLshProjectionSparse = 1, + kTfLiteLshProjectionDense = 2, +} TfLiteLSHProjectionType; + +typedef struct { + TfLiteLSHProjectionType type; +} TfLiteLSHProjectionParams; + +typedef struct { + float beta; +} TfLiteSoftmaxParams; + +typedef struct { + int axis; + TfLiteFusedActivation activation; +} TfLiteConcatenationParams; + +typedef struct { + TfLiteFusedActivation activation; + // Parameter added for the version 4. + bool pot_scale_int16; +} TfLiteAddParams; + +typedef struct { + EmptyStructPlaceholder placeholder; +} TfLiteSpaceToBatchNDParams; + +typedef struct { + EmptyStructPlaceholder placeholder; +} TfLiteBatchToSpaceNDParams; + +typedef struct { + bool adj_x; + bool adj_y; + // Parameters for BatchMatMul version 4 or above. + // If set to true and the weights are quantized, then non constant inputs + // are quantized at evaluation time with asymmetric quantization. + bool asymmetric_quantize_inputs; +} TfLiteBatchMatMulParams; + +typedef struct { + TfLiteFusedActivation activation; +} TfLiteMulParams; + +typedef struct { + TfLiteFusedActivation activation; + // Parameter added for the version 5. + bool pot_scale_int16; +} TfLiteSubParams; + +typedef struct { + TfLiteFusedActivation activation; +} TfLiteDivParams; + +typedef struct { + TfLiteFusedActivation activation; +} TfLiteL2NormParams; + +typedef struct { + int radius; + float bias; + float alpha; + float beta; +} TfLiteLocalResponseNormParams; + +typedef enum { + kTfLiteLSTMFullKernel = 0, + kTfLiteLSTMBasicKernel +} TfLiteLSTMKernelType; + +typedef struct { + // Parameters for LSTM version 1. + TfLiteFusedActivation activation; + float cell_clip; + float proj_clip; + + // Parameters for LSTM version 2. + // kTfLiteLSTMBasicKernel is only supported in version 2 or above. + TfLiteLSTMKernelType kernel_type; + + // Parameters for LSTM version 4. + bool asymmetric_quantize_inputs; +} TfLiteLSTMParams; + +typedef struct { + // Parameters needed for the underlying LSTM. + TfLiteFusedActivation activation; + float cell_clip; + float proj_clip; + + // If set to true then the first dimension is time, otherwise batch. + bool time_major; + + // Parameter for unidirectional sequence RNN version 3. + bool asymmetric_quantize_inputs; + + // Parameter for unidirectional sequence RNN version 4. + bool diagonal_recurrent_tensors; +} TfLiteUnidirectionalSequenceLSTMParams; + +typedef struct { + // Parameters supported by version 1: + // Parameters inherited for the LSTM kernel. + TfLiteFusedActivation activation; + float cell_clip; + float proj_clip; + + // If true, store the outputs of both directions in the first output. + bool merge_outputs; + + // Parameters supported by version 2: + // If set to true then the first dimension is time, otherwise batch. + bool time_major; + + // Parameters supported by version 3: + // If set to true, then hybrid ops use asymmetric quantization for inputs. + bool asymmetric_quantize_inputs; +} TfLiteBidirectionalSequenceLSTMParams; + +typedef struct { + bool align_corners; + // half_pixel_centers assumes pixels are of half the actual dimensions, and + // yields more accurate resizes. Corresponds to the same argument for the + // original TensorFlow op in TF2.0. + bool half_pixel_centers; +} TfLiteResizeBilinearParams; + +typedef struct { + bool align_corners; + bool half_pixel_centers; +} TfLiteResizeNearestNeighborParams; + +typedef struct { + EmptyStructPlaceholder placeholder; +} TfLitePadParams; + +typedef struct { + EmptyStructPlaceholder placeholder; +} TfLitePadV2Params; + +typedef struct { + // These fields are only used in old models for backward compatibility. + // In the current implementation, we use the 2nd input of the op as the shape, + // and these fields are unused. + int shape[TFLITE_RESHAPE_PARAMS_MAX_DIMENSION_COUNT]; + int num_dimensions; +} TfLiteReshapeParams; + +typedef struct { + int ngram_size; + int max_skip_size; + bool include_all_ngrams; +} TfLiteSkipGramParams; + +typedef struct { + int block_size; +} TfLiteSpaceToDepthParams; + +typedef struct { + int block_size; +} TfLiteDepthToSpaceParams; + +typedef struct { + TfLiteType in_data_type; + TfLiteType out_data_type; +} TfLiteCastParams; + +typedef enum { + kTfLiteCombinerTypeSum = 0, + kTfLiteCombinerTypeMean = 1, + kTfLiteCombinerTypeSqrtn = 2, +} TfLiteCombinerType; + +typedef struct { + TfLiteCombinerType combiner; +} TfLiteEmbeddingLookupSparseParams; + +typedef struct { + int axis; + int batch_dims; +} TfLiteGatherParams; + +typedef struct { + EmptyStructPlaceholder placeholder; +} TfLiteTransposeParams; + +typedef struct { + bool keep_dims; +} TfLiteReducerParams; + +typedef struct { + int num_splits; +} TfLiteSplitParams; + +typedef struct { + int num_splits; +} TfLiteSplitVParams; + +typedef struct { + // TODO(ahentz): We can't have dynamic data in this struct, at least not yet. + // For now we will fix the maximum possible number of dimensions. + int squeeze_dims[8]; + int num_squeeze_dims; +} TfLiteSqueezeParams; + +typedef struct { + int begin_mask; + int end_mask; + int ellipsis_mask; + int new_axis_mask; + int shrink_axis_mask; +} TfLiteStridedSliceParams; + +typedef struct { + TfLiteType output_type; +} TfLiteArgMaxParams; + +typedef struct { + TfLiteType output_type; +} TfLiteArgMinParams; + +typedef struct { + // Parameters supported by version 1: + TfLitePadding padding; + int stride_width; + int stride_height; + + // Parameters supported by version 4: + TfLiteFusedActivation activation; +} TfLiteTransposeConvParams; + +typedef struct { + bool validate_indices; +} TfLiteSparseToDenseParams; + +typedef struct { + TfLiteType out_type; +} TfLiteShapeParams; + +typedef struct { + EmptyStructPlaceholder placeholder; +} TfLiteRankParams; + +typedef struct { + // Parameters supported by version 1: + float min; + float max; + int num_bits; + + // Parameters supported by version 2: + bool narrow_range; +} TfLiteFakeQuantParams; + +typedef struct { + int values_count; + int axis; +} TfLitePackParams; + +typedef struct { + int axis; +} TfLiteOneHotParams; + +typedef struct { + int num; + int axis; +} TfLiteUnpackParams; + +typedef struct { + float alpha; +} TfLiteLeakyReluParams; + +typedef struct { + TfLiteType index_out_type; +} TfLiteUniqueParams; + +typedef struct { + int seq_dim; + int batch_dim; +} TfLiteReverseSequenceParams; + +typedef struct { + EmptyStructPlaceholder placeholder; +} TfLiteMatrixDiagParams; + +typedef struct { + EmptyStructPlaceholder placeholder; +} TfLiteMatrixSetDiagParams; + +typedef struct { + int then_subgraph_index; + int else_subgraph_index; +} TfLiteIfParams; + +typedef struct { + int cond_subgraph_index; + int body_subgraph_index; +} TfLiteWhileParams; + +typedef struct { + bool exclusive; + bool reverse; +} TfLiteCumsumParams; + +typedef struct { + int init_subgraph_index; +} TfLiteCallOnceParams; + +typedef struct { + int table_id; + TfLiteType key_dtype; + TfLiteType value_dtype; +} TfLiteHashtableParams; + +typedef struct { + const char* container; + const char* shared_name; +} TfLiteVarHandleParams; + +typedef struct { + int seed; + int seed2; +} TfLiteRandomParams; + +typedef struct { + int num_boundaries; + // This points to the memory stored in the model (flatbuffer), + // and is not owned. + const float* boundaries; +} TfLiteBucketizeParams; + +typedef struct { + bool approximate; +} TfLiteGeluParams; + +#ifdef __cplusplus +} // extern "C" +#endif // __cplusplus + +#endif // TENSORFLOW_LITE_CORE_C_BUILTIN_OP_DATA_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/core/c/c_api_types.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/core/c/c_api_types.h new file mode 100644 index 0000000..3aab43f --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/core/c/c_api_types.h @@ -0,0 +1,168 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// This file declares types used by the pure C inference API defined in c_api.h, +// some of which are also used in the C++ and C kernel and interpreter APIs. + +/// WARNING: Users of TensorFlow Lite should not include this file directly, +/// but should instead include +/// "third_party/tensorflow/lite/c/c_api_types.h". +/// Only the TensorFlow Lite implementation itself should include this +/// file directly. + +#ifndef TENSORFLOW_LITE_CORE_C_C_API_TYPES_H_ +#define TENSORFLOW_LITE_CORE_C_C_API_TYPES_H_ + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +// Define TFL_CAPI_EXPORT macro to export a function properly with a shared +// library. +#ifdef SWIG +#define TFL_CAPI_EXPORT +#elif defined(TFL_STATIC_LIBRARY_BUILD) +#define TFL_CAPI_EXPORT +#else // not definded TFL_STATIC_LIBRARY_BUILD +#if defined(_WIN32) +#ifdef TFL_COMPILE_LIBRARY +#define TFL_CAPI_EXPORT __declspec(dllexport) +#else +#define TFL_CAPI_EXPORT __declspec(dllimport) +#endif // TFL_COMPILE_LIBRARY +#else +#define TFL_CAPI_EXPORT __attribute__((visibility("default"))) +#endif // _WIN32 +#endif // SWIG + +// Note that new error status values may be added in future in order to +// indicate more fine-grained internal states, therefore, applications should +// not rely on status values being members of the enum. +typedef enum TfLiteStatus { + kTfLiteOk = 0, + + // Generally referring to an error in the runtime (i.e. interpreter) + kTfLiteError = 1, + + // Generally referring to an error from a TfLiteDelegate itself. + kTfLiteDelegateError = 2, + + // Generally referring to an error in applying a delegate due to + // incompatibility between runtime and delegate, e.g., this error is returned + // when trying to apply a TF Lite delegate onto a model graph that's already + // immutable. + kTfLiteApplicationError = 3, + + // Generally referring to serialized delegate data not being found. + // See tflite::delegates::Serialization. + kTfLiteDelegateDataNotFound = 4, + + // Generally referring to data-writing issues in delegate serialization. + // See tflite::delegates::Serialization. + kTfLiteDelegateDataWriteError = 5, + + // Generally referring to data-reading issues in delegate serialization. + // See tflite::delegates::Serialization. + kTfLiteDelegateDataReadError = 6, + + // Generally referring to issues when the TF Lite model has ops that cannot be + // resolved at runtime. This could happen when the specific op is not + // registered or built with the TF Lite framework. + kTfLiteUnresolvedOps = 7, + + // Generally referring to invocation cancelled by the user. + // See `interpreter::Cancel`. + // TODO(b/194915839): Implement `interpreter::Cancel`. + // TODO(b/250636993): Cancellation triggered by `SetCancellationFunction` + // should also return this status code. + kTfLiteCancelled = 8, +} TfLiteStatus; + +// Types supported by tensor +typedef enum { + kTfLiteNoType = 0, + kTfLiteFloat32 = 1, + kTfLiteInt32 = 2, + kTfLiteUInt8 = 3, + kTfLiteInt64 = 4, + kTfLiteString = 5, + kTfLiteBool = 6, + kTfLiteInt16 = 7, + kTfLiteComplex64 = 8, + kTfLiteInt8 = 9, + kTfLiteFloat16 = 10, + kTfLiteFloat64 = 11, + kTfLiteComplex128 = 12, + kTfLiteUInt64 = 13, + kTfLiteResource = 14, + kTfLiteVariant = 15, + kTfLiteUInt32 = 16, + kTfLiteUInt16 = 17, + kTfLiteInt4 = 18, +} TfLiteType; + +// Legacy. Will be deprecated in favor of TfLiteAffineQuantization. +// If per-layer quantization is specified this field will still be populated in +// addition to TfLiteAffineQuantization. +// Parameters for asymmetric quantization. Quantized values can be converted +// back to float using: +// real_value = scale * (quantized_value - zero_point) +typedef struct TfLiteQuantizationParams { + float scale; + int32_t zero_point; +} TfLiteQuantizationParams; + +// -------------------------------------------------------------------------- +// Opaque types used by c_api.h, c_api_opaque.h and common.h. + +// TfLiteOpaqueContext is an opaque version of TfLiteContext; +typedef struct TfLiteOpaqueContext TfLiteOpaqueContext; + +// TfLiteOpaqueNode is an opaque version of TfLiteNode; +typedef struct TfLiteOpaqueNode TfLiteOpaqueNode; + +// TfLiteOpaqueTensor is an opaque version of TfLiteTensor; +typedef struct TfLiteOpaqueTensor TfLiteOpaqueTensor; + +// TfLiteDelegate: allows delegation of nodes to alternative backends. +// Forward declaration of concrete type declared in common.h. +typedef struct TfLiteDelegate TfLiteDelegate; + +// TfLiteOpaqueDelegateStruct: unconditionally opaque version of +// TfLiteDelegate; allows delegation of nodes to alternative backends. +// +// This is an abstract type that is intended to have the same +// role as TfLiteDelegate, but without exposing the implementation +// details of how delegates are implemented. +// WARNING: This is an experimental type and subject to change. +typedef struct TfLiteOpaqueDelegateStruct TfLiteOpaqueDelegateStruct; + +// TfLiteOpaqueDelegate: conditionally opaque version of +// TfLiteDelegate; allows delegation of nodes to alternative backends. +// For TF Lite in Play Services, this is an opaque type, +// but for regular TF Lite, this is just a typedef for TfLiteDelegate. +// WARNING: This is an experimental type and subject to change. +#if TFLITE_WITH_STABLE_ABI || TFLITE_USE_OPAQUE_DELEGATE +typedef TfLiteOpaqueDelegateStruct TfLiteOpaqueDelegate; +#else +typedef TfLiteDelegate TfLiteOpaqueDelegate; +#endif + +#ifdef __cplusplus +} // extern C +#endif +#endif // TENSORFLOW_LITE_CORE_C_C_API_TYPES_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/core/c/common.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/core/c/common.h new file mode 100644 index 0000000..83b4a31 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/core/c/common.h @@ -0,0 +1,1170 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// This file defines common C types and APIs for implementing operations, +// delegates and other constructs in TensorFlow Lite. The actual operations and +// delegates can be defined using C++, but the interface between the interpreter +// and the operations are C. +// +// Summary of abstractions +// TF_LITE_ENSURE - Self-sufficient error checking +// TfLiteStatus - Status reporting +// TfLiteIntArray - stores tensor shapes (dims), +// TfLiteContext - allows an op to access the tensors +// TfLiteTensor - tensor (a multidimensional array) +// TfLiteNode - a single node or operation +// TfLiteRegistration - the implementation of a conceptual operation. +// TfLiteDelegate - allows delegation of nodes to alternative backends. +// +// Some abstractions in this file are created and managed by Interpreter. +// +// NOTE: The order of values in these structs are "semi-ABI stable". New values +// should be added only to the end of structs and never reordered. + +/// WARNING: Users of TensorFlow Lite should not include this file directly, +/// but should instead include +/// "third_party/tensorflow/lite/c/common.h". +/// Only the TensorFlow Lite implementation itself should include this +/// file directly. + +#ifndef TENSORFLOW_LITE_CORE_C_COMMON_H_ +#define TENSORFLOW_LITE_CORE_C_COMMON_H_ + +#include +#include +#include +#include + +#include "edge-impulse-sdk/tensorflow/lite/core/c/c_api_types.h" // IWYU pragma: export + +#ifdef __cplusplus +extern "C" { +#endif // __cplusplus + +// The list of external context types known to TF Lite. This list exists solely +// to avoid conflicts and to ensure ops can share the external contexts they +// need. Access to the external contexts is controlled by one of the +// corresponding support files. +typedef enum TfLiteExternalContextType { + kTfLiteEigenContext = 0, // include eigen_support.h to use. + kTfLiteGemmLowpContext = 1, // include gemm_support.h to use. + kTfLiteEdgeTpuContext = 2, // Placeholder for Edge TPU support. + kTfLiteCpuBackendContext = 3, // include cpu_backend_context.h to use. + kTfLiteMaxExternalContexts = 4 +} TfLiteExternalContextType; + +// Forward declare so dependent structs and methods can reference these types +// prior to the struct definitions. +struct TfLiteContext; +struct TfLiteDelegate; +struct TfLiteRegistration; +struct TfLiteOpaqueDelegateBuilder; + +// An external context is a collection of information unrelated to the TF Lite +// framework, but useful to a subset of the ops. TF Lite knows very little +// about the actual contexts, but it keeps a list of them, and is able to +// refresh them if configurations like the number of recommended threads +// change. +typedef struct TfLiteExternalContext { + TfLiteExternalContextType type; + TfLiteStatus (*Refresh)(struct TfLiteContext* context); +} TfLiteExternalContext; + +#define kTfLiteOptionalTensor (-1) + +// Fixed size list of integers. Used for dimensions and inputs/outputs tensor +// indices +typedef struct TfLiteIntArray { + int size; + +#if defined(_MSC_VER) + // Context for why this is needed is in http://b/189926408#comment21 + int data[1]; +#elif (!defined(__clang__) && defined(__GNUC__) && __GNUC__ == 6 && \ + __GNUC_MINOR__ >= 1) || \ + defined(HEXAGON) || \ + (defined(__clang__) && __clang_major__ == 7 && __clang_minor__ == 1) + // gcc 6.1+ have a bug where flexible members aren't properly handled + // https://github.com/google/re2/commit/b94b7cd42e9f02673cd748c1ac1d16db4052514c + int data[0]; +#else + int data[]; +#endif +} TfLiteIntArray; + +// Given the size (number of elements) in a TfLiteIntArray, calculate its size +// in bytes. +size_t TfLiteIntArrayGetSizeInBytes(int size); + +#ifndef TF_LITE_STATIC_MEMORY +// Create a array of a given `size` (uninitialized entries). +// This returns a pointer, that you must free using TfLiteIntArrayFree(). +TfLiteIntArray* TfLiteIntArrayCreate(int size); +#endif + +// Check if two intarrays are equal. Returns 1 if they are equal, 0 otherwise. +int TfLiteIntArrayEqual(const TfLiteIntArray* a, const TfLiteIntArray* b); + +// Check if an intarray equals an array. Returns 1 if equals, 0 otherwise. +int TfLiteIntArrayEqualsArray(const TfLiteIntArray* a, int b_size, + const int b_data[]); + +#ifndef TF_LITE_STATIC_MEMORY +// Create a copy of an array passed as `src`. +// You are expected to free memory with TfLiteIntArrayFree +TfLiteIntArray* TfLiteIntArrayCopy(const TfLiteIntArray* src); + +// Free memory of array `a`. +void TfLiteIntArrayFree(TfLiteIntArray* a); +#endif // TF_LITE_STATIC_MEMORY + +// Fixed size list of floats. Used for per-channel quantization. +typedef struct TfLiteFloatArray { + int size; +#if defined(_MSC_VER) + // Context for why this is needed is in http://b/189926408#comment21 + float data[1]; +#elif (!defined(__clang__) && defined(__GNUC__) && __GNUC__ == 6 && \ + __GNUC_MINOR__ >= 1) || \ + defined(HEXAGON) || \ + (defined(__clang__) && __clang_major__ == 7 && __clang_minor__ == 1) + // gcc 6.1+ have a bug where flexible members aren't properly handled + // https://github.com/google/re2/commit/b94b7cd42e9f02673cd748c1ac1d16db4052514c + float data[0]; +#else + float data[]; +#endif +} TfLiteFloatArray; + +// Given the size (number of elements) in a TfLiteFloatArray, calculate its size +// in bytes. +int TfLiteFloatArrayGetSizeInBytes(int size); + +#ifndef TF_LITE_STATIC_MEMORY +// Create a array of a given `size` (uninitialized entries). +// This returns a pointer, that you must free using TfLiteFloatArrayFree(). +TfLiteFloatArray* TfLiteFloatArrayCreate(int size); + +// Free memory of array `a`. +void TfLiteFloatArrayFree(TfLiteFloatArray* a); +#endif // TF_LITE_STATIC_MEMORY + +// Since we must not depend on any libraries, define a minimal subset of +// error macros while avoiding names that have pre-conceived meanings like +// assert and check. + +// Try to make all reporting calls through TF_LITE_KERNEL_LOG rather than +// calling the context->ReportError function directly, so that message strings +// can be stripped out if the binary size needs to be severely optimized. +#ifndef TF_LITE_STRIP_ERROR_STRINGS +#ifdef TF_LITE_LOG_FILE_NAME +#define TF_LITE_KERNEL_LOG(context, ...) \ + do { \ + (context)->ReportError((context), __FILE__ " " __VA_ARGS__); \ + } while (false) + +#define TF_LITE_MAYBE_KERNEL_LOG(context, ...) \ + do { \ + if ((context) != nullptr) { \ + (context)->ReportError((context), __FILE__ " " __VA_ARGS__); \ + } \ + } while (false) +#else // TF_LITE_LOG_FILE_NAME +#define TF_LITE_KERNEL_LOG(context, ...) \ + do { \ + (context)->ReportError((context), __VA_ARGS__); \ + } while (false) + +#define TF_LITE_MAYBE_KERNEL_LOG(context, ...) \ + do { \ + if ((context) != nullptr) { \ + (context)->ReportError((context), __VA_ARGS__); \ + } \ + } while (false) +#endif // TF_LITE_LOG_FILE_NAME +#else // TF_LITE_STRIP_ERROR_STRINGS +#define ARGS_UNUSED(...) (void)sizeof(#__VA_ARGS__) +#define TF_LITE_KERNEL_LOG(context, ...) ARGS_UNUSED(__VA_ARGS__) +#define TF_LITE_MAYBE_KERNEL_LOG(context, ...) ARGS_UNUSED(__VA_ARGS__) +#endif // TF_LITE_STRIP_ERROR_STRINGS + +// Check whether value is true, and if not return kTfLiteError from +// the current function (and report the error string msg). +#define TF_LITE_ENSURE_MSG(context, value, msg) \ + do { \ + if (!(value)) { \ + TF_LITE_KERNEL_LOG((context), __FILE__ " " msg); \ + return kTfLiteError; \ + } \ + } while (0) + +// Check whether the value `a` is true, and if not return kTfLiteError from +// the current function, while also reporting the location of the error. +#define TF_LITE_ENSURE(context, a) \ + do { \ + if (!(a)) { \ + TF_LITE_KERNEL_LOG((context), "%s:%d %s was not true.", __FILE__, \ + __LINE__, #a); \ + return kTfLiteError; \ + } \ + } while (0) + +#define TF_LITE_ENSURE_STATUS(a) \ + do { \ + const TfLiteStatus s = (a); \ + if (s != kTfLiteOk) { \ + return s; \ + } \ + } while (0) + +// Check whether the value `a == b` is true, and if not return kTfLiteError from +// the current function, while also reporting the location of the error. +// `a` and `b` may be evaluated more than once, so no side effects or +// extremely expensive computations should be done. +// NOTE: Use TF_LITE_ENSURE_TYPES_EQ if comparing TfLiteTypes. +#define TF_LITE_ENSURE_EQ(context, a, b) \ + do { \ + if ((a) != (b)) { \ + TF_LITE_KERNEL_LOG((context), "%s:%d %s != %s (%d != %d)", __FILE__, \ + __LINE__, #a, #b, (a), (b)); \ + return kTfLiteError; \ + } \ + } while (0) + +#define TF_LITE_ENSURE_TYPES_EQ(context, a, b) \ + do { \ + if ((a) != (b)) { \ + TF_LITE_KERNEL_LOG((context), "%s:%d %s != %s (%s != %s)", __FILE__, \ + __LINE__, #a, #b, TfLiteTypeGetName(a), \ + TfLiteTypeGetName(b)); \ + return kTfLiteError; \ + } \ + } while (0) + +#define TF_LITE_ENSURE_NEAR(context, a, b, epsilon) \ + do { \ + auto delta = ((a) > (b)) ? ((a) - (b)) : ((b) - (a)); \ + if (delta > epsilon) { \ + TF_LITE_KERNEL_LOG((context), "%s:%d %s not near %s (%f != %f)", \ + __FILE__, __LINE__, #a, #b, static_cast(a), \ + static_cast(b)); \ + return kTfLiteError; \ + } \ + } while (0) + +#define TF_LITE_ENSURE_OK(context, status) \ + do { \ + const TfLiteStatus s = (status); \ + if ((s) != kTfLiteOk) { \ + return s; \ + } \ + } while (0) + +// Single-precision complex data type compatible with the C99 definition. +typedef struct TfLiteComplex64 { + float re, im; // real and imaginary parts, respectively. +} TfLiteComplex64; + +// Double-precision complex data type compatible with the C99 definition. +typedef struct TfLiteComplex128 { + double re, im; // real and imaginary parts, respectively. +} TfLiteComplex128; + +// Half precision data type compatible with the C99 definition. +typedef struct TfLiteFloat16 { + uint16_t data; +} TfLiteFloat16; + +// Return the name of a given type, for error reporting purposes. +const char* TfLiteTypeGetName(TfLiteType type); + +// SupportedQuantizationTypes. +typedef enum TfLiteQuantizationType { + // No quantization. + kTfLiteNoQuantization = 0, + // Affine quantization (with support for per-channel quantization). + // Corresponds to TfLiteAffineQuantization. + kTfLiteAffineQuantization = 1, +} TfLiteQuantizationType; + +// Structure specifying the quantization used by the tensor, if-any. +typedef struct TfLiteQuantization { + // The type of quantization held by params. + TfLiteQuantizationType type; + // Holds an optional reference to a quantization param structure. The actual + // type depends on the value of the `type` field (see the comment there for + // the values and corresponding types). + void* params; +} TfLiteQuantization; + +// Parameters for asymmetric quantization across a dimension (i.e per output +// channel quantization). +// quantized_dimension specifies which dimension the scales and zero_points +// correspond to. +// For a particular value in quantized_dimension, quantized values can be +// converted back to float using: +// real_value = scale * (quantized_value - zero_point) +typedef struct TfLiteAffineQuantization { + TfLiteFloatArray* scale; + TfLiteIntArray* zero_point; + int32_t quantized_dimension; +} TfLiteAffineQuantization; + +/* A union of pointers that points to memory for a given tensor. */ +typedef union TfLitePtrUnion { + /* Do not access these members directly, if possible, use + * GetTensorData(tensor) instead, otherwise only access .data, as other + * members are deprecated. */ + int32_t* i32; + uint32_t* u32; + int64_t* i64; + uint64_t* u64; + float* f; + TfLiteFloat16* f16; + double* f64; + char* raw; + const char* raw_const; + uint8_t* uint8; + bool* b; + int16_t* i16; + uint16_t* ui16; + TfLiteComplex64* c64; + TfLiteComplex128* c128; + int8_t* int8; + /* Only use this member. */ + void* data; +} TfLitePtrUnion; + +// Memory allocation strategies. +// * kTfLiteMmapRo: Read-only memory-mapped data, or data externally allocated. +// * kTfLiteArenaRw: Arena allocated with no guarantees about persistence, +// and available during eval. +// * kTfLiteArenaRwPersistent: Arena allocated but persistent across eval, and +// only available during eval. +// * kTfLiteDynamic: Allocated during eval, or for string tensors. +// * kTfLitePersistentRo: Allocated and populated during prepare. This is +// useful for tensors that can be computed during prepare and treated +// as constant inputs for downstream ops (also in prepare). +// * kTfLiteCustom: Custom memory allocation provided by the user. See +// TfLiteCustomAllocation below. +typedef enum TfLiteAllocationType { + kTfLiteMemNone = 0, + kTfLiteMmapRo, + kTfLiteArenaRw, + kTfLiteArenaRwPersistent, + kTfLiteDynamic, + kTfLitePersistentRo, + kTfLiteCustom, +} TfLiteAllocationType; + +// The delegates should use zero or positive integers to represent handles. +// -1 is reserved from unallocated status. +typedef int TfLiteBufferHandle; +enum { + kTfLiteNullBufferHandle = -1, +}; + +// Storage format of each dimension in a sparse tensor. +typedef enum TfLiteDimensionType { + kTfLiteDimDense = 0, + kTfLiteDimSparseCSR, +} TfLiteDimensionType; + +// Metadata to encode each dimension in a sparse tensor. +typedef struct TfLiteDimensionMetadata { + TfLiteDimensionType format; + int dense_size; + TfLiteIntArray* array_segments; + TfLiteIntArray* array_indices; +} TfLiteDimensionMetadata; + +// Parameters used to encode a sparse tensor. For detailed explanation of each +// field please refer to lite/schema/schema.fbs. +typedef struct TfLiteSparsity { + TfLiteIntArray* traversal_order; + TfLiteIntArray* block_map; + TfLiteDimensionMetadata* dim_metadata; + int dim_metadata_size; +} TfLiteSparsity; + +// Defines a custom memory allocation not owned by the runtime. +// `data` should be aligned to kDefaultTensorAlignment defined in +// lite/util.h. (Currently 64 bytes) +// NOTE: See Interpreter.SetCustomAllocationForTensor for details on usage. +typedef struct TfLiteCustomAllocation { + void* data; + size_t bytes; +} TfLiteCustomAllocation; + +// The flags used in `Interpreter::SetCustomAllocationForTensor`. +// Note that this is a bitmask, so the values should be 1, 2, 4, 8, ...etc. +typedef enum TfLiteCustomAllocationFlags { + kTfLiteCustomAllocationFlagsNone = 0, + // Skips checking whether allocation.data points to an aligned buffer as + // expected by the TFLite runtime. + // NOTE: Setting this flag can cause crashes when calling Invoke(). + // Use with caution. + kTfLiteCustomAllocationFlagsSkipAlignCheck = 1, +} TfLiteCustomAllocationFlags; + +// A tensor in the interpreter system which is a wrapper around a buffer of +// data including a dimensionality (or NULL if not currently defined). +#ifndef TF_LITE_STATIC_MEMORY +typedef struct TfLiteTensor { + // The data type specification for data stored in `data`. This affects + // what member of `data` union should be used. + TfLiteType type; + // A union of data pointers. The appropriate type should be used for a typed + // tensor based on `type`. + TfLitePtrUnion data; + // A pointer to a structure representing the dimensionality interpretation + // that the buffer should have. NOTE: the product of elements of `dims` + // and the element datatype size should be equal to `bytes` below. + TfLiteIntArray* dims; + // Quantization information. + TfLiteQuantizationParams params; + // How memory is mapped + // kTfLiteMmapRo: Memory mapped read only. + // i.e. weights + // kTfLiteArenaRw: Arena allocated read write memory + // (i.e. temporaries, outputs). + TfLiteAllocationType allocation_type; + // The number of bytes required to store the data of this Tensor. I.e. + // (bytes of each element) * dims[0] * ... * dims[n-1]. For example, if + // type is kTfLiteFloat32 and dims = {3, 2} then + // bytes = sizeof(float) * 3 * 2 = 4 * 3 * 2 = 24. + size_t bytes; + + // An opaque pointer to a tflite::MMapAllocation + const void* allocation; + + // Null-terminated name of this tensor. + const char* name; + + // The delegate which knows how to handle `buffer_handle`. + // WARNING: This is an experimental interface that is subject to change. + struct TfLiteDelegate* delegate; + + // An integer buffer handle that can be handled by `delegate`. + // The value is valid only when delegate is not null. + // WARNING: This is an experimental interface that is subject to change. + TfLiteBufferHandle buffer_handle; + + // If the delegate uses its own buffer (e.g. GPU memory), the delegate is + // responsible to set data_is_stale to true. + // `delegate->CopyFromBufferHandle` can be called to copy the data from + // delegate buffer. + // WARNING: This is an // experimental interface that is subject to change. + bool data_is_stale; + + // True if the tensor is a variable. + bool is_variable; + + // Quantization information. Replaces params field above. + TfLiteQuantization quantization; + + // Parameters used to encode a sparse tensor. + // This is optional. The field is NULL if a tensor is dense. + // WARNING: This is an experimental interface that is subject to change. + TfLiteSparsity* sparsity; + + // Optional. Encodes shapes with unknown dimensions with -1. This field is + // only populated when unknown dimensions exist in a read-write tensor (i.e. + // an input or output tensor). (e.g. `dims` contains [1, 1, 1, 3] and + // `dims_signature` contains [1, -1, -1, 3]). If no unknown dimensions exist + // then `dims_signature` is either null, or set to an empty array. Note that + // this field only exists when TF_LITE_STATIC_MEMORY is not defined. + const TfLiteIntArray* dims_signature; +} TfLiteTensor; + +// A structure representing an instance of a node. +// This structure only exhibits the inputs, outputs, user defined data and some +// node properties (like statefulness), not other features like the type. +typedef struct TfLiteNode { + // Inputs to this node expressed as indices into the simulator's tensors. + TfLiteIntArray* inputs; + + // Outputs to this node expressed as indices into the simulator's tensors. + TfLiteIntArray* outputs; + + // intermediate tensors to this node expressed as indices into the simulator's + // tensors. + TfLiteIntArray* intermediates; + + // Temporary tensors uses during the computations. This usually contains no + // tensors, but ops are allowed to change that if they need scratch space of + // any sort. + TfLiteIntArray* temporaries; + + // Opaque data provided by the node implementer through `Registration.init`. + void* user_data; + + // Opaque data provided to the node if the node is a builtin. This is usually + // a structure defined in builtin_op_data.h + void* builtin_data; + + // Custom initial data. This is the opaque data provided in the flatbuffer. + // WARNING: This is an experimental interface that is subject to change. + const void* custom_initial_data; + int custom_initial_data_size; + + // The pointer to the delegate. This is non-null only when the node is + // created by calling `interpreter.ModifyGraphWithDelegate`. + // WARNING: This is an experimental interface that is subject to change. + struct TfLiteDelegate* delegate; + + // Whether this op might have side effect (e.g. stateful op). + bool might_have_side_effect; +} TfLiteNode; +#else // defined(TF_LITE_STATIC_MEMORY)? +// NOTE: This flag is opt-in only at compile time. +// +// Specific reduced TfLiteTensor struct for TF Micro runtime. This struct +// contains only the minimum fields required to initialize and prepare a micro +// inference graph. The fields in this struct have been ordered from +// largest-to-smallest for optimal struct sizeof. +// +// This struct does not use: +// - allocation +// - buffer_handle +// - data_is_stale +// - delegate +// - dims_signature +// - name +// - sparsity +typedef struct TfLiteTensor { + // TODO(b/155784997): Consider consolidating these quantization fields: + // Quantization information. Replaces params field above. + TfLiteQuantization quantization; + + // Quantization information. + TfLiteQuantizationParams params; + + // A union of data pointers. The appropriate type should be used for a typed + // tensor based on `type`. + TfLitePtrUnion data; + + // A pointer to a structure representing the dimensionality interpretation + // that the buffer should have. NOTE: the product of elements of `dims` + // and the element datatype size should be equal to `bytes` below. + TfLiteIntArray* dims; + + // The number of bytes required to store the data of this Tensor. I.e. + // (bytes of each element) * dims[0] * ... * dims[n-1]. For example, if + // type is kTfLiteFloat32 and dims = {3, 2} then + // bytes = sizeof(float) * 3 * 2 = 4 * 3 * 2 = 24. + size_t bytes; + + // The data type specification for data stored in `data`. This affects + // what member of `data` union should be used. + TfLiteType type; + + // How memory is mapped + // kTfLiteMmapRo: Memory mapped read only. + // i.e. weights + // kTfLiteArenaRw: Arena allocated read write memory + // (i.e. temporaries, outputs). + TfLiteAllocationType allocation_type; + + // True if the tensor is a variable. + bool is_variable; +} TfLiteTensor; + +// Specific reduced TfLiteNode struct for TF Micro runtime. This struct contains +// only the minimum fields required to represent a node. +// +// This struct does not use: +// - delegate +// - intermediates +// - temporaries +typedef struct TfLiteNode { + // Inputs to this node expressed as indices into the simulator's tensors. + TfLiteIntArray* inputs; + + // Outputs to this node expressed as indices into the simulator's tensors. + TfLiteIntArray* outputs; + + // intermediate tensors to this node expressed as indices into the simulator's + // tensors. + TfLiteIntArray* intermediates; + + // Opaque data provided by the node implementer through `Registration.init`. + void* user_data; + + // Opaque data provided to the node if the node is a builtin. This is usually + // a structure defined in builtin_op_data.h + void* builtin_data; + + // Custom initial data. This is the opaque data provided in the flatbuffer. + // WARNING: This is an experimental interface that is subject to change. + const void* custom_initial_data; + int custom_initial_data_size; +} TfLiteNode; +#endif // TF_LITE_STATIC_MEMORY + +// Light-weight tensor struct for TF Micro runtime. Provides the minimal amount +// of information required for a kernel to run during TfLiteRegistration::Eval. +// TODO(b/160955687): Move this field into TF_LITE_STATIC_MEMORY when TFLM +// builds with this flag by default internally. +typedef struct TfLiteEvalTensor { + // A union of data pointers. The appropriate type should be used for a typed + // tensor based on `type`. + TfLitePtrUnion data; + + // A pointer to a structure representing the dimensionality interpretation + // that the buffer should have. + TfLiteIntArray* dims; + + // The data type specification for data stored in `data`. This affects + // what member of `data` union should be used. + TfLiteType type; +} TfLiteEvalTensor; + +#ifndef TF_LITE_STATIC_MEMORY +// Free data memory of tensor `t`. +void TfLiteTensorDataFree(TfLiteTensor* t); + +// Free quantization data. +void TfLiteQuantizationFree(TfLiteQuantization* quantization); + +// Free sparsity parameters. +void TfLiteSparsityFree(TfLiteSparsity* sparsity); + +// Free memory of tensor `t`. +void TfLiteTensorFree(TfLiteTensor* t); + +// Set all of a tensor's fields (and free any previously allocated data). +void TfLiteTensorReset(TfLiteType type, const char* name, TfLiteIntArray* dims, + TfLiteQuantizationParams quantization, char* buffer, + size_t size, TfLiteAllocationType allocation_type, + const void* allocation, bool is_variable, + TfLiteTensor* tensor); + +// Copies the contents of 'src' in 'dst'. +// Function does nothing if either 'src' or 'dst' is passed as nullptr and +// return kTfLiteOk. +// Returns kTfLiteError if 'src' and 'dst' doesn't have matching data size. +// Note function copies contents, so it won't create new data pointer +// or change allocation type. +// All Tensor related properties will be copied from 'src' to 'dst' like +// quantization, sparsity, ... +TfLiteStatus TfLiteTensorCopy(const TfLiteTensor* src, TfLiteTensor* dst); + +// Change the size of the memory block owned by `tensor` to `num_bytes`. +// Tensors with allocation types other than `kTfLiteDynamic` will be ignored and +// a kTfLiteOk will be returned. +// `tensor`'s internal data buffer will be assigned a pointer +// which can safely be passed to free or realloc if `num_bytes` is zero. +// If `preserve_data` is true, tensor data will be unchanged in the range from +// the start of the region up to the minimum of the old and new sizes. In the +// case of NULL tensor, or an error allocating new memory, returns +// `kTfLiteError`. +TfLiteStatus TfLiteTensorResizeMaybeCopy(size_t num_bytes, TfLiteTensor* tensor, + bool preserve_data); + +// Change the size of the memory block owned by `tensor` to `num_bytes`. +// Tensors with allocation types other than kTfLiteDynamic will be ignored and +// a kTfLiteOk will be returned. +// `tensor`'s internal data buffer will be assigned a pointer +// which can safely be passed to free or realloc if `num_bytes` is zero. +// Tensor data will be unchanged in the range from the start of the region up to +// the minimum of the old and new sizes. In the case +// of NULL tensor, or an error allocating new memory, returns `kTfLiteError`. +TfLiteStatus TfLiteTensorRealloc(size_t num_bytes, TfLiteTensor* tensor); +#endif // TF_LITE_STATIC_MEMORY + +// WARNING: This is an experimental interface that is subject to change. +// +// Currently, TfLiteDelegateParams has to be allocated in a way that it's +// trivially destructable. It will be stored as `builtin_data` field in +// `TfLiteNode` of the delegate node. +// +// See also the `CreateDelegateParams` function in `interpreter.cc` details. +typedef struct TfLiteDelegateParams { + struct TfLiteDelegate* delegate; + TfLiteIntArray* nodes_to_replace; + TfLiteIntArray* input_tensors; + TfLiteIntArray* output_tensors; +} TfLiteDelegateParams; + +// WARNING: This is an experimental interface that is subject to change. +// +// Currently, TfLiteOpaqueDelegateParams has to be allocated in a way that it's +// trivially destructable. It will be stored as `builtin_data` field in +// `TfLiteNode` of the delegate node. +// +// See also the `CreateOpaqueDelegateParams` function in `subgraph.cc` +// details. +typedef struct TfLiteOpaqueDelegateParams { + TfLiteOpaqueDelegate* delegate; + void* delegate_data; + TfLiteIntArray* nodes_to_replace; + TfLiteIntArray* input_tensors; + TfLiteIntArray* output_tensors; +} TfLiteOpaqueDelegateParams; + +typedef struct TfLiteContext { + // Number of tensors in the context. + size_t tensors_size; + + // The execution plan contains a list of the node indices in execution + // order. execution_plan->size is the current number of nodes. And, + // execution_plan->data[0] is the first node that needs to be run. + // TfLiteDelegates can traverse the current execution plan by iterating + // through each member of this array and using GetNodeAndRegistration() to + // access details about a node. i.e. + // + // TfLiteIntArray* execution_plan; + // TF_LITE_ENSURE_STATUS(context->GetExecutionPlan(context, &execution_plan)); + // for (int exec_index = 0; exec_index < execution_plan->size; exec_index++) { + // int node_index = execution_plan->data[exec_index]; + // TfLiteNode* node; + // TfLiteRegistration* reg; + // context->GetNodeAndRegistration(context, node_index, &node, ®); + // } + // Note: the memory pointed by '`*execution_plan` is OWNED by TfLite runtime. + // Future calls to GetExecutionPlan invalidates earlier outputs. The following + // code snippet shows the issue of such an invocation pattern. After calling + // CheckNode, subsequent access to `plan_1st` is undefined. + // + // void CheckNode(const TfLiteNode* node) { + // ... + // TfLiteIntArray* plan_2nd; + // TF_LITE_ENSURE_STATUS(context->GetExecutionPlan(context, &plan_2nd)); + // ... + // } + // + // TfLiteIntArray* plan_1st; + // TF_LITE_ENSURE_STATUS(context->GetExecutionPlan(context, &plan_1st)); + // for (int exec_index = 0; exec_index < plan_1st->size; exec_index++) { + // int node_index = plan_1st->data[exec_index]; + // TfLiteNode* node; + // TfLiteRegistration* reg; + // context->GetNodeAndRegistration(context, node_index, &node, ®); + // CheckNode(node); + // } + // + // WARNING: This is an experimental interface that is subject to change. + TfLiteStatus (*GetExecutionPlan)(struct TfLiteContext* context, + TfLiteIntArray** execution_plan); + + // opaque full context ptr (an opaque c++ data structure) + void* impl_; + + // Request memory pointer be resized. Updates dimensions on the tensor. + // NOTE: ResizeTensor takes ownership of newSize. + TfLiteStatus (*ResizeTensor)(struct TfLiteContext*, TfLiteTensor* tensor, + TfLiteIntArray* new_size); + // Request that an error be reported with format string msg. + void (*ReportError)(struct TfLiteContext*, const char* msg, ...); + + // Add `tensors_to_add` tensors, preserving pre-existing Tensor entries. If + // non-null, the value pointed to by `first_new_tensor_index` will be set to + // the index of the first new tensor. + TfLiteStatus (*AddTensors)(struct TfLiteContext*, int tensors_to_add, + int* first_new_tensor_index); + + // Get a Tensor node by node_index. + // WARNING: This is an experimental interface that is subject to change. + TfLiteStatus (*GetNodeAndRegistration)( + struct TfLiteContext*, int node_index, TfLiteNode** node, + struct TfLiteRegistration** registration); + + // Replace ops with one or more stub delegate operations. This function + // does not take ownership of `nodes_to_replace`. + TfLiteStatus (*ReplaceNodeSubsetsWithDelegateKernels)( + struct TfLiteContext*, struct TfLiteRegistration registration, + const TfLiteIntArray* nodes_to_replace, struct TfLiteDelegate* delegate); + + // Number of threads that are recommended to subsystems like gemmlowp and + // eigen. + int recommended_num_threads; + + // Access external contexts by type. + // WARNING: This is an experimental interface that is subject to change. + TfLiteExternalContext* (*GetExternalContext)(struct TfLiteContext*, + TfLiteExternalContextType); + // Set the value of a external context. Does not take ownership of the + // pointer. + // WARNING: This is an experimental interface that is subject to change. + void (*SetExternalContext)(struct TfLiteContext*, TfLiteExternalContextType, + TfLiteExternalContext*); + + // Flag for allowing float16 precision for FP32 calculation. + // default: false. + // WARNING: This is an experimental API and subject to change. + bool allow_fp32_relax_to_fp16; + + // Pointer to the op-level profiler, if set; nullptr otherwise. + void* profiler; + + // Allocate persistent buffer which has the same life time as the interpreter. + // Returns nullptr on failure. + // The memory is allocated from heap for TFL, and from tail in TFLM. + // This method is only available in Init or Prepare stage. + // WARNING: This is an experimental interface that is subject to change. + void* (*AllocatePersistentBuffer)(struct TfLiteContext* ctx, size_t bytes); + + // Allocate a buffer which will be deallocated right after invoke phase. + // The memory is allocated from heap in TFL, and from volatile arena in TFLM. + // This method is only available in invoke stage. + // NOTE: If possible use RequestScratchBufferInArena method to avoid memory + // allocation during inference time. + // WARNING: This is an experimental interface that is subject to change. + TfLiteStatus (*AllocateBufferForEval)(struct TfLiteContext* ctx, size_t bytes, + void** ptr); + + // Request a scratch buffer in the arena through static memory planning. + // This method is only available in Prepare stage and the buffer is allocated + // by the interpreter between Prepare and Eval stage. In Eval stage, + // GetScratchBuffer API can be used to fetch the address. + // WARNING: This is an experimental interface that is subject to change. + TfLiteStatus (*RequestScratchBufferInArena)(struct TfLiteContext* ctx, + size_t bytes, int* buffer_idx); + + // Get the scratch buffer pointer. + // This method is only available in Eval stage. + // WARNING: This is an experimental interface that is subject to change. + void* (*GetScratchBuffer)(struct TfLiteContext* ctx, int buffer_idx); + + // Resize the memory pointer of the `tensor`. This method behaves the same as + // `ResizeTensor`, except that it makes a copy of the shape array internally + // so the shape array could be deallocated right afterwards. + // WARNING: This is an experimental interface that is subject to change. + TfLiteStatus (*ResizeTensorExplicit)(struct TfLiteContext* ctx, + TfLiteTensor* tensor, int dims, + const int* shape); + + // This method provides a preview of post-delegation partitioning. Each + // TfLiteDelegateParams in the referenced array corresponds to one instance of + // the delegate kernel. + // Example usage: + // + // TfLiteIntArray* nodes_to_replace = ...; + // TfLiteDelegateParams* params_array; + // int num_partitions = 0; + // TF_LITE_ENSURE_STATUS(context->PreviewDelegatePartitioning( + // context, delegate, nodes_to_replace, ¶ms_array, &num_partitions)); + // for (int idx = 0; idx < num_partitions; idx++) { + // const auto& partition_params = params_array[idx]; + // ... + // } + // + // NOTE: The context owns the memory referenced by partition_params_array. It + // will be cleared with another call to PreviewDelegateParitioning, or after + // TfLiteDelegateParams::Prepare returns. + // + // WARNING: This is an experimental interface that is subject to change. + TfLiteStatus (*PreviewDelegatePartitioning)( + struct TfLiteContext* context, const TfLiteIntArray* nodes_to_replace, + TfLiteDelegateParams** partition_params_array, int* num_partitions); + + // Returns a TfLiteTensor struct for a given index. + // WARNING: This is an experimental interface that is subject to change. + // WARNING: This method may not be available on all platforms. + TfLiteTensor* (*GetTensor)(const struct TfLiteContext* context, + int tensor_idx); + + // Returns a TfLiteEvalTensor struct for a given index. + // WARNING: This is an experimental interface that is subject to change. + // WARNING: This method may not be available on all platforms. + TfLiteEvalTensor* (*GetEvalTensor)(const struct TfLiteContext* context, + int tensor_idx); + + // Retrieves named metadata buffer from the TFLite model. + // Returns kTfLiteOk if metadata is successfully obtained from the flatbuffer + // Model: that is, there exists a `metadata` entry with given `name` string. + // (see TFLite's schema.fbs). + // The corresponding `buffer` information is populated in `ptr` & `bytes`. + // The data from `ptr` is valid for the lifetime of the Interpreter. + // + // WARNING: This is an experimental interface that is subject to change. + TfLiteStatus (*GetModelMetadata)(const struct TfLiteContext* context, + const char* name, const char** ptr, + size_t* bytes); +} TfLiteContext; + +// `TfLiteRegistrationExternal` is an external version of `TfLiteRegistration` +// for C API which doesn't use internal types (such as `TfLiteContext`) but only +// uses stable API types (such as `TfLiteOpaqueContext`). The purpose of each +// field is the exactly the same as with `TfLiteRegistration`. +typedef struct TfLiteRegistrationExternal TfLiteRegistrationExternal; + +typedef struct TfLiteRegistration { + // Initializes the op from serialized data. + // Called only *once* for the lifetime of the op, so any one-time allocations + // should be made here (unless they depend on tensor sizes). + // + // If a built-in op: + // `buffer` is the op's params data (TfLiteLSTMParams*). + // `length` is zero. + // If custom op: + // `buffer` is the op's `custom_options`. + // `length` is the size of the buffer. + // + // Returns a type-punned (i.e. void*) opaque data (e.g. a primitive pointer + // or an instance of a struct). + // + // The returned pointer will be stored with the node in the `user_data` field, + // accessible within prepare and invoke functions below. + // NOTE: if the data is already in the desired format, simply implement this + // function to return `nullptr` and implement the free function to be a no-op. + void* (*init)(TfLiteContext* context, const char* buffer, size_t length); + + // The pointer `buffer` is the data previously returned by an init invocation. + void (*free)(TfLiteContext* context, void* buffer); + + // prepare is called when the inputs this node depends on have been resized. + // context->ResizeTensor() can be called to request output tensors to be + // resized. + // Can be called multiple times for the lifetime of the op. + // + // Returns kTfLiteOk on success. + TfLiteStatus (*prepare)(TfLiteContext* context, TfLiteNode* node); + + // Execute the node (should read node->inputs and output to node->outputs). + // Returns kTfLiteOk on success. + TfLiteStatus (*invoke)(TfLiteContext* context, TfLiteNode* node); + + // profiling_string is called during summarization of profiling information + // in order to group executions together. Providing a value here will cause a + // given op to appear multiple times is the profiling report. This is + // particularly useful for custom ops that can perform significantly + // different calculations depending on their `user-data`. + const char* (*profiling_string)(const TfLiteContext* context, + const TfLiteNode* node); + + // Builtin codes. If this kernel refers to a builtin this is the code + // of the builtin. This is so we can do marshaling to other frameworks like + // NN API. + // Note: It is the responsibility of the registration binder to set this + // properly. + int32_t builtin_code; + + // Custom op name. If the op is a builtin, this will be null. + // Note: It is the responsibility of the registration binder to set this + // properly. + // WARNING: This is an experimental interface that is subject to change. + const char* custom_name; + + // The version of the op. + // Note: It is the responsibility of the registration binder to set this + // properly. + int version; + + // The external version of `TfLiteRegistration`. Since we can't use internal + // types (such as `TfLiteContext`) for C API to maintain ABI stability. + // C API user will provide `TfLiteRegistrationExternal` to implement custom + // ops. We keep it inside of `TfLiteRegistration` and use it to route + // callbacks properly. + TfLiteRegistrationExternal* registration_external; +} TfLiteRegistration; + +// Old version of `TfLiteRegistration` to maintain binary backward +// compatibility. +// WARNING: This structure is deprecated / not an official part of the API. +// It should be only used for binary backward compatibility. +typedef struct TfLiteRegistration_V1 { + void* (*init)(TfLiteContext* context, const char* buffer, size_t length); + void (*free)(TfLiteContext* context, void* buffer); + TfLiteStatus (*prepare)(TfLiteContext* context, TfLiteNode* node); + TfLiteStatus (*invoke)(TfLiteContext* context, TfLiteNode* node); + const char* (*profiling_string)(const TfLiteContext* context, + const TfLiteNode* node); + int32_t builtin_code; + const char* custom_name; + int version; +} TfLiteRegistration_V1; + +// The flags used in `TfLiteDelegate`. Note that this is a bitmask, so the +// values should be 1, 2, 4, 8, ...etc. +typedef enum TfLiteDelegateFlags { + kTfLiteDelegateFlagsNone = 0, + // The flag is set if the delegate can handle dynamic sized tensors. + // For example, the output shape of a `Resize` op with non-constant shape + // can only be inferred when the op is invoked. + // In this case, the Delegate is responsible for calling + // `SetTensorToDynamic` to mark the tensor as a dynamic tensor, and calling + // `ResizeTensor` when invoking the op. + // + // If the delegate isn't capable to handle dynamic tensors, this flag need + // to be set to false. + kTfLiteDelegateFlagsAllowDynamicTensors = 1, + + // This flag can be used by delegates (that allow dynamic tensors) to ensure + // applicable tensor shapes are automatically propagated in the case of tensor + // resizing. + // This means that non-dynamic (allocation_type != kTfLiteDynamic) I/O tensors + // of a delegate kernel will have correct shapes before its Prepare() method + // is called. The runtime leverages TFLite builtin ops in the original + // execution plan to propagate shapes. + // + // A few points to note: + // 1. This requires kTfLiteDelegateFlagsAllowDynamicTensors. If that flag is + // false, this one is redundant since the delegate kernels are re-initialized + // every time tensors are resized. + // 2. Enabling this flag adds some overhead to AllocateTensors(), since extra + // work is required to prepare the original execution plan. + // 3. This flag requires that the original execution plan only have ops with + // valid registrations (and not 'dummy' custom ops like with Flex). + // WARNING: This feature is experimental and subject to change. + kTfLiteDelegateFlagsRequirePropagatedShapes = 2, + + // This flag can be used by delegates to request per-operator profiling. If a + // node is a delegate node, this flag will be checked before profiling. If + // set, then the node will not be profiled. The delegate will then add per + // operator information using Profiler::EventType::OPERATOR_INVOKE_EVENT and + // the results will appear in the operator-wise Profiling section and not in + // the Delegate internal section. + kTfLiteDelegateFlagsPerOperatorProfiling = 4 +} TfLiteDelegateFlags; + +// WARNING: This is an experimental interface that is subject to change. +typedef struct TfLiteDelegate { + // Data that delegate needs to identify itself. This data is owned by the + // delegate. The delegate is owned in the user code, so the delegate is + // responsible for deallocating this when it is destroyed. + void* data_; + + // Invoked by ModifyGraphWithDelegate. This prepare is called, giving the + // delegate a view of the current graph through TfLiteContext*. It typically + // will look at the nodes and call ReplaceNodeSubsetsWithDelegateKernels() + // to ask the TensorFlow lite runtime to create macro-nodes to represent + // delegated subgraphs of the original graph. + TfLiteStatus (*Prepare)(TfLiteContext* context, + struct TfLiteDelegate* delegate); + + // Copy the data from delegate buffer handle into raw memory of the given + // 'tensor'. Note that the delegate is allowed to allocate the raw bytes as + // long as it follows the rules for kTfLiteDynamic tensors, in which case this + // cannot be null. + TfLiteStatus (*CopyFromBufferHandle)(TfLiteContext* context, + struct TfLiteDelegate* delegate, + TfLiteBufferHandle buffer_handle, + TfLiteTensor* tensor); + + // Copy the data from raw memory of the given 'tensor' to delegate buffer + // handle. This can be null if the delegate doesn't use its own buffer. + TfLiteStatus (*CopyToBufferHandle)(TfLiteContext* context, + struct TfLiteDelegate* delegate, + TfLiteBufferHandle buffer_handle, + TfLiteTensor* tensor); + + // Free the Delegate Buffer Handle. Note: This only frees the handle, but + // this doesn't release the underlying resource (e.g. textures). The + // resources are either owned by application layer or the delegate. + // This can be null if the delegate doesn't use its own buffer. + void (*FreeBufferHandle)(TfLiteContext* context, + struct TfLiteDelegate* delegate, + TfLiteBufferHandle* handle); + + // Bitmask flags. See the comments in `TfLiteDelegateFlags`. + int64_t flags; + + // The opaque delegate builder associated with this object. If set then the + // TF Lite runtime will give precedence to this field. E.g. instead of + // invoking 'Prepare' via the function pointer inside the 'TfLiteDelegate' + // object, the runtime will first check if the corresponding function + // pointer inside 'opaque_delegate_builder' is set and if so invoke that. + // + // If this field is non-null, then the 'Prepare' field (of the + // 'TfLiteDelegate') should be null. + struct TfLiteOpaqueDelegateBuilder* opaque_delegate_builder; +} TfLiteDelegate; + +// Build a 'null' delegate, with all the fields properly set to their default +// values. +TfLiteDelegate TfLiteDelegateCreate(void); + +// `TfLiteOpaqueDelegateBuilder` is used for constructing +// `TfLiteOpaqueDelegate`, see `TfLiteOpaqueDelegateCreate` below. Note: +// This struct is not ABI stable. +// +// For forward source compatibility `TfLiteOpaqueDelegateBuilder` objects should +// be brace-initialized, so that all fields (including any that might be added +// in the future) get zero-initialized. The purpose of each field is exactly +// the same as with `TfLiteDelegate`. +// +// WARNING: This is an experimental interface that is subject to change. +typedef struct TfLiteOpaqueDelegateBuilder { + // Data that delegate needs to identify itself. This data is owned by the + // delegate. The delegate is owned in the user code, so the delegate is + // responsible for deallocating this when it is destroyed. + void* data; + // Invoked by ModifyGraphWithDelegate. This prepare is called, giving the + // delegate a view of the current graph through TfLiteContext*. It typically + // will look at the nodes and call ReplaceNodeSubsetsWithDelegateKernels() + // to ask the TensorFlow lite runtime to create macro-nodes to represent + // delegated subgraphs of the original graph. + TfLiteStatus (*Prepare)(TfLiteOpaqueContext* context, // NOLINT + TfLiteOpaqueDelegate* delegate, void* data); + // Copies the data from delegate buffer handle into raw memory of the given + // 'tensor'. Note that the delegate is allowed to allocate the raw bytes as + // long as it follows the rules for kTfLiteDynamic tensors, in which case this + // cannot be null. + TfLiteStatus (*CopyFromBufferHandle)( // NOLINT + TfLiteOpaqueContext* context, TfLiteOpaqueDelegate* delegate, void* data, + TfLiteBufferHandle buffer_handle, TfLiteOpaqueTensor* tensor); + // Copies the data from raw memory of the given 'tensor' to delegate buffer + // handle. This can be null if the delegate doesn't use its own buffer. + TfLiteStatus (*CopyToBufferHandle)( // NOLINT + TfLiteOpaqueContext* context, TfLiteOpaqueDelegate* delegate, void* data, + TfLiteBufferHandle buffer_handle, TfLiteOpaqueTensor* tensor); + // Frees the Delegate Buffer Handle. Note: This only frees the handle, but + // this doesn't release the underlying resource (e.g. textures). The + // resources are either owned by application layer or the delegate. + // This can be null if the delegate doesn't use its own buffer. + void (*FreeBufferHandle)(TfLiteOpaqueContext* context, // NOLINT + TfLiteOpaqueDelegate* delegate, void* data, + TfLiteBufferHandle* handle); + // Bitmask flags. See the comments in `TfLiteDelegateFlags`. + int64_t flags; +} TfLiteOpaqueDelegateBuilder; + +// Creates an opaque delegate and returns its address. The opaque delegate will +// behave according to the provided 'opaque_delegate_builder'. The lifetime of +// the objects pointed to by any of the fields within the +// 'opaque_delegate_builder' must outlive the returned +// 'TfLiteOpaqueDelegate' and any 'TfLiteInterpreter', +// 'TfLiteInterpreterOptions', 'tflite::Interpreter', or +// 'tflite::InterpreterBuilder' that the delegate is added to. The returned +// address should be passed to 'TfLiteOpaqueDelegateDelete' for deletion. If +// 'opaque_delegate_builder' is a null pointer, then a null pointer will be +// returned. +TfLiteOpaqueDelegate* TfLiteOpaqueDelegateCreate( + const TfLiteOpaqueDelegateBuilder* opaque_delegate_builder); + +// Deletes the provided opaque 'delegate'. This function has no effect if the +// 'delegate' is a null pointer. +void TfLiteOpaqueDelegateDelete(TfLiteOpaqueDelegate* delegate); + +// Returns a pointer to the data associated with the provided opaque 'delegate'. +// +// A null pointer will be returned when: +// - The 'delegate' is null. +// - The 'data' field of the 'TfLiteOpaqueDelegateBuilder' used to construct the +// 'delegate' was null. +// - Or in case of any other error. +// - The 'delegate' has been constructed via a 'TfLiteOpaqueDelegateBuilder', +// but the 'data' field of the 'TfLiteOpaqueDelegateBuilder' is null. +// +// The data_ field of 'delegate' will be returned if the +// 'opaque_delegate_builder' field is null. +void* TfLiteOpaqueDelegateGetData(const TfLiteOpaqueDelegate* delegate); + +#ifdef __cplusplus +} // extern "C" +#endif // __cplusplus +#endif // TENSORFLOW_LITE_CORE_C_COMMON_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h new file mode 100644 index 0000000..05af6fd --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h @@ -0,0 +1,1272 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_COMMON_H_ +#define TENSORFLOW_LITE_KERNELS_INTERNAL_COMMON_H_ + +#include +#ifndef ALLOW_SLOW_GENERIC_DEPTHWISECONV_FALLBACK +#ifdef GEMMLOWP_ALLOW_SLOW_SCALAR_FALLBACK +#define ALLOW_SLOW_GENERIC_DEPTHWISECONV_FALLBACK +#endif +#endif + +#include +#include + +#include "edge-impulse-sdk/third_party/gemmlowp/fixedpoint/fixedpoint.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/cppmath.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/optimized/neon_check.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h" + +namespace tflite { + +constexpr int kReverseShift = -1; + +inline void GetActivationMinMax(FusedActivationFunctionType ac, + float* output_activation_min, + float* output_activation_max) { + switch (ac) { + case FusedActivationFunctionType::kNone: + *output_activation_min = std::numeric_limits::lowest(); + *output_activation_max = std::numeric_limits::max(); + break; + case FusedActivationFunctionType::kRelu: + *output_activation_min = 0.f; + *output_activation_max = std::numeric_limits::max(); + break; + case FusedActivationFunctionType::kRelu1: + *output_activation_min = -1.f; + *output_activation_max = 1.f; + break; + case FusedActivationFunctionType::kRelu6: + *output_activation_min = 0.f; + *output_activation_max = 6.f; + break; + } +} + +template +inline T ActivationFunctionWithMinMax(T x, T output_activation_min, + T output_activation_max) { + using std::max; + using std::min; + return min(max(x, output_activation_min), output_activation_max); +} + +// Legacy function, left for compatibility only. +template +float ActivationFunction(float x) { + float output_activation_min, output_activation_max; + GetActivationMinMax(Ac, &output_activation_min, &output_activation_max); + return ActivationFunctionWithMinMax(x, output_activation_min, + output_activation_max); +} + +inline void BiasAndClamp(float clamp_min, float clamp_max, int bias_size, + const float* bias_data, int array_size, + float* array_data) { + if (bias_size == 0) return; + // Note: see b/132215220: in May 2019 we thought it would be OK to replace + // this with the Eigen one-liner: + // return (array.colwise() + bias).cwiseMin(clamp_max).cwiseMin(clamp_max). + // This turned out to severely regress performance: +4ms (i.e. 8%) on + // MobileNet v2 / 1.0 / 224. So we keep custom NEON code for now. + TFLITE_DCHECK_EQ((array_size % bias_size), 0); +#ifdef USE_NEON + float* array_ptr = array_data; + float* array_end_ptr = array_ptr + array_size; + const auto clamp_min_vec = vdupq_n_f32(clamp_min); + const auto clamp_max_vec = vdupq_n_f32(clamp_max); + for (; array_ptr != array_end_ptr; array_ptr += bias_size) { + int i = 0; + for (; i <= bias_size - 16; i += 16) { + auto b0 = vld1q_f32(bias_data + i); + auto b1 = vld1q_f32(bias_data + i + 4); + auto b2 = vld1q_f32(bias_data + i + 8); + auto b3 = vld1q_f32(bias_data + i + 12); + auto a0 = vld1q_f32(array_ptr + i); + auto a1 = vld1q_f32(array_ptr + i + 4); + auto a2 = vld1q_f32(array_ptr + i + 8); + auto a3 = vld1q_f32(array_ptr + i + 12); + auto x0 = vaddq_f32(a0, b0); + auto x1 = vaddq_f32(a1, b1); + auto x2 = vaddq_f32(a2, b2); + auto x3 = vaddq_f32(a3, b3); + x0 = vmaxq_f32(clamp_min_vec, x0); + x1 = vmaxq_f32(clamp_min_vec, x1); + x2 = vmaxq_f32(clamp_min_vec, x2); + x3 = vmaxq_f32(clamp_min_vec, x3); + x0 = vminq_f32(clamp_max_vec, x0); + x1 = vminq_f32(clamp_max_vec, x1); + x2 = vminq_f32(clamp_max_vec, x2); + x3 = vminq_f32(clamp_max_vec, x3); + vst1q_f32(array_ptr + i, x0); + vst1q_f32(array_ptr + i + 4, x1); + vst1q_f32(array_ptr + i + 8, x2); + vst1q_f32(array_ptr + i + 12, x3); + } + for (; i <= bias_size - 4; i += 4) { + auto b = vld1q_f32(bias_data + i); + auto a = vld1q_f32(array_ptr + i); + auto x = vaddq_f32(a, b); + x = vmaxq_f32(clamp_min_vec, x); + x = vminq_f32(clamp_max_vec, x); + vst1q_f32(array_ptr + i, x); + } + for (; i < bias_size; i++) { + array_ptr[i] = ActivationFunctionWithMinMax(array_ptr[i] + bias_data[i], + clamp_min, clamp_max); + } + } +#else // not NEON + for (int array_offset = 0; array_offset < array_size; + array_offset += bias_size) { + for (int i = 0; i < bias_size; i++) { + array_data[array_offset + i] = ActivationFunctionWithMinMax( + array_data[array_offset + i] + bias_data[i], clamp_min, clamp_max); + } + } +#endif +} + +// Single-rounding MultiplyByQuantizedMultiplier +#if TFLITE_SINGLE_ROUNDING +inline int32_t MultiplyByQuantizedMultiplier(int32_t x, + int32_t quantized_multiplier, + int shift) { + TFLITE_DCHECK(quantized_multiplier >= 0); + TFLITE_DCHECK(shift >= -31 && shift <= 30); + + const int64_t total_shift = 31 - shift; + const int64_t round = static_cast(1) << (total_shift - 1); + int64_t result = x * static_cast(quantized_multiplier) + round; + result = result >> total_shift; + + TFLITE_DCHECK(result >= std::numeric_limits::min() && + result <= std::numeric_limits::max()); + return static_cast(result); +} + +inline int32_t MultiplyByQuantizedMultiplierSmallerThanOneExp( + int32_t x, int32_t quantized_multiplier, int shift) { + TFLITE_DCHECK_LE(shift, 0); + return MultiplyByQuantizedMultiplier(x, quantized_multiplier, shift); +} + +inline int32_t MultiplyByQuantizedMultiplierGreaterThanOne( + int32_t x, int32_t quantized_multiplier, int shift) { + TFLITE_DCHECK_GE(shift, 0); + return MultiplyByQuantizedMultiplier(x, quantized_multiplier, shift); +} + +inline int32_t MultiplyByQuantizedMultiplier(int64_t x, + int32_t quantized_multiplier, + int shift) { + // Inputs: + // - quantized_multiplier has fixed point at bit 31 + // - shift is -31 to +7 (negative for right shift) + // + // Assumptions: The following input ranges are assumed + // - quantize_scale>=0 (the usual range is (1<<30) to (1>>31)-1) + // - scaling is chosen so final scaled result fits in int32_t + // - input x is in the range -(1<<47) <= x < (1<<47) + TFLITE_DCHECK(quantized_multiplier >= 0); + TFLITE_DCHECK(shift >= -31 && shift < 8); + TFLITE_DCHECK(x >= -(static_cast(1) << 47) && + x < (static_cast(1) << 47)); + + const int32_t reduced_multiplier = + (quantized_multiplier < 0x7FFF0000) + ? ((quantized_multiplier + (1 << 15)) >> 16) + : 0x7FFF; + const int64_t total_shift = 15 - shift; + const int64_t round = static_cast(1) << (total_shift - 1); + int64_t result = x * static_cast(reduced_multiplier) + round; + result = result >> total_shift; + + TFLITE_DCHECK(result >= std::numeric_limits::min() && + result <= std::numeric_limits::max()); + return static_cast(result); +} + +#ifdef USE_NEON +inline int32x4x4_t MultiplyByQuantizedMultiplier4Rows( + int32x4x4_t input_val, int32_t quantized_multiplier, int shift) { + TFLITE_DCHECK(quantized_multiplier >= 0); + + const int right_shift = std::min(-1, shift); + const int left_shift = shift - right_shift; + + const int32x4_t multiplier_dup = vdupq_n_s32(quantized_multiplier); + const int32x4_t left_shift_dup = vdupq_n_s32(left_shift); + const int32x4_t right_shift_dup = vdupq_n_s32(right_shift); + + int32x4x4_t result; + result.val[0] = vrshlq_s32( + vqdmulhq_s32(vshlq_s32(input_val.val[0], left_shift_dup), multiplier_dup), + right_shift_dup); + + result.val[1] = vrshlq_s32( + vqdmulhq_s32(vshlq_s32(input_val.val[1], left_shift_dup), multiplier_dup), + right_shift_dup); + + result.val[2] = vrshlq_s32( + vqdmulhq_s32(vshlq_s32(input_val.val[2], left_shift_dup), multiplier_dup), + right_shift_dup); + + result.val[3] = vrshlq_s32( + vqdmulhq_s32(vshlq_s32(input_val.val[3], left_shift_dup), multiplier_dup), + right_shift_dup); + + return result; +} +#endif // USE_NEON +// Double-rounding MultiplyByQuantizedMultiplier +#else +inline int32_t MultiplyByQuantizedMultiplierSmallerThanOneExp( + int32_t x, int32_t quantized_multiplier, int left_shift) { + using gemmlowp::RoundingDivideByPOT; + using gemmlowp::SaturatingRoundingDoublingHighMul; + return RoundingDivideByPOT( + SaturatingRoundingDoublingHighMul(x, quantized_multiplier), -left_shift); +} + +inline int32_t MultiplyByQuantizedMultiplierGreaterThanOne( + int32_t x, int32_t quantized_multiplier, int left_shift) { + using gemmlowp::SaturatingRoundingDoublingHighMul; + return SaturatingRoundingDoublingHighMul(x * (1 << left_shift), + quantized_multiplier); +} + +inline int32_t MultiplyByQuantizedMultiplier(int32_t x, + int32_t quantized_multiplier, + int shift) { + using gemmlowp::RoundingDivideByPOT; + using gemmlowp::SaturatingRoundingDoublingHighMul; + int left_shift = shift > 0 ? shift : 0; + int right_shift = shift > 0 ? 0 : -shift; + return RoundingDivideByPOT(SaturatingRoundingDoublingHighMul( + x * (1 << left_shift), quantized_multiplier), + right_shift); +} + +inline int32_t MultiplyByQuantizedMultiplier(int64_t x, + int32_t quantized_multiplier, + int shift) { + // Inputs: + // - quantized_multiplier has fixed point at bit 31 + // - shift is -31 to +7 (negative for right shift) + // + // Assumptions: The following input ranges are assumed + // - quantize_scale>=0 (the usual range is (1<<30) to (1>>31)-1) + // - scaling is chosen so final scaled result fits in int32_t + // - input x is in the range -(1<<47) <= x < (1<<47) + assert(quantized_multiplier >= 0); + assert(shift >= -31 && shift < 8); + assert(x >= -(static_cast(1) << 47) && + x < (static_cast(1) << 47)); + + int32_t reduced_multiplier = (quantized_multiplier < 0x7FFF0000) + ? ((quantized_multiplier + (1 << 15)) >> 16) + : 0x7FFF; + int total_shift = 15 - shift; + x = (x * (int64_t)reduced_multiplier) + ((int64_t)1 << (total_shift - 1)); + int32_t result = x >> total_shift; + return result; +} + +#ifdef USE_NEON +// Round uses ARM's rounding shift right. +inline int32x4x4_t MultiplyByQuantizedMultiplier4Rows( + int32x4x4_t input_val, int32_t quantized_multiplier, int shift) { + const int left_shift = std::max(shift, 0); + const int right_shift = std::min(shift, 0); + int32x4x4_t result; + + int32x4_t multiplier_dup = vdupq_n_s32(quantized_multiplier); + int32x4_t left_shift_dup = vdupq_n_s32(left_shift); + int32x4_t right_shift_dup = vdupq_n_s32(right_shift); + + result.val[0] = + vrshlq_s32(vqrdmulhq_s32(vshlq_s32(input_val.val[0], left_shift_dup), + multiplier_dup), + right_shift_dup); + + result.val[1] = + vrshlq_s32(vqrdmulhq_s32(vshlq_s32(input_val.val[1], left_shift_dup), + multiplier_dup), + right_shift_dup); + + result.val[2] = + vrshlq_s32(vqrdmulhq_s32(vshlq_s32(input_val.val[2], left_shift_dup), + multiplier_dup), + right_shift_dup); + + result.val[3] = + vrshlq_s32(vqrdmulhq_s32(vshlq_s32(input_val.val[3], left_shift_dup), + multiplier_dup), + right_shift_dup); + + return result; +} +#endif // USE_NEON +#endif // TFLITE_SINGLE_ROUNDING + +template +int CountLeadingZeros(T integer_input) { + static_assert(std::is_unsigned::value, + "Only unsigned integer types handled."); +#if defined(__GNUC__) + return integer_input ? __builtin_clz(integer_input) + : std::numeric_limits::digits; +#else + if (integer_input == 0) { + return std::numeric_limits::digits; + } + + const T one_in_leading_positive = static_cast(1) + << (std::numeric_limits::digits - 1); + int leading_zeros = 0; + while (integer_input < one_in_leading_positive) { + integer_input <<= 1; + ++leading_zeros; + } + return leading_zeros; +#endif +} + +template +inline int CountLeadingSignBits(T integer_input) { + static_assert(std::is_signed::value, "Only signed integer types handled."); +#if defined(__GNUC__) && !defined(__clang__) + return integer_input ? __builtin_clrsb(integer_input) + : std::numeric_limits::digits; +#else + using U = typename std::make_unsigned::type; + return integer_input >= 0 + ? CountLeadingZeros(static_cast(integer_input)) - 1 + : integer_input != std::numeric_limits::min() + ? CountLeadingZeros(2 * static_cast(-integer_input) - 1) + : 0; +#endif +} + +// Use "count leading zeros" helper functions to do a fast Floor(log_2(x)). +template +inline Integer FloorLog2(Integer n) { + static_assert(std::is_integral::value, ""); + static_assert(std::is_signed::value, ""); + static_assert(sizeof(Integer) == 4 || sizeof(Integer) == 8, ""); + TFLITE_CHECK_GT(n, 0); + if (sizeof(Integer) == 4) { + return 30 - CountLeadingSignBits(n); + } else { + return 62 - CountLeadingSignBits(n); + } +} + +namespace detail { + +// LUTPopulate takes an optional type-erased transform_params to allow passing +// extra parameters to the transform function pointer. const void* is used +// instead of std::function to be compatible with TFLite Micro +template +inline typename std::enable_if::value, + FloatT>::type +LUTTransform(Func transform, const void* /*transform_params*/, FloatT value) { + static_assert(std::is_floating_point::value, + "FloatT must be a floating-point type."); + return transform(value); +} + +template +inline typename std::enable_if< + std::is_same::value, FloatT>::type +LUTTransform(Func transform, const void* transform_params, FloatT value) { + static_assert(std::is_floating_point::value, + "FloatT must be a floating-point type."); + return transform(value, transform_params); +} + +// Use the same LUT generation code for both uint8_t and int8_t. Int8_t indexes +// will be directly casted to uint8_t, the int8 LUT will thus be ordered as [0, +// 1, ..., 127, -128, ..., -2, -1] instead of [-128, -127, ..., -1, 0, 1, ..., +// 126, 127]. +template +inline void LUTPopulateInt8(float input_scale, int32_t input_zero_point, + float output_scale, int32_t output_zero_point, + Func transform, const void* transform_params, + T* lut) { + static_assert( + std::is_same::value || std::is_same::value, + "T must be an uint8 or int8 type."); + uint8_t* lut_uint8 = reinterpret_cast(lut); + const float inverse_scale = 1 / output_scale; + int32_t maxval = std::numeric_limits::max(); + int32_t minval = std::numeric_limits::min(); + for (int32_t val = minval; val <= maxval; ++val) { + const float dequantized = input_scale * (val - input_zero_point); + const float transformed = + LUTTransform(transform, transform_params, dequantized); + const float rescaled = TfLiteRound(transformed * inverse_scale); + const int32_t quantized = + static_cast(rescaled + output_zero_point); + lut_uint8[static_cast(static_cast(val))] = static_cast( + static_cast(std::max(std::min(maxval, quantized), minval))); + } +} + +// Keep floating-point type configurable for backward compatibility. float +// should be used for FloatT by default. +template +inline void LUTPopulateInt16(FloatT input_scale, int32_t input_zero_point, + FloatT output_scale, int32_t output_zero_point, + Func transform, const void* transform_params, + int16_t* lut) { + static_assert(std::is_floating_point::value, + "FloatT must be a floating-point type."); + const FloatT input_min = + input_scale * (std::numeric_limits::min() - input_zero_point); + const FloatT input_max = + input_scale * (std::numeric_limits::max() - input_zero_point); + const FloatT output_min = + output_scale * (std::numeric_limits::min() - output_zero_point); + const FloatT output_max = + output_scale * (std::numeric_limits::max() - output_zero_point); + + const int nb_steps = 512; + const FloatT step = (input_max - input_min) / nb_steps; + const FloatT half_step = step / 2; + const FloatT output_scaling_inv = + static_cast(std::numeric_limits::max() - + std::numeric_limits::min() + 1) / + (output_max - output_min); + const FloatT table_min = + static_cast(std::numeric_limits::min()); + const FloatT table_max = + static_cast(std::numeric_limits::max()); + + for (int i = 0; i < nb_steps; i++) { + const FloatT val = + LUTTransform(transform, transform_params, input_min + i * step); + const FloatT val_midpoint = LUTTransform( + transform, transform_params, input_min + i * step + half_step); + const FloatT val_next = LUTTransform(transform, transform_params, + input_min + (i + 1) * step); + + const FloatT sample_val = TfLiteRound(val * output_scaling_inv); + const FloatT midpoint_interp_val = + TfLiteRound((val_next * output_scaling_inv + + TfLiteRound(val * output_scaling_inv)) / + 2); + const FloatT midpoint_val = TfLiteRound(val_midpoint * output_scaling_inv); + const FloatT midpoint_err = midpoint_interp_val - midpoint_val; + const FloatT bias = TfLiteRound(midpoint_err / 2); + + lut[i] = static_cast(std::min( + std::max(sample_val - bias, table_min), table_max)); + } + + lut[nb_steps] = static_cast(std::min( + std::max(TfLiteRound(LUTTransform( + transform, transform_params, input_max) * + output_scaling_inv), + table_min), + table_max)); +} + +} // namespace detail + +template +inline typename std::enable_if::value || + std::is_same::value, + void>::type +LUTPopulate(float input_scale, int32_t input_zero_point, float output_scale, + int32_t output_zero_point, float (*transform)(float), T* lut) { + detail::LUTPopulateInt8(input_scale, input_zero_point, output_scale, + output_zero_point, transform, nullptr, lut); +} + +template +inline typename std::enable_if::value || + std::is_same::value, + void>::type +LUTPopulate(float input_scale, int32_t input_zero_point, float output_scale, + int32_t output_zero_point, float (*transform)(float, const void*), + const void* transform_params, T* lut) { + detail::LUTPopulateInt8(input_scale, input_zero_point, output_scale, + output_zero_point, transform, transform_params, lut); +} + +template +inline typename std::enable_if::value, void>::type +LUTPopulate(float input_scale, int32_t input_zero_point, float output_scale, + int32_t output_zero_point, float (*transform)(float), T* lut) { + detail::LUTPopulateInt16(input_scale, input_zero_point, output_scale, + output_zero_point, transform, nullptr, lut); +} + +template +inline typename std::enable_if::value, void>::type +LUTPopulate(float input_scale, int32_t input_zero_point, float output_scale, + int32_t output_zero_point, float (*transform)(float, const void*), + const void* transform_params, T* lut) { + detail::LUTPopulateInt16(input_scale, input_zero_point, output_scale, + output_zero_point, transform, + transform_params, lut); +} + +// Deprecated, avoid usage and prefer the float version. Kept for +// backward-compatiblity. +template +inline typename std::enable_if::value, void>::type +LUTPopulate(double input_scale, int32_t input_zero_point, double output_scale, + int32_t output_zero_point, double (*transform)(double), T* lut) { + detail::LUTPopulateInt16(input_scale, input_zero_point, output_scale, + output_zero_point, transform, nullptr, lut); +} + +// The size of the LUT depends on the type of input. For uint8 and int8 inputs a +// simple 256 entries LUT is used. For int16 inputs the high 9 bits are used for +// indexing and the 7 remaining bits are used for interpolation. We thus use a +// 513-entries LUT for int16 cases, 512 for the 9-bit indexing and 1 extra entry +// to interpolate the last value. +template +constexpr int LUTSize() { + static_assert(std::is_same::value || + std::is_same::value || + std::is_same::value, + "Only LUTs with uint8, int8 or int16 inputs are supported."); + // As per c++11: constexpr methods cannot have more than one return statement. + return (std::is_same::value || std::is_same::value) + ? 256 + : 513; +} + +// int16_t -> int16_t table lookup with interpolation +// LUT must have 513 values +inline int16_t LUTLookup(int16_t value, const int16_t* lut) { + // 512 base values, lut[513] is only used to calculate the slope + const uint16_t index = static_cast(256 + (value >> 7)); + assert(index < 512 && "LUT index out of range."); + const int16_t offset = value & 0x7f; + + // Base and slope are Q0.x + const int16_t base = lut[index]; + const int16_t slope = lut[index + 1] - lut[index]; + + // Q0.x * Q0.7 = Q0.(x + 7) + // Round and convert from Q0.(x + 7) to Q0.x + const int delta = (slope * offset + 64) >> 7; + + // Q0.15 + Q0.15 + return static_cast(base + delta); +} + +// int8_t -> int8_t table lookup without interpolation +// LUT must have 256 values +// LUTPopulate has ordered the LUT so that indexing it with an +// int8_t is just done by casting it to an uint8_t. +inline int8_t LUTLookup(int8_t value, const int8_t* lut) { + return lut[static_cast(value)]; +} + +// uint8_t -> uint8_t table lookup without interpolation +// LUT must have 256 values +inline uint8_t LUTLookup(uint8_t value, const uint8_t* lut) { + return lut[value]; +} + +// Table of sigmoid(i/24) at 0.16 format - 256 elements. + +// We use combined sigmoid and tanh look-up table, since +// tanh(x) = 2*sigmoid(2*x) -1. +// Both functions are symmetric, so the LUT table is only needed +// for the absolute value of the input. +static const uint16_t sigmoid_table_uint16[256] = { + 32768, 33451, 34133, 34813, 35493, 36169, 36843, 37513, 38180, 38841, 39498, + 40149, 40794, 41432, 42064, 42688, 43304, 43912, 44511, 45102, 45683, 46255, + 46817, 47369, 47911, 48443, 48964, 49475, 49975, 50464, 50942, 51409, 51865, + 52311, 52745, 53169, 53581, 53983, 54374, 54755, 55125, 55485, 55834, 56174, + 56503, 56823, 57133, 57433, 57724, 58007, 58280, 58544, 58800, 59048, 59288, + 59519, 59743, 59959, 60168, 60370, 60565, 60753, 60935, 61110, 61279, 61441, + 61599, 61750, 61896, 62036, 62172, 62302, 62428, 62549, 62666, 62778, 62886, + 62990, 63090, 63186, 63279, 63368, 63454, 63536, 63615, 63691, 63765, 63835, + 63903, 63968, 64030, 64090, 64148, 64204, 64257, 64308, 64357, 64405, 64450, + 64494, 64536, 64576, 64614, 64652, 64687, 64721, 64754, 64786, 64816, 64845, + 64873, 64900, 64926, 64950, 64974, 64997, 65019, 65039, 65060, 65079, 65097, + 65115, 65132, 65149, 65164, 65179, 65194, 65208, 65221, 65234, 65246, 65258, + 65269, 65280, 65291, 65301, 65310, 65319, 65328, 65337, 65345, 65352, 65360, + 65367, 65374, 65381, 65387, 65393, 65399, 65404, 65410, 65415, 65420, 65425, + 65429, 65433, 65438, 65442, 65445, 65449, 65453, 65456, 65459, 65462, 65465, + 65468, 65471, 65474, 65476, 65479, 65481, 65483, 65485, 65488, 65489, 65491, + 65493, 65495, 65497, 65498, 65500, 65501, 65503, 65504, 65505, 65507, 65508, + 65509, 65510, 65511, 65512, 65513, 65514, 65515, 65516, 65517, 65517, 65518, + 65519, 65520, 65520, 65521, 65522, 65522, 65523, 65523, 65524, 65524, 65525, + 65525, 65526, 65526, 65526, 65527, 65527, 65528, 65528, 65528, 65529, 65529, + 65529, 65529, 65530, 65530, 65530, 65530, 65531, 65531, 65531, 65531, 65531, + 65532, 65532, 65532, 65532, 65532, 65532, 65533, 65533, 65533, 65533, 65533, + 65533, 65533, 65533, 65534, 65534, 65534, 65534, 65534, 65534, 65534, 65534, + 65534, 65534, 65535}; + +// TODO(b/77858996): Add these to gemmlowp. +template +IntegerType SaturatingAddNonGemmlowp(IntegerType a, IntegerType b) { + static_assert(std::is_same::value, "unimplemented"); + return a; +} + +template <> +inline std::int32_t SaturatingAddNonGemmlowp(std::int32_t a, std::int32_t b) { + std::int64_t a64 = a; + std::int64_t b64 = b; + std::int64_t sum = a64 + b64; + return static_cast(std::min( + static_cast(std::numeric_limits::max()), + std::max( + static_cast(std::numeric_limits::min()), + sum))); +} + +template +gemmlowp::FixedPoint SaturatingAddNonGemmlowp( + gemmlowp::FixedPoint a, + gemmlowp::FixedPoint b) { + return gemmlowp::FixedPoint::FromRaw( + SaturatingAddNonGemmlowp(a.raw(), b.raw())); +} + +template +IntegerType SaturatingSub(IntegerType a, IntegerType b) { + static_assert(std::is_same::value, "unimplemented"); + return a; +} + +template <> +inline std::int16_t SaturatingSub(std::int16_t a, std::int16_t b) { + std::int32_t a32 = a; + std::int32_t b32 = b; + std::int32_t diff = a32 - b32; + return static_cast( + std::min(static_cast(32767), + std::max(static_cast(-32768), diff))); +} + +template <> +inline std::int32_t SaturatingSub(std::int32_t a, std::int32_t b) { + std::int64_t a64 = a; + std::int64_t b64 = b; + std::int64_t diff = a64 - b64; + return static_cast(std::min( + static_cast(std::numeric_limits::max()), + std::max( + static_cast(std::numeric_limits::min()), + diff))); +} + +template +gemmlowp::FixedPoint SaturatingSub( + gemmlowp::FixedPoint a, + gemmlowp::FixedPoint b) { + return gemmlowp::FixedPoint::FromRaw( + SaturatingSub(a.raw(), b.raw())); +} +// End section to be moved to gemmlowp. + +template +IntegerType SaturatingRoundingMultiplyByPOTParam(IntegerType x, int exponent) { + if (exponent == 0) { + return x; + } + using ScalarIntegerType = + typename gemmlowp::FixedPointRawTypeTraits::ScalarRawType; + const IntegerType min = + gemmlowp::Dup(std::numeric_limits::min()); + const IntegerType max = + gemmlowp::Dup(std::numeric_limits::max()); + const int ScalarIntegerTypeBits = 8 * sizeof(ScalarIntegerType); + + const std::int32_t threshold = + ((1 << (ScalarIntegerTypeBits - 1 - exponent)) - 1); + const IntegerType positive_mask = + gemmlowp::MaskIfGreaterThan(x, gemmlowp::Dup(threshold)); + const IntegerType negative_mask = + gemmlowp::MaskIfLessThan(x, gemmlowp::Dup(-threshold)); + + IntegerType result = gemmlowp::ShiftLeft(x, exponent); + result = gemmlowp::SelectUsingMask(positive_mask, max, result); + result = gemmlowp::SelectUsingMask(negative_mask, min, result); + return result; +} + +// If we want to leave IntegerBits fixed, then multiplication +// by a power of two has to be saturating/rounding, not exact anymore. +template +gemmlowp::FixedPoint +SaturatingRoundingMultiplyByPOTParam( + gemmlowp::FixedPoint a, int exponent) { + return gemmlowp::FixedPoint::FromRaw( + SaturatingRoundingMultiplyByPOTParam(a.raw(), exponent)); +} + +// Convert int32_t multiplier to int16_t with rounding. +inline void DownScaleInt32ToInt16Multiplier(int32_t multiplier_int32_t, + int16_t* multiplier_int16_t) { + TFLITE_DCHECK_GE(multiplier_int32_t, 0); + static constexpr int32_t kRoundingOffset = 1 << 15; + if (multiplier_int32_t >= + std::numeric_limits::max() - kRoundingOffset) { + *multiplier_int16_t = std::numeric_limits::max(); + return; + } + const int32_t result = (multiplier_int32_t + kRoundingOffset) >> 16; + TFLITE_DCHECK_LE(result << 16, multiplier_int32_t + kRoundingOffset); + TFLITE_DCHECK_GT(result << 16, multiplier_int32_t - kRoundingOffset); + *multiplier_int16_t = result; + TFLITE_DCHECK_EQ(*multiplier_int16_t, result); +} + +// Minimum output bits to accommodate log of maximum input range. It actually +// does not matter if one considers, say, [-64,64] or [-64,64). +// +// For example, run this through Octave: +// [0:127; ... +// ceil(log(abs( log(2.^(0:127))+1 ))/log(2)); ... +// ceil(log(abs( log(2.^(0:127))+1 ))/log(2))] +constexpr int min_log_x_output_bits(int input_bits) { + return input_bits > 90 ? 7 + : input_bits > 44 ? 6 + : input_bits > 21 ? 5 + : input_bits > 10 ? 4 + : input_bits > 4 ? 3 + : input_bits > 1 ? 2 + : 1; +} + +// Although currently the name of this function says that it cannot handle +// values less than 1, in practice it can handle as low as 1/x_max, where +// x_max is the largest representable input. In other words, the output range +// is symmetric. +template +inline gemmlowp::FixedPoint +log_x_for_x_greater_than_or_equal_to_1_impl( + gemmlowp::FixedPoint input_val) { + // assert(__builtin_clz(0u) >= std::numeric_limits::digits - 1); + // assert(__builtin_clz(0u) <= std::numeric_limits::digits); + using FixedPoint0 = gemmlowp::FixedPoint; + // The reason for accumulating the result with an extra bit of headroom is + // that z_pow_2_adj * log_2 might be saturated, and adding num_scaled * + // recip_denom will otherwise introduce an error. + static constexpr int kAccumIntegerBits = OutputIntegerBits + 1; + using FixedPointAccum = gemmlowp::FixedPoint; + + const FixedPoint0 log_2 = GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT( + FixedPoint0, 1488522236, std::log(2.0)); + const FixedPoint0 sqrt_sqrt_half = GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT( + FixedPoint0, 1805811301, std::sqrt(std::sqrt(0.5))); + const FixedPoint0 sqrt_half = GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT( + FixedPoint0, 1518500250, std::sqrt(0.5)); + const FixedPoint0 one_quarter = + GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(FixedPoint0, 536870912, 1.0 / 4.0); + + const FixedPoint0 alpha_n = GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT( + FixedPoint0, 117049297, 11.0 / 240.0 * std::sqrt(std::sqrt(2.0))); + const FixedPoint0 alpha_d = GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT( + FixedPoint0, 127690142, 1.0 / 20.0 * std::sqrt(std::sqrt(2.0))); + const FixedPoint0 alpha_i = GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT( + FixedPoint0, 1057819769, + 2.0 / std::sqrt(std::sqrt(2.0)) - std::sqrt(std::sqrt(2.0))); + const FixedPoint0 alpha_f = GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT( + FixedPoint0, 638450708, 1.0 / 4.0 * std::sqrt(std::sqrt(2.0))); + + const FixedPointAccum shifted_quarter = + gemmlowp::Rescale(one_quarter); + + // Reinterpret the input value as Q0.31, because we will figure out the + // required shift "ourselves" instead of using, say, Rescale. + FixedPoint0 z_a = FixedPoint0::FromRaw(input_val.raw()); + // z_a_pow_2 = input_integer_bits - z_a_headroom; + int z_a_headroom_plus_1 = CountLeadingZeros(static_cast(z_a.raw())); + FixedPoint0 r_a_tmp = + SaturatingRoundingMultiplyByPOTParam(z_a, (z_a_headroom_plus_1 - 1)); + const int32_t r_a_raw = + SaturatingRoundingMultiplyByPOTParam((r_a_tmp * sqrt_half).raw(), 1); + // z_pow_2_adj = max(z_pow_2_a - 0.75, z_pow_2_b - 0.25); + // z_pow_2_adj = max(InputIntegerBits - z_a_headroom_plus_1 + 0.25, + // InputIntegerBits - z_b_headroom - 0.25); + const FixedPointAccum z_a_pow_2_adj = SaturatingAddNonGemmlowp( + FixedPointAccum::FromRaw(SaturatingRoundingMultiplyByPOTParam( + static_cast(InputIntegerBits - z_a_headroom_plus_1), + 31 - kAccumIntegerBits)), + shifted_quarter); + + // z_b is treated like z_a, but premultiplying by sqrt(0.5). + FixedPoint0 z_b = z_a * sqrt_half; + int z_b_headroom = CountLeadingZeros(static_cast(z_b.raw())) - 1; + const int32_t r_b_raw = + SaturatingRoundingMultiplyByPOTParam(z_a.raw(), z_b_headroom); + const FixedPointAccum z_b_pow_2_adj = SaturatingSub( + FixedPointAccum::FromRaw(SaturatingRoundingMultiplyByPOTParam( + static_cast(InputIntegerBits - z_b_headroom), + 31 - kAccumIntegerBits)), + shifted_quarter); + + const FixedPoint0 r = FixedPoint0::FromRaw(std::min(r_a_raw, r_b_raw)); + const FixedPointAccum z_pow_2_adj = FixedPointAccum::FromRaw( + std::max(z_a_pow_2_adj.raw(), z_b_pow_2_adj.raw())); + + const FixedPoint0 p = gemmlowp::RoundingHalfSum(r, sqrt_sqrt_half); + FixedPoint0 q = r - sqrt_sqrt_half; + q = q + q; + + const FixedPoint0 common_sq = q * q; + const FixedPoint0 num = q * r + q * common_sq * alpha_n; + const FixedPoint0 denom_minus_one_0 = + p * (alpha_i + q + alpha_d * common_sq) + alpha_f * q; + const FixedPoint0 recip_denom = + one_over_one_plus_x_for_x_in_0_1(denom_minus_one_0); + + const FixedPointAccum num_scaled = gemmlowp::Rescale(num); + return gemmlowp::Rescale(z_pow_2_adj * log_2 + + num_scaled * recip_denom); +} + +template +inline gemmlowp::FixedPoint +log_x_for_x_greater_than_or_equal_to_1( + gemmlowp::FixedPoint input_val) { + static_assert( + OutputIntegerBits >= min_log_x_output_bits(InputIntegerBits), + "Output integer bits must be sufficient to accommodate logs of inputs."); + return log_x_for_x_greater_than_or_equal_to_1_impl( + input_val); +} + +inline int32_t GetReciprocal(int32_t x, int x_integer_digits, + int* num_bits_over_unit) { + int headroom_plus_one = CountLeadingZeros(static_cast(x)); + // This is the number of bits to the left of the binary point above 1.0. + // Consider x=1.25. In that case shifted_scale=0.8 and + // no later adjustment will be needed. + *num_bits_over_unit = x_integer_digits - headroom_plus_one; + const int32_t shifted_sum_minus_one = + static_cast((static_cast(x) << headroom_plus_one) - + (static_cast(1) << 31)); + + gemmlowp::FixedPoint shifted_scale = + gemmlowp::one_over_one_plus_x_for_x_in_0_1( + gemmlowp::FixedPoint::FromRaw(shifted_sum_minus_one)); + return shifted_scale.raw(); +} + +inline void GetInvSqrtQuantizedMultiplierExp(int32_t input, int reverse_shift, + int32_t* output_inv_sqrt, + int* output_shift) { + TFLITE_DCHECK_GE(input, 0); + if (input <= 1) { + // Handle the input value 1 separately to avoid overflow in that case + // in the general computation below (b/143972021). Also handle 0 as if it + // were a 1. 0 is an invalid input here (divide by zero) and 1 is a valid + // but rare/unrealistic input value. We can expect both to occur in some + // incompletely trained models, but probably not in fully trained models. + *output_inv_sqrt = std::numeric_limits::max(); + *output_shift = 0; + return; + } + TFLITE_DCHECK_GT(input, 1); + *output_shift = 11; + while (input >= (1 << 29)) { + input /= 4; + ++*output_shift; + } + const unsigned max_left_shift_bits = + CountLeadingZeros(static_cast(input)) - 1; + const unsigned max_left_shift_bit_pairs = max_left_shift_bits / 2; + const unsigned left_shift_bit_pairs = max_left_shift_bit_pairs - 1; + *output_shift -= left_shift_bit_pairs; + input <<= 2 * left_shift_bit_pairs; + TFLITE_DCHECK_GE(input, (1 << 27)); + TFLITE_DCHECK_LT(input, (1 << 29)); + using gemmlowp::FixedPoint; + using gemmlowp::Rescale; + using gemmlowp::SaturatingRoundingMultiplyByPOT; + // Using 3 integer bits gives us enough room for the internal arithmetic in + // this Newton-Raphson iteration. + using F3 = FixedPoint; + using F0 = FixedPoint; + const F3 fixedpoint_input = F3::FromRaw(input >> 1); + const F3 fixedpoint_half_input = + SaturatingRoundingMultiplyByPOT<-1>(fixedpoint_input); + const F3 fixedpoint_half_three = + GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(F3, (1 << 28) + (1 << 27), 1.5); + // Newton-Raphson iteration + // Naive unoptimized starting guess: x = 1 + F3 x = F3::One(); + // Naive unoptimized number of iterations: 5 + for (int i = 0; i < 5; i++) { + const F3 x3 = Rescale<3>(x * x * x); + x = Rescale<3>(fixedpoint_half_three * x - fixedpoint_half_input * x3); + } + const F0 fixedpoint_half_sqrt_2 = + GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(F0, 1518500250, std::sqrt(2.) / 2.); + x = x * fixedpoint_half_sqrt_2; + *output_inv_sqrt = x.raw(); + if (*output_shift < 0) { + *output_inv_sqrt <<= -*output_shift; + *output_shift = 0; + } + // Convert right shift (right is positive) to left shift. + *output_shift *= reverse_shift; +} + +// DO NOT USE THIS STRUCT FOR NEW FUNCTIONALITY BEYOND IMPLEMENTING +// BROADCASTING. +// +// NdArrayDesc describes the shape and memory layout of an N-dimensional +// rectangular array of numbers. +// +// NdArrayDesc is basically identical to Dims defined in types.h. +// However, as Dims is to be deprecated, this class exists as an adaptor +// to enable simple unoptimized implementations of element-wise broadcasting +// operations. +template +struct NdArrayDesc { + // The "extent" of each dimension. Indices along dimension d must be in the + // half-open interval [0, extents[d]). + int extents[N]; + + // The number of *elements* (not bytes) between consecutive indices of each + // dimension. + int strides[N]; +}; + +// DO NOT USE THIS FUNCTION FOR NEW FUNCTIONALITY BEYOND IMPLEMENTING +// BROADCASTING. +// +// Same as Offset(), except takes as NdArrayDesc instead of Dims. +inline int SubscriptToIndex(const NdArrayDesc<4>& desc, int i0, int i1, int i2, + int i3) { + TFLITE_DCHECK(i0 >= 0 && i0 < desc.extents[0]); + TFLITE_DCHECK(i1 >= 0 && i1 < desc.extents[1]); + TFLITE_DCHECK(i2 >= 0 && i2 < desc.extents[2]); + TFLITE_DCHECK(i3 >= 0 && i3 < desc.extents[3]); + return i0 * desc.strides[0] + i1 * desc.strides[1] + i2 * desc.strides[2] + + i3 * desc.strides[3]; +} + +inline int SubscriptToIndex(const NdArrayDesc<5>& desc, int indexes[5]) { + return indexes[0] * desc.strides[0] + indexes[1] * desc.strides[1] + + indexes[2] * desc.strides[2] + indexes[3] * desc.strides[3] + + indexes[4] * desc.strides[4]; +} + +inline int SubscriptToIndex(const NdArrayDesc<8>& desc, int indexes[8]) { + return indexes[0] * desc.strides[0] + indexes[1] * desc.strides[1] + + indexes[2] * desc.strides[2] + indexes[3] * desc.strides[3] + + indexes[4] * desc.strides[4] + indexes[5] * desc.strides[5] + + indexes[6] * desc.strides[6] + indexes[7] * desc.strides[7]; +} + +// Given the dimensions of the operands for an element-wise binary broadcast, +// adjusts them so that they can be directly iterated over with simple loops. +// Returns the adjusted dims as instances of NdArrayDesc in 'desc0_out' and +// 'desc1_out'. 'desc0_out' and 'desc1_out' cannot be nullptr. +// +// This function assumes that the two input shapes are compatible up to +// broadcasting and the shorter one has already been prepended with 1s to be the +// same length. E.g., if shape0 is (1, 16, 16, 64) and shape1 is (1, 64), +// shape1 must already have been prepended to be (1, 1, 1, 64). Recall that +// Dims refer to shapes in reverse order. In this case, input0_dims will be +// (64, 16, 16, 1) and input1_dims will be (64, 1, 1, 1). +// +// When two shapes are compatible up to broadcasting, for each dimension d, +// the input extents are either equal, or one of them is 1. +// +// This function performs the following for each dimension d: +// - If the extents are equal, then do nothing since the loop that walks over +// both of the input arrays is correct. +// - Otherwise, one (and only one) of the extents must be 1. Say extent0 is 1 +// and extent1 is e1. Then set extent0 to e1 and stride0 *to 0*. This allows +// array0 to be referenced *at any index* in dimension d and still access the +// same slice. +template +inline void NdArrayDescsForElementwiseBroadcast(const Dims& input0_dims, + const Dims& input1_dims, + NdArrayDesc* desc0_out, + NdArrayDesc* desc1_out) { + TFLITE_DCHECK(desc0_out != nullptr); + TFLITE_DCHECK(desc1_out != nullptr); + + // Copy dims to desc. + for (int i = 0; i < N; ++i) { + desc0_out->extents[i] = input0_dims.sizes[i]; + desc0_out->strides[i] = input0_dims.strides[i]; + desc1_out->extents[i] = input1_dims.sizes[i]; + desc1_out->strides[i] = input1_dims.strides[i]; + } + + // Walk over each dimension. If the extents are equal do nothing. + // Otherwise, set the desc with extent 1 to have extent equal to the other and + // stride 0. + for (int i = 0; i < N; ++i) { + const int extent0 = ArraySize(input0_dims, i); + const int extent1 = ArraySize(input1_dims, i); + if (extent0 != extent1) { + if (extent0 == 1) { + desc0_out->strides[i] = 0; + desc0_out->extents[i] = extent1; + } else { + TFLITE_DCHECK_EQ(extent1, 1); + desc1_out->strides[i] = 0; + desc1_out->extents[i] = extent0; + } + } + } +} + +// Copies dims to desc, calculating strides. +template +inline void CopyDimsToDesc(const RuntimeShape& input_shape, + NdArrayDesc* desc_out) { + int desc_stride = 1; + for (int i = N - 1; i >= 0; --i) { + desc_out->extents[i] = input_shape.Dims(i); + desc_out->strides[i] = desc_stride; + desc_stride *= input_shape.Dims(i); + } +} + +template +inline void NdArrayDescsForElementwiseBroadcast( + const RuntimeShape& input0_shape, const RuntimeShape& input1_shape, + NdArrayDesc* desc0_out, NdArrayDesc* desc1_out) { + TFLITE_DCHECK(desc0_out != nullptr); + TFLITE_DCHECK(desc1_out != nullptr); + + auto extended_input0_shape = RuntimeShape::ExtendedShape(N, input0_shape); + auto extended_input1_shape = RuntimeShape::ExtendedShape(N, input1_shape); + + // Copy dims to desc, calculating strides. + CopyDimsToDesc(extended_input0_shape, desc0_out); + CopyDimsToDesc(extended_input1_shape, desc1_out); + + // Walk over each dimension. If the extents are equal do nothing. + // Otherwise, set the desc with extent 1 to have extent equal to the other and + // stride 0. + for (int i = 0; i < N; ++i) { + const int extent0 = extended_input0_shape.Dims(i); + const int extent1 = extended_input1_shape.Dims(i); + if (extent0 != extent1) { + if (extent0 == 1) { + desc0_out->strides[i] = 0; + desc0_out->extents[i] = extent1; + } else { + TFLITE_DCHECK_EQ(extent1, 1); + desc1_out->strides[i] = 0; + desc1_out->extents[i] = extent0; + } + } + } +} + +template +inline void NdArrayDescsForElementwiseBroadcast( + const RuntimeShape& input0_shape, const RuntimeShape& input1_shape, + const RuntimeShape& input2_shape, NdArrayDesc* desc0_out, + NdArrayDesc* desc1_out, NdArrayDesc* desc2_out) { + TFLITE_DCHECK(desc0_out != nullptr); + TFLITE_DCHECK(desc1_out != nullptr); + TFLITE_DCHECK(desc2_out != nullptr); + + auto extended_input0_shape = RuntimeShape::ExtendedShape(N, input0_shape); + auto extended_input1_shape = RuntimeShape::ExtendedShape(N, input1_shape); + auto extended_input2_shape = RuntimeShape::ExtendedShape(N, input2_shape); + + // Copy dims to desc, calculating strides. + CopyDimsToDesc(extended_input0_shape, desc0_out); + CopyDimsToDesc(extended_input1_shape, desc1_out); + CopyDimsToDesc(extended_input2_shape, desc2_out); + + // Walk over each dimension. If the extents are equal do nothing. + // Otherwise, set the desc with extent 1 to have extent equal to the other and + // stride 0. + for (int i = 0; i < N; ++i) { + const int extent0 = extended_input0_shape.Dims(i); + const int extent1 = extended_input1_shape.Dims(i); + const int extent2 = extended_input2_shape.Dims(i); + + int extent = extent0; + if (extent1 != 1) extent = extent1; + if (extent2 != 1) extent = extent2; + + TFLITE_DCHECK(extent0 == 1 || extent0 == extent); + TFLITE_DCHECK(extent1 == 1 || extent1 == extent); + TFLITE_DCHECK(extent2 == 1 || extent2 == extent); + + if (!(extent0 == extent1 && extent1 == extent2)) { + if (extent0 == 1) { + desc0_out->strides[i] = 0; + desc0_out->extents[i] = extent; + } + if (extent1 == 1) { + desc1_out->strides[i] = 0; + desc1_out->extents[i] = extent; + } + if (extent2 == 1) { + desc2_out->strides[i] = 0; + desc2_out->extents[i] = extent; + } + } + } +} + +// Detailed implementation of NDOpsHelper, the indexes must be a zero array. +// This implementation is equivalent to N nested loops. Ex, if N=4, it can be +// re-writen as: +// for (int b = 0; b < output.extents[0]; ++b) { +// for (int y = 0; y < output.extents[1]; ++y) { +// for (int x = 0; x < output.extents[2]; ++x) { +// for (int c = 0; c < output.extents[3]; ++c) { +// calc({b,y,x,c}); +// } +// } +// } +// } +template +typename std::enable_if::type NDOpsHelperImpl( + const NdArrayDesc& output, const Calc& calc, int indexes[N]) { + for (indexes[DIM] = 0; indexes[DIM] < output.extents[DIM]; ++indexes[DIM]) { + NDOpsHelperImpl(output, calc, indexes); + } +} + +template +typename std::enable_if::type NDOpsHelperImpl( + const NdArrayDesc& output, const Calc& calc, int indexes[N]) { + for (indexes[DIM] = 0; indexes[DIM] < output.extents[DIM]; ++indexes[DIM]) { + calc(indexes); + } +} + +// Execute the calc function in the innermost iteration based on the shape of +// the output. The calc function should take a single argument of type int[N]. +template +inline void NDOpsHelper(const NdArrayDesc& output, const Calc& calc) { + int indexes[N] = {0}; + NDOpsHelperImpl(output, calc, indexes); +} +// Copied from gemmlowp::RoundDown when we dropped direct dependency on +// gemmlowp. +// +// Returns the runtime argument rounded down to the nearest multiple of +// the fixed Modulus. +template +Integer RoundDown(Integer i) { + return i - (i % Modulus); +} + +// Copied from gemmlowp::RoundUp when we dropped direct dependency on +// gemmlowp. +// +// Returns the runtime argument rounded up to the nearest multiple of +// the fixed Modulus. +template +Integer RoundUp(Integer i) { + return RoundDown(i + Modulus - 1); +} + +// Copied from gemmlowp::CeilQuotient when we dropped direct dependency on +// gemmlowp. +// +// Returns the quotient a / b rounded up ('ceil') to the nearest integer. +template +Integer CeilQuotient(Integer a, Integer b) { + return (a + b - 1) / b; +} + +// This function is a copy of gemmlowp::HowManyThreads, copied when we dropped +// the direct dependency of internal/optimized/ on gemmlowp. +// +// It computes a reasonable number of threads to use for a GEMM of shape +// (rows, cols, depth). +// +// TODO(b/131910176): get rid of this function by switching each call site +// to its own more sensible logic for its own workload. +template +inline int LegacyHowManyThreads(int max_num_threads, int rows, int cols, + int depth) { + // Early-exit in the default case where multi-threading is disabled. + if (max_num_threads == 1) { + return 1; + } + + // Ensure that each thread has KernelRows rows to process, if at all possible. + int thread_count = std::min(max_num_threads, rows / KernelRows); + + // Limit the number of threads according to the overall size of the problem. + if (thread_count > 1) { + // Empirically determined value. + static constexpr std::uint64_t min_cubic_size_per_thread = 64 * 1024; + + // We can only multiply two out of three sizes without risking overflow + const std::uint64_t cubic_size = + std::uint64_t(rows) * std::uint64_t(cols) * std::uint64_t(depth); + + thread_count = std::min( + thread_count, static_cast(cubic_size / min_cubic_size_per_thread)); + } + + if (thread_count < 1) { + thread_count = 1; + } + + assert(thread_count > 0 && thread_count <= max_num_threads); + return thread_count; +} + +template +void optimized_ops_preload_l1_stream(const T* ptr) { +#ifdef __GNUC__ + // builtin offered by GCC-compatible compilers including clang + __builtin_prefetch(ptr, /* 0 means read */ 0, /* 0 means no locality */ 0); +#else + (void)ptr; +#endif +} + +template +void optimized_ops_preload_l1_keep(const T* ptr) { +#ifdef __GNUC__ + // builtin offered by GCC-compatible compilers including clang + __builtin_prefetch(ptr, /* 0 means read */ 0, /* 3 means high locality */ 3); +#else + (void)ptr; +#endif +} + +template +void optimized_ops_prefetch_write_l1_keep(const T* ptr) { +#ifdef __GNUC__ + // builtin offered by GCC-compatible compilers including clang + __builtin_prefetch(ptr, /* 1 means write */ 1, /* 3 means high locality */ 3); +#else + (void)ptr; +#endif +} + +} // namespace tflite + +#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_COMMON_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/compatibility.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/compatibility.h new file mode 100644 index 0000000..ede9cd6 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/compatibility.h @@ -0,0 +1,122 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_COMPATIBILITY_H_ +#define TENSORFLOW_LITE_KERNELS_INTERNAL_COMPATIBILITY_H_ + +#include + +#include "edge-impulse-sdk/tensorflow/lite/kernels/op_macros.h" + +#ifndef TFLITE_DCHECK +#define TFLITE_DCHECK(condition) (condition) ? (void)0 : TFLITE_ASSERT_FALSE +#endif + +#ifndef TFLITE_DCHECK_EQ +#define TFLITE_DCHECK_EQ(x, y) ((x) == (y)) ? (void)0 : TFLITE_ASSERT_FALSE +#endif + +#ifndef TFLITE_DCHECK_NE +#define TFLITE_DCHECK_NE(x, y) ((x) != (y)) ? (void)0 : TFLITE_ASSERT_FALSE +#endif + +#ifndef TFLITE_DCHECK_GE +#define TFLITE_DCHECK_GE(x, y) ((x) >= (y)) ? (void)0 : TFLITE_ASSERT_FALSE +#endif + +#ifndef TFLITE_DCHECK_GT +#define TFLITE_DCHECK_GT(x, y) ((x) > (y)) ? (void)0 : TFLITE_ASSERT_FALSE +#endif + +#ifndef TFLITE_DCHECK_LE +#define TFLITE_DCHECK_LE(x, y) ((x) <= (y)) ? (void)0 : TFLITE_ASSERT_FALSE +#endif + +#ifndef TFLITE_DCHECK_LT +#define TFLITE_DCHECK_LT(x, y) ((x) < (y)) ? (void)0 : TFLITE_ASSERT_FALSE +#endif + +// TODO(ahentz): Clean up: We should stick to the DCHECK versions. +#ifndef TFLITE_CHECK +#define TFLITE_CHECK(condition) (condition) ? (void)0 : TFLITE_ABORT +#endif + +#ifndef TFLITE_CHECK_EQ +#define TFLITE_CHECK_EQ(x, y) ((x) == (y)) ? (void)0 : TFLITE_ABORT +#endif + +#ifndef TFLITE_CHECK_NE +#define TFLITE_CHECK_NE(x, y) ((x) != (y)) ? (void)0 : TFLITE_ABORT +#endif + +#ifndef TFLITE_CHECK_GE +#define TFLITE_CHECK_GE(x, y) ((x) >= (y)) ? (void)0 : TFLITE_ABORT +#endif + +#ifndef TFLITE_CHECK_GT +#define TFLITE_CHECK_GT(x, y) ((x) > (y)) ? (void)0 : TFLITE_ABORT +#endif + +#ifndef TFLITE_CHECK_LE +#define TFLITE_CHECK_LE(x, y) ((x) <= (y)) ? (void)0 : TFLITE_ABORT +#endif + +#ifndef TFLITE_CHECK_LT +#define TFLITE_CHECK_LT(x, y) ((x) < (y)) ? (void)0 : TFLITE_ABORT +#endif + +#ifndef TF_LITE_STATIC_MEMORY +// TODO(b/162019032): Consider removing these type-aliases. +using int8 = std::int8_t; +using uint8 = std::uint8_t; +using int16 = std::int16_t; +using uint16 = std::uint16_t; +using int32 = std::int32_t; +using uint32 = std::uint32_t; +#endif // !defined(TF_LITE_STATIC_MEMORY) + +// Allow for cross-compiler usage of function signatures - currently used for +// specifying named RUY profiler regions in templated methods. +#if defined(_MSC_VER) +#define TFLITE_PRETTY_FUNCTION __FUNCSIG__ +#elif defined(__GNUC__) +#define TFLITE_PRETTY_FUNCTION __PRETTY_FUNCTION__ +#else +#define TFLITE_PRETTY_FUNCTION __func__ +#endif + +// TFLITE_DEPRECATED() +// +// Duplicated from absl/base/macros.h to avoid pulling in that library. +// Marks a deprecated class, struct, enum, function, method and variable +// declarations. The macro argument is used as a custom diagnostic message (e.g. +// suggestion of a better alternative). +// +// Example: +// +// class TFLITE_DEPRECATED("Use Bar instead") Foo {...}; +// TFLITE_DEPRECATED("Use Baz instead") void Bar() {...} +// +// Every usage of a deprecated entity will trigger a warning when compiled with +// clang's `-Wdeprecated-declarations` option. This option is turned off by +// default, but the warnings will be reported by clang-tidy. +#if defined(__clang__) && __cplusplus >= 201103L +#define TFLITE_DEPRECATED(message) __attribute__((deprecated(message))) +#endif + +#ifndef TFLITE_DEPRECATED +#define TFLITE_DEPRECATED(message) +#endif + +#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_COMPATIBILITY_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/cppmath.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/cppmath.h new file mode 100644 index 0000000..c97cc31 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/cppmath.h @@ -0,0 +1,40 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_CPPMATH_H_ +#define TENSORFLOW_LITE_KERNELS_INTERNAL_CPPMATH_H_ + +#include + +namespace tflite { + +#if defined(TF_LITE_USE_GLOBAL_CMATH_FUNCTIONS) || \ + (defined(__ANDROID__) && !defined(__NDK_MAJOR__)) || defined(__ZEPHYR__) +#define TF_LITE_GLOBAL_STD_PREFIX +#else +#define TF_LITE_GLOBAL_STD_PREFIX std +#endif + +#define DECLARE_STD_GLOBAL_SWITCH1(tf_name, std_name) \ + template \ + inline T tf_name(const T x) { \ + return TF_LITE_GLOBAL_STD_PREFIX::std_name(x); \ + } + +DECLARE_STD_GLOBAL_SWITCH1(TfLiteRound, round); +DECLARE_STD_GLOBAL_SWITCH1(TfLiteExpm1, expm1); + +} // namespace tflite + +#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_CPPMATH_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/max.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/max.h new file mode 100644 index 0000000..d636564 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/max.h @@ -0,0 +1,30 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_MAX_H_ +#define TENSORFLOW_LITE_KERNELS_INTERNAL_MAX_H_ + +#include + +namespace tflite { + +// Patched by Edge Impulse, remove std::fmax +template +inline T TfLiteMax(const T& x, const T& y) { + return std::max(x, y); +} + +} // namespace tflite + +#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_MAX_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/min.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/min.h new file mode 100644 index 0000000..68049b4 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/min.h @@ -0,0 +1,30 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_MIN_H_ +#define TENSORFLOW_LITE_KERNELS_INTERNAL_MIN_H_ + +#include + +namespace tflite { + +// Patched by Edge Impulse, remove std::fmin +template +inline T TfLiteMin(const T& x, const T& y) { + return std::min(x, y); +} + +} // namespace tflite + +#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_MIN_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/optimized/neon_check.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/optimized/neon_check.h new file mode 100644 index 0000000..7df1129 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/optimized/neon_check.h @@ -0,0 +1,20 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_OPTIMIZED_NEON_CHECK_H_ +#define TENSORFLOW_LITE_KERNELS_INTERNAL_OPTIMIZED_NEON_CHECK_H_ + +// TFLM does not need to utilize any Neon optimizations. + +#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_OPTIMIZED_NEON_CHECK_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/portable_tensor.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/portable_tensor.h new file mode 100644 index 0000000..a03e502 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/portable_tensor.h @@ -0,0 +1,122 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_PORTABLE_TENSOR_H_ +#define TENSORFLOW_LITE_KERNELS_INTERNAL_PORTABLE_TENSOR_H_ + +#include + +#include "edge-impulse-sdk/tensorflow/lite/core/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h" + +namespace tflite { + +inline RuntimeShape GetTensorShape(std::vector data) { + return RuntimeShape(data.size(), data.data()); +} + +// A list of tensors in a format that can be used by kernels like split and +// concatenation. +template +class VectorOfTensors { + public: + // Build with the tensors in 'tensor_list'. + VectorOfTensors(const TfLiteContext& context, + const TfLiteIntArray& tensor_list) { + int num_tensors = tensor_list.size; + + all_data_.reserve(num_tensors); + all_shape_.reserve(num_tensors); + all_shape_ptr_.reserve(num_tensors); + + for (int i = 0; i < num_tensors; ++i) { + TfLiteTensor* t = context.GetTensor(&context, tensor_list.data[i]); + all_data_.push_back(GetTensorData(t)); + all_shape_.push_back(GetTensorShape(t)); + } + + // Taking the pointer from inside a std::vector is only OK if the vector is + // never modified, so we populate all_shape in the previous loop and then we + // are free to grab iterators here. + for (int i = 0; i < num_tensors; ++i) { + all_shape_ptr_.push_back(&all_shape_[i]); + } + } + // Return a pointer to the data pointers of all tensors in the list. For + // example: + // float* const* f = v.data(); + // f[0][1] is the second element of the first tensor. + T* const* data() const { return all_data_.data(); } + + // Return a pointer the shape pointers of all tensors in the list. For + // example: + // const RuntimeShape* const* d = v.dims(); + // dims[1] are the dimensions of the second tensor in the list. + const RuntimeShape* const* shapes() const { return all_shape_ptr_.data(); } + + private: + std::vector all_data_; + std::vector all_shape_; + std::vector all_shape_ptr_; +}; + +// A list of quantized tensors in a format that can be used by kernels like +// split and concatenation. +class VectorOfQuantizedTensors : public VectorOfTensors { + public: + // Build with the tensors in 'tensor_list'. + VectorOfQuantizedTensors(const TfLiteContext& context, + const TfLiteIntArray& tensor_list) + : VectorOfTensors(context, tensor_list) { + for (int i = 0; i < tensor_list.size; ++i) { + TfLiteTensor* t = context.GetTensor(&context, tensor_list.data[i]); + zero_point_.push_back(t->params.zero_point); + scale_.push_back(t->params.scale); + } + } + + const float* scale() const { return scale_.data(); } + const int32_t* zero_point() const { return zero_point_.data(); } + + private: + std::vector zero_point_; + std::vector scale_; +}; + +// Writes randomly accessed values from `input` sequentially into `output`. +template +class SequentialTensorWriter { + public: + SequentialTensorWriter(const TfLiteTensor* input, TfLiteTensor* output) { + input_data_ = GetTensorData(input); + output_ptr_ = GetTensorData(output); + } + SequentialTensorWriter(const T* input_data, T* output_data) + : input_data_(input_data), output_ptr_(output_data) {} + + void Write(int position) { *output_ptr_++ = input_data_[position]; } + void WriteN(int position, int len) { + memcpy(output_ptr_, &input_data_[position], sizeof(T) * len); + output_ptr_ += len; + } + + private: + const T* input_data_; + T* output_ptr_; +}; + +} // namespace tflite + +#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_PORTABLE_TENSOR_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/portable_tensor_utils.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/portable_tensor_utils.cpp new file mode 100644 index 0000000..ec7ad76 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/portable_tensor_utils.cpp @@ -0,0 +1,86 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_TENSOR_UTILS_H_ +#define TENSORFLOW_LITE_KERNELS_INTERNAL_TENSOR_UTILS_H_ + +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/portable_tensor_utils.h" + +#include +#include +#include + +#include "edge-impulse-sdk/tensorflow/lite/core/c/builtin_op_data.h" + +#if defined(_MSC_VER) +#define __restrict__ __restrict +#endif + +namespace tflite { + +// Not all backends support CpuBackendContext usage, so forward declare to avoid +// pulling in its implementation. Use of CpuBackendContext in method +// implementations is purely optional. +class CpuBackendContext; + +namespace tensor_utils { + +// Apply Rectified Linear to elements of a vector. +void ApplyReluToVector(const float* __restrict__ vector, int v_size, + float* __restrict__ result) { + for (int v = 0; v < v_size; v++) { + result[v] = std::max(0.0f, vector[v]); + } +} + +// Apply Rectified Linear 1 (cap to [-1;1]) to elements of a vector +void ApplyRelu1ToVector(const float* __restrict__ vector, int v_size, + float* __restrict__ result) { + for (int v = 0; v < v_size; v++) { + result[v] = std::max(-1.0f, std::min(vector[v], 1.0f)); + } +} + +// Apply Rectified Linear 6 (cap to [0;6]) to elements of a vector +void ApplyRelu6ToVector(const float* __restrict__ vector, int v_size, + float* __restrict__ result) { + for (int v = 0; v < v_size; v++) { + result[v] = std::max(0.0f, std::min(vector[v], 6.0f)); + } +} + +// Apply signbit to elements of a vector +void ApplySignbitToVector(const float* __restrict__ vector, int v_size, + float* __restrict__ result) { + for (int v = 0; v < v_size; v++) { + result[v] = std::signbit(vector[v]); + } +} + +void UnpackDenseInt4IntoInt8(const int8_t* src_buffer, int num_elements, + int8_t* dst_buffer) { + for (int i = 0; i < num_elements; i += 2) { + // Shift left first so that sign is properly extended when shifted right + dst_buffer[i] = static_cast(src_buffer[i / 2] << 4) >> 4; + // Break early if the tensor has odd length and the higher nibble should be + // ignored. + if (i + 1 == num_elements) break; + dst_buffer[i + 1] = static_cast(src_buffer[i / 2]) >> 4; + } +} + +} // namespace tensor_utils +} // namespace tflite + +#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_TENSOR_UTILS_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/portable_tensor_utils.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/portable_tensor_utils.h new file mode 100644 index 0000000..5674e2e --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/portable_tensor_utils.h @@ -0,0 +1,623 @@ +/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_PORTABLE_TENSOR_UTILS_H_ +#define TENSORFLOW_LITE_KERNELS_INTERNAL_PORTABLE_TENSOR_UTILS_H_ + +#include +#include +#include + +#include "edge-impulse-sdk/tensorflow/lite/core/c/builtin_op_data.h" +#include "edge-impulse-sdk/tensorflow/lite/core/c/common.h" + +#if defined(_MSC_VER) +#define __restrict__ __restrict +#endif + +namespace tflite { + +// Not all backends support CpuBackendContext usage, so forward declare to avoid +// pulling in its implementation. Use of CpuBackendContext in method +// implementations is purely optional. +class CpuBackendContext; + +namespace tensor_utils { + +// Multiplies a matrix with a scalar and reduce the result on each row to a +// scalar. +// Parameters: +// - matrix: matrix of size n_row * n_col +// - scalar: the scalar that is multiplied to each element in the matrix +// - n_row: the row count of the matrix +// - n_col: the column count of the matrix +// - output: the 32bit output +// Note: We do not need saturation because the int8 * int8 is safe from overflow +// in (2^31-1) / (2^14) = 131072, which is bigger than the n_row. Non-zero +// initial output value is not exceptionally large. +void MatrixScalarMultiplyAccumulate(const int8_t* matrix, int32_t scalar, + int32_t n_row, int32_t n_col, + int32_t* output); + +// Add another vector for each batch in the batch vector. +template +void VectorBatchVectorAdd(const T* vector, int v_size, int n_batch, + T* batch_vector) { + for (int b = 0; b < n_batch; b++) { + for (int i = 0; i < v_size; ++i) { + batch_vector[i] += vector[i]; + } + batch_vector += v_size; + } +} + +// Cwise product of two vectors. +template +inline void VectorVectorCwiseProduct(const T* vector1, const T* vector2, + int v_size, T* result) { + for (int v = 0; v < v_size; v++) { + *result++ = *vector1++ * *vector2++; + } +} + +// Cwise product of a vector and a batch-vector. +template +inline void VectorBatchVectorCwiseProduct(const T* vector, int v_size, + const T* batch_vector, int n_batch, + T* result) { + for (int b = 0; b < n_batch; b++) { + VectorVectorCwiseProduct(vector, batch_vector, v_size, result); + // Update the pointers. + result += v_size; + batch_vector += v_size; + } +} + +// Cwise product and accumulate of two vectors. Since it's a MAC operation, the +// assumption here is that result array is initialized to valid values. +template +inline void VectorVectorCwiseProductAccumulate(const T* __restrict__ vector1, + const T* __restrict__ vector2, + int v_size, + T* __restrict__ result) { + for (int v = 0; v < v_size; v++) { + *result++ += *vector1++ * *vector2++; + } +} + +// Cwise product and accumulate of a vector and a batch-vector. Since it's a MAC +// operation, the assumption here is that result array is initialized to valid +// values. +template +inline void VectorBatchVectorCwiseProductAccumulate(const T* vector, int v_size, + const T* batch_vector, + int n_batch, T* result) { + for (int b = 0; b < n_batch; b++) { + VectorVectorCwiseProductAccumulate(vector, batch_vector, v_size, result); + // Update the pointers. + result += v_size; + batch_vector += v_size; + } +} + +// Batch vector initialization with another vector. +template +void VectorBatchVectorAssign(const T* vector, int v_size, int n_batch, + T* batch_vector) { + for (int b = 0; b < n_batch; b++) { + std::copy_n(vector, v_size, batch_vector + b * v_size); + } +} + +// Checks if all entries of vector are zero for float. +bool IsZeroVector(const float* vector, int v_size); + +// Checks if all entries of vector are zero for int8. +bool IsZeroVector(const int8_t* vector, int v_size); + +// Quantizes a buffer of floating point values using a symmetric quantization +// (i.e. linear quantization without an offset) to 8-bit signed integers. +// It also outputs the range (min, max) of the floating point buffer, and the +// scaling factor used to quantize the values. +void SymmetricQuantizeFloats(const float* values, const int size, + int8_t* quantized_values, float* min_value, + float* max_value, float* scaling_factor); + +// Quantizes a buffer of floating point values using a symmetric quantization +// (i.e. linear quantization without an offset) to 8-bit signed integers. +// It uses the range (min, max) provided to the function to calculate the +// appropriate scaling factor to quantize the values. +void SymmetricQuantizeFloats(const float* values, const int size, + int8_t* quantized_values, float min_value, + float max_value, float* scaling_factor); + +void AsymmetricQuantizeFloats(const float* values, const int size, + int8_t* quantized_values, float* scaling_factor, + int32_t* offset); + +// Helper function to quantize floats. +// float_data_ptr input float vectors +// n_batch number of input vectors +// n_data size of a single input vector +// quantized_data_ptr (out) vector with quantized data +// scaling_factors (out) scaling factors (one per vector) +// zero_points (out) zero points (one per vector) +// do_asymmetric controls if the quantization should be asymmetric. +inline void BatchQuantizeFloats(const float* float_data_ptr, int n_batch, + int n_data, int8_t* quantized_data_ptr, + float* scaling_factors, int32_t* zero_points, + bool do_asymmetric) { + for (int b = 0; b < n_batch; ++b) { + const int offset = b * n_data; + if (do_asymmetric) { + tensor_utils::AsymmetricQuantizeFloats( + float_data_ptr + offset, n_data, quantized_data_ptr + offset, + &scaling_factors[b], &zero_points[b]); + } else { + float unused_min, unused_max; + tensor_utils::SymmetricQuantizeFloats( + float_data_ptr + offset, n_data, quantized_data_ptr + offset, + &unused_min, &unused_max, &scaling_factors[b]); + } + } +} + +// Multiplies a matrix by a "batched" vector (i.e. a matrix with a batch +// dimension composed by input vectors independent from each other). The result +// of the multiplication is accumulated to the passed result buffer. +// More specifically, for a matrix M of shape [n, i] and a batched-vector +// of shape [i, batch] it will first compute the product of shape [n, batch]. +// This product will be accumulated to the result buffer. +void MatrixBatchVectorMultiplyAccumulate(const float* matrix, int m_rows, + int m_cols, const float* vector, + int n_batch, float* result); + +// Same as the function above, but the matrix is a sparse tensor with block +// pattern 1x4. +// This function assumes that m_cols is a multiple of the block size (4 in this +// case) so that there's no incomplete block. +void SparseMatrixBatchVectorMultiplyAccumulate1x4( + const float* __restrict__ matrix, const int32_t* __restrict__ segments, + const int32_t* __restrict__ indices, int m_rows, int m_cols, + const float* __restrict__ vector, int n_batch, float* __restrict__ result); + +// Same as the function above, but the matrix is stored in block compressed +// sparse row format with block pattern 1x16 which consists of two arrays: +// 1. A matrix array stores non-zero blocks of the matrix in row major. +// 2. A ledger array stores nrows groups, one group per row. Each group starts +// with an integer representing the number of non-zero blocks for the +// corresponding row and follows with column indexes of the first element +// of each non-zero block. +// This function assumes that +// 1. m_cols is a multiple of 16 so that all blocks are full blocks. +// 2. m_cols < 254 * 16 so that block index can be represented by uint8. +void SparseMatrixBatchVectorMultiplyAccumulate( + const float* __restrict__ matrix, const uint8_t* __restrict__ ledger, + int m_rows, int m_cols, const float* __restrict__ vector, int n_batch, + float* __restrict__ result); + +// Same as the function above, but for values quantized using symmetric +// quantization (e.g. by calling SymmetricQuantizeFloats). +// The passed scaling factors is a buffer of the quantization scaling factors +// that will be used to dequentize the products into the final result buffer. +// These scaling factors are the multiplication of the matrix scaling factor +// by the vector's scaling factor, one per batch (i.e. this allows quantizing +// each batch in the batch-vector matrix independently). +void MatrixBatchVectorMultiplyAccumulate( + const int8_t* __restrict__ matrix, const int m_rows, const int m_cols, + const int8_t* __restrict__ vectors, + const float* __restrict__ scaling_factors, int n_batch, + float* __restrict__ result); + +// Same as the function above except that vector values +// are quantized with asymmetric quantization per-batch and the matrix +// is quantized per row. +void MatrixBatchVectorMultiplyAccumulate( + const int8_t* __restrict__ matrix, const int m_rows, const int m_cols, + const int8_t* __restrict__ vectors, + const float* __restrict__ scaling_factors, int n_batch, + float* __restrict__ result, const float* __restrict__ per_channel_scale, + const int32_t* __restrict__ input_offset); + +// Same as the function above, but the matrix is a sparse tensor with block +// pattern 1x16. +// This function assumes that m_cols is a multiple of the block size (16 in this +// case) so that there's no incomplete block. Also, it assumes all offsets of +// input, output and filter are zero. +void SparseMatrixBatchVectorMultiplyAccumulate1x16( + const int8_t* __restrict__ matrix, const int32_t* __restrict__ segments, + const int32_t* __restrict__ indices, int m_rows, int m_cols, + const int8_t* __restrict__ vector, const int32_t* __restrict__ bias_vector, + int n_batch, const int32_t input_offset, const int32_t output_multiplier, + const int32_t output_shift, const int32_t output_offset, + const int32_t output_activation_min, const int32_t output_activation_max, + int8_t* __restrict__ result); + +// Same as the function above, but the matrix is stored in block compressed +// sparse row format with block pattern 1x16 which consists of two arrays: +// 1. A matrix array stores non-zero blocks of the matrix in row major. +// 2. A ledger array stores nrows groups, one group per row. Each group starts +// with an integer representing the number of non-zero blocks for the +// corresponding row followed by column index of the first element of +// each non-zero block. +// This function assumes that +// 1. m_cols is a multiple of 16 so that all blocks are full blocks. +// 2. m_cols < 254 * 16 so that block index can be represented by uint8. +void SparseMatrixBatchVectorMultiplyAccumulate( + const int8_t* __restrict__ matrix, const uint8_t* __restrict__ ledger, + const int m_rows, const int m_cols, const int8_t* __restrict__ vectors, + const float* __restrict__ scaling_factors, int n_batch, + float* __restrict__ result); + +// Same as the above 8, 8, 8 integer matmul except for the presence of zero +// point and non-accumulative. +// TODO(b/148688698): remove this function by folding zero point calculation in +// prepare() function. +void MatrixBatchVectorMultiply(const int8_t* input, int32_t input_zeropoint, + const int8_t* input_to_gate_weights, + int32_t input_to_gate_effective_scale_a, + int32_t input_to_gate_effective_scale_b, + int32_t n_batch, int32_t n_input, int32_t n_cell, + int8_t* gate_output, int8_t gate_output_zp); + +// Same as above but has 16 bit and 8 bit input and 8 bit output. +// Used in projection when hidden is 16bit. +void MatrixBatchVectorMultiply(const int16_t* hidden, + const int8_t* hidden_to_output_weights, + int32_t proj_effective_scale_a, + int32_t proj_effective_scale_b, + const int32_t* gate_bias, int32_t n_batch, + int32_t n_hidden, int32_t n_output, + int32_t output_zp, int8_t* proj_output); + +// Apply Layer Normalization (https://arxiv.org/abs/1607.06450) to a Quantized +// vector. +// Parameters: +// - input: batch vector of size n_batch * n_input; 16 bit. +// - layer_norm_weights: the quantized layer normalization weights. +// - bias: the bias for the layer normalization. +// - layer_norm_scale_a: multiplier for scale factor. +// - layer_norm_scale_b: shift for scale factor. +// - variance_limit: the guard to make sure the inverse does not overflow. +// - n_batch: the number of batches. +// - n_input: the size for input and output. +// - output: the 16 bit output +void ApplyLayerNorm(const int16_t* input, const int16_t* layer_norm_weights, + const int32_t* bias, int32_t layer_norm_scale_a, + int32_t layer_norm_scale_b, int32_t variance_limit, + int n_batch, int n_input, int16_t* output); + +// Same as above but the internal calculation is done in float. +void ApplyLayerNormFloat(const int16_t* input, + const int16_t* layer_norm_weights, + int32_t layer_norm_scale_a, int32_t layer_norm_scale_b, + const int32_t* bias, int n_batch, int n_input, + int16_t* output); + +// Apply Sigmoid to a quantized vector. +// Parameters: +// - input: batch vector of size n_batch * n_input; 16 bit. +// - n_batch: the number of batches. +// - n_input: the size for input and output. +// - output: the 16 bit output +// The input is in Q3.12 format and the output is in Q0.15 format. +void ApplySigmoid(const int16_t* input, int32_t n_batch, int32_t n_input, + int16_t* output); + +// Same as above but the internal calcualtion is float. +void ApplySigmoidFloat(const int16_t* input, int32_t n_batch, int32_t n_input, + int16_t* output); + +// Apply Tanh to a quantized vector. +// Parameters: +// - integer_bits: the integer bits of the input. +// Currently supports 0, 1, 2, 3, 4, 5, 6. +// - input: batch vector of size n_batch * n_input; 16 bit. +// - n_batch: the number of batches. +// - n_input: the size for input and output. +// - output: the 16 bit output +// The input is in Qm.15-m format and the output is in Q0.15 format. +void ApplyTanh(int32_t intger_bits, const int16_t* input, int32_t n_batch, + int32_t n_input, int16_t* output); + +// Apply Tanh to a quantized vector. Tbe internal calculation is in float. +// - Input has 2^(integer_bits) as scale. +// - Output has Q0.15 as scale. +void ApplyTanhFloat(const int16_t* input, int32_t n_batch, int32_t n_input, + int32_t integer_bits, int16_t* output); + +// Element-wise multiplication of two quantized vectors. +// Parameters: +// - input_1: batch vector of size n_batch * n_input; 16 bit. +// - input_2: batch vector of size n_batch * n_input; 16 bit. +// - n_batch: the number of batches. +// - n_input: the size for input and output. +// - shift: the shift needed to produce the output. +// - output: the 16 bit output of size n_batch * n_input. +// Output does not need to be initialized. +void CwiseMul(const int16_t* input_1, const int16_t* input_2, int n_batch, + int n_input, int shift, int16_t* output); + +// Element-wise multiplication of two quantized vectors. +// Parameters: +// - input_1: batch vector of size n_batch * n_input; 16 bit. +// - input_2: batch vector of size n_batch * n_input; 16 bit. +// - n_batch: the number of batches. +// - n_input: the size for input and output. +// - shift: the shift needed to produce the output. +// - output: the 8 bit output of size n_batch * n_input. +// Output does not need to be initialized. +void CwiseMul(const int16_t* input_1, const int16_t* input_2, int n_batch, + int n_input, int shift, int8_t* output); + +// Element-wise multiplication of two quantized vectors with rescaling. +// Parameters: +// - input_1: batch vector of size n_batch * n_input; 16 bit. +// - input_2: batch vector of size n_batch * n_input; 16 bit. +// - multiplier: the multiplier part of scale. +// - shift: the shift part of scale. +// - n_batch: the number of batches. +// - n_input: the size for input and output. +// - output: the 8 bit output of size n_batch * n_input. +// - output_zp: the zero point of output. +// Output does not need to be initialized. +// Multiplier ("m") and shift ("s") are connected to scale ("s") with s = m * +// 2^(s - 31). +void CwiseMul(const int16_t* input_1, const int16_t* input_2, + int32_t multiplier, int32_t shift, int32_t n_batch, + int32_t n_input, int32_t output_zp, int8_t* output); + +// Element-wise saturating addition of two quantized vectors without rescaling. +// Parameters: +// - input_1: batch vector of size n_batch * n_input; 16 bit. +// - input_2: batch vector of size n_batch * n_input; 16 bit. +// - n_batch: the number of batches. +// - n_input: the size for input and output. +// - output: the 8 bit output of size n_batch * n_input. +// Output does not need to be initialized. +void CwiseAdd(const int16_t* input_1, const int16_t* input_2, int n_batch, + int n_input, int16_t* output); + +// Element-wise in-place clipping of a vector. Overloaded for float, int16_t, +// int8_t. Parameters: +// - vector: vector of size v_size. +// - v_size: the size of the vector. +// - clipping_value: the value used for clipping. +void CwiseClipping(float* vector, const int v_size, const float clipping_value); +void CwiseClipping(int16_t* vector, const int v_size, + const int16_t clipping_value); +void CwiseClipping(int8_t* vector, const int v_size, + const int8_t clipping_value); + +// Dot product of two vectors. +float VectorVectorDotProduct(const float* vector1, const float* vector2, + int v_size); + +// Dot product of two batch vectors of size n_batch * v_size: +// vector1 = [x_1_1, x_1_2, ..., x_1_vsize, +// x_2_1, x_2_2, ..., x_2_vsize, +// ... +// x_nbatch_1,..., x_nbatch_vsize] +// vector2 = [y_1_1, y_1_2, ..., y_1_vsize, +// y_2_1, y_2_2, ..., y_2_vsize, +// ... +// y_nbatch_1,..., y_nbatch_vsize] +// Then result will be a vector of n_batch size starting from 'result': +// [x_1_1 * y_1_1 + x_1_2 * y_1_2 + ... + x_1_vsize * y_1_vsize, +// x_2_1 * y_2_1 + x_2_2 * y_2_2 + ... + x_2_vsize * y_2_vsize, +// ... +// x_nbatch_1 * y_nbatch_1 + ... + x_nbatch_vsize * y_nbatch_vsize] +template +inline void BatchVectorBatchVectorDotProduct(const T* vector1, const T* vector2, + int v_size, int n_batch, + T* result) { + for (int b = 0; b < n_batch; b++) { + result[b] = VectorVectorDotProduct(vector1, vector2, v_size); + vector1 += v_size; + vector2 += v_size; + } +} + +// Same as above but input is 16bit and output is 32bit. +void BatchVectorBatchVectorDotProduct(const int16_t* vector1, + const int16_t* vector2, int v_size, + int n_batch, int32_t* result); + +// Same as above, but inputs are 16bit integer and output is 16bit integer. +void VectorBatchVectorCwiseProductAccumulate(const int16_t* vector, int v_size, + const int16_t* batch_vector, + int n_batch, int32_t multiplier, + int shift, int16_t* result); + +// Compute "1.0f - elements of vector" (used in CIFG). +void Sub1Vector(const float* vector, int v_size, float* result); + +// Compute "1.0f - elements of vector" (used in CIFG) for int16 input. +// "vector" has range [0, 32767] because it is the output of sigmoid function. +void Sub1Vector(const int16_t* vector, int v_size, int16_t* result); + +// Reduce-sum on a float input vector: +// input_vector: float pointer to input vector. +// output_vector: float pointer to vector. +// output_size: output vector size. +// reduction_size: number of consecutive elements from input vector which are +// added to get one element of output. +void ReductionSumVector(const float* input_vector, float* output_vector, + int output_size, int reduction_size); + +// Same as above but input/output is 32 bit integer. +void ReductionSumVector(const int32_t* input_vector, int32_t* output_vector, + int output_size, int reduction_size); + +// Same as above but input is 8 bit integer. +void ReductionSumVector(const int8_t* input_vector, int32_t* output_vector, + int output_size, int reduction_size); + +// Multiply all elements of vector with a scalar. +void VectorScalarMultiply(const int8_t* vector, int v_size, float scale, + float* result); + +// Layer norm for each batch. +void MeanStddevNormalization(const float* input_vector, float* output_vector, + int v_size, int n_batch); + +// Saturate Add with rescale on both inputs. +void TwoGateSaturatingAdd(const int8_t* input, int8_t input_zp, + const int8_t* recurrent, int8_t recurrent_zp, + int32_t input_effective_scale_a, + int32_t input_effective_scale_b, + int32_t recurrent_effective_scale_a, + int32_t recurrent_effective_scale_b, int32_t n_batch, + int32_t n_cell, int16_t* output); + +// Same as the function above, but provide a scratch buffer for the +// int8 x int8 -> int32 and a CpuBackendContext for the accumulator +// computation. +void MatrixBatchVectorMultiplyAccumulate( + const int8_t* __restrict__ matrix, const int m_rows, const int m_cols, + const int8_t* __restrict__ vectors, + const float* __restrict__ scaling_factors, int n_batch, + int32_t* __restrict__ scratch, float* __restrict__ result, + CpuBackendContext* __restrict__ context); + +// Same as the function above except that can make use of cached row sums. +void MatrixBatchVectorMultiplyAccumulate( + const int8_t* __restrict__ matrix, const int m_rows, const int m_cols, + const int8_t* __restrict__ vectors, const float* scaling_factors, + int n_batch, float* __restrict__ result, const float* per_channel_scale, + const int32_t* input_offset, int32_t* scratch, int32_t* row_sums, + bool* compute_row_sums, CpuBackendContext* context); + +// Same as the function above, but provides separate scaling factor for the +// matrix and the vectors. The scaling factors are multiplied in the +// scaling_factor_scratch buffer. +inline void MatrixBatchVectorMultiplyAccumulate( + const int8_t* __restrict__ matrix, const int m_rows, const int m_cols, + const int8_t* __restrict__ vectors, const float matrix_scaling_factor, + const float* vector_scaling_factors, int n_batch, + float* __restrict__ result, const float* per_channel_scale, + const int32_t* input_offset, int32_t* scratch, int32_t* row_sums, + bool* compute_row_sums, float* scaling_factor_scratch, + CpuBackendContext* context) { + for (int b = 0; b < n_batch; ++b) { + scaling_factor_scratch[b] = + vector_scaling_factors[b] * matrix_scaling_factor; + } + MatrixBatchVectorMultiplyAccumulate(matrix, m_rows, m_cols, vectors, + scaling_factor_scratch, n_batch, result, + per_channel_scale, input_offset, scratch, + row_sums, compute_row_sums, context); +} + +// Multiplies a matrix by a "batched" vector (i.e. a matrix with a batch +// dimension composed by input vectors independent from each other). The result +// of the multiplication is accumulated to the passed result buffer. +// More specifically, for a matrix M of shape [n, i] and a batched-vector +// of shape [i, batch] it will first compute the product of shape [n, batch]. +// This product will be accumulated to the result buffer, +// Parameters: +// - input: batch vector of size n_batch * n_input +// - bias: vector of size b_input +// - input_to_gate_weights: matrix of size n_input * n_output +// - multiplier: scalar +// - shift: scalar +// - n_batch: the batch size +// - n_input: the input size +// - n_output: the output size +// - output_zp: the zero point of the output. +// - scratch: batch vector of size n_batch * n_output +// - output: the 16 bit output +// Notes: +// - this is used for gate matmul: for non-cifg it is for input, forget, +// cell, output gates; for cifg, it is for forget, cell, output gates. +// - multiplier and shift combined gives the scale. +// - assumes input zero point is 0. +// - scratch is created for optimization purpose only. +// TODO(b/152066492): this can be removed if some future optimization +// work makes it unnecessary. +void MatrixBatchVectorMultiplyAccumulate( + const int8_t* input, const int32_t* bias, + const int8_t* input_to_gate_weights, int32_t multiplier, int32_t shift, + int32_t n_batch, int32_t n_input, int32_t n_output, int32_t output_zp, + int32_t* scratch, int16_t* output, CpuBackendContext* context); + +// Multiplies a matrix by a "batched" vector (i.e. a matrix with a batch +// dimension composed by input vectors independent from each other). The result +// of the multiplication is accumulated to the passed result buffer. +// More specifically, for a matrix M of shape [n, i] and a batched-vector +// of shape [i, batch] it will first compute the product of shape [n, batch]. +// This product will be accumulated to the result buffer, +// Parameters: +// - input: batch vector of size n_batch * n_input +// - bias: vector of size b_input +// - input_to_gate_weights: matrix of size n_input * n_output +// - multiplier: scalar +// - shift: scalar +// - n_batch: the batch size +// - n_input: the input size +// - n_output: the output size +// - output_zp: the zero point of the output. +// - scratch: batch vector of size n_batch * n_output +// - output: the 8 bit output +// Notes: +// - this is used for projection matmul. +// - multiplier and shift combined gives the scale. +// - assumes input zero point is 0. +// - scratch is created for optimization purpose only. +// TODO(b/152066492): this can be removed if some future optimization +// work makes it unnecessary. +void MatrixBatchVectorMultiplyAccumulate( + const int8_t* input, const int32_t* bias, + const int8_t* input_to_gate_weights, int32_t multiplier, int32_t shift, + int32_t n_batch, int32_t n_input, int32_t n_output, int32_t output_zp, + int32_t* scratch, int8_t* output, CpuBackendContext* context); + +// Apply Rectified Linear to elements of a vector. +void ApplyReluToVector(const float* __restrict__ vector, int v_size, + float* __restrict__ result); + +// Apply Rectified Linear 1 (cap to [-1;1]) to elements of a vector +void ApplyRelu1ToVector(const float* __restrict__ vector, int v_size, + float* __restrict__ result); + +// Apply Rectified Linear 6 (cap to [0;6]) to elements of a vector +void ApplyRelu6ToVector(const float* __restrict__ vector, int v_size, + float* __restrict__ result); + +// Apply signbit to elements of a vector +void ApplySignbitToVector(const float* __restrict__ vector, int v_size, + float* __restrict__ result); + +// Unpack or inflate `src_buffer` by taking each element and splitting it as +// two elements into `dst_buffer`. +// Parameters: +// src_buffer : Densely packed buffer containing int4 values +// num_elements : Number of elements stored in the buffer. Note that this can +// be smaller than the size of `src_buffer` by 1 if it's odd, +// in which case the last nibble in `src_buffer` is ignored. +// This should be equal to the size of `dst_buffer`. +// dst_buffer : Buffer to unpack into. Should be allocated by the caller. +// Size should be at least `num_elements`. +// Notes: +// For example, given `src_buffer = {0x12, 0x34};`, calling this function +// will return `dst_buffer = {0x02, 0x01, 0x04, 0x03}`. +void UnpackDenseInt4IntoInt8(const int8_t* src_buffer, int num_elements, + int8_t* dst_buffer); + +} // namespace tensor_utils + +} // namespace tflite + +#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_PORTABLE_TENSOR_UTILS_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/quantization_util.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/quantization_util.cpp new file mode 100644 index 0000000..efd57db --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/quantization_util.cpp @@ -0,0 +1,416 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/quantization_util.h" + +#include +#include +#include + +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/compatibility.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/cppmath.h" + +namespace tflite { + +namespace { +// These constants are used to manipulate the binary representation of doubles. +// Double-precision binary64 floating point format is: +// Bit | 63 | 62-52 | 51-0 | +// | Sign | Exponent | Fraction | +// To avoid 64-bit integers as much as possible, I break this into high and +// low 32-bit chunks. High is: +// Bit | 31 | 30-20 | 19-0 | +// | Sign | Exponent | High Fraction | +// Low is: +// Bit | 31-0 | +// | Low Fraction | +// We then access the components through logical bit-wise operations to +// extract the parts needed, with the positions and masks derived from the +// layout shown above. +constexpr uint64_t kSignMask = 0x8000000000000000LL; +constexpr uint64_t kExponentMask = 0x7ff0000000000000LL; +constexpr int32_t kExponentShift = 52; +constexpr int32_t kExponentBias = 1023; +constexpr uint32_t kExponentIsBadNum = 0x7ff; +constexpr uint64_t kFractionMask = 0x000fffffffc00000LL; +constexpr uint32_t kFractionShift = 22; +constexpr uint32_t kFractionRoundingMask = 0x003fffff; +constexpr uint32_t kFractionRoundingThreshold = 0x00200000; +} // namespace + +void QuantizeMultiplier(double double_multiplier, int32_t* quantized_multiplier, + int* shift) { +#if TFLITE_SINGLE_ROUNDING + // Single-rounding MultiplyByQuantizedMultiplier only supports positive + // multipliers. + // TFLITE_DCHECK(double_multiplier >= 0); +#endif + if (double_multiplier == 0.) { + *quantized_multiplier = 0; + *shift = 0; + return; + } +#ifdef TFLITE_EMULATE_FLOAT + // If we're trying to avoid the use of floating-point instructions (for + // example on microcontrollers) then use an alternative implementation + // that only requires integer and bitwise operations. To enable this, you + // need to set the define during the build process for your platform. + int64_t q_fixed = IntegerFrExp(double_multiplier, shift); +#else // TFLITE_EMULATE_FLOAT + const double q = std::frexp(double_multiplier, shift); + auto q_fixed = static_cast(TfLiteRound(q * (1LL << 31))); +#endif // TFLITE_EMULATE_FLOAT + TFLITE_CHECK(q_fixed <= (1LL << 31)); + if (q_fixed == (1LL << 31)) { + q_fixed /= 2; + ++*shift; + } + TFLITE_CHECK_LE(q_fixed, std::numeric_limits::max()); + // A shift amount smaller than -31 would cause all bits to be shifted out + // and thus all results would be zero. We implement that instead with + // q_fixed==0, so as to avoid hitting issues with right-shift + // operations with shift amounts greater than 31. Note that this happens + // roughly when abs(double_multiplier) < 2^-31 and the present handling means + // that we're effectively flushing tiny double_multiplier's to zero. + // We could conceivably handle values in the range (roughly) [32, 63] + // as 'denormals' i.e. (shift==0, q_fixed < 2^30). In that point of view + // the present handling is just doing 'flush denormals to zero'. We could + // reconsider and actually generate nonzero denormals if a need arises. + if (*shift < -31) { + *shift = 0; + q_fixed = 0; + } +#if TFLITE_SINGLE_ROUNDING + // Single-rounding MultiplyByQuantizedMultiplier doesn't support a shift > 30, + // saturate it. + if (*shift > 30) { + *shift = 30; + q_fixed = (1LL << 31) - 1; + } +#endif + *quantized_multiplier = static_cast(q_fixed); +} + +void QuantizeMultiplierGreaterThanOne(double double_multiplier, + int32_t* quantized_multiplier, + int* left_shift) { + TFLITE_CHECK_GT(double_multiplier, 1.); + QuantizeMultiplier(double_multiplier, quantized_multiplier, left_shift); + TFLITE_CHECK_GE(*left_shift, 0); +} + +void QuantizeMultiplierSmallerThanOneExp(double double_multiplier, + int32_t* quantized_multiplier, + int* left_shift) { + TFLITE_CHECK_LT(double_multiplier, 1.); + TFLITE_CHECK_GT(double_multiplier, 0.); + int shift; + QuantizeMultiplier(double_multiplier, quantized_multiplier, &shift); + TFLITE_CHECK_LE(shift, 0); + *left_shift = shift; +} + +int64_t IntegerFrExp(double input, int* shift) { + // Make sure our assumptions about the double layout hold. + TFLITE_CHECK_EQ(8, sizeof(double)); + + // We want to access the bits of the input double value directly, which is + // tricky to do safely, so use a union to handle the casting. + union { + double double_value; + uint64_t double_as_uint; + } cast_union; + cast_union.double_value = input; + const uint64_t u = cast_union.double_as_uint; + + // If the bitfield is all zeros apart from the sign bit, this is a normalized + // zero value, so return standard values for this special case. + if ((u & ~kSignMask) == 0) { + *shift = 0; + return 0; + } + + // Deal with NaNs and Infs, which are always indicated with a fixed pattern in + // the exponent, and distinguished by whether the fractions are zero or + // non-zero. + const uint32_t exponent_part = ((u & kExponentMask) >> kExponentShift); + if (exponent_part == kExponentIsBadNum) { + *shift = std::numeric_limits::max(); + if (u & kFractionMask) { + // NaN, so just return zero (with the exponent set to INT_MAX). + return 0; + } else { + // Infinity, so return +/- INT_MAX. + if (u & kSignMask) { + return std::numeric_limits::min(); + } else { + return std::numeric_limits::max(); + } + } + } + + // The shift is fairly easy to extract from the high bits of the double value, + // just by masking it out and applying a bias. The std::frexp() implementation + // always returns values between 0.5 and 1.0 though, whereas the exponent + // assumes 1.0 to 2.0 is the standard range, so I add on one to match that + // interface. + *shift = (exponent_part - kExponentBias) + 1; + + // There's an implicit high bit in the double format definition, so make sure + // we include that at the top, and then reconstruct the rest of the fractional + // value from the remaining fragments. + int64_t fraction = 0x40000000 + ((u & kFractionMask) >> kFractionShift); + + // We're cutting off some bits at the bottom, so to exactly match the standard + // frexp implementation here we'll apply rounding by adding one to the least + // significant bit of the result if the discarded portion is over half of the + // maximum. + if ((u & kFractionRoundingMask) > kFractionRoundingThreshold) { + fraction += 1; + } + // Negate the fraction if the sign bit was set. + if (u & kSignMask) { + fraction *= -1; + } + + return fraction; +} + +double DoubleFromFractionAndShift(int64_t fraction, int shift) { + union { + double double_value; + uint64_t double_as_uint; + } result; + + // Detect NaNs and infinities. + if (shift == std::numeric_limits::max()) { + if (fraction == 0) { + return std::numeric_limits::quiet_NaN(); + } else if (fraction > 0) { + return std::numeric_limits::infinity(); + } else { + return -std::numeric_limits::infinity(); + } + } + + // Return a normalized zero for a zero fraction. + if (fraction == 0) { + result.double_as_uint = 0; + return result.double_value; + } + + bool is_negative = (fraction < 0); + int64_t encoded_fraction = is_negative ? -fraction : fraction; + int64_t encoded_shift = (shift - 1); + while (encoded_fraction < 0x40000000) { + encoded_fraction *= 2; + encoded_shift -= 1; + } + while (encoded_fraction > 0x80000000) { + encoded_fraction /= 2; + encoded_shift += 1; + } + encoded_fraction -= 0x40000000; + if (encoded_shift < -1022) { + encoded_shift = -1023; + } else if (encoded_shift > 1022) { + encoded_shift = 1023; + } + encoded_shift += kExponentBias; + uint64_t encoded_sign = is_negative ? kSignMask : 0; + result.double_as_uint = encoded_sign | (encoded_shift << kExponentShift) | + (encoded_fraction << kFractionShift); + return result.double_value; +} + +double IntegerDoubleMultiply(double a, double b) { + int a_shift; + const int64_t a_fraction = IntegerFrExp(a, &a_shift); + int b_shift; + const int64_t b_fraction = IntegerFrExp(b, &b_shift); + // Detect NaNs and infinities. + if (a_shift == std::numeric_limits::max() || + (b_shift == std::numeric_limits::max())) { + return std::numeric_limits::quiet_NaN(); + } + const int result_shift = a_shift + b_shift + 1; + const int64_t result_fraction = (a_fraction * b_fraction) >> 32; + return DoubleFromFractionAndShift(result_fraction, result_shift); +} + +int IntegerDoubleCompare(double a, double b) { + int a_shift; + const int64_t a_fraction = IntegerFrExp(a, &a_shift); + int b_shift; + const int64_t b_fraction = IntegerFrExp(b, &b_shift); + + // Detect NaNs and infinities. + if (a_shift == std::numeric_limits::max() || + (b_shift == std::numeric_limits::max())) { + return 1; + } + + if ((a_fraction == 0) && (b_fraction < 0)) { + return 1; + } else if ((a_fraction < 0) && (b_fraction == 0)) { + return -1; + } else if (a_shift < b_shift) { + return -1; + } else if (a_shift > b_shift) { + return 1; + } else if (a_fraction < b_fraction) { + return -1; + } else if (a_fraction > b_fraction) { + return 1; + } else { + return 0; + } +} + +void PreprocessSoftmaxScaling(double beta, double input_scale, + int input_integer_bits, + int32_t* quantized_multiplier, int* left_shift) { + // If the overall multiplier (input and beta) is large, then exp() of an + // input difference of 1 scaled by this will be large. In other words, we + // can cap the multiplier and know that, when it is used, the output will be + // (round to) zero wherever the input is not at the maximum value. + + // If the overall scale is less than one, and input_integer_bits=0, then the + // result is double equivalent of Q0.31 (actually with more precision). Thus + // this generates a Q(input_integer_bits).(31-input_integer_bits) + // representation. +#if TFLITE_SINGLE_ROUNDING + const double max_real_multiplier = (1LL << 30) - 1.0; +#else + const double max_real_multiplier = (1LL << 31) - 1.0; +#endif + +#ifdef TFLITE_EMULATE_FLOAT + const double input_beta = IntegerDoubleMultiply(beta, input_scale); + int shift; + int64_t fraction = IntegerFrExp(input_beta, &shift); + shift += (31 - input_integer_bits); + double input_beta_real_multiplier = + DoubleFromFractionAndShift(fraction, shift); + if (IntegerDoubleCompare(input_beta_real_multiplier, max_real_multiplier) > + 0) { + input_beta_real_multiplier = max_real_multiplier; + } +#else // TFLITE_EMULATE_FLOAT + const double input_beta_real_multiplier = + std::min(beta * input_scale * (1 << (31 - input_integer_bits)), + max_real_multiplier); +#endif // TFLITE_EMULATE_FLOAT + + QuantizeMultiplierGreaterThanOne(input_beta_real_multiplier, + quantized_multiplier, left_shift); +} + +void PreprocessLogSoftmaxScalingExp(double beta, double input_scale, + int input_integer_bits, + int32_t* quantized_multiplier, + int* left_shift, + int32_t* reverse_scaling_divisor, + int* reverse_scaling_left_shift) { + PreprocessSoftmaxScaling(beta, input_scale, input_integer_bits, + quantized_multiplier, left_shift); + + // Also calculate what amounts to the inverse scaling factor for the input. + const double real_reverse_scaling_divisor = + (1 << (31 - *left_shift)) / static_cast(*quantized_multiplier); + tflite::QuantizeMultiplierSmallerThanOneExp(real_reverse_scaling_divisor, + reverse_scaling_divisor, + reverse_scaling_left_shift); +} + +int CalculateInputRadius(int input_integer_bits, int input_left_shift, + int total_signed_bits) { +#ifdef TFLITE_EMULATE_FLOAT + int64_t result = (1 << input_integer_bits) - 1; + result <<= (total_signed_bits - input_integer_bits); + result >>= input_left_shift; + return result; +#else // TFLITE_EMULATE_FLOAT + const double max_input_rescaled = + 1.0 * ((1 << input_integer_bits) - 1) * + (1LL << (total_signed_bits - input_integer_bits)) / + (1LL << input_left_shift); + // Tighten bound using floor. Suppose that we could use the exact value. + // After scaling the difference, the result would be at the maximum. Thus we + // must ensure that our value has lower magnitude. + return static_cast(std::floor(max_input_rescaled)); +#endif // TFLITE_EMULATE_FLOAT +} + +void NudgeQuantizationRange(const float min, const float max, + const int quant_min, const int quant_max, + float* nudged_min, float* nudged_max, + float* nudged_scale) { + // This code originates from tensorflow/core/kernels/fake_quant_ops_functor.h. + const float quant_min_float = static_cast(quant_min); + const float quant_max_float = static_cast(quant_max); + *nudged_scale = (max - min) / (quant_max_float - quant_min_float); + const float zero_point_from_min = quant_min_float - min / *nudged_scale; + uint16_t nudged_zero_point; + if (zero_point_from_min < quant_min_float) { + nudged_zero_point = static_cast(quant_min); + } else if (zero_point_from_min > quant_max_float) { + nudged_zero_point = static_cast(quant_max); + } else { + nudged_zero_point = static_cast(TfLiteRound(zero_point_from_min)); + } + *nudged_min = (quant_min_float - nudged_zero_point) * (*nudged_scale); + *nudged_max = (quant_max_float - nudged_zero_point) * (*nudged_scale); +} + +void FakeQuantizeArray(const float nudged_scale, const float nudged_min, + const float nudged_max, const float* input_data, + float* output_data, const float size) { + // This code originates from tensorflow/core/kernels/fake_quant_ops_functor.h. + const float inv_nudged_scale = 1.0f / nudged_scale; + + for (int i = 0; i < size; i++) { + const float src_val = input_data[i]; + const float clamped = std::min(nudged_max, std::max(nudged_min, src_val)); + const float clamped_shifted = clamped - nudged_min; + const float dst_val = + TfLiteRound(clamped_shifted * inv_nudged_scale) * nudged_scale + + nudged_min; + output_data[i] = dst_val; + } +} + +bool CheckedLog2(const float x, int* log2_result) { + // Using TfLiteRound instead of std::round and std::log instead of + // std::log2 to work around these functions being missing in a toolchain + // used in some TensorFlow tests as of May 2018. + const float x_log2 = std::log(x) * (1.0f / std::log(2.0f)); + const float x_log2_rounded = TfLiteRound(x_log2); + const float x_log2_fracpart = x_log2 - x_log2_rounded; + + *log2_result = static_cast(x_log2_rounded); + return std::abs(x_log2_fracpart) < 1e-3f; +} + +void QuantizeMultiplierArray(const double* effective_scales, size_t size, + int32_t* effective_scale_significand, + int* effective_shift) { + for (size_t i = 0; i < size; ++i) { + QuantizeMultiplier(effective_scales[i], &effective_scale_significand[i], + &effective_shift[i]); + } +} + +} // namespace tflite diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/quantization_util.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/quantization_util.h new file mode 100644 index 0000000..9571d1f --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/quantization_util.h @@ -0,0 +1,292 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_QUANTIZATION_UTIL_H_ +#define TENSORFLOW_LITE_KERNELS_INTERNAL_QUANTIZATION_UTIL_H_ + +#include +#include +#include + +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/compatibility.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/cppmath.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h" + +namespace tflite { + +// Given the min and max values of a float array, return +// reasonable quantization parameters to use for this array. +template +QuantizationParams ChooseQuantizationParams(double rmin, double rmax, + bool narrow_range) { + const T qmin = std::numeric_limits::min() + (narrow_range ? 1 : 0); + const T qmax = std::numeric_limits::max(); + const double qmin_double = qmin; + const double qmax_double = qmax; + // 0 should always be a representable value. Let's assume that the initial + // min,max range contains 0. + TFLITE_CHECK_LE(rmin, 0.); + TFLITE_CHECK_GE(rmax, 0.); + if (rmin == rmax) { + // Special case where the min,max range is a point. Should be {0}. + TFLITE_CHECK_EQ(rmin, 0.); + TFLITE_CHECK_EQ(rmax, 0.); + QuantizationParams quantization_params; + quantization_params.zero_point = 0; + quantization_params.scale = 0.; + return quantization_params; + } + + // General case. + // + // First determine the scale. + const double scale = (rmax - rmin) / (qmax_double - qmin_double); + + // Zero-point computation. + // First the initial floating-point computation. The zero-point can be + // determined from solving an affine equation for any known pair + // (real value, corresponding quantized value). + // We know two such pairs: (rmin, qmin) and (rmax, qmax). + // The arithmetic error on the zero point computed from either pair + // will be roughly machine_epsilon * (sum of absolute values of terms) + // so we want to use the variant that adds the smaller terms. + const double zero_point_from_min = qmin_double - rmin / scale; + const double zero_point_from_max = qmax_double - rmax / scale; + const double zero_point_from_min_error = + std::abs(qmin_double) + std::abs(rmin / scale); + const double zero_point_from_max_error = + std::abs(qmax_double) + std::abs(rmax / scale); + + const double zero_point_double = + zero_point_from_min_error < zero_point_from_max_error + ? zero_point_from_min + : zero_point_from_max; + + // Now we need to nudge the zero point to be an integer + // (our zero points are integer, and this is motivated by the requirement + // to be able to represent the real value "0" exactly as a quantized value, + // which is required in multiple places, for example in Im2col with SAME + // padding). + T nudged_zero_point = 0; + if (zero_point_double < qmin_double) { + nudged_zero_point = qmin; + } else if (zero_point_double > qmax_double) { + nudged_zero_point = qmax; + } else { + nudged_zero_point = static_cast(round(zero_point_double)); + } + // The zero point should always be in the range of quantized value, + // [qmin, qmax]. + TFLITE_CHECK_GE(nudged_zero_point, qmin); + TFLITE_CHECK_LE(nudged_zero_point, qmax); + + // Finally, store the result nudged quantization params. + QuantizationParams quantization_params; + quantization_params.zero_point = nudged_zero_point; + quantization_params.scale = scale; + return quantization_params; +} + +template +QuantizationParams ChooseQuantizationParams(double rmin, double rmax) { + return ChooseQuantizationParams(rmin, rmax, false); +} + +// Converts a floating-point number to an integer. For all inputs x where +// static_cast(x) is legal according to the C++ standard, the result +// is identical to that cast (i.e. the result is x with its fractional part +// truncated whenever that is representable as IntOut). +// +// static_cast would cause undefined behavior for the following cases, which +// have well-defined behavior for this function: +// +// 1. If x is NaN, the result is zero. +// +// 2. If the truncated form of x is above the representable range of IntOut, +// the result is std::numeric_limits::max(). +// +// 3. If the truncated form of x is below the representable range of IntOut, +// the result is std::numeric_limits::min(). +// +// Note that cases #2 and #3 cover infinities as well as finite numbers. +// +// The range of FloatIn must include the range of IntOut, otherwise +// the results are undefined. +// TODO(sfeuz): Replace by absl::SafeCast once available. +template +IntOut SafeCast(FloatIn x) { + static_assert(!std::numeric_limits::is_integer, + "FloatIn is integer"); + static_assert(std::numeric_limits::is_integer, + "IntOut is not integer"); + static_assert(std::numeric_limits::radix == 2, "IntOut is base 2"); + + // Special case NaN, for which the logic below doesn't work. + if (std::isnan(x)) { + return 0; + } + + // Negative values all clip to zero for unsigned results. + if (!std::numeric_limits::is_signed && x < 0) { + return 0; + } + + // Handle infinities. + if (std::isinf(x)) { + return x < 0 ? std::numeric_limits::min() + : std::numeric_limits::max(); + } + + // Set exp such that x == f * 2^exp for some f with |f| in [0.5, 1.0), + // unless x is zero in which case exp == 0. Note that this implies that the + // magnitude of x is strictly less than 2^exp. + int exp = 0; + std::frexp(x, &exp); + + // Let N be the number of non-sign bits in the representation of IntOut. If + // the magnitude of x is strictly less than 2^N, the truncated version of x + // is representable as IntOut. The only representable integer for which this + // is not the case is kMin for signed types (i.e. -2^N), but that is covered + // by the fall-through below. + if (exp <= std::numeric_limits::digits) { + return x; + } + + // Handle numbers with magnitude >= 2^N. + return x < 0 ? std::numeric_limits::min() + : std::numeric_limits::max(); +} + +// Decompose a double multiplier into a Q0.31 int32 representation of its +// significand, and shift representation of NEGATIVE its exponent --- +// this is intended as a RIGHT-shift. +// +// Restricted to the case where the multiplier < 1 (and non-negative). +void QuantizeMultiplierSmallerThanOneExp(double double_multiplier, + int32_t* quantized_multiplier, + int* left_shift); + +// Decompose a double multiplier into a Q0.31 int32 representation of its +// significand, and shift representation of its exponent. +// +// Restricted to the case where the multiplier > 1. +void QuantizeMultiplierGreaterThanOne(double double_multiplier, + int32_t* quantized_multiplier, + int* left_shift); + +// Decompose a double multiplier into a Q0.31 int32 representation of its +// significand, and shift representation of its exponent. +// +// Handles an arbitrary positive multiplier. The 'shift' output-value is +// basically the 'floating-point exponent' of the multiplier: +// Negative for a right-shift (when the multiplier is <1), positive for a +// left-shift (when the multiplier is >1) +void QuantizeMultiplier(double double_multiplier, int32_t* quantized_multiplier, + int* shift); + +// Splits a double input value into a returned fraction, and a shift value from +// the exponent, using only bitwise and integer operations to support +// microcontrollers and other environments without floating-point support. +// +// This is designed to be a replacement for how std::frexp() is used within the +// QuantizeMultiplier() function, and so has a different signature than the +// standard version, returning a 64-bit integer rather than a double. This +// result has a maximum value of 1<<31, with the fraction expressed as a +// proportion of that maximum. +// +// std::frexp() returns NaNs and infinities unmodified, but since we're +// returning integers that can't represent those values, instead we return +// a shift of std::numeric_limits::max() for all bad numbers, with an int64 +// result of 0 for NaNs, std:numeric_limits::max() for +INFINITY, and +// std::numeric_limits::min() for -INFINITY. Denormalized inputs will +// result in return values that end up truncating some bits at the end, +// reflecting the loss of precision inherent in denormalization. +int64_t IntegerFrExp(double input, int* shift); + +// Converts an integer fraction in the format produced by IntegerFrExp (where +// 0x40000000 is 1.0) and an exponent shift (between -1022 and +1022) into an +// IEEE binary64 double format result. The implementation uses only integer and +// bitwise operators, so no floating point hardware support or emulation is +// needed. This is here so quantized operations can run non-time-critical +// preparation calculations on microcontrollers and other platforms without +// float support. +double DoubleFromFractionAndShift(int64_t fraction, int shift); + +// Performs a multiplication of two numbers in double format, using only integer +// and bitwise instructions. This is aimed at supporting housekeeping functions +// for quantized operations on microcontrollers without floating-point hardware. +double IntegerDoubleMultiply(double a, double b); + +// Returns -1 if a is less than b, 0 if a and b are equal, and +1 if a is +// greater than b. It is implemented using only integer and logical instructions +// so that it can be easily run on microcontrollers for quantized operations. +int IntegerDoubleCompare(double a, double b); + +// This first creates a multiplier in a double equivalent of +// Q(input_integer_bits).(31-input_integer_bits) representation, with extra +// precision in the double's fractional bits. It then splits the result into +// significand and exponent. +void PreprocessSoftmaxScaling(double beta, double input_scale, + int input_integer_bits, + int32_t* quantized_multiplier, int* left_shift); +// Like PreprocessSoftmaxScaling, but inverse scaling factors also calculated. +void PreprocessLogSoftmaxScalingExp(double beta, double input_scale, + int input_integer_bits, + int32_t* quantized_multiplier, + int* left_shift, + int32_t* reverse_scaling_divisor, + int* reverse_scaling_left_shift); +// Calculate the largest input that will result in a within-bounds intermediate +// result within MultiplyByQuantizedMultiplierGreaterThanOne. In other words, +// it must not overflow before we reduce the value by multiplication by the +// input multiplier. The negative radius is used as the minimum difference in +// Softmax. +int CalculateInputRadius(int input_integer_bits, int input_left_shift, + int total_signed_bits = 31); + +// Nudges a min/max quantization range to ensure zero is zero. +// Gymnastics with nudged zero point is to ensure that real zero maps to +// an integer, which is required for e.g. zero-padding in convolutional layers. +// Outputs nudged_min, nudged_max, nudged_scale. +void NudgeQuantizationRange(const float min, const float max, + const int quant_min, const int quant_max, + float* nudged_min, float* nudged_max, + float* nudged_scale); + +// Fake quantizes (quantizes and dequantizes) input_data using the scale, +// nudged_min, and nudged_max from NudgeQuantizationRange. This matches the code +// in TensorFlow's FakeQuantizeWithMinMaxVarsFunctor. +void FakeQuantizeArray(const float nudged_scale, const float nudged_min, + const float nudged_max, const float* input_data, + float* output_data, const float size); + +// If x is approximately a power of two (with any positive or negative +// exponent), stores that exponent (i.e. log2(x)) in *log2_result, otherwise +// returns false. +bool CheckedLog2(const float x, int* log2_result); + +// Decomposes an array of double multipliers into a Q0.31 int32 representation +// of its significand, and shift representation of its exponent. +// +// Handles an arbitrary multiplier. The 'shift' output-value is +// basically the 'floating-point exponent' of the multiplier: +// Negative for a right-shift (when the multiplier is <1), positive for a +// left-shift (when the multiplier is >1) +void QuantizeMultiplierArray(const double* effective_scales, size_t size, + int32_t* effective_scale_significand, + int* effective_shift); + +} // namespace tflite + +#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_QUANTIZATION_UTIL_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/add.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/add.h new file mode 100644 index 0000000..ada6696 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/add.h @@ -0,0 +1,400 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ADD_H_ +#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ADD_H_ + +#include +#include + +#include "edge-impulse-sdk/third_party/gemmlowp/fixedpoint/fixedpoint.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h" + +namespace tflite { + +namespace reference_ops { + +template +inline void Add(const ArithmeticParams& params, + const RuntimeShape& input1_shape, const T* input1_data, + const RuntimeShape& input2_shape, const T* input2_data, + const RuntimeShape& output_shape, T* output_data) { + T activation_min, activation_max; + GetActivationParams(params, &activation_min, &activation_max); + + const int flat_size = + MatchingElementsSize(input1_shape, input2_shape, output_shape); + for (int i = 0; i < flat_size; ++i) { + output_data[i] = ActivationFunctionWithMinMax( + input1_data[i] + input2_data[i], activation_min, activation_max); + } +} + +// Element-wise add that can often be used for inner loop of broadcast add as +// well as the non-broadcast add. + +// This function is used for 8-bit as well as for 16-bit, but the accumulator +// is 32-bit for both cases. The overflow does not happen due to the +// choice of the shift (20 or 15, accordingly - see add.cc for more comments). +template +inline void AddElementwise(int size, const ArithmeticParams& params, + const T* input1_data, const T* input2_data, + T* output_data) { + TFLITE_DCHECK_GT(params.input1_offset, -std::numeric_limits::max()); + TFLITE_DCHECK_GT(params.input2_offset, -std::numeric_limits::max()); + TFLITE_DCHECK_LT(params.input1_offset, std::numeric_limits::max()); + TFLITE_DCHECK_LT(params.input2_offset, std::numeric_limits::max()); + + for (int i = 0; i < size; ++i) { + const int32_t input1_val = params.input1_offset + input1_data[i]; + const int32_t input2_val = params.input2_offset + input2_data[i]; + const int32_t shifted_input1_val = input1_val * (1 << params.left_shift); + const int32_t shifted_input2_val = input2_val * (1 << params.left_shift); + const int32_t scaled_input1_val = + MultiplyByQuantizedMultiplierSmallerThanOneExp( + shifted_input1_val, params.input1_multiplier, params.input1_shift); + const int32_t scaled_input2_val = + MultiplyByQuantizedMultiplierSmallerThanOneExp( + shifted_input2_val, params.input2_multiplier, params.input2_shift); + const int32_t raw_sum = scaled_input1_val + scaled_input2_val; + const int32_t raw_output = + MultiplyByQuantizedMultiplierSmallerThanOneExp( + raw_sum, params.output_multiplier, params.output_shift) + + params.output_offset; + const int32_t clamped_output = + std::min(params.quantized_activation_max, + std::max(params.quantized_activation_min, raw_output)); + output_data[i] = static_cast(clamped_output); + } +} + +// Scalar-broadcast add that can be used for inner loop of more general +// broadcast add, so that, for example, scalar-broadcast with batch will still +// be fast. +inline void AddScalarBroadcast(int size, const ArithmeticParams& params, + uint8_t input1_data, const uint8_t* input2_data, + uint8_t* output_data) { + TFLITE_DCHECK_GT(params.input1_offset, -256); + TFLITE_DCHECK_GT(params.input2_offset, -256); + TFLITE_DCHECK_LT(params.input1_offset, 256); + TFLITE_DCHECK_LT(params.input2_offset, 256); + + const int32_t input1_val = params.input1_offset + input1_data; + const int32_t shifted_input1_val = input1_val * (1 << params.left_shift); + const int32_t scaled_input1_val = + MultiplyByQuantizedMultiplierSmallerThanOneExp( + shifted_input1_val, params.input1_multiplier, params.input1_shift); + for (int i = 0; i < size; ++i) { + const int32_t input2_val = params.input2_offset + input2_data[i]; + const int32_t shifted_input2_val = input2_val * (1 << params.left_shift); + const int32_t scaled_input2_val = + MultiplyByQuantizedMultiplierSmallerThanOneExp( + shifted_input2_val, params.input2_multiplier, params.input2_shift); + const int32_t raw_sum = scaled_input1_val + scaled_input2_val; + const int32_t raw_output = + MultiplyByQuantizedMultiplierSmallerThanOneExp( + raw_sum, params.output_multiplier, params.output_shift) + + params.output_offset; + const int32_t clamped_output = + std::min(params.quantized_activation_max, + std::max(params.quantized_activation_min, raw_output)); + output_data[i] = static_cast(clamped_output); + } +} + +inline void Add(const ArithmeticParams& params, + const RuntimeShape& input1_shape, const uint8_t* input1_data, + const RuntimeShape& input2_shape, const uint8_t* input2_data, + const RuntimeShape& output_shape, uint8_t* output_data) { + TFLITE_DCHECK_LE(params.quantized_activation_min, + params.quantized_activation_max); + const int flat_size = + MatchingElementsSize(input1_shape, input2_shape, output_shape); + + TFLITE_DCHECK_GT(params.input1_offset, -256); + TFLITE_DCHECK_GT(params.input2_offset, -256); + TFLITE_DCHECK_LT(params.input1_offset, 256); + TFLITE_DCHECK_LT(params.input2_offset, 256); + AddElementwise(flat_size, params, input1_data, input2_data, output_data); +} + +inline void AddGeneralParamScale(const ArithmeticParams& params, + const RuntimeShape& input1_shape, + const int16_t* input1_data, + const RuntimeShape& input2_shape, + const int16_t* input2_data, + const RuntimeShape& output_shape, + int16_t* output_data) { + TFLITE_DCHECK_LE(params.quantized_activation_min, + params.quantized_activation_max); + const int flat_size = + MatchingElementsSize(input1_shape, input2_shape, output_shape); + + int max_value = std::numeric_limits::max(); + + TFLITE_DCHECK_GT(params.input1_offset, -max_value); + TFLITE_DCHECK_GT(params.input2_offset, -max_value); + TFLITE_DCHECK_LT(params.input1_offset, max_value); + TFLITE_DCHECK_LT(params.input2_offset, max_value); + AddElementwise(flat_size, params, input1_data, input2_data, output_data); +} + +inline void Add(const ArithmeticParams& params, + const RuntimeShape& input1_shape, const int16_t* input1_data, + const RuntimeShape& input2_shape, const int16_t* input2_data, + const RuntimeShape& output_shape, int16_t* output_data, + bool pot_scale = true) { + if (!pot_scale) { + AddGeneralParamScale(params, input1_shape, input1_data, input2_shape, + input2_data, output_shape, output_data); + return; + } + + TFLITE_DCHECK_LE(params.quantized_activation_min, + params.quantized_activation_max); + + const int input1_shift = params.input1_shift; + const int flat_size = + MatchingElementsSize(input1_shape, input2_shape, output_shape); + const int16_t output_activation_min = params.quantized_activation_min; + const int16_t output_activation_max = params.quantized_activation_max; + + TFLITE_DCHECK(input1_shift == 0 || params.input2_shift == 0); + TFLITE_DCHECK_LE(input1_shift, 0); + TFLITE_DCHECK_LE(params.input2_shift, 0); + const int16_t* not_shift_input = + input1_shift == 0 ? input1_data : input2_data; + const int16_t* shift_input = input1_shift == 0 ? input2_data : input1_data; + const int input_right_shift = + input1_shift == 0 ? -params.input2_shift : -input1_shift; + + for (int i = 0; i < flat_size; i++) { + // F0 uses 0 integer bits, range [-1, 1]. + using F0 = gemmlowp::FixedPoint; + + F0 input_ready_scaled = F0::FromRaw(not_shift_input[i]); + F0 scaled_input = F0::FromRaw( + gemmlowp::RoundingDivideByPOT(shift_input[i], input_right_shift)); + F0 result = gemmlowp::SaturatingAdd(scaled_input, input_ready_scaled); + const int16_t raw_output = result.raw(); + const int16_t clamped_output = std::min( + output_activation_max, std::max(output_activation_min, raw_output)); + output_data[i] = clamped_output; + } +} + +template +inline typename std::enable_if::value, void>::type +BroadcastAdd4DSlow(const ArithmeticParams& params, + const RuntimeShape& input1_shape, const T* input1_data, + const RuntimeShape& input2_shape, const T* input2_data, + const RuntimeShape& output_shape, T* output_data) { + NdArrayDesc<4> desc1; + NdArrayDesc<4> desc2; + NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1, + &desc2); + const RuntimeShape extended_output_shape = + RuntimeShape::ExtendedShape(4, output_shape); + + T activation_min, activation_max; + GetActivationParams(params, &activation_min, &activation_max); + + // In Tensorflow, the dimensions are canonically named (batch_number, row, + // col, channel), with extents (batches, height, width, depth), with the + // trailing dimension changing most rapidly (channels has the smallest stride, + // typically 1 element). + // + // In generated C code, we store arrays with the dimensions reversed. The + // first dimension has smallest stride. + // + // We name our variables by their Tensorflow convention, but generate C code + // nesting loops such that the innermost loop has the smallest stride for the + // best cache behavior. + for (int b = 0; b < extended_output_shape.Dims(0); ++b) { + for (int y = 0; y < extended_output_shape.Dims(1); ++y) { + for (int x = 0; x < extended_output_shape.Dims(2); ++x) { + for (int c = 0; c < extended_output_shape.Dims(3); ++c) { + output_data[Offset(extended_output_shape, b, y, x, c)] = + ActivationFunctionWithMinMax( + input1_data[SubscriptToIndex(desc1, b, y, x, c)] + + input2_data[SubscriptToIndex(desc2, b, y, x, c)], + activation_min, activation_max); + } + } + } + } +} + +// This function is used for 8-bit as well as for 16-bit, but the accumulator +// is 32-bit for both cases. The overflow does not happen due to the +// choice of the shift (20 or 15, accordingly - see add.cc for more comments). +template +inline typename std::enable_if::value, void>::type +BroadcastAdd4DSlow(const ArithmeticParams& params, + const RuntimeShape& input1_shape, const T* input1_data, + const RuntimeShape& input2_shape, const T* input2_data, + const RuntimeShape& output_shape, T* output_data) { + NdArrayDesc<4> desc1; + NdArrayDesc<4> desc2; + NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1, + &desc2); + const RuntimeShape extended_output_shape = + RuntimeShape::ExtendedShape(4, output_shape); + + // In Tensorflow, the dimensions are canonically named (batch_number, row, + // col, channel), with extents (batches, height, width, depth), with the + // trailing dimension changing most rapidly (channels has the smallest stride, + // typically 1 element). + // + // In generated C code, we store arrays with the dimensions reversed. The + // first dimension has smallest stride. + // + // We name our variables by their Tensorflow convention, but generate C code + // nesting loops such that the innermost loop has the smallest stride for the + // best cache behavior. + for (int b = 0; b < extended_output_shape.Dims(0); ++b) { + for (int y = 0; y < extended_output_shape.Dims(1); ++y) { + for (int x = 0; x < extended_output_shape.Dims(2); ++x) { + for (int c = 0; c < extended_output_shape.Dims(3); ++c) { + const int32_t input1_val = + params.input1_offset + + input1_data[SubscriptToIndex(desc1, b, y, x, c)]; + const int32_t input2_val = + params.input2_offset + + input2_data[SubscriptToIndex(desc2, b, y, x, c)]; + const int32_t shifted_input1_val = + input1_val * (1 << params.left_shift); + const int32_t shifted_input2_val = + input2_val * (1 << params.left_shift); + const int32_t scaled_input1_val = + MultiplyByQuantizedMultiplierSmallerThanOneExp( + shifted_input1_val, params.input1_multiplier, + params.input1_shift); + const int32_t scaled_input2_val = + MultiplyByQuantizedMultiplierSmallerThanOneExp( + shifted_input2_val, params.input2_multiplier, + params.input2_shift); + const int32_t raw_sum = scaled_input1_val + scaled_input2_val; + const int32_t raw_output = + MultiplyByQuantizedMultiplierSmallerThanOneExp( + raw_sum, params.output_multiplier, params.output_shift) + + params.output_offset; + const int32_t clamped_output = + std::min(params.quantized_activation_max, + std::max(params.quantized_activation_min, raw_output)); + output_data[Offset(extended_output_shape, b, y, x, c)] = + static_cast(clamped_output); + } + } + } + } +} + +inline void BroadcastAddFivefold(const ArithmeticParams& unswitched_params, + const RuntimeShape& unswitched_input1_shape, + const uint8_t* unswitched_input1_data, + const RuntimeShape& unswitched_input2_shape, + const uint8_t* unswitched_input2_data, + const RuntimeShape& output_shape, + uint8_t* output_data) { + ArithmeticParams switched_params = unswitched_params; + switched_params.input1_offset = unswitched_params.input2_offset; + switched_params.input1_multiplier = unswitched_params.input2_multiplier; + switched_params.input1_shift = unswitched_params.input2_shift; + switched_params.input2_offset = unswitched_params.input1_offset; + switched_params.input2_multiplier = unswitched_params.input1_multiplier; + switched_params.input2_shift = unswitched_params.input1_shift; + + const bool use_unswitched = + unswitched_params.broadcast_category == + tflite::BroadcastableOpCategory::kFirstInputBroadcastsFast; + + const ArithmeticParams& params = + use_unswitched ? unswitched_params : switched_params; + const uint8_t* input1_data = + use_unswitched ? unswitched_input1_data : unswitched_input2_data; + const uint8_t* input2_data = + use_unswitched ? unswitched_input2_data : unswitched_input1_data; + + // Fivefold nested loops. The second input resets its position for each + // iteration of the second loop. The first input resets its position at the + // beginning of the fourth loop. The innermost loop is an elementwise add of + // sections of the arrays. + uint8_t* output_data_ptr = output_data; + const uint8_t* input1_data_ptr = input1_data; + const uint8_t* input2_data_reset = input2_data; + // In the fivefold pattern, y0, y2 and y4 are not broadcast, and so shared + // between input shapes. y3 for input 1 is always broadcast, and so the + // dimension there is 1, whereas optionally y1 might be broadcast for input 2. + // Put another way, + // input1.shape.FlatSize = y0 * y1 * y2 * y4, + // input2.shape.FlatSize = y0 * y2 * y3 * y4. + int y0 = params.broadcast_shape[0]; + int y1 = params.broadcast_shape[1]; + int y2 = params.broadcast_shape[2]; + int y3 = params.broadcast_shape[3]; + int y4 = params.broadcast_shape[4]; + if (y4 > 1) { + // General fivefold pattern, with y4 > 1 so there is a non-broadcast inner + // dimension. + for (int i0 = 0; i0 < y0; ++i0) { + const uint8_t* input2_data_ptr; + for (int i1 = 0; i1 < y1; ++i1) { + input2_data_ptr = input2_data_reset; + for (int i2 = 0; i2 < y2; ++i2) { + for (int i3 = 0; i3 < y3; ++i3) { + AddElementwise(y4, params, input1_data_ptr, input2_data_ptr, + output_data_ptr); + input2_data_ptr += y4; + output_data_ptr += y4; + } + // We have broadcast y4 of input1 data y3 times, and now move on. + input1_data_ptr += y4; + } + } + // We have broadcast y2*y3*y4 of input2 data y1 times, and now move on. + input2_data_reset = input2_data_ptr; + } + } else { + // Special case of y4 == 1, in which the innermost loop is a single element + // and can be combined with the next (y3) as an inner broadcast. + // + // Note that this handles the case of pure scalar broadcast when + // y0 == y1 == y2 == 1. With low overhead it handles cases such as scalar + // broadcast with batch (as y2 > 1). + // + // NOTE The process is the same as the above general case except simplified + // for y4 == 1 and the loop over y3 is contained within the + // AddScalarBroadcast function. + for (int i0 = 0; i0 < y0; ++i0) { + const uint8_t* input2_data_ptr; + for (int i1 = 0; i1 < y1; ++i1) { + input2_data_ptr = input2_data_reset; + for (int i2 = 0; i2 < y2; ++i2) { + AddScalarBroadcast(y3, params, *input1_data_ptr, input2_data_ptr, + output_data_ptr); + input2_data_ptr += y3; + output_data_ptr += y3; + input1_data_ptr += 1; + } + } + input2_data_reset = input2_data_ptr; + } + } +} + +} // namespace reference_ops +} // namespace tflite + +#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ADD_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/add_n.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/add_n.h new file mode 100644 index 0000000..7b5424c --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/add_n.h @@ -0,0 +1,86 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ADD_N_H_ +#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ADD_N_H_ + +#include +#include + +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h" + +namespace tflite { +namespace reference_ops { + +// T is expected to be either float or int. +template +inline void AddN(const RuntimeShape& input_shape, const size_t num_inputs, + const T* const* input_data, T* output_data) { + // All inputs and output should have the same shape, this is checked during + // Prepare stage. + const size_t size = input_shape.FlatSize(); + for (size_t i = 0; i < size; ++i) { + T x = 0; + for (size_t j = 0; j < num_inputs; ++j) { + x += input_data[j][i]; + } + output_data[i] = x; + } +} + +inline void AddN(const ArithmeticParams& params, + const RuntimeShape& input_shape, const size_t num_inputs, + const int8_t* const* input_data, int8_t* output_data) { + TFLITE_DCHECK_LE(params.quantized_activation_min, + params.quantized_activation_max); + // Input offset is negative input zero point. Activation tensors are + // asymmetric quantized so they span the full int8 range. + // All inputs should have same zero-point and scale, this is checked during + // Prepare stage. + TFLITE_DCHECK_GE(-params.input1_offset, std::numeric_limits::min()); + TFLITE_DCHECK_LE(-params.input1_offset, std::numeric_limits::max()); + + // All inputs and output should have the same shape, this is checked during + // Prepare stage. + const size_t size = input_shape.FlatSize(); + for (size_t i = 0; i < size; ++i) { + // accumulate in scaled_x before clamping to avoid overflow + const int32_t x = params.input1_offset; // x = 0 + const int32_t shifted_x = x * (1 << params.left_shift); + int32_t scaled_x = MultiplyByQuantizedMultiplierSmallerThanOneExp( + shifted_x, params.input1_multiplier, params.input1_shift); + + for (size_t j = 0; j < num_inputs; ++j) { + const int32_t y = params.input1_offset + input_data[j][i]; + const int32_t shifted_y = y * (1 << params.left_shift); + int32_t scaled_y = MultiplyByQuantizedMultiplierSmallerThanOneExp( + shifted_y, params.input1_multiplier, params.input1_shift); + scaled_x += scaled_y; + } + + const int32_t raw_output = + MultiplyByQuantizedMultiplierSmallerThanOneExp( + scaled_x, params.output_multiplier, params.output_shift) + + params.output_offset; + const int32_t clamped_output = + std::min(params.quantized_activation_max, + std::max(params.quantized_activation_min, raw_output)); + output_data[i] = static_cast(clamped_output); + } +} + +} // namespace reference_ops +} // namespace tflite + +#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ADD_N_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/arg_min_max.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/arg_min_max.h new file mode 100644 index 0000000..7de12f7 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/arg_min_max.h @@ -0,0 +1,88 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ARG_MIN_MAX_H_ +#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ARG_MIN_MAX_H_ + +#include + +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h" + +namespace tflite { + +namespace reference_ops { + +template +std::function GetComparefunction(bool is_arg_max) { + if (is_arg_max) { + return std::greater(); + } else { + return std::less(); + } +} + +template +void ArgMinMax(const RuntimeShape& input1_shape, const T1* input1_data, + const T3* input2_data, const RuntimeShape& output_shape, + T2* output_data, const Cmp& cmp) { + TFLITE_DCHECK_GT(input1_shape.DimensionsCount(), 0); + TFLITE_DCHECK_EQ(input1_shape.DimensionsCount() - 1, + output_shape.DimensionsCount()); + int axis = input2_data[0]; + if (axis < 0) { + axis += input1_shape.DimensionsCount(); + } + const int axis_size = input1_shape.Dims(axis); + + int outer_size = 1; + for (int i = 0; i < axis; ++i) { + TFLITE_DCHECK_EQ(input1_shape.Dims(i), output_shape.Dims(i)); + outer_size *= input1_shape.Dims(i); + } + + int inner_size = 1; + const int dims_count = input1_shape.DimensionsCount(); + for (int i = axis + 1; i < dims_count; ++i) { + TFLITE_DCHECK_EQ(input1_shape.Dims(i), output_shape.Dims(i - 1)); + inner_size *= input1_shape.Dims(i); + } + for (int outer = 0; outer < outer_size; ++outer) { + for (int inner = 0; inner < inner_size; ++inner) { + auto min_max_value = input1_data[outer * axis_size * inner_size + inner]; + T2 min_max_index = 0; + for (int i = 1; i < axis_size; ++i) { + const auto& curr_value = + input1_data[(outer * axis_size + i) * inner_size + inner]; + if (cmp(curr_value, min_max_value)) { + min_max_value = curr_value; + min_max_index = static_cast(i); + } + } + output_data[outer * inner_size + inner] = min_max_index; + } + } +} + +template +void ArgMinMax(const RuntimeShape& input1_shape, const T1* input1_data, + const T3* input2_data, const RuntimeShape& output_shape, + T2* output_data, const bool is_arg_max) { + ArgMinMax(input1_shape, input1_data, input2_data, output_shape, output_data, + GetComparefunction(is_arg_max)); +} + +} // namespace reference_ops +} // namespace tflite + +#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ARG_MIN_MAX_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/batch_matmul.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/batch_matmul.h new file mode 100644 index 0000000..3695bad --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/batch_matmul.h @@ -0,0 +1,275 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BATCH_MATMUL_H_ +#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BATCH_MATMUL_H_ + +#include +#include + +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/compatibility.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/portable_tensor_utils.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h" + +namespace tflite { +namespace reference_ops { +namespace batch_matmul { + +// Determine which dimension is the broadcast dimension. +inline int broadcast_dim(int lhs_dim, int rhs_dim) { + if (lhs_dim == rhs_dim) return lhs_dim; + if (lhs_dim == 1) return rhs_dim; + TFLITE_DCHECK_EQ(rhs_dim, 1); + return lhs_dim; +} + +// Compute the "extent" for iterating on this dimension. +// If we are broadcasting, then don't advance (i.e return 0). +inline int extent(const RuntimeShape& shape, int x) { + if (shape.Dims(x) == 1) { + return 0; + } + int prod = 1; + for (int i = x + 1; i < shape.DimensionsCount(); ++i) { + prod *= shape.Dims(i); + } + return prod; +} + +} // namespace batch_matmul + +template +inline void BatchMatMul(const RuntimeShape& lhs_shape, const Ta* lhs_data, + const RuntimeShape& rhs_shape, const Tb* rhs_data, + const RuntimeShape& output_shape, Tout* output_data) { + const RuntimeShape extended_lhs_shape = + RuntimeShape::ExtendedShape(5, lhs_shape); + const RuntimeShape extended_rhs_shape = + RuntimeShape::ExtendedShape(5, rhs_shape); + + const int batch_dim0 = batch_matmul::broadcast_dim( + extended_lhs_shape.Dims(0), extended_rhs_shape.Dims(0)); + const int batch_dim1 = batch_matmul::broadcast_dim( + extended_lhs_shape.Dims(1), extended_rhs_shape.Dims(1)); + const int batch_dim2 = batch_matmul::broadcast_dim( + extended_lhs_shape.Dims(2), extended_rhs_shape.Dims(2)); + + const int lhs_ext0 = batch_matmul::extent(extended_lhs_shape, 0); + const int lhs_ext1 = batch_matmul::extent(extended_lhs_shape, 1); + const int lhs_ext2 = batch_matmul::extent(extended_lhs_shape, 2); + const int rhs_ext0 = batch_matmul::extent(extended_rhs_shape, 0); + const int rhs_ext1 = batch_matmul::extent(extended_rhs_shape, 1); + const int rhs_ext2 = batch_matmul::extent(extended_rhs_shape, 2); + + // Set params for each matrix multiply. + const int lhs_rows = extended_lhs_shape.Dims(3); + const int rhs_cols = extended_rhs_shape.Dims(4); + const int accum_depth = extended_lhs_shape.Dims(4); + + for (int b0 = 0; b0 < batch_dim0; ++b0) { + const Ta* lhs_ptr0 = lhs_data + (b0 * lhs_ext0); + const Tb* rhs_ptr0 = rhs_data + (b0 * rhs_ext0); + for (int b1 = 0; b1 < batch_dim1; ++b1) { + const Ta* lhs_ptr1 = lhs_ptr0 + b1 * lhs_ext1; + const Tb* rhs_ptr1 = rhs_ptr0 + b1 * rhs_ext1; + for (int b2 = 0; b2 < batch_dim2; ++b2) { + const Ta* lhs_ptr2 = lhs_ptr1 + b2 * lhs_ext2; + const Tb* rhs_ptr2 = rhs_ptr1 + b2 * rhs_ext2; + Tout* out_ptr = output_data + ((b0 * batch_dim1 * batch_dim2) + + b1 * batch_dim2 + b2) * + lhs_rows * rhs_cols; + for (int j = 0; j < rhs_cols; ++j) { + for (int i = 0; i < lhs_rows; ++i) { + Tout total = 0; + for (int k = 0; k < accum_depth; ++k) { + total += static_cast(lhs_ptr2[accum_depth * i + k]) * + static_cast(rhs_ptr2[j * accum_depth + k]); + } + int idx = lhs_rows * j + i; + out_ptr[idx] = total; + } + } + } + } + } +} + +inline void BatchMatMul(const RuntimeShape& lhs_shape, const int8_t* lhs_data, + const RuntimeShape& rhs_shape, const int8_t* rhs_data, + const float* scaling_factors, + const int32_t* input_offset, int32_t* row_sums, + const RuntimeShape& output_shape, float* output_data, + bool* compute_row_sums) { + const RuntimeShape extended_lhs_shape = + RuntimeShape::ExtendedShape(5, lhs_shape); + const RuntimeShape extended_rhs_shape = + RuntimeShape::ExtendedShape(5, rhs_shape); + + const int batch_dim0 = batch_matmul::broadcast_dim( + extended_lhs_shape.Dims(0), extended_rhs_shape.Dims(0)); + const int batch_dim1 = batch_matmul::broadcast_dim( + extended_lhs_shape.Dims(1), extended_rhs_shape.Dims(1)); + const int batch_dim2 = batch_matmul::broadcast_dim( + extended_lhs_shape.Dims(2), extended_rhs_shape.Dims(2)); + + const int lhs_ext0 = batch_matmul::extent(extended_lhs_shape, 0); + const int lhs_ext1 = batch_matmul::extent(extended_lhs_shape, 1); + const int lhs_ext2 = batch_matmul::extent(extended_lhs_shape, 2); + const int rhs_ext0 = batch_matmul::extent(extended_rhs_shape, 0); + const int rhs_ext1 = batch_matmul::extent(extended_rhs_shape, 1); + const int rhs_ext2 = batch_matmul::extent(extended_rhs_shape, 2); + + // Set params for each matrix multiply. + const int lhs_rows = extended_lhs_shape.Dims(3); + const int rhs_cols = extended_rhs_shape.Dims(4); + const int accum_depth = extended_lhs_shape.Dims(4); + + const int ioff_ext0 = rhs_ext0 == 0 ? 0 : rhs_cols; + const int ioff_ext1 = rhs_ext1 == 0 ? 0 : rhs_cols; + const int ioff_ext2 = rhs_ext2 == 0 ? 0 : rhs_cols; + const int woff_ext0 = lhs_ext0 == 0 ? 0 : lhs_rows; + const int woff_ext1 = lhs_ext1 == 0 ? 0 : lhs_rows; + const int woff_ext2 = lhs_ext2 == 0 ? 0 : lhs_rows; + + if (!compute_row_sums || *compute_row_sums) { + int num_weights_matrices = 1; + for (int i = 1; i < extended_lhs_shape.DimensionsCount() - 2; ++i) { + num_weights_matrices *= extended_lhs_shape.Dims(i); + } + tensor_utils::ReductionSumVector( + lhs_data, row_sums, num_weights_matrices * lhs_rows, accum_depth); + if (compute_row_sums) { + *compute_row_sums = false; + } + } + + for (int b0 = 0; b0 < batch_dim0; ++b0) { + const int8_t* lhs_ptr0 = lhs_data + (b0 * lhs_ext0); + const int8_t* rhs_ptr0 = rhs_data + (b0 * rhs_ext0); + const int32_t* ioff_ptr0 = input_offset + (b0 * ioff_ext0); + const float* scale_ptr0 = scaling_factors + (b0 * ioff_ext0); + const int32_t* woff_ptr0 = row_sums + (b0 * woff_ext0); + for (int b1 = 0; b1 < batch_dim1; ++b1) { + const int8_t* lhs_ptr1 = lhs_ptr0 + b1 * lhs_ext1; + const int8_t* rhs_ptr1 = rhs_ptr0 + b1 * rhs_ext1; + const int32_t* ioff_ptr1 = ioff_ptr0 + (b1 * ioff_ext1); + const float* scale_ptr1 = scale_ptr0 + (b1 * ioff_ext1); + const int32_t* woff_ptr1 = woff_ptr0 + (b1 * woff_ext1); + for (int b2 = 0; b2 < batch_dim2; ++b2) { + const int8_t* lhs_ptr2 = lhs_ptr1 + b2 * lhs_ext2; + const int8_t* rhs_ptr2 = rhs_ptr1 + b2 * rhs_ext2; + const int32_t* ioff_ptr2 = ioff_ptr1 + (b2 * ioff_ext2); + const float* scale_ptr2 = scale_ptr1 + (b2 * ioff_ext2); + const int32_t* woff_ptr2 = woff_ptr1 + (b2 * woff_ext2); + float* out_ptr = output_data + ((b0 * batch_dim1 * batch_dim2) + + b1 * batch_dim2 + b2) * + lhs_rows * rhs_cols; + for (int j = 0; j < rhs_cols; ++j) { + const float batch_scaling_factor = scale_ptr2[j]; + const float batch_offset = static_cast(ioff_ptr2[j]); + for (int i = 0; i < lhs_rows; ++i) { + int32_t total = 0; + for (int k = 0; k < accum_depth; ++k) { + total += + lhs_ptr2[accum_depth * i + k] * rhs_ptr2[j * accum_depth + k]; + } + int32_t row_sum = woff_ptr2[i]; + total -= row_sum * batch_offset; + int idx = lhs_rows * j + i; + out_ptr[idx] += batch_scaling_factor * total; + } + } + } + } + } +} + +template +inline void BatchMatMul(const FullyConnectedParams& params, + const RuntimeShape& lhs_shape, const T* lhs_data, + const RuntimeShape& rhs_shape, const T* rhs_data, + const RuntimeShape& output_shape, T* output_data) { + const RuntimeShape extended_lhs_shape = + RuntimeShape::ExtendedShape(5, lhs_shape); + const RuntimeShape extended_rhs_shape = + RuntimeShape::ExtendedShape(5, rhs_shape); + + const int batch_dim0 = batch_matmul::broadcast_dim( + extended_lhs_shape.Dims(0), extended_rhs_shape.Dims(0)); + const int batch_dim1 = batch_matmul::broadcast_dim( + extended_lhs_shape.Dims(1), extended_rhs_shape.Dims(1)); + const int batch_dim2 = batch_matmul::broadcast_dim( + extended_lhs_shape.Dims(2), extended_rhs_shape.Dims(2)); + + const int lhs_ext0 = batch_matmul::extent(extended_lhs_shape, 0); + const int lhs_ext1 = batch_matmul::extent(extended_lhs_shape, 1); + const int lhs_ext2 = batch_matmul::extent(extended_lhs_shape, 2); + const int rhs_ext0 = batch_matmul::extent(extended_rhs_shape, 0); + const int rhs_ext1 = batch_matmul::extent(extended_rhs_shape, 1); + const int rhs_ext2 = batch_matmul::extent(extended_rhs_shape, 2); + + // Set params for each matrix multiply. + const int lhs_rows = extended_lhs_shape.Dims(3); + const int rhs_cols = extended_rhs_shape.Dims(4); + const int accum_depth = extended_lhs_shape.Dims(4); + + const int32_t input_offset = params.input_offset; + const int32_t filter_offset = params.weights_offset; + const int32_t output_offset = params.output_offset; + const int32_t output_multiplier = params.output_multiplier; + const int output_shift = params.output_shift; + const int32_t output_activation_min = params.quantized_activation_min; + const int32_t output_activation_max = params.quantized_activation_max; + TFLITE_DCHECK_LE(output_activation_min, output_activation_max); + + for (int b0 = 0; b0 < batch_dim0; ++b0) { + const T* lhs_ptr0 = lhs_data + (b0 * lhs_ext0); + const T* rhs_ptr0 = rhs_data + (b0 * rhs_ext0); + for (int b1 = 0; b1 < batch_dim1; ++b1) { + const T* lhs_ptr1 = lhs_ptr0 + b1 * lhs_ext1; + const T* rhs_ptr1 = rhs_ptr0 + b1 * rhs_ext1; + for (int b2 = 0; b2 < batch_dim2; ++b2) { + const T* lhs_ptr2 = lhs_ptr1 + b2 * lhs_ext2; + const T* rhs_ptr2 = rhs_ptr1 + b2 * rhs_ext2; + T* out_ptr = output_data + + ((b0 * batch_dim1 * batch_dim2) + b1 * batch_dim2 + b2) * + lhs_rows * rhs_cols; + + for (int j = 0; j < rhs_cols; ++j) { + for (int i = 0; i < lhs_rows; ++i) { + AccumT total = 0; + for (int k = 0; k < accum_depth; ++k) { + AccumT lhs_val = lhs_ptr2[accum_depth * i + k]; + AccumT rhs_val = rhs_ptr2[accum_depth * j + k]; + total += (lhs_val + filter_offset) * (rhs_val + input_offset); + } + int32_t total_scaled = MultiplyByQuantizedMultiplier( + total, output_multiplier, output_shift); + total_scaled += output_offset; + total_scaled = std::max(total_scaled, output_activation_min); + total_scaled = std::min(total_scaled, output_activation_max); + const int idx = lhs_rows * j + i; + out_ptr[idx] = static_cast(total_scaled); + } + } + } + } + } +} + +} // namespace reference_ops +} // namespace tflite + +#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BATCH_MATMUL_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/batch_to_space_nd.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/batch_to_space_nd.h new file mode 100644 index 0000000..72c39e6 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/batch_to_space_nd.h @@ -0,0 +1,101 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BATCH_TO_SPACE_ND_H_ +#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BATCH_TO_SPACE_ND_H_ + +#include + +#include "edge-impulse-sdk/third_party/ruy/ruy/profiler/instrumentation.h" // from @ruy +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h" + +namespace tflite { +namespace reference_ops { + +// TODO(b/135760455): Move this method anonymous namespace in a cc file. +inline RuntimeShape ExtendShapeBatchToSpace(const RuntimeShape& shape) { + if (shape.DimensionsCount() == 4) { + return shape; + } + RuntimeShape new_shape(4, 1); + new_shape.SetDim(0, shape.Dims(0)); + new_shape.SetDim(1, shape.Dims(1)); + new_shape.SetDim(3, shape.Dims(2)); + return new_shape; +} + +template +inline void BatchToSpaceND(const RuntimeShape& unextended_input1_shape, + const T* input1_data, + const RuntimeShape& unextended_input2_shape, + const int32_t* block_shape_data, + const RuntimeShape& unextended_input3_shape, + const int32_t* crops_data, + const RuntimeShape& unextended_output_shape, + T* output_data) { + ruy::profiler::ScopeLabel label("BatchToSpaceND"); + TFLITE_DCHECK_GE(unextended_input1_shape.DimensionsCount(), 3); + TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), 4); + TFLITE_DCHECK_EQ(unextended_input1_shape.DimensionsCount(), + unextended_output_shape.DimensionsCount()); + + const RuntimeShape input1_shape = + ExtendShapeBatchToSpace(unextended_input1_shape); + const RuntimeShape output_shape = + ExtendShapeBatchToSpace(unextended_output_shape); + + const int output_width = output_shape.Dims(2); + const int output_height = output_shape.Dims(1); + const int output_batch_size = output_shape.Dims(0); + + const int depth = input1_shape.Dims(3); + const int input_width = input1_shape.Dims(2); + const int input_height = input1_shape.Dims(1); + const int input_batch_size = input1_shape.Dims(0); + + const int block_shape_height = block_shape_data[0]; + const int block_shape_width = + unextended_input1_shape.DimensionsCount() == 4 ? block_shape_data[1] : 1; + const int crops_top = crops_data[0]; + const int crops_left = + unextended_input1_shape.DimensionsCount() == 4 ? crops_data[2] : 0; + for (int in_batch = 0; in_batch < input_batch_size; ++in_batch) { + const int out_batch = in_batch % output_batch_size; + const int spatial_offset = in_batch / output_batch_size; + for (int in_h = 0; in_h < input_height; ++in_h) { + const int out_h = in_h * block_shape_height + + spatial_offset / block_shape_width - crops_top; + if (out_h < 0 || out_h >= output_height) { + continue; + } + for (int in_w = 0; in_w < input_width; ++in_w) { + const int out_w = in_w * block_shape_width + + spatial_offset % block_shape_width - crops_left; + + if (out_w < 0 || out_w >= output_width) { + continue; + } + T* out = output_data + Offset(output_shape, out_batch, out_h, out_w, 0); + const T* in = + input1_data + Offset(input1_shape, in_batch, in_h, in_w, 0); + memcpy(out, in, depth * sizeof(T)); + } + } + } +} + +} // namespace reference_ops +} // namespace tflite + +#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BATCH_TO_SPACE_ND_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/binary_function.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/binary_function.h new file mode 100644 index 0000000..66101d9 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/binary_function.h @@ -0,0 +1,91 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BINARY_FUNCTION_H_ +#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BINARY_FUNCTION_H_ + +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/compatibility.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h" + +namespace tflite { + +namespace reference_ops { + +// Also appears to duplicate MinimumMaximum. +// +// R: Result type. T1: Input 1 type. T2: Input 2 type. +template +inline void BroadcastBinaryFunction4DSlow( + const RuntimeShape& unextended_input1_shape, const T1* input1_data, + const RuntimeShape& unextended_input2_shape, const T2* input2_data, + const RuntimeShape& unextended_output_shape, R* output_data, + R (*func)(T1, T2)) { + TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(unextended_input2_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4); + const RuntimeShape output_shape = + RuntimeShape::ExtendedShape(4, unextended_output_shape); + + NdArrayDesc<4> desc1; + NdArrayDesc<4> desc2; + NdArrayDescsForElementwiseBroadcast(unextended_input1_shape, + unextended_input2_shape, &desc1, &desc2); + + const int* dims_data = + reinterpret_cast(output_shape.DimsDataUpTo5D()); + for (int b = 0; b < output_shape.Dims(0); ++b) { + int out_idx_b = b * dims_data[1]; + int in_idx1_b = desc1.strides[0] * b; + int in_idx2_b = desc2.strides[0] * b; + for (int y = 0; y < output_shape.Dims(1); ++y) { + int out_idx_y = (out_idx_b + y) * dims_data[2]; + int in_idx1_y = in_idx1_b + desc1.strides[1] * y; + int in_idx2_y = in_idx2_b + desc2.strides[1] * y; + for (int x = 0; x < output_shape.Dims(2); ++x) { + int out_idx_x = (out_idx_y + x) * dims_data[3]; + int in1_idx = in_idx1_y + desc1.strides[2] * x; + int in2_idx = in_idx2_y + desc2.strides[2] * x; + for (int c = 0; c < output_shape.Dims(3); ++c) { + auto out_idx = out_idx_x + c; + auto in1_val = input1_data[in1_idx]; + auto in2_val = input2_data[in2_idx]; + output_data[out_idx] = func(in1_val, in2_val); + in1_idx += desc1.strides[3]; + in2_idx += desc2.strides[3]; + } + } + } + } +} + +// R: Result type. T1: Input 1 type. T2: Input 2 type. +template +inline void BinaryFunction(const RuntimeShape& input1_shape, + const T1* input1_data, + const RuntimeShape& input2_shape, + const T2* input2_data, + const RuntimeShape& output_shape, R* output_data, + R (*func)(T1, T2)) { + const int flat_size = + MatchingFlatSize(input1_shape, input2_shape, output_shape); + for (int i = 0; i < flat_size; ++i) { + output_data[i] = func(input1_data[i], input2_data[i]); + } +} + +} // namespace reference_ops +} // namespace tflite + +#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BINARY_FUNCTION_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/broadcast_args.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/broadcast_args.h new file mode 100644 index 0000000..341c418 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/broadcast_args.h @@ -0,0 +1,56 @@ +/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BROADCAST_ARGS_H_ +#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BROADCAST_ARGS_H_ + +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/compatibility.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h" + +namespace tflite { +namespace reference_ops { + +template +void BroadcastArgs(const RuntimeShape& input1_shape, const T* input1_data, + const RuntimeShape& input2_shape, const T* input2_data, + const RuntimeShape& output_shape, T* output_data) { + // Gets data at the backward index i of the shape tensor. Returns 1 if the + // index is out of range. + auto get_shape_data = [](const RuntimeShape& shape, const T* data, + int backward_idx) -> T { + int forward_idx = shape.FlatSize() - 1 - backward_idx; + if (forward_idx < 0) return 1; + return data[forward_idx]; + }; + + int output_num_elements = output_shape.FlatSize(); + for (int i = 0; i < output_num_elements; ++i) { + int backward_i = output_num_elements - 1 - i; + int shape1_i = get_shape_data(input1_shape, input1_data, i); + int shape2_i = get_shape_data(input2_shape, input2_data, i); + if (shape1_i == 1) { + output_data[backward_i] = shape2_i; + } else if (shape2_i == 1) { + output_data[backward_i] = shape1_i; + } else { + TFLITE_CHECK_EQ(shape1_i, shape2_i); + output_data[backward_i] = shape1_i; + } + } +} + +} // namespace reference_ops +} // namespace tflite + +#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BROADCAST_ARGS_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/broadcast_to.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/broadcast_to.h new file mode 100644 index 0000000..79756cb --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/broadcast_to.h @@ -0,0 +1,97 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BROADCAST_TO_H_ +#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BROADCAST_TO_H_ + +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h" + +namespace tflite { +namespace reference_ops { +template +void BroadcastImpl(const NdArrayDesc& input_desc, const char* input_data, + const NdArrayDesc& output_desc, char* output_data, + int indexes[N], int dim, const int last_broadcasting_dim, + const int type_size) { + // Copy data from input to output. + if (dim == last_broadcasting_dim) { + int copy_size = output_desc.strides[dim] * type_size; + const char* data_src = + input_data + SubscriptToIndex(input_desc, indexes) * type_size; + char* data_dst = + output_data + SubscriptToIndex(output_desc, indexes) * type_size; + for (int i = 0; i < output_desc.extents[dim]; ++i, data_dst += copy_size) { + memcpy(data_dst, data_src, copy_size); + } + return; + } + + // Recursive call to find the next broadcasting. + for (indexes[dim] = 0; indexes[dim] < input_desc.extents[dim]; + ++indexes[dim]) { + BroadcastImpl(input_desc, input_data, output_desc, output_data, indexes, + dim + 1, last_broadcasting_dim, type_size); + } + + // Duplicate data in output tensor. + indexes[dim] = 0; + if (input_desc.extents[dim] != output_desc.extents[dim]) { + int copy_size = output_desc.strides[dim] * type_size; + char* data_src = + output_data + SubscriptToIndex(output_desc, indexes) * type_size; + char* data_dst = data_src + copy_size; + for (int i = 1; i < output_desc.extents[dim]; ++i, data_dst += copy_size) { + memcpy(data_dst, data_src, copy_size); + } + } +} + +template +inline void BroadcastTo(const RuntimeShape& unextended_input_shape, + const char* input_data, + const RuntimeShape& unextended_output_shape, + char* output_data, TfLiteType data_type) { + NdArrayDesc input_desc; + NdArrayDesc output_desc; + CopyDimsToDesc(RuntimeShape::ExtendedShape(N, unextended_input_shape), + &input_desc); + CopyDimsToDesc(RuntimeShape::ExtendedShape(N, unextended_output_shape), + &output_desc); + + // Get the last dimension has broadcasting. At this dimension, the data is + // copied from input tensor to output tensor. + int last_broadcast_dim = -1; + for (int i = N - 1; i >= 0; --i) { + if (input_desc.extents[i] != output_desc.extents[i]) { + last_broadcast_dim = i; + break; + } + } + + // If non-broadcasting, just copy data from input to output tensor. + if (last_broadcast_dim == -1) { + memcpy(output_data, input_data, + unextended_input_shape.FlatSize() * TfLiteTypeGetSize(data_type)); + return; + } + + // Broadcasting using memcpy. + int indexes[N] = {0}; + BroadcastImpl(input_desc, input_data, output_desc, output_data, indexes, 0, + last_broadcast_dim, TfLiteTypeGetSize(data_type)); +} +} // namespace reference_ops +} // namespace tflite +#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BROADCAST_TO_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/ceil.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/ceil.h new file mode 100644 index 0000000..5be295d --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/ceil.h @@ -0,0 +1,37 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CEIL_H_ +#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CEIL_H_ + +#include + +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h" + +namespace tflite { + +namespace reference_ops { + +inline void Ceil(const RuntimeShape& input_shape, const float* input_data, + const RuntimeShape& output_shape, float* output_data) { + const int flat_size = MatchingFlatSize(input_shape, output_shape); + + for (int i = 0; i < flat_size; ++i) { + output_data[i] = std::ceil(input_data[i]); + } +} + +} // namespace reference_ops +} // namespace tflite +#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CEIL_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/comparisons.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/comparisons.h new file mode 100644 index 0000000..f3d6bcc --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/comparisons.h @@ -0,0 +1,280 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_COMPARISONS_H_ +#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_COMPARISONS_H_ + +#include "edge-impulse-sdk/tensorflow/lite/core/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h" + +namespace tflite { + +namespace reference_ops { + +template +inline bool EqualFn(T lhs, T rhs) { + return lhs == rhs; +} + +template +inline bool NotEqualFn(T lhs, T rhs) { + return lhs != rhs; +} + +template +inline bool GreaterFn(T lhs, T rhs) { + return lhs > rhs; +} +template +inline bool GreaterEqualFn(T lhs, T rhs) { + return lhs >= rhs; +} +template +inline bool LessFn(T lhs, T rhs) { + return lhs < rhs; +} +template +inline bool LessEqualFn(T lhs, T rhs) { + return lhs <= rhs; +} + +template +using ComparisonFn = bool (*)(T, T); + +template F> +inline void ComparisonImpl( + const ComparisonParams& op_params, const RuntimeShape& input1_shape, + const T* input1_data, const RuntimeShape& input2_shape, + const T* input2_data, const RuntimeShape& output_shape, bool* output_data) { + const int64_t flatsize = + MatchingFlatSize(input1_shape, input2_shape, output_shape); + for (int64_t i = 0; i < flatsize; ++i) { + output_data[i] = F(input1_data[i], input2_data[i]); + } +} + +template F> +inline void Comparison(const ComparisonParams& op_params, + const RuntimeShape& input1_shape, + const float* input1_data, + const RuntimeShape& input2_shape, + const float* input2_data, + const RuntimeShape& output_shape, bool* output_data) { + ComparisonImpl(op_params, input1_shape, input1_data, input2_shape, + input2_data, output_shape, output_data); +} + +template F> +inline void ComparisonWithScaling( + const ComparisonParams& op_params, const RuntimeShape& input1_shape, + const T* input1_data, const RuntimeShape& input2_shape, + const T* input2_data, const RuntimeShape& output_shape, bool* output_data) { + int left_shift = op_params.left_shift; + int32_t input1_offset = op_params.input1_offset; + int32_t input1_multiplier = op_params.input1_multiplier; + int input1_shift = op_params.input1_shift; + int32_t input2_offset = op_params.input2_offset; + int32_t input2_multiplier = op_params.input2_multiplier; + int input2_shift = op_params.input2_shift; + + const int64_t flatsize = + MatchingFlatSize(input1_shape, input2_shape, output_shape); + for (int64_t i = 0; i < flatsize; ++i) { + const int32_t input1_val = input1_offset + input1_data[i]; + const int32_t input2_val = input2_offset + input2_data[i]; + const int32_t shifted_input1_val = input1_val * (1 << left_shift); + const int32_t shifted_input2_val = input2_val * (1 << left_shift); + const int32_t scaled_input1_val = + MultiplyByQuantizedMultiplierSmallerThanOneExp( + shifted_input1_val, input1_multiplier, input1_shift); + const int32_t scaled_input2_val = + MultiplyByQuantizedMultiplierSmallerThanOneExp( + shifted_input2_val, input2_multiplier, input2_shift); + output_data[i] = F(scaled_input1_val, scaled_input2_val); + } +} + +struct BroadcastComparison4DSlowCommon { + const RuntimeShape output_shape; + NdArrayDesc<4> desc1; + NdArrayDesc<4> desc2; +}; + +inline BroadcastComparison4DSlowCommon BroadcastComparison4DSlowPreprocess( + const RuntimeShape& unextended_input1_shape, + const RuntimeShape& unextended_input2_shape, + const RuntimeShape& unextended_output_shape) { + TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(unextended_input2_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4); + NdArrayDesc<4> desc1; + NdArrayDesc<4> desc2; + NdArrayDescsForElementwiseBroadcast(unextended_input1_shape, + unextended_input2_shape, &desc1, &desc2); + return {RuntimeShape::ExtendedShape(4, unextended_output_shape), desc1, + desc2}; +} + +template F> +inline void BroadcastComparison4DSlowImpl( + const ComparisonParams& op_params, + const RuntimeShape& unextended_input1_shape, const T* input1_data, + const RuntimeShape& unextended_input2_shape, const T* input2_data, + const RuntimeShape& unextended_output_shape, bool* output_data) { + const BroadcastComparison4DSlowCommon dims = + BroadcastComparison4DSlowPreprocess(unextended_input1_shape, + unextended_input2_shape, + unextended_output_shape); + + for (int b = 0; b < dims.output_shape.Dims(0); ++b) { + for (int y = 0; y < dims.output_shape.Dims(1); ++y) { + for (int x = 0; x < dims.output_shape.Dims(2); ++x) { + for (int c = 0; c < dims.output_shape.Dims(3); ++c) { + output_data[Offset(dims.output_shape, b, y, x, c)] = + F(input1_data[SubscriptToIndex(dims.desc1, b, y, x, c)], + input2_data[SubscriptToIndex(dims.desc2, b, y, x, c)]); + } + } + } + } +} + +template F> +inline void BroadcastComparison4DSlow(const ComparisonParams& op_params, + const RuntimeShape& input1_shape, + const float* input1_data, + const RuntimeShape& input2_shape, + const float* input2_data, + const RuntimeShape& output_shape, + bool* output_data) { + BroadcastComparison4DSlowImpl(op_params, input1_shape, input1_data, + input2_shape, input2_data, + output_shape, output_data); +} + +template F> +inline void BroadcastComparison4DSlowWithScaling( + const ComparisonParams& op_params, + const RuntimeShape& unextended_input1_shape, const T* input1_data, + const RuntimeShape& unextended_input2_shape, const T* input2_data, + const RuntimeShape& unextended_output_shape, bool* output_data) { + const BroadcastComparison4DSlowCommon dims = + BroadcastComparison4DSlowPreprocess(unextended_input1_shape, + unextended_input2_shape, + unextended_output_shape); + + int left_shift = op_params.left_shift; + int32_t input1_offset = op_params.input1_offset; + int32_t input1_multiplier = op_params.input1_multiplier; + int input1_shift = op_params.input1_shift; + int32_t input2_offset = op_params.input2_offset; + int32_t input2_multiplier = op_params.input2_multiplier; + int input2_shift = op_params.input2_shift; + + for (int b = 0; b < dims.output_shape.Dims(0); ++b) { + for (int y = 0; y < dims.output_shape.Dims(1); ++y) { + for (int x = 0; x < dims.output_shape.Dims(2); ++x) { + for (int c = 0; c < dims.output_shape.Dims(3); ++c) { + const int32_t input1_val = + input1_offset + + input1_data[SubscriptToIndex(dims.desc1, b, y, x, c)]; + const int32_t input2_val = + input2_offset + + input2_data[SubscriptToIndex(dims.desc2, b, y, x, c)]; + const int32_t shifted_input1_val = input1_val * (1 << left_shift); + const int32_t shifted_input2_val = input2_val * (1 << left_shift); + const int32_t scaled_input1_val = + MultiplyByQuantizedMultiplierSmallerThanOneExp( + shifted_input1_val, input1_multiplier, input1_shift); + const int32_t scaled_input2_val = + MultiplyByQuantizedMultiplierSmallerThanOneExp( + shifted_input2_val, input2_multiplier, input2_shift); + output_data[Offset(dims.output_shape, b, y, x, c)] = + F(scaled_input1_val, scaled_input2_val); + } + } + } + } +} + +#define TFLITE_COMPARISON_OP(name) \ + inline void name(const ComparisonParams& op_params, \ + const RuntimeShape& input1_shape, const float* input1_data, \ + const RuntimeShape& input2_shape, const float* input2_data, \ + const RuntimeShape& output_shape, bool* output_data) { \ + Comparison(op_params, input1_shape, input1_data, input2_shape, \ + input2_data, output_shape, output_data); \ + } \ + template \ + inline void name##NoScaling( \ + const ComparisonParams& op_params, const RuntimeShape& input1_shape, \ + const T* input1_data, const RuntimeShape& input2_shape, \ + const T* input2_data, const RuntimeShape& output_shape, \ + bool* output_data) { \ + ComparisonImpl(op_params, input1_shape, input1_data, \ + input2_shape, input2_data, output_shape, \ + output_data); \ + } \ + template \ + inline void name##WithScaling( \ + const ComparisonParams& op_params, const RuntimeShape& input1_shape, \ + const T* input1_data, const RuntimeShape& input2_shape, \ + const T* input2_data, const RuntimeShape& output_shape, \ + bool* output_data) { \ + ComparisonWithScaling(op_params, input1_shape, input1_data, \ + input2_shape, input2_data, \ + output_shape, output_data); \ + } \ + template \ + inline void Broadcast4DSlow##name##NoScaling( \ + const ComparisonParams& op_params, const RuntimeShape& input1_shape, \ + const T* input1_data, const RuntimeShape& input2_shape, \ + const T* input2_data, const RuntimeShape& output_shape, \ + bool* output_data) { \ + BroadcastComparison4DSlowImpl( \ + op_params, input1_shape, input1_data, input2_shape, input2_data, \ + output_shape, output_data); \ + } \ + inline void Broadcast4DSlow##name( \ + const ComparisonParams& op_params, const RuntimeShape& input1_shape, \ + const float* input1_data, const RuntimeShape& input2_shape, \ + const float* input2_data, const RuntimeShape& output_shape, \ + bool* output_data) { \ + BroadcastComparison4DSlow(op_params, input1_shape, input1_data, \ + input2_shape, input2_data, \ + output_shape, output_data); \ + } \ + template \ + inline void Broadcast4DSlow##name##WithScaling( \ + const ComparisonParams& op_params, const RuntimeShape& input1_shape, \ + const T* input1_data, const RuntimeShape& input2_shape, \ + const T* input2_data, const RuntimeShape& output_shape, \ + bool* output_data) { \ + BroadcastComparison4DSlowWithScaling( \ + op_params, input1_shape, input1_data, input2_shape, input2_data, \ + output_shape, output_data); \ + } +TFLITE_COMPARISON_OP(Equal); +TFLITE_COMPARISON_OP(NotEqual); +TFLITE_COMPARISON_OP(Greater); +TFLITE_COMPARISON_OP(GreaterEqual); +TFLITE_COMPARISON_OP(Less); +TFLITE_COMPARISON_OP(LessEqual); +#undef TFLITE_COMPARISON_OP + +} // namespace reference_ops +} // namespace tflite + +#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_COMPARISONS_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/concatenation.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/concatenation.h new file mode 100644 index 0000000..9d03523 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/concatenation.h @@ -0,0 +1,141 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CONCATENATION_H_ +#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CONCATENATION_H_ + +#include + +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/compatibility.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/cppmath.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h" + +namespace tflite { +namespace reference_ops { + +template +inline void Concatenation(const ConcatenationParams& params, + const RuntimeShape* const* input_shapes, + const Scalar* const* input_data, + const RuntimeShape& output_shape, + Scalar* output_data) { + int axis = params.axis; + int inputs_count = params.inputs_count; + const int concat_dimensions = output_shape.DimensionsCount(); + TFLITE_DCHECK_LT(axis, concat_dimensions); + + int64_t concat_size = 0; + for (int i = 0; i < inputs_count; i++) { + TFLITE_DCHECK_EQ(input_shapes[i]->DimensionsCount(), concat_dimensions); + for (int j = 0; j < concat_dimensions; j++) { + if (j != axis) { + MatchingDim(*input_shapes[i], j, output_shape, j); + } + } + concat_size += input_shapes[i]->Dims(axis); + } + TFLITE_DCHECK_EQ(concat_size, output_shape.Dims(axis)); + int64_t outer_size = 1; + for (int i = 0; i < axis; ++i) { + outer_size *= output_shape.Dims(i); + } + // For all input arrays, + // FlatSize() = outer_size * Dims(axis) * base_inner_size; + int64_t base_inner_size = 1; + for (int i = axis + 1; i < concat_dimensions; ++i) { + base_inner_size *= output_shape.Dims(i); + } + + Scalar* output_ptr = output_data; + for (int k = 0; k < outer_size; k++) { + for (int i = 0; i < inputs_count; ++i) { + const int copy_size = input_shapes[i]->Dims(axis) * base_inner_size; + const Scalar* input_ptr = input_data[i] + k * copy_size; + memcpy(output_ptr, input_ptr, copy_size * sizeof(Scalar)); + output_ptr += copy_size; + } + } +} + +// TODO(b/174275780): The quantized implementation of concatentation isn't fully +// quantized as it takes scale as a floating point value. This should be fixed +// when optimizng this routine further. +inline void ConcatenationWithScaling(const ConcatenationParams& params, + const RuntimeShape* const* input_shapes, + const uint8_t* const* input_data, + const RuntimeShape& output_shape, + uint8_t* output_data) { + int axis = params.axis; + const int32_t* input_zeropoint = params.input_zeropoint; + const float* input_scale = params.input_scale; + int inputs_count = params.inputs_count; + const int32_t output_zeropoint = params.output_zeropoint; + const float output_scale = params.output_scale; + + const int concat_dimensions = output_shape.DimensionsCount(); + TFLITE_DCHECK_LT(axis, concat_dimensions); + + int64_t concat_size = 0; + for (int i = 0; i < inputs_count; i++) { + TFLITE_DCHECK_EQ(input_shapes[i]->DimensionsCount(), concat_dimensions); + for (int j = 0; j < concat_dimensions; j++) { + if (j != axis) { + MatchingDim(*input_shapes[i], j, output_shape, j); + } + } + concat_size += input_shapes[i]->Dims(axis); + } + TFLITE_DCHECK_EQ(concat_size, output_shape.Dims(axis)); + int64_t outer_size = 1; + for (int i = 0; i < axis; ++i) { + outer_size *= output_shape.Dims(i); + } + // For all input arrays, + // FlatSize() = outer_size * Dims(axis) * base_inner_size; + int64_t base_inner_size = 1; + for (int i = axis + 1; i < concat_dimensions; ++i) { + base_inner_size *= output_shape.Dims(i); + } + + const float inverse_output_scale = 1.f / output_scale; + uint8_t* output_ptr = output_data; + for (int k = 0; k < outer_size; k++) { + for (int i = 0; i < inputs_count; ++i) { + const int copy_size = input_shapes[i]->Dims(axis) * base_inner_size; + const uint8_t* input_ptr = input_data[i] + k * copy_size; + if (input_zeropoint[i] == output_zeropoint && + input_scale[i] == output_scale) { + memcpy(output_ptr, input_ptr, copy_size); + } else { + const float scale = input_scale[i] * inverse_output_scale; + const float bias = -input_zeropoint[i] * scale; + for (int j = 0; j < copy_size; ++j) { + const int32_t value = static_cast(tflite::TfLiteRound( + input_ptr[j] * scale + bias)) + + output_zeropoint; + output_ptr[j] = static_cast( + std::max(std::min(255, value), 0)); + } + } + output_ptr += copy_size; + } + } +} + +} // namespace reference_ops +} // namespace tflite + +#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CONCATENATION_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/conv.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/conv.h new file mode 100644 index 0000000..a244ec0 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/conv.h @@ -0,0 +1,287 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CONV_H_ +#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CONV_H_ + +#include + +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h" + +namespace tflite { + +namespace reference_ops { + +inline void Conv(const ConvParams& params, const RuntimeShape& input_shape, + const float* input_data, const RuntimeShape& filter_shape, + const float* filter_data, const RuntimeShape& bias_shape, + const float* bias_data, const RuntimeShape& output_shape, + float* output_data, const RuntimeShape& im2col_shape, + float* im2col_data) { + const int stride_width = params.stride_width; + const int stride_height = params.stride_height; + const int dilation_width_factor = params.dilation_width_factor; + const int dilation_height_factor = params.dilation_height_factor; + const int pad_width = params.padding_values.width; + const int pad_height = params.padding_values.height; + const float output_activation_min = params.float_activation_min; + const float output_activation_max = params.float_activation_max; + TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4); + TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4); + TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4); + + (void)im2col_data; // only used in optimized code. + (void)im2col_shape; // only used in optimized code. + const int batches = MatchingDim(input_shape, 0, output_shape, 0); + const int input_depth = input_shape.Dims(3); + const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3); + if (bias_data) { + TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth); + } + const int input_height = input_shape.Dims(1); + const int input_width = input_shape.Dims(2); + const int filter_height = filter_shape.Dims(1); + const int filter_width = filter_shape.Dims(2); + const int filter_input_depth = filter_shape.Dims(3); + const int groups = input_depth / filter_input_depth; + TFLITE_DCHECK_EQ(input_depth % filter_input_depth, 0); + const int filters_per_group = output_depth / groups; + const int output_height = output_shape.Dims(1); + const int output_width = output_shape.Dims(2); + + for (int batch = 0; batch < batches; ++batch) { + for (int out_y = 0; out_y < output_height; ++out_y) { + const int in_y_origin = (out_y * stride_height) - pad_height; + for (int out_x = 0; out_x < output_width; ++out_x) { + const int in_x_origin = (out_x * stride_width) - pad_width; + for (int out_channel = 0; out_channel < output_depth; ++out_channel) { + auto group = out_channel / filters_per_group; + float total = 0.f; + for (int filter_y = 0; filter_y < filter_height; ++filter_y) { + const int in_y = in_y_origin + dilation_height_factor * filter_y; + for (int filter_x = 0; filter_x < filter_width; ++filter_x) { + const int in_x = in_x_origin + dilation_width_factor * filter_x; + + // Zero padding by omitting the areas outside the image. + const bool is_point_inside_image = + (in_x >= 0) && (in_x < input_width) && (in_y >= 0) && + (in_y < input_height); + + if (!is_point_inside_image) { + continue; + } + for (int in_channel = 0; in_channel < filter_input_depth; + ++in_channel) { + float input_value = + input_data[Offset(input_shape, batch, in_y, in_x, + in_channel + group * filter_input_depth)]; + float filter_value = filter_data[Offset( + filter_shape, out_channel, filter_y, filter_x, in_channel)]; + total += (input_value * filter_value); + } + } + } + float bias_value = 0.0f; + if (bias_data) { + bias_value = bias_data[out_channel]; + } + output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] = + ActivationFunctionWithMinMax(total + bias_value, + output_activation_min, + output_activation_max); + } + } + } + } +} + +inline void Conv(const ConvParams& params, const RuntimeShape& input_shape, + const uint8_t* input_data, const RuntimeShape& filter_shape, + const uint8_t* filter_data, const RuntimeShape& bias_shape, + const int32_t* bias_data, const RuntimeShape& output_shape, + uint8_t* output_data, const RuntimeShape& im2col_shape, + uint8_t* im2col_data, void* cpu_backend_context) { + (void)cpu_backend_context; // only used in optimized code. + (void)im2col_data; // only used in optimized code. + (void)im2col_shape; // only used in optimized code. + const int stride_width = params.stride_width; + const int stride_height = params.stride_height; + const int dilation_width_factor = params.dilation_width_factor; + const int dilation_height_factor = params.dilation_height_factor; + const int pad_width = params.padding_values.width; + const int pad_height = params.padding_values.height; + const int32_t input_offset = params.input_offset; + const int32_t filter_offset = params.weights_offset; + const int32_t output_offset = params.output_offset; + const int32_t output_multiplier = params.output_multiplier; + const int output_shift = params.output_shift; + const int32_t output_activation_min = params.quantized_activation_min; + const int32_t output_activation_max = params.quantized_activation_max; + TFLITE_DCHECK_LE(output_activation_min, output_activation_max); + + TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4); + TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4); + TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4); + const int batches = MatchingDim(input_shape, 0, output_shape, 0); + const int input_depth = input_shape.Dims(3); + const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3); + if (bias_data) { + TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth); + } + const int input_height = input_shape.Dims(1); + const int input_width = input_shape.Dims(2); + const int filter_height = filter_shape.Dims(1); + const int filter_width = filter_shape.Dims(2); + const int filter_input_depth = filter_shape.Dims(3); + const int groups = input_depth / filter_input_depth; + TFLITE_DCHECK_EQ(input_depth % filter_input_depth, 0); + const int filters_per_group = output_depth / groups; + const int output_height = output_shape.Dims(1); + const int output_width = output_shape.Dims(2); + for (int batch = 0; batch < batches; ++batch) { + for (int out_y = 0; out_y < output_height; ++out_y) { + const int in_y_origin = (out_y * stride_height) - pad_height; + for (int out_x = 0; out_x < output_width; ++out_x) { + const int in_x_origin = (out_x * stride_width) - pad_width; + for (int out_channel = 0; out_channel < output_depth; ++out_channel) { + auto group = out_channel / filters_per_group; + int32_t acc = 0; + for (int filter_y = 0; filter_y < filter_height; ++filter_y) { + const int in_y = in_y_origin + dilation_height_factor * filter_y; + for (int filter_x = 0; filter_x < filter_width; ++filter_x) { + const int in_x = in_x_origin + dilation_width_factor * filter_x; + + // Zero padding by omitting the areas outside the image. + const bool is_point_inside_image = + (in_x >= 0) && (in_x < input_width) && (in_y >= 0) && + (in_y < input_height); + + if (!is_point_inside_image) { + continue; + } + + for (int in_channel = 0; in_channel < filter_input_depth; + ++in_channel) { + int32_t input_val = + input_data[Offset(input_shape, batch, in_y, in_x, + in_channel + group * filter_input_depth)]; + int32_t filter_val = filter_data[Offset( + filter_shape, out_channel, filter_y, filter_x, in_channel)]; + acc += + (filter_val + filter_offset) * (input_val + input_offset); + } + } + } + if (bias_data) { + acc += bias_data[out_channel]; + } + acc = MultiplyByQuantizedMultiplier(acc, output_multiplier, + output_shift); + acc += output_offset; + acc = std::max(acc, output_activation_min); + acc = std::min(acc, output_activation_max); + output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] = + static_cast(acc); + } + } + } + } +} + +inline void HybridConvPerChannel( + const ConvParams& params, float* scaling_factors_ptr, + const RuntimeShape& input_shape, const int8_t* input_data, + const RuntimeShape& filter_shape, const int8_t* filter_data, + const RuntimeShape& bias_shape, const float* bias_data, + const RuntimeShape& output_shape, float* output_data, + const RuntimeShape& im2col_shape, int8_t* im2col_data, + const float* per_channel_scale, int32_t* input_offset) { + (void)im2col_data; // only used in optimized code. + (void)im2col_shape; // only used in optimized code. + const int stride_width = params.stride_width; + const int stride_height = params.stride_height; + const int dilation_width_factor = params.dilation_width_factor; + const int dilation_height_factor = params.dilation_height_factor; + const int pad_width = params.padding_values.width; + const int pad_height = params.padding_values.height; + const float output_activation_min = params.float_activation_min; + const float output_activation_max = params.float_activation_max; + TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4); + TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4); + TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4); + const int batches = MatchingDim(input_shape, 0, output_shape, 0); + const int input_depth = input_shape.Dims(3); + const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3); + if (bias_data) { + TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth); + } + const int input_height = input_shape.Dims(1); + const int input_width = input_shape.Dims(2); + const int filter_height = filter_shape.Dims(1); + const int filter_width = filter_shape.Dims(2); + const int filter_input_depth = filter_shape.Dims(3); + const int groups = input_depth / filter_input_depth; + TFLITE_DCHECK_EQ(input_depth % filter_input_depth, 0); + const int filters_per_group = output_depth / groups; + const int output_height = output_shape.Dims(1); + const int output_width = output_shape.Dims(2); + for (int batch = 0; batch < batches; ++batch) { + for (int out_y = 0; out_y < output_height; ++out_y) { + for (int out_x = 0; out_x < output_width; ++out_x) { + for (int out_channel = 0; out_channel < output_depth; ++out_channel) { + auto group = out_channel / filters_per_group; + const int in_x_origin = (out_x * stride_width) - pad_width; + const int in_y_origin = (out_y * stride_height) - pad_height; + int32_t acc = 0; + for (int filter_y = 0; filter_y < filter_height; ++filter_y) { + for (int filter_x = 0; filter_x < filter_width; ++filter_x) { + for (int in_channel = 0; in_channel < filter_input_depth; + ++in_channel) { + const int in_x = in_x_origin + dilation_width_factor * filter_x; + const int in_y = + in_y_origin + dilation_height_factor * filter_y; + // If the location is outside the bounds of the input image, + // use zero as a default value. + if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) && + (in_y < input_height)) { + int32_t input_val = input_data[Offset( + input_shape, batch, in_y, in_x, + in_channel + group * filter_input_depth)]; + int32_t filter_val = + filter_data[Offset(filter_shape, out_channel, filter_y, + filter_x, in_channel)]; + acc += filter_val * (input_val - input_offset[batch]); + } + } + } + } + float acc_float = + acc * per_channel_scale[out_channel] * scaling_factors_ptr[batch]; + if (bias_data) { + acc_float += bias_data[out_channel]; + } + output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] = + ActivationFunctionWithMinMax(acc_float, output_activation_min, + output_activation_max); + } + } + } + } +} + +} // namespace reference_ops +} // namespace tflite + +#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CONV_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/cumsum.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/cumsum.h new file mode 100644 index 0000000..56698a0 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/cumsum.h @@ -0,0 +1,175 @@ +/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CUMSUM_H_ +#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CUMSUM_H_ + +#include +#include +#include + +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/compatibility.h" + +namespace tflite { +namespace reference_ops { + +template +inline void CumSum(const T* input_data, const RuntimeShape& shape, int32_t axis, + bool exclusive, bool reverse, T* output_data) { + const int32_t rank = shape.DimensionsCount(); + TFLITE_DCHECK_GE(rank, 1); + TFLITE_DCHECK_GE(axis, 0); + TFLITE_DCHECK_LT(axis, rank); + + size_t inner = 1; + size_t outer = 1; + size_t depth = 1; + for (int32_t i = 0; i < rank; i++) { + if (i < axis) + inner *= shape.Dims(i); + else if (i > axis) + outer *= shape.Dims(i); + else + depth = shape.Dims(i); + } + + for (size_t outer_index = 0; outer_index < outer; outer_index++) { + size_t outer_index_adj; + if (reverse) + outer_index_adj = (outer - 1) - outer_index; + else + outer_index_adj = outer_index; + for (size_t inner_index = 0; inner_index < inner; inner_index++) { + T accumulator = 0; + size_t inner_index_adj; + if (reverse) + inner_index_adj = (inner - 1) - inner_index; + else + inner_index_adj = inner_index; + for (size_t depth_index = 0; depth_index < depth; depth_index++) { + size_t depth_index_adj; + if (reverse) + depth_index_adj = (depth - 1) - depth_index; + else + depth_index_adj = depth_index; + + size_t index = outer_index_adj; + index += inner_index_adj * depth * outer; + index += depth_index_adj * outer; + + if (exclusive) { + output_data[index] = accumulator; + accumulator += input_data[index]; + } else { + accumulator += input_data[index]; + output_data[index] = accumulator; + } + } + } + } +} + +// +// Quantized INT8 CUMSUM +// +inline void CumSum(const ArithmeticParams& params, const int8_t* input_data, + const RuntimeShape& shape, int32_t axis, bool exclusive, + bool reverse, int8_t* output_data) { + TFLITE_DCHECK_LE(params.quantized_activation_min, + params.quantized_activation_max); + // Input offset is negative input zero point. Activation tensors are + // asymmetric quantized so they span the full int8 range. + // All inputs should have same zero-point and scale, this is checked during + // Prepare stage. + TFLITE_DCHECK_GE(-params.input1_offset, std::numeric_limits::min()); + TFLITE_DCHECK_LE(-params.input1_offset, std::numeric_limits::max()); + + const int32_t rank = shape.DimensionsCount(); + TFLITE_DCHECK_GE(rank, 1); + TFLITE_DCHECK_GE(axis, 0); + TFLITE_DCHECK_LT(axis, rank); + + size_t inner = 1; + size_t outer = 1; + size_t depth = 1; + for (int32_t i = 0; i < rank; i++) { + if (i < axis) + inner *= shape.Dims(i); + else if (i > axis) + outer *= shape.Dims(i); + else + depth = shape.Dims(i); + } + + for (size_t outer_index = 0; outer_index < outer; outer_index++) { + size_t outer_index_adj; + if (reverse) + outer_index_adj = (outer - 1) - outer_index; + else + outer_index_adj = outer_index; + for (size_t inner_index = 0; inner_index < inner; inner_index++) { + int32_t accumulator = params.input1_offset; // accumulator = 0 + accumulator *= (1 << params.left_shift); + accumulator = MultiplyByQuantizedMultiplierSmallerThanOneExp( + accumulator, params.input1_multiplier, params.input1_shift); + + size_t inner_index_adj; + if (reverse) + inner_index_adj = (inner - 1) - inner_index; + else + inner_index_adj = inner_index; + + for (size_t depth_index = 0; depth_index < depth; depth_index++) { + size_t depth_index_adj; + if (reverse) + depth_index_adj = (depth - 1) - depth_index; + else + depth_index_adj = depth_index; + + size_t index = outer_index_adj; + index += inner_index_adj * depth * outer; + index += depth_index_adj * outer; + + const int32_t y = params.input1_offset + input_data[index]; + const int32_t shifted_y = y * (1 << params.left_shift); + const int32_t scaled_y = MultiplyByQuantizedMultiplierSmallerThanOneExp( + shifted_y, params.input1_multiplier, params.input1_shift); + + int32_t scaled_output; + if (exclusive) { + scaled_output = accumulator; + accumulator += scaled_y; + } else { + accumulator += scaled_y; + scaled_output = accumulator; + } + + const int32_t raw_output = + MultiplyByQuantizedMultiplierSmallerThanOneExp( + scaled_output, params.output_multiplier, params.output_shift) + + params.output_offset; + const int32_t clamped_output = + std::min(params.quantized_activation_max, + std::max(params.quantized_activation_min, raw_output)); + output_data[index] = static_cast(clamped_output); + } + } + } +} + +} // namespace reference_ops +} // namespace tflite + +#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CUMSUM_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/depth_to_space.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/depth_to_space.h new file mode 100644 index 0000000..41b2679 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/depth_to_space.h @@ -0,0 +1,79 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEPTH_TO_SPACE_H_ +#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEPTH_TO_SPACE_H_ + +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h" + +namespace tflite { +namespace reference_ops { + +template +inline void DepthToSpace(const tflite::DepthToSpaceParams& op_params, + const RuntimeShape& unextended_input_shape, + const T* input_data, + const RuntimeShape& unextended_output_shape, + T* output_data) { + TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4); + const RuntimeShape input_shape = + RuntimeShape::ExtendedShape(4, unextended_input_shape); + const RuntimeShape output_shape = + RuntimeShape::ExtendedShape(4, unextended_output_shape); + + const int input_depth = input_shape.Dims(3); + const int input_width = input_shape.Dims(2); + const int input_height = input_shape.Dims(1); + const int input_batch = input_shape.Dims(0); + + const int output_depth = output_shape.Dims(3); + const int output_width = output_shape.Dims(2); + const int output_height = output_shape.Dims(1); + const int output_batch = output_shape.Dims(0); + + const int32_t block_size = op_params.block_size; + + TFLITE_DCHECK_EQ(input_width * block_size, output_width); + TFLITE_DCHECK_EQ(input_height * block_size, output_height); + TFLITE_DCHECK_EQ(input_depth, output_depth * block_size * block_size); + TFLITE_DCHECK_EQ(input_batch, output_batch); + + for (int out_b = 0; out_b < output_batch; ++out_b) { + for (int out_h = 0; out_h < output_height; ++out_h) { + for (int out_w = 0; out_w < output_width; ++out_w) { + for (int out_d = 0; out_d < output_depth; ++out_d) { + const int in_d = + out_d + ((out_h % block_size) * block_size + out_w % block_size) * + output_depth; + + const int in_w = out_w / block_size; + const int in_h = out_h / block_size; + const int in_b = out_b; + + const int input_index = Offset(input_shape, in_b, in_h, in_w, in_d); + const int output_index = + Offset(output_shape, out_b, out_h, out_w, out_d); + + output_data[output_index] = input_data[input_index]; + } + } + } + } +} + +} // namespace reference_ops +} // namespace tflite + +#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEPTH_TO_SPACE_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/depthwiseconv_float.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/depthwiseconv_float.h new file mode 100644 index 0000000..33a1b9b --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/depthwiseconv_float.h @@ -0,0 +1,100 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEPTHWISECONV_FLOAT_H_ +#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEPTHWISECONV_FLOAT_H_ + +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/compatibility.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h" + +namespace tflite { +namespace reference_ops { + +inline void DepthwiseConv( + const DepthwiseParams& params, const RuntimeShape& input_shape, + const float* input_data, const RuntimeShape& filter_shape, + const float* filter_data, const RuntimeShape& bias_shape, + const float* bias_data, const RuntimeShape& output_shape, + float* output_data) { + const int stride_width = params.stride_width; + const int stride_height = params.stride_height; + const int dilation_width_factor = params.dilation_width_factor; + const int dilation_height_factor = params.dilation_height_factor; + const int pad_width = params.padding_values.width; + const int pad_height = params.padding_values.height; + const int depth_multiplier = params.depth_multiplier; + const float output_activation_min = params.float_activation_min; + const float output_activation_max = params.float_activation_max; + TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4); + TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4); + TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4); + + const int batches = MatchingDim(input_shape, 0, output_shape, 0); + const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3); + const int input_height = input_shape.Dims(1); + const int input_width = input_shape.Dims(2); + const int input_depth = input_shape.Dims(3); + const int filter_height = filter_shape.Dims(1); + const int filter_width = filter_shape.Dims(2); + const int output_height = output_shape.Dims(1); + const int output_width = output_shape.Dims(2); + TFLITE_DCHECK_EQ(output_depth, input_depth * depth_multiplier); + TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth); + + for (int b = 0; b < batches; ++b) { + for (int out_y = 0; out_y < output_height; ++out_y) { + for (int out_x = 0; out_x < output_width; ++out_x) { + for (int ic = 0; ic < input_depth; ++ic) { + for (int m = 0; m < depth_multiplier; m++) { + const int oc = m + ic * depth_multiplier; + const int in_x_origin = (out_x * stride_width) - pad_width; + const int in_y_origin = (out_y * stride_height) - pad_height; + float total = 0.f; + for (int filter_y = 0; filter_y < filter_height; ++filter_y) { + for (int filter_x = 0; filter_x < filter_width; ++filter_x) { + const int in_x = in_x_origin + dilation_width_factor * filter_x; + const int in_y = + in_y_origin + dilation_height_factor * filter_y; + // If the location is outside the bounds of the input image, + // use zero as a default value. + if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) && + (in_y < input_height)) { + float input_value = + input_data[Offset(input_shape, b, in_y, in_x, ic)]; + float filter_value = filter_data[Offset( + filter_shape, 0, filter_y, filter_x, oc)]; + total += (input_value * filter_value); + } + } + } + float bias_value = 0.0f; + if (bias_data) { + bias_value = bias_data[oc]; + } + output_data[Offset(output_shape, b, out_y, out_x, oc)] = + ActivationFunctionWithMinMax(total + bias_value, + output_activation_min, + output_activation_max); + } + } + } + } + } +} + +} // end namespace reference_ops +} // end namespace tflite + +#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEPTHWISECONV_FLOAT_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/depthwiseconv_uint8.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/depthwiseconv_uint8.h new file mode 100644 index 0000000..4dc5245 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/depthwiseconv_uint8.h @@ -0,0 +1,319 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEPTHWISECONV_UINT8_H_ +#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEPTHWISECONV_UINT8_H_ + +#include + +#include "edge-impulse-sdk/third_party/gemmlowp/fixedpoint/fixedpoint.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/compatibility.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h" + +namespace tflite { + +// Used in tests and template parameters to control which version of depthwise +// convolution is called. Primarily for reference code, and specializations +// forced in tests. +enum class DepthwiseConvImplementation { + // Run all tests against kUseStandardEntry even if also testing another + // kernel, since we need to be sure that the main DepthwiseConv() function in + // optimized_ops.h dispatches to a correctly-executing kernel. + kNone = 0, // The "default" option: use the normal + // DepthwiseConv kernel (entry) function. + kUseGenericKernel, // Forced use of generic kernel. + kUseNeon3x3, // 3x3 kernel that uses NEON when available. + kUseNeon3x3DotProduct, // 3x3 kernel that uses dot-product enabled NEON + // when available. + kUseCModel3x3DotProduct, // 3x3 kernel, reference C model that is intended + // to match overall design NEON code. + kUseUnwound3x3DotProduct, // 3x3 kernel, reference C model with unwound loops + // and some arrays. + kUseIntrinsics3x3DotProduct, // 3x3 kernel using NEON intrinsics. +}; + +// Category of depthwise convolution output rounding. +enum class DepthwiseConvOutputRounding { + kNone = 0, // Invalid: specific method must be specified. + kAwayFromZero, // Original method: exact halves rounded away from zero. + kUpward, // Halves towards +infinity: adds 0.5 before truncate. + // This is where a future kNearestEven would be placed. +}; + +// Category of depthwise convolution depth multiplication. +enum class DepthwiseConvDepthMultiplication { + kNoMultiplication = 0, // Depth multiplier = 1. + kUnitInputDepth, // Input depth = 1, output depth = depth multiplier. +}; + +namespace reference_ops { +namespace depthwise_conv { + +template +inline int32_t DepthwiseConvRound(int32_t x, int32_t quantized_multiplier, + int shift) { + TFLITE_DCHECK_NE(output_rounding, DepthwiseConvOutputRounding::kNone); + return MultiplyByQuantizedMultiplier(x, quantized_multiplier, shift); +} + +// Single-rounding MultiplyByQuantizedMultiplier +#if TFLITE_SINGLE_ROUNDING +template <> +inline int32_t DepthwiseConvRound( + int32_t x, int32_t quantized_multiplier, int shift) { + using gemmlowp::RoundingDivideByPOT; + using gemmlowp::SaturatingRoundingDoublingHighMul; + int left_shift = shift > 0 ? shift : 0; + int right_shift = shift > 0 ? 0 : -shift; + return RoundingDivideByPOT(SaturatingRoundingDoublingHighMul( + x * (1 << left_shift), quantized_multiplier), + right_shift); +} + +template <> +inline int32_t DepthwiseConvRound( + int32_t x, int32_t quantized_multiplier, int shift) { + return MultiplyByQuantizedMultiplier(x, quantized_multiplier, shift); +} +// Double-rounding MultiplyByQuantizedMultiplier +#else +template <> +inline int32_t DepthwiseConvRound( + int32_t x, int32_t quantized_multiplier, int shift) { + return MultiplyByQuantizedMultiplier(x, quantized_multiplier, shift); +} + +template <> +inline int32_t DepthwiseConvRound( + int32_t x, int32_t quantized_multiplier, int shift) { + using gemmlowp::SaturatingRoundingDoublingHighMul; + const int left_shift = shift > 0 ? shift : 0; + const int right_shift = shift > 0 ? 0 : -shift; + const int rounding_offset = right_shift > 0 ? 1 << (right_shift - 1) : 0; + return (SaturatingRoundingDoublingHighMul(x * (1 << left_shift), + quantized_multiplier) + + rounding_offset) >> + right_shift; +} +#endif // TFLITE_SINGLE_ROUNDING + +template +struct DepthwiseConvBasicKernel { + static inline void Run( + const DepthwiseParams& params, const RuntimeShape& input_shape, + const uint8_t* input_data, const RuntimeShape& filter_shape, + const uint8_t* filter_data, const RuntimeShape& bias_shape, + const int32_t* bias_data, const RuntimeShape& output_shape, + uint8_t* output_data) { + const int stride_width = params.stride_width; + const int stride_height = params.stride_height; + const int dilation_width_factor = params.dilation_width_factor; + const int dilation_height_factor = params.dilation_height_factor; + const int pad_width = params.padding_values.width; + const int pad_height = params.padding_values.height; + const int depth_multiplier = params.depth_multiplier; + const int32_t output_activation_min = params.quantized_activation_min; + const int32_t output_activation_max = params.quantized_activation_max; + const int32_t input_offset = params.input_offset; + const int32_t filter_offset = params.weights_offset; + const int32_t output_offset = params.output_offset; + const int32_t output_multiplier = params.output_multiplier; + const int output_shift = params.output_shift; + TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4); + TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4); + TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4); + + TFLITE_DCHECK_LE(output_activation_min, output_activation_max); + const int batches = MatchingDim(input_shape, 0, output_shape, 0); + const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3); + const int input_height = input_shape.Dims(1); + const int input_width = input_shape.Dims(2); + const int input_depth = input_shape.Dims(3); + const int filter_height = filter_shape.Dims(1); + const int filter_width = filter_shape.Dims(2); + const int output_height = output_shape.Dims(1); + const int output_width = output_shape.Dims(2); + TFLITE_DCHECK_EQ(output_depth, input_depth * depth_multiplier); + TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth); + + for (int b = 0; b < batches; ++b) { + for (int out_y = 0; out_y < output_height; ++out_y) { + for (int out_x = 0; out_x < output_width; ++out_x) { + for (int ic = 0; ic < input_depth; ++ic) { + for (int m = 0; m < depth_multiplier; m++) { + const int oc = m + ic * depth_multiplier; + const int in_x_origin = (out_x * stride_width) - pad_width; + const int in_y_origin = (out_y * stride_height) - pad_height; + int32_t acc = 0; + for (int filter_y = 0; filter_y < filter_height; ++filter_y) { + for (int filter_x = 0; filter_x < filter_width; ++filter_x) { + const int in_x = + in_x_origin + dilation_width_factor * filter_x; + const int in_y = + in_y_origin + dilation_height_factor * filter_y; + // If the location is outside the bounds of the input image, + // use zero as a default value. + if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) && + (in_y < input_height)) { + int32_t input_val = + input_data[Offset(input_shape, b, in_y, in_x, ic)]; + int32_t filter_val = filter_data[Offset( + filter_shape, 0, filter_y, filter_x, oc)]; + acc += (filter_val + filter_offset) * + (input_val + input_offset); + } + } + } + if (bias_data) { + acc += bias_data[oc]; + } + acc = DepthwiseConvRound(acc, output_multiplier, + output_shift); + acc += output_offset; + acc = std::max(acc, output_activation_min); + acc = std::min(acc, output_activation_max); + output_data[Offset(output_shape, b, out_y, out_x, oc)] = + static_cast(acc); + } + } + } + } + } + } + + // TODO(b/148596273): Reconcile reference versions, perhaps with common + // MultiplyByQuantizedMultiplier or DepthwiseConvRound function. + static inline void RunPerChannel( + const DepthwiseParams& params, const RuntimeShape& input_shape, + const int8_t* input_data, const RuntimeShape& filter_shape, + const int8_t* filter_data, const RuntimeShape& bias_shape, + const int32_t* bias_data, const RuntimeShape& output_shape, + int8_t* output_data) { + // Get parameters. + // TODO(b/141565753): Re-introduce ScopedProfilingLabel on Micro. + const int stride_width = params.stride_width; + const int stride_height = params.stride_height; + const int dilation_width_factor = params.dilation_width_factor; + const int dilation_height_factor = params.dilation_height_factor; + const int pad_width = params.padding_values.width; + const int pad_height = params.padding_values.height; + const int depth_multiplier = params.depth_multiplier; + const int32_t input_offset = params.input_offset; + const int32_t output_offset = params.output_offset; + const int32_t output_activation_min = params.quantized_activation_min; + const int32_t output_activation_max = params.quantized_activation_max; + const int32_t* output_multiplier = params.output_multiplier_per_channel; + const int32_t* output_shift = params.output_shift_per_channel; + + // Check dimensions of the tensors. + TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4); + TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4); + TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4); + + TFLITE_DCHECK_LE(output_activation_min, output_activation_max); + const int batches = MatchingDim(input_shape, 0, output_shape, 0); + const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3); + const int input_height = input_shape.Dims(1); + const int input_width = input_shape.Dims(2); + const int input_depth = input_shape.Dims(3); + const int filter_height = filter_shape.Dims(1); + const int filter_width = filter_shape.Dims(2); + const int output_height = output_shape.Dims(1); + const int output_width = output_shape.Dims(2); + TFLITE_DCHECK_EQ(output_depth, input_depth * depth_multiplier); + TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth); + + for (int batch = 0; batch < batches; ++batch) { + for (int out_y = 0; out_y < output_height; ++out_y) { + for (int out_x = 0; out_x < output_width; ++out_x) { + for (int in_channel = 0; in_channel < input_depth; ++in_channel) { + for (int m = 0; m < depth_multiplier; ++m) { + const int output_channel = m + in_channel * depth_multiplier; + const int in_x_origin = (out_x * stride_width) - pad_width; + const int in_y_origin = (out_y * stride_height) - pad_height; + int32_t acc = 0; + for (int filter_y = 0; filter_y < filter_height; ++filter_y) { + for (int filter_x = 0; filter_x < filter_width; ++filter_x) { + const int in_x = + in_x_origin + dilation_width_factor * filter_x; + const int in_y = + in_y_origin + dilation_height_factor * filter_y; + // Zero padding by omitting the areas outside the image. + const bool is_point_inside_image = + (in_x >= 0) && (in_x < input_width) && (in_y >= 0) && + (in_y < input_height); + if (is_point_inside_image) { + int32_t input_val = input_data[Offset( + input_shape, batch, in_y, in_x, in_channel)]; + int32_t filter_val = filter_data[Offset( + filter_shape, 0, filter_y, filter_x, output_channel)]; + // Accumulate with 32 bits accumulator. + // In the nudging process during model quantization, we + // force real value of 0.0 be represented by a quantized + // value. This guarantees that the input_offset is a int8_t, + // even though it is represented using int32_t. int32_t += + // int8_t + // * (int8_t - int8_t) so the highest value we can get from + // each accumulation is [-127, 127] * ([-128, 127] - + // [-128, 127]), which is [-32512, 32512]. log2(32512) + // = 14.98, which means we can accumulate at least 2^16 + // multiplications without overflow. The accumulator is + // applied to a filter so the accumulation logic will hold + // as long as the filter size (filter_y * filter_x * + // in_channel) does not exceed 2^16, which is the case in + // all the models we have seen so far. + acc += filter_val * (input_val + input_offset); + } + } + } + if (bias_data) { + acc += bias_data[output_channel]; + } + acc = DepthwiseConvRound( + acc, output_multiplier[output_channel], + output_shift[output_channel]); + acc += output_offset; + acc = std::max(acc, output_activation_min); + acc = std::min(acc, output_activation_max); + output_data[Offset(output_shape, batch, out_y, out_x, + output_channel)] = static_cast(acc); + } + } + } + } + } + } +}; + +} // namespace depthwise_conv + +inline void DepthwiseConv( + const DepthwiseParams& params, const RuntimeShape& input_shape, + const uint8_t* input_data, const RuntimeShape& filter_shape, + const uint8_t* filter_data, const RuntimeShape& bias_shape, + const int32_t* bias_data, const RuntimeShape& output_shape, + uint8_t* output_data) { + return depthwise_conv::DepthwiseConvBasicKernel< + DepthwiseConvOutputRounding::kAwayFromZero>::Run(params, input_shape, + input_data, filter_shape, + filter_data, bias_shape, + bias_data, output_shape, + output_data); +} + +} // namespace reference_ops +} // end namespace tflite + +#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEPTHWISECONV_UINT8_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/dequantize.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/dequantize.h new file mode 100644 index 0000000..70f6009 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/dequantize.h @@ -0,0 +1,78 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEQUANTIZE_H_ +#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEQUANTIZE_H_ + +#include + +#include + +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h" + +namespace tflite { + +namespace reference_ops { + +// Dequantizes into a float without rounding. +template +inline void Dequantize(const tflite::DequantizationParams& op_params, + const RuntimeShape& input_shape, + const InputT* input_data, + const RuntimeShape& output_shape, OutputT* output_data) { + int32_t zero_point = op_params.zero_point; + const double scale = op_params.scale; + const int flat_size = MatchingFlatSize(input_shape, output_shape); + + for (int i = 0; i < flat_size; i++) { + const int32_t val = input_data[i]; + const OutputT result = static_cast(scale * (val - zero_point)); + output_data[i] = result; + } +} + +// Dequantizes per-channel quantized tensor to float. +template +inline void PerChannelDequantize( + const tflite::PerChannelDequantizationParams& op_params, + const RuntimeShape& input_shape, const T* input_data, + const RuntimeShape& output_shape, float* output_data) { + // Ensure flat size is same. + MatchingFlatSize(input_shape, output_shape); + + const int32_t* zero_point = op_params.zero_point; + const float* scale = op_params.scale; + const int32_t quantized_dimension = op_params.quantized_dimension; + const int32_t num_dims = input_shape.DimensionsCount(); + const int32_t* dims_data = input_shape.DimsData(); + std::vector current_dim(num_dims, 0); + + do { + size_t offset = + ReducedOutputOffset(num_dims, reinterpret_cast(dims_data), + current_dim.data(), 0, nullptr); + const int channel = current_dim[quantized_dimension]; + const int32_t val = input_data[offset]; + const float result = + static_cast(scale[channel] * (val - zero_point[channel])); + output_data[offset] = result; + } while (NextIndex(num_dims, reinterpret_cast(dims_data), + current_dim.data())); +} + +} // namespace reference_ops + +} // namespace tflite +#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEQUANTIZE_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/div.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/div.h new file mode 100644 index 0000000..71bbeaf --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/div.h @@ -0,0 +1,247 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DIV_H_ +#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DIV_H_ + +#include + +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h" + +namespace tflite { + +namespace reference_ops { + +template +inline void DivCheckArithmeticParams(const ArithmeticParams& params) { + TFLITE_DCHECK_LE(params.quantized_activation_min, + params.quantized_activation_max); + // Input offset is negative input zero point. Activation tensors are + // asymmetric quantized so they span the full int8 range. + constexpr int32_t max_value = + static_cast(std::numeric_limits::max()); + TFLITE_DCHECK_GE(params.input1_offset, -max_value); + TFLITE_DCHECK_LE(params.input1_offset, max_value); + TFLITE_DCHECK_GE(params.input2_offset, -max_value); + TFLITE_DCHECK_LE(params.input2_offset, max_value); + TFLITE_DCHECK_GE(params.output_offset, -max_value); + TFLITE_DCHECK_LE(params.output_offset, max_value); +} + +// Element-wise div that can often be used for inner loop of broadcast Div as +// well as the non-broadcast Div. +template +inline void DivElementwise(int size, const ArithmeticParams& params, + const T* input1_data, const T* input2_data, + T* output_data) { + DivCheckArithmeticParams(params); + + for (int i = 0; i < size; ++i) { + int32_t input1_val = params.input1_offset + input1_data[i]; + int32_t input2_val = params.input2_offset + input2_data[i]; + TFLITE_DCHECK_NE(input2_val, 0); + if (input2_val < 0) { + // Invert signs to avoid a negative input2_val as input2_inv needs to be + // positive to be used as multiplier of MultiplyByQuantizedMultiplier. + input1_val = -input1_val; + input2_val = -input2_val; + } + int recip_shift; + const int32_t input2_inv = GetReciprocal(input2_val, 31, &recip_shift); + const int headroom = CountLeadingSignBits(input1_val); + const int32_t unscaled_quotient = + MultiplyByQuantizedMultiplierGreaterThanOne(input1_val, input2_inv, + headroom); + const int total_shift = params.output_shift - recip_shift - headroom; + const int32_t unclamped_result = + params.output_offset + + MultiplyByQuantizedMultiplierSmallerThanOneExp( + unscaled_quotient, params.output_multiplier, total_shift); + const int32_t clamped_output = + std::min(params.quantized_activation_max, + std::max(params.quantized_activation_min, unclamped_result)); + output_data[i] = static_cast(clamped_output); + } +} + +inline void Div(const ArithmeticParams& params, + const RuntimeShape& input1_shape, const uint8_t* input1_data, + const RuntimeShape& input2_shape, const uint8_t* input2_data, + const RuntimeShape& output_shape, uint8_t* output_data) { + TFLITE_DCHECK_LE(params.quantized_activation_min, + params.quantized_activation_max); + const int flat_size = + MatchingElementsSize(input1_shape, input2_shape, output_shape); + + DivElementwise(flat_size, params, input1_data, input2_data, output_data); +} + +inline void Div(const ArithmeticParams& params, + const RuntimeShape& input1_shape, const int8_t* input1_data, + const RuntimeShape& input2_shape, const int8_t* input2_data, + const RuntimeShape& output_shape, int8_t* output_data) { + TFLITE_DCHECK_LE(params.quantized_activation_min, + params.quantized_activation_max); + const int flat_size = + MatchingElementsSize(input1_shape, input2_shape, output_shape); + + DivElementwise(flat_size, params, input1_data, input2_data, output_data); +} + +template +inline void BroadcastDivSlowQuantized( + const ArithmeticParams& params, const RuntimeShape& unextended_input1_shape, + const T* input1_data, const RuntimeShape& unextended_input2_shape, + const T* input2_data, const RuntimeShape& unextended_output_shape, + T* output_data) { + TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), N); + TFLITE_DCHECK_LE(unextended_input2_shape.DimensionsCount(), N); + TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), N); + + NdArrayDesc desc1; + NdArrayDesc desc2; + NdArrayDesc output_desc; + NdArrayDescsForElementwiseBroadcast(unextended_input1_shape, + unextended_input2_shape, &desc1, &desc2); + CopyDimsToDesc(RuntimeShape::ExtendedShape(N, unextended_output_shape), + &output_desc); + + DivCheckArithmeticParams(params); + + auto div_func = [&](int indexes[N]) { + int32_t input1_val = + params.input1_offset + input1_data[SubscriptToIndex(desc1, indexes)]; + int32_t input2_val = + params.input2_offset + input2_data[SubscriptToIndex(desc2, indexes)]; + TFLITE_DCHECK_NE(input2_val, 0); + if (input2_val < 0) { + // Invert signs to avoid a negative input2_val as input2_inv needs to be + // positive to be used as multiplier of MultiplyByQuantizedMultiplier. + input1_val = -input1_val; + input2_val = -input2_val; + } + int recip_shift; + const int32_t input2_inv = GetReciprocal(input2_val, 31, &recip_shift); + const int headroom = CountLeadingSignBits(input1_val); + const int32_t unscaled_quotient = + MultiplyByQuantizedMultiplierGreaterThanOne(input1_val, input2_inv, + headroom); + const int total_shift = params.output_shift - recip_shift - headroom; + const int32_t unclamped_result = + params.output_offset + + MultiplyByQuantizedMultiplierSmallerThanOneExp( + unscaled_quotient, params.output_multiplier, total_shift); + const int32_t clamped_output = + std::min(params.quantized_activation_max, + std::max(params.quantized_activation_min, unclamped_result)); + output_data[SubscriptToIndex(output_desc, indexes)] = + static_cast(clamped_output); + }; + NDOpsHelper(output_desc, div_func); +} + +template +inline void BroadcastDivSlow(const ArithmeticParams& params, + const RuntimeShape& unextended_input1_shape, + const uint8_t* input1_data, + const RuntimeShape& unextended_input2_shape, + const uint8_t* input2_data, + const RuntimeShape& unextended_output_shape, + uint8_t* output_data) { + BroadcastDivSlowQuantized( + params, unextended_input1_shape, input1_data, unextended_input2_shape, + input2_data, unextended_output_shape, output_data); +} + +template +inline void BroadcastDivSlow(const ArithmeticParams& params, + const RuntimeShape& unextended_input1_shape, + const int8_t* input1_data, + const RuntimeShape& unextended_input2_shape, + const int8_t* input2_data, + const RuntimeShape& unextended_output_shape, + int8_t* output_data) { + BroadcastDivSlowQuantized( + params, unextended_input1_shape, input1_data, unextended_input2_shape, + input2_data, unextended_output_shape, output_data); +} + +// TODO(jiawen): We can implement BroadcastDiv on buffers of arbitrary +// dimensionality if the runtime code does a single loop over one dimension +// that handles broadcasting as the base case. The code generator would then +// generate max(D1, D2) nested for loops. +template +void BroadcastDivSlow(const ArithmeticParams& params, + const RuntimeShape& unextended_input1_shape, + const T* input1_data, + const RuntimeShape& unextended_input2_shape, + const T* input2_data, + const RuntimeShape& unextended_output_shape, + T* output_data) { + T output_activation_min; + T output_activation_max; + GetActivationParams(params, &output_activation_min, &output_activation_max); + + TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), N); + TFLITE_DCHECK_LE(unextended_input2_shape.DimensionsCount(), N); + TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), N); + + NdArrayDesc desc1; + NdArrayDesc desc2; + NdArrayDesc output_desc; + NdArrayDescsForElementwiseBroadcast(unextended_input1_shape, + unextended_input2_shape, &desc1, &desc2); + CopyDimsToDesc(RuntimeShape::ExtendedShape(N, unextended_output_shape), + &output_desc); + + // In Tensorflow, the dimensions are canonically named (batch_number, row, + // col, channel), with extents (batches, height, width, depth), with the + // trailing dimension changing most rapidly (channels has the smallest + // stride, typically 1 element). + // + // In generated C code, we store arrays with the dimensions reversed. The + // first dimension has smallest stride. + + auto div_func = [&](int indexes[N]) { + output_data[SubscriptToIndex(output_desc, indexes)] = + ActivationFunctionWithMinMax( + input1_data[SubscriptToIndex(desc1, indexes)] / + input2_data[SubscriptToIndex(desc2, indexes)], + output_activation_min, output_activation_max); + }; + NDOpsHelper(output_desc, div_func); +} + +template +inline void Div(const ArithmeticParams& params, + const RuntimeShape& input1_shape, const T* input1_data, + const RuntimeShape& input2_shape, const T* input2_data, + const RuntimeShape& output_shape, T* output_data) { + T output_activation_min; + T output_activation_max; + GetActivationParams(params, &output_activation_min, &output_activation_max); + + const int flat_size = + MatchingElementsSize(input1_shape, input2_shape, output_shape); + for (int i = 0; i < flat_size; ++i) { + output_data[i] = ActivationFunctionWithMinMax( + input1_data[i] / input2_data[i], output_activation_min, + output_activation_max); + } +} + +} // namespace reference_ops +} // namespace tflite + +#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DIV_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/elu.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/elu.h new file mode 100644 index 0000000..e1d50ab --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/elu.h @@ -0,0 +1,37 @@ +/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ELU_H_ +#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ELU_H_ + +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/cppmath.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h" + +namespace tflite { + +namespace reference_ops { + +inline void Elu(const RuntimeShape& input_shape, const float* input_data, + const RuntimeShape& output_shape, float* output_data) { + const int flat_size = MatchingFlatSize(input_shape, output_shape); + for (int i = 0; i < flat_size; ++i) { + const float val = input_data[i]; + output_data[i] = val < 0.0f ? TfLiteExpm1(val) : val; + } +} + +} // namespace reference_ops +} // namespace tflite + +#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ELU_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/exp.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/exp.h new file mode 100644 index 0000000..a9a6ccc --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/exp.h @@ -0,0 +1,38 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_EXP_H_ +#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_EXP_H_ + +#include + +#include "edge-impulse-sdk/third_party/ruy/ruy/profiler/instrumentation.h" // from @ruy +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h" + +namespace tflite { +namespace reference_ops { + +template +inline void Exp(const T* input_data, const size_t num_elements, + T* output_data) { + ruy::profiler::ScopeLabel label("Exp"); + for (size_t idx = 0; idx < num_elements; ++idx) { + output_data[idx] = std::exp(input_data[idx]); + } +} + +} // namespace reference_ops +} // namespace tflite + +#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_EXP_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/fill.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/fill.h new file mode 100644 index 0000000..1f140e2 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/fill.h @@ -0,0 +1,38 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FILL_H_ +#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FILL_H_ + +#include + +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h" + +namespace tflite { +namespace reference_ops { + +template +void Fill(const RuntimeShape& value_shape, const T* value_data, + const RuntimeShape& output_shape, T* output_data) { + TFLITE_DCHECK_EQ(value_shape.DimensionsCount(), 0); + const int flat_size = output_shape.FlatSize(); + for (int i = 0; i < flat_size; ++i) { + output_data[i] = *value_data; + } +} + +} // namespace reference_ops +} // namespace tflite + +#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FILL_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/floor.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/floor.h new file mode 100644 index 0000000..d1e0421 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/floor.h @@ -0,0 +1,39 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FLOOR_H_ +#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FLOOR_H_ + +#include + +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h" + +namespace tflite { + +namespace reference_ops { + +inline void Floor(const RuntimeShape& input_shape, const float* input_data, + const RuntimeShape& output_shape, float* output_data) { + const int flat_size = MatchingFlatSize(input_shape, output_shape); + + for (int i = 0; i < flat_size; i++) { + int offset = i; + output_data[offset] = std::floor(input_data[offset]); + } +} + +} // namespace reference_ops +} // namespace tflite + +#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FLOOR_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/floor_div.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/floor_div.h new file mode 100644 index 0000000..dbda3f8 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/floor_div.h @@ -0,0 +1,35 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FLOOR_DIV_H_ +#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FLOOR_DIV_H_ + +#include +#include + +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h" + +namespace tflite { +namespace reference_ops { + +template +T FloorDiv(T input1, T input2) { + return std::floor(std::divides()(static_cast(input1), + static_cast(input2))); +} + +} // namespace reference_ops +} // namespace tflite + +#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FLOOR_DIV_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/floor_mod.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/floor_mod.h new file mode 100644 index 0000000..20ce18b --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/floor_mod.h @@ -0,0 +1,44 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FLOOR_MOD_H_ +#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FLOOR_MOD_H_ + +#include +#include + +namespace tflite { + +namespace reference_ops { + +template +T FloorMod(T input1, T input2) { + struct FloatMod { + float operator()(const float lhs, const float rhs) const { + return std::fmod(lhs, rhs); + } + }; + using ModFunc = typename std::conditional::value, + std::modulus, FloatMod>::type; + ModFunc mod_func; + T trunc_mod = mod_func(input1, input2); + return (trunc_mod != 0) && ((input2 < 0) != (trunc_mod < 0)) + ? (trunc_mod + input2) + : trunc_mod; +} + +} // namespace reference_ops +} // namespace tflite + +#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FLOOR_MOD_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/fully_connected.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/fully_connected.h new file mode 100644 index 0000000..6cd8f66 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/fully_connected.h @@ -0,0 +1,323 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FULLY_CONNECTED_H_ +#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FULLY_CONNECTED_H_ + +#include + +#include "edge-impulse-sdk/third_party/ruy/ruy/profiler/instrumentation.h" // from @ruy +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/cppmath.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/quantization_util.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h" + +namespace tflite { +namespace reference_ops { + +inline void FullyConnected( + const FullyConnectedParams& params, const RuntimeShape& input_shape, + const float* input_data, const RuntimeShape& weights_shape, + const float* weights_data, const RuntimeShape& bias_shape, + const float* bias_data, const RuntimeShape& output_shape, + float* output_data) { + const float output_activation_min = params.float_activation_min; + const float output_activation_max = params.float_activation_max; + // TODO(b/62193649): This really should be: + // const int batches = ArraySize(output_dims, 1); + // but the current --variable_batch hack consists in overwriting the 3rd + // dimension with the runtime batch size, as we don't keep track for each + // array of which dimension is the batch dimension in it. + const int output_dims_count = output_shape.DimensionsCount(); + const int weights_dims_count = weights_shape.DimensionsCount(); + const int batches = FlatSizeSkipDim(output_shape, output_dims_count - 1); + const int output_depth = MatchingDim(weights_shape, weights_dims_count - 2, + output_shape, output_dims_count - 1); + const int accum_depth = weights_shape.Dims(weights_dims_count - 1); + for (int b = 0; b < batches; ++b) { + for (int out_c = 0; out_c < output_depth; ++out_c) { + float total = 0.f; + for (int d = 0; d < accum_depth; ++d) { + total += input_data[b * accum_depth + d] * + weights_data[out_c * accum_depth + d]; + } + float bias_value = 0.0f; + if (bias_data) { + bias_value = bias_data[out_c]; + } + output_data[out_c + output_depth * b] = ActivationFunctionWithMinMax( + total + bias_value, output_activation_min, output_activation_max); + } + } +} + +inline void FullyConnected( + const FullyConnectedParams& params, const RuntimeShape& input_shape, + const uint8_t* input_data, const RuntimeShape& filter_shape, + const uint8_t* filter_data, const RuntimeShape& bias_shape, + const int32_t* bias_data, const RuntimeShape& output_shape, + uint8_t* output_data) { + const int32_t input_offset = params.input_offset; + const int32_t filter_offset = params.weights_offset; + const int32_t output_offset = params.output_offset; + const int32_t output_multiplier = params.output_multiplier; + const int output_shift = params.output_shift; + const int32_t output_activation_min = params.quantized_activation_min; + const int32_t output_activation_max = params.quantized_activation_max; + TFLITE_DCHECK_GE(filter_shape.DimensionsCount(), 2); + TFLITE_DCHECK_GE(output_shape.DimensionsCount(), 1); + + TFLITE_DCHECK_LE(output_activation_min, output_activation_max); + // TODO(b/62193649): This really should be: + // const int batches = ArraySize(output_dims, 1); + // but the current --variable_batch hack consists in overwriting the 3rd + // dimension with the runtime batch size, as we don't keep track for each + // array of which dimension is the batch dimension in it. + const int output_dim_count = output_shape.DimensionsCount(); + const int filter_dim_count = filter_shape.DimensionsCount(); + const int batches = FlatSizeSkipDim(output_shape, output_dim_count - 1); + const int output_depth = MatchingDim(filter_shape, filter_dim_count - 2, + output_shape, output_dim_count - 1); + const int accum_depth = filter_shape.Dims(filter_dim_count - 1); + for (int b = 0; b < batches; ++b) { + for (int out_c = 0; out_c < output_depth; ++out_c) { + int32_t acc = 0; + for (int d = 0; d < accum_depth; ++d) { + int32_t input_val = input_data[b * accum_depth + d]; + int32_t filter_val = filter_data[out_c * accum_depth + d]; + acc += (filter_val + filter_offset) * (input_val + input_offset); + } + if (bias_data) { + acc += bias_data[out_c]; + } + acc = MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift); + acc += output_offset; + acc = std::max(acc, output_activation_min); + acc = std::min(acc, output_activation_max); + output_data[out_c + output_depth * b] = static_cast(acc); + } + } +} + +inline void FullyConnected( + const FullyConnectedParams& params, const RuntimeShape& input_shape, + const uint8_t* input_data, const RuntimeShape& filter_shape, + const uint8_t* filter_data, const RuntimeShape& bias_shape, + const int32_t* bias_data, const RuntimeShape& output_shape, + int16_t* output_data) { + const int32_t input_offset = params.input_offset; + const int32_t filter_offset = params.weights_offset; + const int32_t output_offset = params.output_offset; + const int32_t output_multiplier = params.output_multiplier; + const int output_shift = params.output_shift; + const int32_t output_activation_min = params.quantized_activation_min; + const int32_t output_activation_max = params.quantized_activation_max; + + TFLITE_DCHECK_LE(output_activation_min, output_activation_max); + TFLITE_DCHECK_EQ(output_offset, 0); + // TODO(b/62193649): This really should be: + // const int batches = ArraySize(output_dims, 1); + // but the current --variable_batch hack consists in overwriting the 3rd + // dimension with the runtime batch size, as we don't keep track for each + // array of which dimension is the batch dimension in it. + const int output_dim_count = output_shape.DimensionsCount(); + const int filter_dim_count = filter_shape.DimensionsCount(); + const int batches = FlatSizeSkipDim(output_shape, output_dim_count - 1); + const int output_depth = MatchingDim(filter_shape, filter_dim_count - 2, + output_shape, output_dim_count - 1); + const int accum_depth = filter_shape.Dims(filter_dim_count - 1); + for (int b = 0; b < batches; ++b) { + for (int out_c = 0; out_c < output_depth; ++out_c) { + // Internal accumulation. + // Initialize accumulator with the bias-value. + int32_t accum = bias_data[out_c]; + // Accumulation loop. + for (int d = 0; d < accum_depth; ++d) { + int16_t input_val = input_data[b * accum_depth + d] + input_offset; + int16_t filter_val = + filter_data[out_c * accum_depth + d] + filter_offset; + accum += filter_val * input_val; + } + // Down-scale the final int32_t accumulator to the scale used by our + // (16-bit, typically 3 integer bits) fixed-point format. The quantized + // multiplier and shift here have been pre-computed offline + // (e.g. by toco). + accum = + MultiplyByQuantizedMultiplier(accum, output_multiplier, output_shift); + // Saturate, cast to int16_t, and store to output array. + accum = std::max(accum, output_activation_min - output_offset); + accum = std::min(accum, output_activation_max - output_offset); + accum += output_offset; + output_data[out_c + output_depth * b] = accum; + } + } +} + +inline void ShuffledFullyConnected( + const FullyConnectedParams& params, const RuntimeShape& input_shape, + const uint8_t* input_data, const RuntimeShape& weights_shape, + const uint8_t* shuffled_weights_data, const RuntimeShape& bias_shape, + const int32_t* bias_data, const RuntimeShape& output_shape, + int16_t* output_data, uint8_t* shuffled_input_workspace_data) { + const int32_t output_multiplier = params.output_multiplier; + const int output_shift = params.output_shift; + const int32_t output_activation_min = params.quantized_activation_min; + const int32_t output_activation_max = params.quantized_activation_max; + TFLITE_DCHECK_LE(output_activation_min, output_activation_max); + + TFLITE_DCHECK_GE(input_shape.DimensionsCount(), 1); + TFLITE_DCHECK_GE(weights_shape.DimensionsCount(), 2); + TFLITE_DCHECK_GE(output_shape.DimensionsCount(), 1); + // TODO(b/62193649): This really should be: + // const int batches = ArraySize(output_dims, 1); + // but the current --variable_batch hack consists in overwriting the 3rd + // dimension with the runtime batch size, as we don't keep track for each + // array of which dimension is the batch dimension in it. + const int output_dim_count = output_shape.DimensionsCount(); + const int weights_dim_count = weights_shape.DimensionsCount(); + const int batches = FlatSizeSkipDim(output_shape, output_dim_count - 1); + const int output_depth = MatchingDim(weights_shape, weights_dim_count - 2, + output_shape, output_dim_count - 1); + const int accum_depth = weights_shape.Dims(weights_dim_count - 1); + TFLITE_DCHECK((accum_depth % 16) == 0); + TFLITE_DCHECK((output_depth % 4) == 0); + + // Shuffling and xoring of input activations into the workspace buffer + uint8_t* shuffled_input_workspace_ptr = shuffled_input_workspace_data; + if (batches == 1) { + for (int i = 0; i < accum_depth; i++) { + shuffled_input_workspace_data[i] = input_data[i] ^ 0x80; + } + } else if (batches == 4) { + for (int c = 0; c < accum_depth; c += 16) { + for (int b = 0; b < 4; b++) { + const uint8_t* src_data_ptr = input_data + b * accum_depth + c; + for (int j = 0; j < 16; j++) { + uint8_t src_val = *src_data_ptr++; + // Flip the sign bit, so that the kernel will only need to + // reinterpret these uint8_t values as int8_t, getting for free the + // subtraction of the zero_point value 128. + uint8_t dst_val = src_val ^ 0x80; + *shuffled_input_workspace_ptr++ = dst_val; + } + } + } + } else { + TFLITE_DCHECK(false); + return; + } + + // Actual computation + if (batches == 1) { + int16_t* output_ptr = output_data; + // Shuffled weights have had their sign bit (0x80) pre-flipped (xor'd) + // so that just reinterpreting them as int8_t values is equivalent to + // subtracting 128 from them, thus implementing for free the subtraction of + // the zero_point value 128. + const int8_t* shuffled_weights_ptr = + reinterpret_cast(shuffled_weights_data); + // Likewise, we preshuffled and pre-xored the input data above. + const int8_t* shuffled_input_data = + reinterpret_cast(shuffled_input_workspace_data); + for (int c = 0; c < output_depth; c += 4) { + // Internal accumulation. + // Initialize accumulator with the bias-value. + int32_t accum[4] = {0}; + // Accumulation loop. + for (int d = 0; d < accum_depth; d += 16) { + for (int i = 0; i < 4; i++) { + for (int j = 0; j < 16; j++) { + int8_t input_val = shuffled_input_data[d + j]; + int8_t weights_val = *shuffled_weights_ptr++; + accum[i] += weights_val * input_val; + } + } + } + for (int i = 0; i < 4; i++) { + // Add bias value + int32_t acc = accum[i] + bias_data[c + i]; + // Down-scale the final int32_t accumulator to the scale used by our + // (16-bit, typically 3 integer bits) fixed-point format. The quantized + // multiplier and shift here have been pre-computed offline + // (e.g. by toco). + acc = + MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift); + // Saturate, cast to int16_t, and store to output array. + acc = std::max(acc, output_activation_min); + acc = std::min(acc, output_activation_max); + output_ptr[c + i] = acc; + } + } + } else if (batches == 4) { + int16_t* output_ptr = output_data; + // Shuffled weights have had their sign bit (0x80) pre-flipped (xor'd) + // so that just reinterpreting them as int8_t values is equivalent to + // subtracting 128 from them, thus implementing for free the subtraction of + // the zero_point value 128. + const int8_t* shuffled_weights_ptr = + reinterpret_cast(shuffled_weights_data); + // Likewise, we preshuffled and pre-xored the input data above. + const int8_t* shuffled_input_data = + reinterpret_cast(shuffled_input_workspace_data); + for (int c = 0; c < output_depth; c += 4) { + const int8_t* shuffled_input_ptr = shuffled_input_data; + // Accumulation loop. + // Internal accumulation. + // Initialize accumulator with the bias-value. + int32_t accum[4][4]; + for (int i = 0; i < 4; i++) { + for (int b = 0; b < 4; b++) { + accum[i][b] = 0; + } + } + for (int d = 0; d < accum_depth; d += 16) { + for (int i = 0; i < 4; i++) { + for (int b = 0; b < 4; b++) { + for (int j = 0; j < 16; j++) { + int8_t input_val = shuffled_input_ptr[16 * b + j]; + int8_t weights_val = shuffled_weights_ptr[16 * i + j]; + accum[i][b] += weights_val * input_val; + } + } + } + shuffled_input_ptr += 64; + shuffled_weights_ptr += 64; + } + for (int i = 0; i < 4; i++) { + for (int b = 0; b < 4; b++) { + // Add bias value + int32_t acc = accum[i][b] + bias_data[c + i]; + // Down-scale the final int32_t accumulator to the scale used by our + // (16-bit, typically 3 integer bits) fixed-point format. The + // quantized multiplier and shift here have been pre-computed offline + // (e.g. by toco). + acc = MultiplyByQuantizedMultiplier(acc, output_multiplier, + output_shift); + // Saturate, cast to int16_t, and store to output array. + acc = std::max(acc, output_activation_min); + acc = std::min(acc, output_activation_max); + output_ptr[b * output_depth + c + i] = acc; + } + } + } + } else { + TFLITE_DCHECK(false); + return; + } +} + +} // namespace reference_ops +} // namespace tflite + +#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FULLY_CONNECTED_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/hard_swish.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/hard_swish.h new file mode 100644 index 0000000..c427205 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/hard_swish.h @@ -0,0 +1,168 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_HARD_SWISH_H_ +#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_HARD_SWISH_H_ + +#include + +#include "edge-impulse-sdk/third_party/ruy/ruy/profiler/instrumentation.h" // from @ruy +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h" + +namespace tflite { +namespace reference_ops { + +inline int16_t SaturatingLeftShift(int16_t value, int amount) { + int64_t result = static_cast(value) * (1 << amount); + result = std::min(result, std::numeric_limits::max()); + result = std::max(result, std::numeric_limits::min()); + return result; +} + +// Similar to ARM instruction SQDMULH. +// Similar to gemmlowp::SaturatingRoundingDoublingHighMul except +// rounding to zero instead of to nearest (SQRDMULH). +inline std::int16_t SaturatingDoublingHighMul(std::int16_t a, std::int16_t b) { + bool overflow = a == b && a == std::numeric_limits::min(); + std::int32_t a_32(a); + std::int32_t b_32(b); + std::int32_t ab_32 = a_32 * b_32; + std::int16_t ab_x2_high16 = static_cast((ab_32) / (1 << 15)); + return overflow ? std::numeric_limits::max() : ab_x2_high16; +} + +template +inline void HardSwish(const RuntimeShape& input_shape, const T* input_data, + const RuntimeShape& output_shape, T* output_data) { + ruy::profiler::ScopeLabel label("ReferenceHardSwish/Float"); + auto matching_size = MatchingFlatSize(input_shape, output_shape); + const T* in_end = input_data + matching_size; + for (; input_data < in_end; input_data++, output_data++) { + const float in = *input_data; + *output_data = + in * std::min(static_cast(6), std::max(static_cast(0), in + 3)) / + 6; + } +} + +template +inline void HardSwish(const HardSwishParams& params, + const RuntimeShape& input_shape, const T* input_data, + const RuntimeShape& output_shape, T* output_data) { + ruy::profiler::ScopeLabel label("ReferenceHardSwish/Quantized"); + + const int flat_size = MatchingFlatSize(input_shape, output_shape); + + for (int i = 0; i < flat_size; i++) { + const int16_t input_value = input_data[i] - params.input_zero_point; + // Left-shift as much as we can without overflow/saturation to put + // significant bits in the high bits of our 16-bit fixedpoint values, so + // that fixed-point approximate computations below are as accurate as + // possible. + const int16_t input_value_on_hires_input_scale = input_value * (1 << 7); + // Compute the input value on essentially the output scale, just not + // right-shifted yet. This is the value that we'll use in the (x >= +3) + // case, and that in the general case we'll multiply against the "relu-ish" + // fixed-point multiplier in [0, 1]. + const int16_t input_value_on_preshift_output_scale = + gemmlowp::SaturatingRoundingDoublingHighMul( + input_value_on_hires_input_scale, + params.output_multiplier_fixedpoint_int16); + // Now compute the "relu-ish multiplier". In the (-3 <= x <= +3) case, that + // is just an affine rescaling of x from [-3, 3] to [0, 1]. In the general + // case, it is just that plus saturation at the boundaries of [-3, 3]. + // First, we rescale from [-3, 3] to [-1, 1], saturating. + // That is done by rescaling the input value with a fixed-point multiplier + // (reluish_multiplier_fixedpoint) and bit-shift such that we represent + // that input value on the scale where the real value 3.0f is represented + // by the quantized value 32768. (+32768 is actually not representable as + // int16_t, so this saturates at +32767, and that is seen empirically to be + // a negligible contribution to numerical error/bias). + // + // This code is careful to correctly implement any magnitude of multiplier, + // involving either a right shift or a left shift, with correct saturation + // behavior in the left-shift case. This forces this code to be more + // complicated, but is necessary for real applications: a partially + // trained quantized MobileNet v3-small model that motivated this code + // exhibits some large [min, max] range boundaries, of the order of + // magnitude of 10 or 100 depending on layers. + // + // The next few lines are basically just an ordinary + // MultiplyByQuantizedMultiplier, except that we are more careful here + // about the fine details of saturation when left-shifting, because here + // overflow in left-shift is a common case, not an anomaly as + // MultiplyByQuantizedMultiplier assumes. + int16_t reluish_value = input_value_on_hires_input_scale; + // Shift left, saturating, as much as we can while ensuring that this + // saturation will not contribute to the result. That is, left shift amount + // reduced by 1. + if (params.reluish_multiplier_exponent > 0) { + reluish_value = SaturatingLeftShift( + reluish_value, params.reluish_multiplier_exponent - 1); + } + // Apply the fixed-point multiplier, dividing the value by a divisor + // ranging in [1, 2]. + reluish_value = gemmlowp::SaturatingRoundingDoublingHighMul( + reluish_value, params.reluish_multiplier_fixedpoint_int16); + // Apply the last bit of left-shift. Thus, in the left-shifting case, if + // any saturation affects the result, it is happening here --- any + // saturation having occurred above is overwritten here, not affecting the + // result. + if (params.reluish_multiplier_exponent > 0) { + reluish_value = SaturatingLeftShift(reluish_value, 1); + } + // Shift right, in the right-shifting case. + if (params.reluish_multiplier_exponent < 0) { + reluish_value = gemmlowp::RoundingDivideByPOT( + reluish_value, -params.reluish_multiplier_exponent); + } + // At this point we have rescaled the value into a 16bit fixedpoint + // reluish_value in [-1, 1]. + // We now convert that to a 16bit fixedpoint value in [0, 1]. + reluish_value = (reluish_value + (1 << 15)) >> 1; + // Use of SaturatingDoublingHighMul here is important to cancel the biases + // from the above SaturatingRoundingDoublingHighMul. + // + // On a partially trained MobileNet-v3-small, + // + // | bias on | ImageNet + // | quantized | Top-1 + // Operation used here | values | accuracy (50k) + // --------------------------------------+------------+----------- + // SaturatingDoublingHighMul | -0.0024 | 58.920 + // SaturatingRoundingDoublingHighMul | -0.0067 | 58.064 + // + // In activations_test, this is covered by this testcase: + // QuantizedActivationsOpTest.HardSwishBias + // + const int16_t preshift_output_value = SaturatingDoublingHighMul( + reluish_value, input_value_on_preshift_output_scale); + // We were so far operating on the pre-shift output scale. Now we finally + // apply that output shift, arriving at the final output scale. + int16_t output_value = gemmlowp::RoundingDivideByPOT( + preshift_output_value, -params.output_multiplier_exponent); + output_value += params.output_zero_point; + output_value = + std::min(output_value, std::numeric_limits::max()); + output_value = + std::max(output_value, std::numeric_limits::min()); + output_data[i] = output_value; + } +} + +} // namespace reference_ops +} // namespace tflite + +#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_HARD_SWISH_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/add.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/add.h new file mode 100644 index 0000000..12064e3 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/add.h @@ -0,0 +1,145 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_ADD_H_ +#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_ADD_H_ + +#include +#include + +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h" + +namespace tflite { +namespace reference_integer_ops { + +inline void CheckArithmeticParams(const ArithmeticParams& params) { + TFLITE_DCHECK_LE(params.quantized_activation_min, + params.quantized_activation_max); + // Input offset is negative input zero point. Activation tensors are + // asymmetric quantized so they span the full int8 range. + TFLITE_DCHECK_GE(-params.input1_offset, std::numeric_limits::min()); + TFLITE_DCHECK_GE(-params.input2_offset, std::numeric_limits::min()); + TFLITE_DCHECK_LE(-params.input1_offset, std::numeric_limits::max()); + TFLITE_DCHECK_LE(-params.input2_offset, std::numeric_limits::max()); +} + +inline void ElementWise( + int size, const ArithmeticParams& params, const int8_t* input1_data, + const int8_t* input2_data, int8_t* output_data, + void (*check_arithmetic_params)(const ArithmeticParams&), + int8_t (*binary_func)(int8_t, int8_t, const ArithmeticParams&)) { + CheckArithmeticParams(params); + for (int i = 0; i < size; ++i) { + output_data[i] = binary_func(input1_data[i], input2_data[i], params); + } +} + +inline void BroadcastBinaryFunction4DSlow( + const ArithmeticParams& params, const RuntimeShape& input1_shape, + const int8_t* input1_data, const RuntimeShape& input2_shape, + const int8_t* input2_data, const RuntimeShape& output_shape, + int8_t* output_data, + void (*check_arithmetic_params)(const ArithmeticParams&), + int8_t (*binary_func)(int8_t, int8_t, const ArithmeticParams&)) { + NdArrayDesc<4> desc1; + NdArrayDesc<4> desc2; + NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1, + &desc2); + const RuntimeShape extended_output_shape = + RuntimeShape::ExtendedShape(4, output_shape); + + // In Tensorflow, the dimensions are canonically named (batch_number, row, + // col, channel), with extents (batches, height, width, depth), with the + // trailing dimension changing most rapidly (channels has the smallest stride, + // typically 1 element). + // + // In generated C code, we store arrays with the dimensions reversed. The + // first dimension has smallest stride. + // + // We name our variables by their Tensorflow convention, but generate C code + // nesting loops such that the innermost loop has the smallest stride for the + // best cache behavior. + for (int b = 0; b < extended_output_shape.Dims(0); ++b) { + for (int y = 0; y < extended_output_shape.Dims(1); ++y) { + for (int x = 0; x < extended_output_shape.Dims(2); ++x) { + for (int c = 0; c < extended_output_shape.Dims(3); ++c) { + output_data[Offset(extended_output_shape, b, y, x, c)] = binary_func( + input1_data[SubscriptToIndex(desc1, b, y, x, c)], + input2_data[SubscriptToIndex(desc2, b, y, x, c)], params); + } + } + } + } +} + +inline int8_t AddFunc(int8_t x, int8_t y, const ArithmeticParams& params) { + const int32_t input1_val = params.input1_offset + x; + const int32_t input2_val = params.input2_offset + y; + const int32_t shifted_input1_val = input1_val * (1 << params.left_shift); + const int32_t shifted_input2_val = input2_val * (1 << params.left_shift); + const int32_t scaled_input1_val = + MultiplyByQuantizedMultiplierSmallerThanOneExp( + shifted_input1_val, params.input1_multiplier, params.input1_shift); + const int32_t scaled_input2_val = + MultiplyByQuantizedMultiplierSmallerThanOneExp( + shifted_input2_val, params.input2_multiplier, params.input2_shift); + const int32_t raw_sum = scaled_input1_val + scaled_input2_val; + const int32_t raw_output = + MultiplyByQuantizedMultiplierSmallerThanOneExp( + raw_sum, params.output_multiplier, params.output_shift) + + params.output_offset; + const int32_t clamped_output = + std::min(params.quantized_activation_max, + std::max(params.quantized_activation_min, raw_output)); + return static_cast(clamped_output); +} + +// Element-wise add that can often be used for inner loop of broadcast add as +// well as the non-broadcast add. +inline void AddElementwise(int size, const ArithmeticParams& params, + const int8_t* input1_data, const int8_t* input2_data, + int8_t* output_data) { + ElementWise(size, params, input1_data, input2_data, output_data, + CheckArithmeticParams, AddFunc); +} + +inline void Add(const ArithmeticParams& params, + const RuntimeShape& input1_shape, const int8_t* input1_data, + const RuntimeShape& input2_shape, const int8_t* input2_data, + const RuntimeShape& output_shape, int8_t* output_data) { + CheckArithmeticParams(params); + + const int flat_size = + MatchingElementsSize(input1_shape, input2_shape, output_shape); + + AddElementwise(flat_size, params, input1_data, input2_data, output_data); +} + +inline void BroadcastAdd4DSlow(const ArithmeticParams& params, + const RuntimeShape& input1_shape, + const int8_t* input1_data, + const RuntimeShape& input2_shape, + const int8_t* input2_data, + const RuntimeShape& output_shape, + int8_t* output_data) { + BroadcastBinaryFunction4DSlow(params, input1_shape, input1_data, input2_shape, + input2_data, output_shape, output_data, + CheckArithmeticParams, AddFunc); +} + +} // namespace reference_integer_ops +} // namespace tflite + +#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_ADD_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/conv.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/conv.h new file mode 100644 index 0000000..3b9adcb --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/conv.h @@ -0,0 +1,239 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_CONV_H_ +#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_CONV_H_ + +#include + +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h" + +namespace tflite { +namespace reference_integer_ops { + +// Fixed-point per-channel-quantization convolution reference kernel. +inline void ConvPerChannel( + const ConvParams& params, const int32_t* output_multiplier, + const int32_t* output_shift, const RuntimeShape& input_shape, + const int8_t* input_data, const RuntimeShape& filter_shape, + const int8_t* filter_data, const RuntimeShape& bias_shape, + const int32_t* bias_data, const RuntimeShape& output_shape, + int8_t* output_data) { + // Get parameters. + const int32_t input_offset = params.input_offset; // r = s(q - Z) + const int stride_width = params.stride_width; + const int stride_height = params.stride_height; + const int dilation_width_factor = params.dilation_width_factor; + const int dilation_height_factor = params.dilation_height_factor; + const int pad_width = params.padding_values.width; + const int pad_height = params.padding_values.height; + const int32_t output_offset = params.output_offset; + + // Set min and max value of the output. + const int32_t output_activation_min = params.quantized_activation_min; + const int32_t output_activation_max = params.quantized_activation_max; + + // Consistency check. + TFLITE_DCHECK_LE(output_activation_min, output_activation_max); + TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4); + TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4); + TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4); + const int batches = MatchingDim(input_shape, 0, output_shape, 0); + const int input_depth = input_shape.Dims(3); + const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3); + if (bias_data) { + TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth); + } + + // Check dimensions of the tensors. + const int input_height = input_shape.Dims(1); + const int input_width = input_shape.Dims(2); + const int filter_height = filter_shape.Dims(1); + const int filter_width = filter_shape.Dims(2); + const int filter_input_depth = filter_shape.Dims(3); + const int groups = input_depth / filter_input_depth; + TFLITE_DCHECK_EQ(input_depth % filter_input_depth, 0); + const int filters_per_group = output_depth / groups; + const int output_height = output_shape.Dims(1); + const int output_width = output_shape.Dims(2); + for (int batch = 0; batch < batches; ++batch) { + for (int out_y = 0; out_y < output_height; ++out_y) { + const int in_y_origin = (out_y * stride_height) - pad_height; + for (int out_x = 0; out_x < output_width; ++out_x) { + const int in_x_origin = (out_x * stride_width) - pad_width; + for (int out_channel = 0; out_channel < output_depth; ++out_channel) { + auto group = out_channel / filters_per_group; + int32_t acc = 0; + for (int filter_y = 0; filter_y < filter_height; ++filter_y) { + const int in_y = in_y_origin + dilation_height_factor * filter_y; + for (int filter_x = 0; filter_x < filter_width; ++filter_x) { + const int in_x = in_x_origin + dilation_width_factor * filter_x; + + // Zero padding by omitting the areas outside the image. + const bool is_point_inside_image = + (in_x >= 0) && (in_x < input_width) && (in_y >= 0) && + (in_y < input_height); + + if (!is_point_inside_image) { + continue; + } + + for (int in_channel = 0; in_channel < filter_input_depth; + ++in_channel) { + int32_t input_val = + input_data[Offset(input_shape, batch, in_y, in_x, + in_channel + group * filter_input_depth)]; + int32_t filter_val = filter_data[Offset( + filter_shape, out_channel, filter_y, filter_x, in_channel)]; + // Accumulate with 32 bits accumulator. + // In the nudging process during model quantization, we force + // real value of 0.0 be represented by a quantized value. This + // guarantees that the input_offset is a int8_t, even though + // it is represented using int32_t. int32_t += int8_t * + // (int8_t - int8_t) so the highest value we can get from each + // accumulation is [-127, 127] * ([-128, 127] - + // [-128, 127]), which is [-32512, 32512]. log2(32512) + // = 14.98, which means we can accumulate at least 2^16 + // multiplications without overflow. The accumulator is + // applied to a filter so the accumulation logic will hold as + // long as the filter size (filter_y * filter_x * in_channel) + // does not exceed 2^16, which is the case in all the models + // we have seen so far. + // TODO(b/174275578): Add a check to make sure the + // accumulator depth is smaller than 2^16. + acc += filter_val * (input_val + input_offset); + } + } + } + + if (bias_data) { + acc += bias_data[out_channel]; + } + acc = MultiplyByQuantizedMultiplier( + acc, output_multiplier[out_channel], output_shift[out_channel]); + acc += output_offset; + acc = std::max(acc, output_activation_min); + acc = std::min(acc, output_activation_max); + output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] = + static_cast(acc); + } + } + } + } +} + + +// Fixed-point per-channel-quantization convolution reference kernel. +// 16-bit data and 8-bit filter +template +inline void ConvPerChannel( + const ConvParams& params, const int32_t* output_multiplier, + const int32_t* output_shift, const RuntimeShape& input_shape, + const int16_t* input_data, const RuntimeShape& filter_shape, + const int8_t* filter_data, const RuntimeShape& bias_shape, + const AccumScalar* bias_data, const RuntimeShape& output_shape, + int16_t* output_data) { + // Get parameters. + const int stride_width = params.stride_width; + const int stride_height = params.stride_height; + const int dilation_width_factor = params.dilation_width_factor; + const int dilation_height_factor = params.dilation_height_factor; + const int pad_width = params.padding_values.width; + const int pad_height = params.padding_values.height; + + // Set min and max value of the output. + const int32_t output_activation_min = params.quantized_activation_min; + const int32_t output_activation_max = params.quantized_activation_max; + + // Consistency check. + TFLITE_DCHECK_LE(output_activation_min, output_activation_max); + TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4); + TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4); + TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4); + const int batches = MatchingDim(input_shape, 0, output_shape, 0); + const int input_depth = input_shape.Dims(3); + const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3); + if (bias_data) { + TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth); + } + + // Check dimensions of the tensors. + const int input_height = input_shape.Dims(1); + const int input_width = input_shape.Dims(2); + const int filter_height = filter_shape.Dims(1); + const int filter_width = filter_shape.Dims(2); + const int filter_input_depth = filter_shape.Dims(3); + const int groups = input_depth / filter_input_depth; + TFLITE_DCHECK_EQ(input_depth % filter_input_depth, 0); + const int filters_per_group = output_depth / groups; + const int output_height = output_shape.Dims(1); + const int output_width = output_shape.Dims(2); + for (int batch = 0; batch < batches; ++batch) { + for (int out_y = 0; out_y < output_height; ++out_y) { + const int in_y_origin = (out_y * stride_height) - pad_height; + for (int out_x = 0; out_x < output_width; ++out_x) { + const int in_x_origin = (out_x * stride_width) - pad_width; + for (int out_channel = 0; out_channel < output_depth; ++out_channel) { + auto group = out_channel / filters_per_group; + AccumScalar acc = 0; + for (int filter_y = 0; filter_y < filter_height; ++filter_y) { + const int in_y = in_y_origin + dilation_height_factor * filter_y; + for (int filter_x = 0; filter_x < filter_width; ++filter_x) { + const int in_x = in_x_origin + dilation_width_factor * filter_x; + + // Zero padding by omitting the areas outside the image. + const bool is_point_inside_image = + (in_x >= 0) && (in_x < input_width) && (in_y >= 0) && + (in_y < input_height); + + if (!is_point_inside_image) { + continue; + } + + for (int in_channel = 0; in_channel < filter_input_depth; + ++in_channel) { + int32_t input_val = + input_data[Offset(input_shape, batch, in_y, in_x, + in_channel + group * filter_input_depth)]; + int32_t filter_val = filter_data[Offset( + filter_shape, out_channel, filter_y, filter_x, in_channel)]; + // Accumulate with 64 bits accumulator. + // int64_t += int8_t * int16_t so the highest value we can + // get from each accumulation is [-127, 127] * ([-32768, + // 32767] - + // [-32768, 32767]), which is [-8322945, 8322945]. + // log2(8322945) = 22.99. + acc += filter_val * input_val; + } + } + } + if (bias_data) { + acc += bias_data[out_channel]; + } + int32_t scaled_acc = MultiplyByQuantizedMultiplier( + acc, output_multiplier[out_channel], output_shift[out_channel]); + scaled_acc = std::max(scaled_acc, output_activation_min); + scaled_acc = std::min(scaled_acc, output_activation_max); + output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] = + static_cast(scaled_acc); + } + } + } + } +} + +} // namespace reference_integer_ops +} // namespace tflite + +#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_CONV_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/depthwise_conv.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/depthwise_conv.h new file mode 100644 index 0000000..95e7337 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/depthwise_conv.h @@ -0,0 +1,291 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_DEPTHWISE_CONV_H_ +#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_DEPTHWISE_CONV_H_ + +#include + +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h" + +namespace tflite { +namespace reference_integer_ops { +inline void DepthwiseConvPerChannel( + const DepthwiseParams& params, const int32_t* output_multiplier, + const int32_t* output_shift, const RuntimeShape& input_shape, + const int8_t* input_data, const RuntimeShape& filter_shape, + const int8_t* filter_data, const RuntimeShape& bias_shape, + const int32_t* bias_data, const RuntimeShape& output_shape, + int8_t* output_data) { + // Get parameters. + // TODO(b/141565753): Re-introduce ScopedProfilingLabel on Micro. + const int stride_width = params.stride_width; + const int stride_height = params.stride_height; + const int dilation_width_factor = params.dilation_width_factor; + const int dilation_height_factor = params.dilation_height_factor; + const int pad_width = params.padding_values.width; + const int pad_height = params.padding_values.height; + const int depth_multiplier = params.depth_multiplier; + const int32_t input_offset = params.input_offset; + const int32_t output_offset = params.output_offset; + const int32_t output_activation_min = params.quantized_activation_min; + const int32_t output_activation_max = params.quantized_activation_max; + + // Check dimensions of the tensors. + TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4); + TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4); + TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4); + + TFLITE_DCHECK_LE(output_activation_min, output_activation_max); + const int batches = MatchingDim(input_shape, 0, output_shape, 0); + const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3); + const int input_height = input_shape.Dims(1); + const int input_width = input_shape.Dims(2); + const int input_depth = input_shape.Dims(3); + const int filter_height = filter_shape.Dims(1); + const int filter_width = filter_shape.Dims(2); + const int output_height = output_shape.Dims(1); + const int output_width = output_shape.Dims(2); + TFLITE_DCHECK_EQ(output_depth, input_depth * depth_multiplier); + TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth); + + for (int batch = 0; batch < batches; ++batch) { + for (int out_y = 0; out_y < output_height; ++out_y) { + for (int out_x = 0; out_x < output_width; ++out_x) { + for (int in_channel = 0; in_channel < input_depth; ++in_channel) { + for (int m = 0; m < depth_multiplier; ++m) { + const int output_channel = m + in_channel * depth_multiplier; + const int in_x_origin = (out_x * stride_width) - pad_width; + const int in_y_origin = (out_y * stride_height) - pad_height; + int32_t acc = 0; + for (int filter_y = 0; filter_y < filter_height; ++filter_y) { + for (int filter_x = 0; filter_x < filter_width; ++filter_x) { + const int in_x = in_x_origin + dilation_width_factor * filter_x; + const int in_y = + in_y_origin + dilation_height_factor * filter_y; + // Zero padding by omitting the areas outside the image. + const bool is_point_inside_image = + (in_x >= 0) && (in_x < input_width) && (in_y >= 0) && + (in_y < input_height); + if (is_point_inside_image) { + int32_t input_val = input_data[Offset( + input_shape, batch, in_y, in_x, in_channel)]; + int32_t filter_val = filter_data[Offset( + filter_shape, 0, filter_y, filter_x, output_channel)]; + // Accumulate with 32 bits accumulator. + // In the nudging process during model quantization, we force + // real value of 0.0 be represented by a quantized value. This + // guarantees that the input_offset is a int8_t, even though + // it is represented using int32_t. int32_t += int8_t * + // (int8_t - int8_t) so the highest value we can get from each + // accumulation is [-127, 127] * ([-128, 127] - + // [-128, 127]), which is [-32512, 32512]. log2(32512) + // = 14.98, which means we can accumulate at least 2^16 + // multiplications without overflow. The accumulator is + // applied to a filter so the accumulation logic will hold as + // long as the filter size (filter_y * filter_x * in_channel) + // does not exceed 2^16, which is the case in all the models + // we have seen so far. + // TODO(b/174275578): Add a check to make sure the + // accumulator depth is smaller than 2^16. + acc += filter_val * (input_val + input_offset); + } + } + } + if (bias_data) { + acc += bias_data[output_channel]; + } + acc = MultiplyByQuantizedMultiplier( + acc, output_multiplier[output_channel], + output_shift[output_channel]); + acc += output_offset; + acc = std::max(acc, output_activation_min); + acc = std::min(acc, output_activation_max); + output_data[Offset(output_shape, batch, out_y, out_x, + output_channel)] = static_cast(acc); + } + } + } + } + } +} + +inline void DepthwiseConvPerChannel( + const DepthwiseParams& params, const int32_t* output_multiplier, + const int32_t* output_shift, const RuntimeShape& input_shape, + const int16_t* input_data, const RuntimeShape& filter_shape, + const int8_t* filter_data, const RuntimeShape& bias_shape, + const std::int64_t* bias_data, const RuntimeShape& output_shape, + int16_t* output_data) { + // Get parameters. + const int stride_width = params.stride_width; + const int stride_height = params.stride_height; + const int dilation_width_factor = params.dilation_width_factor; + const int dilation_height_factor = params.dilation_height_factor; + const int pad_width = params.padding_values.width; + const int pad_height = params.padding_values.height; + const int depth_multiplier = params.depth_multiplier; + const int32_t output_activation_min = params.quantized_activation_min; + const int32_t output_activation_max = params.quantized_activation_max; + + // Check dimensions of the tensors. + TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4); + TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4); + TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4); + + TFLITE_DCHECK_LE(output_activation_min, output_activation_max); + const int batches = MatchingDim(input_shape, 0, output_shape, 0); + const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3); + const int input_height = input_shape.Dims(1); + const int input_width = input_shape.Dims(2); + const int input_depth = input_shape.Dims(3); + const int filter_height = filter_shape.Dims(1); + const int filter_width = filter_shape.Dims(2); + const int output_height = output_shape.Dims(1); + const int output_width = output_shape.Dims(2); + TFLITE_DCHECK_EQ(output_depth, input_depth * depth_multiplier); + TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth); + + for (int batch = 0; batch < batches; ++batch) { + for (int out_y = 0; out_y < output_height; ++out_y) { + for (int out_x = 0; out_x < output_width; ++out_x) { + for (int in_channel = 0; in_channel < input_depth; ++in_channel) { + for (int m = 0; m < depth_multiplier; ++m) { + const int output_channel = m + in_channel * depth_multiplier; + const int in_x_origin = (out_x * stride_width) - pad_width; + const int in_y_origin = (out_y * stride_height) - pad_height; + std::int64_t acc = 0; + for (int filter_y = 0; filter_y < filter_height; ++filter_y) { + for (int filter_x = 0; filter_x < filter_width; ++filter_x) { + const int in_x = in_x_origin + dilation_width_factor * filter_x; + const int in_y = + in_y_origin + dilation_height_factor * filter_y; + // Zero padding by omitting the areas outside the image. + const bool is_point_inside_image = + (in_x >= 0) && (in_x < input_width) && (in_y >= 0) && + (in_y < input_height); + if (is_point_inside_image) { + int32_t input_val = input_data[Offset( + input_shape, batch, in_y, in_x, in_channel)]; + int32_t filter_val = filter_data[Offset( + filter_shape, 0, filter_y, filter_x, output_channel)]; + // Accumulate with 64 bits accumulator. + // We assume maximum of 2^16 accumulations as with the 8-bit + // case so actually the value in the accumulator should not + // exceed 40 bits + acc += static_cast(filter_val) * + static_cast(input_val); + } + } + } + if (bias_data) { + acc += bias_data[output_channel]; + } + int32_t scaled_acc = MultiplyByQuantizedMultiplier( + acc, output_multiplier[output_channel], + output_shift[output_channel]); + scaled_acc = std::max(scaled_acc, output_activation_min); + scaled_acc = std::min(scaled_acc, output_activation_max); + output_data[Offset(output_shape, batch, out_y, out_x, + output_channel)] = + static_cast(scaled_acc); + } + } + } + } + } +} + +inline void DepthwiseConvHybridPerChannel( + const DepthwiseParams& params, float* scaling_factors_ptr, + const RuntimeShape& input_shape, const int8_t* input_data, + const RuntimeShape& filter_shape, const int8_t* filter_data, + const RuntimeShape& bias_shape, const float* bias_data, + const RuntimeShape& output_shape, float* output_data, + const float* per_channel_scale, int32_t* input_offset) { + const int stride_width = params.stride_width; + const int stride_height = params.stride_height; + const int dilation_width_factor = params.dilation_width_factor; + const int dilation_height_factor = params.dilation_height_factor; + const int pad_width = params.padding_values.width; + const int pad_height = params.padding_values.height; + const int depth_multiplier = params.depth_multiplier; + const float output_activation_min = params.float_activation_min; + const float output_activation_max = params.float_activation_max; + // Check dimensions of the tensors. + TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4); + TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4); + TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4); + + const int batches = MatchingDim(input_shape, 0, output_shape, 0); + const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3); + const int input_height = input_shape.Dims(1); + const int input_width = input_shape.Dims(2); + const int input_depth = input_shape.Dims(3); + const int filter_height = filter_shape.Dims(1); + const int filter_width = filter_shape.Dims(2); + const int output_height = output_shape.Dims(1); + const int output_width = output_shape.Dims(2); + const int bias_depth = bias_shape.FlatSize(); + TFLITE_DCHECK_EQ(output_depth, input_depth * depth_multiplier); + TFLITE_DCHECK_EQ(bias_depth, output_depth); + + for (int batch = 0; batch < batches; ++batch) { + for (int out_y = 0; out_y < output_height; ++out_y) { + for (int out_x = 0; out_x < output_width; ++out_x) { + for (int in_channel = 0; in_channel < input_depth; ++in_channel) { + for (int m = 0; m < depth_multiplier; ++m) { + const int output_channel = m + in_channel * depth_multiplier; + const int in_x_origin = (out_x * stride_width) - pad_width; + const int in_y_origin = (out_y * stride_height) - pad_height; + int32_t acc = 0; + for (int filter_y = 0; filter_y < filter_height; ++filter_y) { + for (int filter_x = 0; filter_x < filter_width; ++filter_x) { + const int in_x = in_x_origin + dilation_width_factor * filter_x; + const int in_y = + in_y_origin + dilation_height_factor * filter_y; + // Zero padding by omitting the areas outside the image. + const bool is_point_inside_image = + (in_x >= 0) && (in_x < input_width) && (in_y >= 0) && + (in_y < input_height); + if (is_point_inside_image) { + int32_t input_val = input_data[Offset( + input_shape, batch, in_y, in_x, in_channel)]; + int32_t filter_val = filter_data[Offset( + filter_shape, 0, filter_y, filter_x, output_channel)]; + acc += filter_val * (input_val - input_offset[batch]); + } + } + } + float acc_float = static_cast(acc); + acc_float *= + per_channel_scale[output_channel] * scaling_factors_ptr[batch]; + if (bias_data && output_channel < bias_depth) { + acc_float += bias_data[output_channel]; + } + output_data[Offset(output_shape, batch, out_y, out_x, + output_channel)] = + ActivationFunctionWithMinMax(acc_float, output_activation_min, + output_activation_max); + } + } + } + } + } +} + +} // namespace reference_integer_ops +} // namespace tflite + +#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_DEPTHWISE_CONV_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h new file mode 100644 index 0000000..4be7987 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h @@ -0,0 +1,126 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_FULLY_CONNECTED_H_ +#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_FULLY_CONNECTED_H_ + +#include + +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h" + +namespace tflite { +namespace reference_integer_ops { + +// For per-channel functions, since it is defined in quantization spec that +// weights are symmetric +// (https://www.tensorflow.org/lite/performance/quantization_spec#symmetric_vs_asymmetric), +// zero_point (params.weights_offset) is always 0. +// However, for per-tensor functions, params.weights_offset is still applied for +// backward compatibility. +template +void FullyConnectedPerChannel( + const FullyConnectedParams& params, const int32_t* output_multiplier, + const int* output_shift, const RuntimeShape& input_shape, + const InputType* input_data, const RuntimeShape& filter_shape, + const WeightType* filter_data, const RuntimeShape& bias_shape, + const BiasType* bias_data, const RuntimeShape& output_shape, + OutputType* output_data) { + const int32_t input_offset = params.input_offset; + const int32_t output_offset = params.output_offset; + const int32_t output_activation_min = params.quantized_activation_min; + const int32_t output_activation_max = params.quantized_activation_max; + TFLITE_DCHECK_GE(filter_shape.DimensionsCount(), 2); + TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 2); + + TFLITE_DCHECK_LE(output_activation_min, output_activation_max); + const int filter_dim_count = filter_shape.DimensionsCount(); + const int batches = output_shape.Dims(0); + const int output_depth = output_shape.Dims(1); + TFLITE_DCHECK_LE(output_depth, filter_shape.Dims(filter_dim_count - 2)); + const int accum_depth = filter_shape.Dims(filter_dim_count - 1); + for (int b = 0; b < batches; ++b) { + for (int out_c = 0; out_c < output_depth; ++out_c) { + BiasType acc = 0; + for (int d = 0; d < accum_depth; ++d) { + int32_t input_val = input_data[b * accum_depth + d]; + int32_t filter_val = filter_data[out_c * accum_depth + d]; + acc += filter_val * (input_val + input_offset); + } + if (bias_data) { + acc += bias_data[out_c]; + } + int32_t acc_scaled = MultiplyByQuantizedMultiplier( + acc, output_multiplier[out_c], output_shift[out_c]); + acc_scaled += output_offset; + acc_scaled = std::max(acc_scaled, output_activation_min); + acc_scaled = std::min(acc_scaled, output_activation_max); + output_data[out_c + output_depth * b] = + static_cast(acc_scaled); + } + } +} + +template +void FullyConnected(const FullyConnectedParams& params, + const RuntimeShape& input_shape, + const InputType* input_data, + const RuntimeShape& filter_shape, + const WeightType* filter_data, + const RuntimeShape& bias_shape, const BiasType* bias_data, + const RuntimeShape& output_shape, OutputType* output_data) { + const int32_t input_offset = params.input_offset; + const int32_t filter_offset = params.weights_offset; + const int32_t output_offset = params.output_offset; + const int32_t output_multiplier = params.output_multiplier; + const int output_shift = params.output_shift; + const int32_t output_activation_min = params.quantized_activation_min; + const int32_t output_activation_max = params.quantized_activation_max; + TFLITE_DCHECK_GE(filter_shape.DimensionsCount(), 2); + TFLITE_DCHECK_GE(output_shape.DimensionsCount(), 1); + + TFLITE_DCHECK_LE(output_activation_min, output_activation_max); + const int filter_dim_count = filter_shape.DimensionsCount(); + const int output_dim_count = output_shape.DimensionsCount(); + const int batches = FlatSizeSkipDim(output_shape, output_dim_count - 1); + const int output_depth = output_shape.Dims(output_dim_count - 1); + TFLITE_DCHECK_LE(output_depth, filter_shape.Dims(filter_dim_count - 2)); + const int accum_depth = filter_shape.Dims(filter_dim_count - 1); + for (int b = 0; b < batches; ++b) { + for (int out_c = 0; out_c < output_depth; ++out_c) { + BiasType acc = 0; + for (int d = 0; d < accum_depth; ++d) { + int32_t input_val = input_data[b * accum_depth + d]; + int32_t filter_val = filter_data[out_c * accum_depth + d]; + acc += (filter_val + filter_offset) * (input_val + input_offset); + } + if (bias_data) { + acc += bias_data[out_c]; + } + int32_t acc_scaled = + MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift); + acc_scaled += output_offset; + acc_scaled = std::max(acc_scaled, output_activation_min); + acc_scaled = std::min(acc_scaled, output_activation_max); + output_data[out_c + output_depth * b] = + static_cast(acc_scaled); + } + } +} + +} // namespace reference_integer_ops +} // namespace tflite + +#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_FULLY_CONNECTED_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/l2normalization.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/l2normalization.h new file mode 100644 index 0000000..582713b --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/l2normalization.h @@ -0,0 +1,67 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_L2NORMALIZATION_H_ +#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_L2NORMALIZATION_H_ + +#include + +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h" + +namespace tflite { +namespace reference_integer_ops { + +inline void L2Normalization(int32_t input_zero_point, int32_t outer_size, + int32_t depth, const int8_t* input_data, + int8_t* output_data) { + static constexpr int8_t kMinInt8 = std::numeric_limits::min(); + static constexpr int8_t kMaxInt8 = std::numeric_limits::max(); + // The output scale must be in sync with Prepare(). + // Output is in 1/128 scale so the actual output range is nudged from [-1, 1] + // to [-1, 127/128]. + static constexpr int32_t kOutputScale = 7; + for (int outer_index = 0; outer_index < outer_size; ++outer_index) { + // int32_t = (int8_t - int8_t) ^ 2. + // ([-128, 127] - [-128, 127]) ^ 2 = [0, (2^8 - 1)^2] so the accumulator is + // safe from overflowing in at least 2^16 steps. + int32_t acc = 0; + for (int inner_index = 0; inner_index < depth; ++inner_index) { + int32_t input = + input_data[depth * outer_index + inner_index] - input_zero_point; + acc += input * input; + } + int32_t inv_l2norm_multiplier; + int inv_l2norm_shift; + GetInvSqrtQuantizedMultiplierExp(acc, kReverseShift, &inv_l2norm_multiplier, + &inv_l2norm_shift); + + for (int inner_index = 0; inner_index < depth; ++inner_index) { + int32_t input = + input_data[depth * outer_index + inner_index] - input_zero_point; + + // Rescale and downcast. Rescale is folded into the division. + int32_t output_in_q24 = MultiplyByQuantizedMultiplier( + input, inv_l2norm_multiplier, inv_l2norm_shift + kOutputScale); + output_in_q24 = + std::min(static_cast(kMaxInt8), + std::max(static_cast(kMinInt8), output_in_q24)); + output_data[depth * outer_index + inner_index] = + static_cast(output_in_q24); + } + } +} +} // namespace reference_integer_ops +} // namespace tflite + +#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_L2NORMALIZATION_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/logistic.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/logistic.h new file mode 100644 index 0000000..2119103 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/logistic.h @@ -0,0 +1,121 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_LOGISTIC_H_ +#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_LOGISTIC_H_ + +#include +#include + +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h" + +namespace tflite { +namespace reference_integer_ops { + +inline void Logistic(int32_t input_zero_point, int32_t input_range_radius, + int32_t input_multiplier, int32_t input_left_shift, + int32_t input_size, const int8_t* input_data, + int8_t* output_data) { + // Integer bits must be in sync with Prepare() function. + static constexpr int32_t kInputIntegerBits = 4; + static constexpr int32_t kOutputIntegerBits = 8; + static constexpr int8_t kMinInt8 = std::numeric_limits::min(); + static constexpr int8_t kMaxInt8 = std::numeric_limits::max(); + static constexpr int32_t kOutputZeroPoint = -128; + + for (int i = 0; i < input_size; ++i) { + const int32_t input = + static_cast(input_data[i]) - input_zero_point; + if (input <= -input_range_radius) { + output_data[i] = kMinInt8; + } else if (input >= input_range_radius) { + output_data[i] = kMaxInt8; + } else { + const int32_t input_in_q4 = MultiplyByQuantizedMultiplier( + input, input_multiplier, input_left_shift); + using FixedPoint4 = gemmlowp::FixedPoint; + const int32_t output_in_q0 = + gemmlowp::logistic(FixedPoint4::FromRaw(input_in_q4)).raw(); + + // Rescale and downcast. + using gemmlowp::RoundingDivideByPOT; + int32_t output_in_q23 = + RoundingDivideByPOT(output_in_q0, 31 - kOutputIntegerBits); + output_in_q23 = std::min(std::max(output_in_q23 + kOutputZeroPoint, + static_cast(kMinInt8)), + static_cast(kMaxInt8)); + output_data[i] = static_cast(output_in_q23); + } + } +} + +inline void Logistic(int32_t input_multiplier, int32_t input_left_shift, + int32_t input_size, const int16_t* ptr_input_data, + int16_t* ptr_output_data) { + // We use the LUT for sigmoid and take into account, that + // tanh(x) = 2*sigmoid(2*x) - 1 + + // We scale by 3/4 to expand range [-8,8]->[-10.7,10.7]. + // In case of general parameter scale, multiplier 3 is taken into account + // in TanhPrepare function and it is included in + // input_multiplier already. + + TFLITE_DCHECK_GE(input_left_shift, 0); + if (input_multiplier == 0) { // power of two case + input_multiplier = 3 << input_left_shift; + input_left_shift = 0; + } + + int32_t round = (input_left_shift > 0) ? 1 << (input_left_shift - 1) : 0; + + for (int i = 0; i < input_size; ++i, ptr_input_data++, ptr_output_data++) { + int32_t input_data = + ((*ptr_input_data) * input_multiplier + round) >> input_left_shift; + + // We do interpolation on unsigned values. + uint32_t abs_input_data = abs(input_data); + + // We divide by 2 power of 9, because + // we need to divide by 2 in power of 7 for + // the input conversion + 1/4 from the scale above. + + // Define uh as uint32_t type not to make this function overflow. + uint32_t uh = abs_input_data >> 9; + uint32_t result; + + if (uh >= 255) { + // Saturate to maximum. + result = 0x7FFF << 10; + } else { + uint32_t ua = sigmoid_table_uint16[uh]; + uint32_t ub = sigmoid_table_uint16[uh + 1]; + uint32_t ut = abs_input_data & 0x1ff; + // Interpolation is done using the fractional bit. + result = (ua << 9) + ut * (ub - ua); + } + + result = (input_data >= 0) ? (result + (1 << 9)) + : ((1 << (16 + 9)) - result + (1 << 9) - 1); + + // Back to 16-bit. + result >>= 10; + + *ptr_output_data = result; + } +} + +} // namespace reference_integer_ops +} // namespace tflite + +#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_LOGISTIC_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/mean.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/mean.h new file mode 100644 index 0000000..0ba0f66 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/mean.h @@ -0,0 +1,79 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_MEAN_H_ +#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_MEAN_H_ + +#include + +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h" + +namespace tflite { +namespace reference_integer_ops { + +template +inline void Mean(const tflite::MeanParams& op_params, int32_t multiplier, + int32_t shift, const RuntimeShape& unextended_input_shape, + const integer_type* input_data, int32_t input_zero_point, + const RuntimeShape& unextended_output_shape, + integer_type* output_data, int32_t output_zero_point) { + // Current implementation only supports dimension equals 4 and simultaneous + // reduction over width and height. + TFLITE_CHECK_EQ(unextended_input_shape.DimensionsCount(), 4); + TFLITE_CHECK_LE(unextended_output_shape.DimensionsCount(), 4); + const RuntimeShape input_shape = + RuntimeShape::ExtendedShape(4, unextended_input_shape); + const RuntimeShape output_shape = + RuntimeShape::ExtendedShape(4, unextended_output_shape); + const int output_batch = output_shape.Dims(0); + const int output_height = output_shape.Dims(1); + const int output_width = output_shape.Dims(2); + const int output_depth = output_shape.Dims(3); + const int input_height = input_shape.Dims(1); + const int input_width = input_shape.Dims(2); + const int num_elements_in_axis = input_width * input_height; + + TFLITE_CHECK_EQ(op_params.axis_count, 2); + TFLITE_CHECK((op_params.axis[0] == 1 && op_params.axis[1] == 2) || + (op_params.axis[0] == 2 && op_params.axis[1] == 1)); + TFLITE_CHECK_EQ(output_height, 1); + TFLITE_CHECK_EQ(output_width, 1); + + static constexpr int32_t kMinInt = std::numeric_limits::min(); + static constexpr int32_t kMaxInt = std::numeric_limits::max(); + + for (int out_b = 0; out_b < output_batch; ++out_b) { + for (int out_d = 0; out_d < output_depth; ++out_d) { + int32_t acc = 0; + for (int in_h = 0; in_h < input_height; ++in_h) { + for (int in_w = 0; in_w < input_width; ++in_w) { + acc += input_data[Offset(input_shape, out_b, in_h, in_w, out_d)] - + input_zero_point; + } + } + acc = MultiplyByQuantizedMultiplier(acc, multiplier, shift); + acc = acc > 0 ? (acc + num_elements_in_axis / 2) / num_elements_in_axis + : (acc - num_elements_in_axis / 2) / num_elements_in_axis; + acc += output_zero_point; + acc = std::min(std::max(acc, kMinInt), kMaxInt); + output_data[Offset(output_shape, out_b, 0, 0, out_d)] = + static_cast(acc); + } + } +} + +} // namespace reference_integer_ops +} // namespace tflite + +#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_MEAN_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/mul.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/mul.h new file mode 100644 index 0000000..168e3ae --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/mul.h @@ -0,0 +1,133 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_MUL_H_ +#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_MUL_H_ + +#include + +#include "edge-impulse-sdk/third_party/gemmlowp/fixedpoint/fixedpoint.h" +#include "edge-impulse-sdk/third_party/ruy/ruy/profiler/instrumentation.h" // from @ruy +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h" + +namespace tflite { +namespace reference_integer_ops { + +template +void MulElementwise(int size, const ArithmeticParams& params, + const InputType* input1_data, const InputType* input2_data, + OutputType* output_data) { + for (int i = 0; i < size; ++i) { + const int32_t input1_val = params.input1_offset + input1_data[i]; + const int32_t input2_val = params.input2_offset + input2_data[i]; + const int32_t unclamped_result = + params.output_offset + + MultiplyByQuantizedMultiplier(input1_val * input2_val, + params.output_multiplier, + params.output_shift); + const int32_t clamped_output = + std::min(params.quantized_activation_max, + std::max(params.quantized_activation_min, unclamped_result)); + output_data[i] = static_cast(clamped_output); + } +} + +template +inline void Mul(const ArithmeticParams& params, + const RuntimeShape& input1_shape, const T* input1_data, + const RuntimeShape& input2_shape, const T* input2_data, + const RuntimeShape& output_shape, T* output_data) { + TFLITE_DCHECK_LE(params.quantized_activation_min, + params.quantized_activation_max); + ruy::profiler::ScopeLabel label("Mul/8bit"); + const int flat_size = + MatchingElementsSize(input1_shape, input2_shape, output_shape); + + MulElementwise(flat_size, params, input1_data, input2_data, output_data); +} + +// Mul with 16 bit inputs and int8_t outputs. +inline void Mul(const ArithmeticParams& params, + const RuntimeShape& input1_shape, const int16_t* input1_data, + const RuntimeShape& input2_shape, const int16_t* input2_data, + const RuntimeShape& output_shape, int8_t* output_data) { + ruy::profiler::ScopeLabel label("Mul/Int16Int8"); + int32_t output_offset = params.output_offset; + int32_t output_activation_min = params.quantized_activation_min; + int32_t output_activation_max = params.quantized_activation_max; + TFLITE_DCHECK_LE(output_activation_min, output_activation_max); + + const int flat_size = + MatchingElementsSize(input1_shape, input2_shape, output_shape); + + for (int i = 0; i < flat_size; i++) { + // F0 uses 0 integer bits, range [-1, 1]. + using F0 = gemmlowp::FixedPoint; + + F0 unclamped_result = + F0::FromRaw(input1_data[i]) * F0::FromRaw(input2_data[i]); + int16_t rescaled_result = + gemmlowp::RoundingDivideByPOT(unclamped_result.raw(), 8); + int16_t clamped_result = std::min( + output_activation_max - output_offset, rescaled_result); + clamped_result = std::max(output_activation_min - output_offset, + clamped_result); + output_data[i] = output_offset + clamped_result; + } +} + +template +inline void BroadcastMul4DSlow( + const ArithmeticParams& params, const RuntimeShape& input1_shape, + const T* input1_data, const RuntimeShape& input2_shape, + const T* input2_data, const RuntimeShape& output_shape, T* output_data) { + ruy::profiler::ScopeLabel label("BroadcastMul4DSlow"); + + NdArrayDesc<4> desc1; + NdArrayDesc<4> desc2; + // The input shapes are extended as part of NdArrayDesc initialization. + NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1, + &desc2); + const RuntimeShape extended_output_shape = + RuntimeShape::ExtendedShape(4, output_shape); + + for (int b = 0; b < extended_output_shape.Dims(0); ++b) { + for (int y = 0; y < extended_output_shape.Dims(1); ++y) { + for (int x = 0; x < extended_output_shape.Dims(2); ++x) { + for (int c = 0; c < extended_output_shape.Dims(3); ++c) { + const int32_t input1_val = + params.input1_offset + + input1_data[SubscriptToIndex(desc1, b, y, x, c)]; + const int32_t input2_val = + params.input2_offset + + input2_data[SubscriptToIndex(desc2, b, y, x, c)]; + const int32_t unclamped_result = + params.output_offset + + MultiplyByQuantizedMultiplier(input1_val * input2_val, + params.output_multiplier, + params.output_shift); + const int32_t clamped_output = std::min( + params.quantized_activation_max, + std::max(params.quantized_activation_min, unclamped_result)); + output_data[Offset(extended_output_shape, b, y, x, c)] = + static_cast(clamped_output); + } + } + } + } +} + +} // namespace reference_integer_ops +} // namespace tflite +#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_MUL_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/pooling.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/pooling.h new file mode 100644 index 0000000..ee026fd --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/pooling.h @@ -0,0 +1,264 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_POOLING_H_ +#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_POOLING_H_ + +#include +#include + +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h" + +namespace tflite { +namespace reference_integer_ops { + +inline bool AveragePool(const PoolParams& params, + const RuntimeShape& input_shape, + const int8_t* input_data, + const RuntimeShape& output_shape, int8_t* output_data) { + TFLITE_DCHECK_LE(params.quantized_activation_min, + params.quantized_activation_max); + TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4); + TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4); + const int batches = MatchingDim(input_shape, 0, output_shape, 0); + const int depth = MatchingDim(input_shape, 3, output_shape, 3); + const int input_height = input_shape.Dims(1); + const int input_width = input_shape.Dims(2); + const int output_height = output_shape.Dims(1); + const int output_width = output_shape.Dims(2); + const int stride_height = params.stride_height; + const int stride_width = params.stride_width; + for (int batch = 0; batch < batches; ++batch) { + for (int out_y = 0; out_y < output_height; ++out_y) { + for (int out_x = 0; out_x < output_width; ++out_x) { + for (int channel = 0; channel < depth; ++channel) { + const int in_x_origin = + (out_x * stride_width) - params.padding_values.width; + const int in_y_origin = + (out_y * stride_height) - params.padding_values.height; + // Compute the boundaries of the filter region clamped so as to + // ensure that the filter window fits in the input array. + const int filter_x_start = std::max(0, -in_x_origin); + const int filter_x_end = + std::min(params.filter_width, input_width - in_x_origin); + const int filter_y_start = std::max(0, -in_y_origin); + const int filter_y_end = + std::min(params.filter_height, input_height - in_y_origin); + int32_t acc = 0; + int filter_count = 0; + for (int filter_y = filter_y_start; filter_y < filter_y_end; + ++filter_y) { + for (int filter_x = filter_x_start; filter_x < filter_x_end; + ++filter_x) { + const int in_x = in_x_origin + filter_x; + const int in_y = in_y_origin + filter_y; + acc += + input_data[Offset(input_shape, batch, in_y, in_x, channel)]; + filter_count++; + } + } + if (filter_count == 0) return false; + // Round to the closest integer value. + acc = acc > 0 ? (acc + filter_count / 2) / filter_count + : (acc - filter_count / 2) / filter_count; + acc = std::max(acc, params.quantized_activation_min); + acc = std::min(acc, params.quantized_activation_max); + output_data[Offset(output_shape, batch, out_y, out_x, channel)] = + static_cast(acc); + } + } + } + } + return true; +} + +inline void MaxPool(const PoolParams& params, const RuntimeShape& input_shape, + const int8_t* input_data, const RuntimeShape& output_shape, + int8_t* output_data) { + TFLITE_DCHECK_LE(params.quantized_activation_min, + params.quantized_activation_max); + TFLITE_DCHECK_GE(params.quantized_activation_min, + std::numeric_limits::min()); + TFLITE_DCHECK_LE(params.quantized_activation_max, + std::numeric_limits::max()); + TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4); + TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4); + const int batches = MatchingDim(input_shape, 0, output_shape, 0); + const int depth = MatchingDim(input_shape, 3, output_shape, 3); + const int input_height = input_shape.Dims(1); + const int input_width = input_shape.Dims(2); + const int output_height = output_shape.Dims(1); + const int output_width = output_shape.Dims(2); + const int stride_height = params.stride_height; + const int stride_width = params.stride_width; + for (int batch = 0; batch < batches; ++batch) { + for (int out_y = 0; out_y < output_height; ++out_y) { + for (int out_x = 0; out_x < output_width; ++out_x) { + for (int channel = 0; channel < depth; ++channel) { + const int in_x_origin = + (out_x * stride_width) - params.padding_values.width; + const int in_y_origin = + (out_y * stride_height) - params.padding_values.height; + // Compute the boundaries of the filter region clamped so as to + // ensure that the filter window fits in the input array. + const int filter_x_start = std::max(0, -in_x_origin); + const int filter_x_end = + std::min(params.filter_width, input_width - in_x_origin); + const int filter_y_start = std::max(0, -in_y_origin); + const int filter_y_end = + std::min(params.filter_height, input_height - in_y_origin); + int8_t max = std::numeric_limits::lowest(); + for (int filter_y = filter_y_start; filter_y < filter_y_end; + ++filter_y) { + for (int filter_x = filter_x_start; filter_x < filter_x_end; + ++filter_x) { + const int in_x = in_x_origin + filter_x; + const int in_y = in_y_origin + filter_y; + max = std::max( + max, + input_data[Offset(input_shape, batch, in_y, in_x, channel)]); + } + } + max = std::max(max, params.quantized_activation_min); + max = std::min(max, params.quantized_activation_max); + output_data[Offset(output_shape, batch, out_y, out_x, channel)] = + static_cast(max); + } + } + } + } +} + +inline bool AveragePool(const PoolParams& params, + const RuntimeShape& input_shape, + const int16_t* input_data, + const RuntimeShape& output_shape, + int16_t* output_data) { + TFLITE_DCHECK_LE(params.quantized_activation_min, + params.quantized_activation_max); + TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4); + TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4); + const int batches = MatchingDim(input_shape, 0, output_shape, 0); + const int depth = MatchingDim(input_shape, 3, output_shape, 3); + const int input_height = input_shape.Dims(1); + const int input_width = input_shape.Dims(2); + const int output_height = output_shape.Dims(1); + const int output_width = output_shape.Dims(2); + const int stride_height = params.stride_height; + const int stride_width = params.stride_width; + for (int batch = 0; batch < batches; ++batch) { + for (int out_y = 0; out_y < output_height; ++out_y) { + for (int out_x = 0; out_x < output_width; ++out_x) { + for (int channel = 0; channel < depth; ++channel) { + const int in_x_origin = + (out_x * stride_width) - params.padding_values.width; + const int in_y_origin = + (out_y * stride_height) - params.padding_values.height; + // Compute the boundaries of the filter region clamped so as to + // ensure that the filter window fits in the input array. + const int filter_x_start = std::max(0, -in_x_origin); + const int filter_x_end = + std::min(params.filter_width, input_width - in_x_origin); + const int filter_y_start = std::max(0, -in_y_origin); + const int filter_y_end = + std::min(params.filter_height, input_height - in_y_origin); + int32_t acc = 0; + int filter_count = 0; + for (int filter_y = filter_y_start; filter_y < filter_y_end; + ++filter_y) { + for (int filter_x = filter_x_start; filter_x < filter_x_end; + ++filter_x) { + const int in_x = in_x_origin + filter_x; + const int in_y = in_y_origin + filter_y; + acc += + input_data[Offset(input_shape, batch, in_y, in_x, channel)]; + filter_count++; + } + } + if (filter_count == 0) return false; + // Round to the closest integer value. + acc = acc > 0 ? (acc + filter_count / 2) / filter_count + : (acc - filter_count / 2) / filter_count; + acc = std::max(acc, params.quantized_activation_min); + acc = std::min(acc, params.quantized_activation_max); + output_data[Offset(output_shape, batch, out_y, out_x, channel)] = + static_cast(acc); + } + } + } + } + return true; +} + +inline void MaxPool(const PoolParams& params, const RuntimeShape& input_shape, + const int16_t* input_data, const RuntimeShape& output_shape, + int16_t* output_data) { + TFLITE_DCHECK_LE(params.quantized_activation_min, + params.quantized_activation_max); + TFLITE_DCHECK_GE(params.quantized_activation_min, + std::numeric_limits::min()); + TFLITE_DCHECK_LE(params.quantized_activation_max, + std::numeric_limits::max()); + TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4); + TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4); + const int batches = MatchingDim(input_shape, 0, output_shape, 0); + const int depth = MatchingDim(input_shape, 3, output_shape, 3); + const int input_height = input_shape.Dims(1); + const int input_width = input_shape.Dims(2); + const int output_height = output_shape.Dims(1); + const int output_width = output_shape.Dims(2); + const int stride_height = params.stride_height; + const int stride_width = params.stride_width; + for (int batch = 0; batch < batches; ++batch) { + for (int out_y = 0; out_y < output_height; ++out_y) { + for (int out_x = 0; out_x < output_width; ++out_x) { + for (int channel = 0; channel < depth; ++channel) { + const int in_x_origin = + (out_x * stride_width) - params.padding_values.width; + const int in_y_origin = + (out_y * stride_height) - params.padding_values.height; + // Compute the boundaries of the filter region clamped so as to + // ensure that the filter window fits in the input array. + const int filter_x_start = std::max(0, -in_x_origin); + const int filter_x_end = + std::min(params.filter_width, input_width - in_x_origin); + const int filter_y_start = std::max(0, -in_y_origin); + const int filter_y_end = + std::min(params.filter_height, input_height - in_y_origin); + int16_t max = std::numeric_limits::lowest(); + for (int filter_y = filter_y_start; filter_y < filter_y_end; + ++filter_y) { + for (int filter_x = filter_x_start; filter_x < filter_x_end; + ++filter_x) { + const int in_x = in_x_origin + filter_x; + const int in_y = in_y_origin + filter_y; + max = std::max( + max, + input_data[Offset(input_shape, batch, in_y, in_x, channel)]); + } + } + max = std::max(max, params.quantized_activation_min); + max = std::min(max, params.quantized_activation_max); + output_data[Offset(output_shape, batch, out_y, out_x, channel)] = + static_cast(max); + } + } + } + } +} + +} // namespace reference_integer_ops +} // namespace tflite + +#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_POOLING_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/tanh.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/tanh.h new file mode 100644 index 0000000..d7feb45 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/tanh.h @@ -0,0 +1,117 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_TANH_H_ +#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_TANH_H_ + +#include +#include + +#include "edge-impulse-sdk/third_party/gemmlowp/fixedpoint/fixedpoint.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h" + +namespace tflite { +namespace reference_integer_ops { + +inline void Tanh(int32_t input_zero_point, int32_t input_range_radius, + int32_t input_multiplier, int32_t input_shift, + const RuntimeShape& input_shape, const int8_t* input_data, + const RuntimeShape& output_shape, int8_t* output_data) { + // Integer bits must be in sync with Prepare() function. + static constexpr int32_t kInputIntegerBits = 4; + static constexpr int32_t kOutputScale = 7; + static constexpr int32_t kMinInt8 = std::numeric_limits::min(); + static constexpr int32_t kMaxInt8 = std::numeric_limits::max(); + using F4 = gemmlowp::FixedPoint; + + const int flat_size = MatchingFlatSize(input_shape, output_shape); + + for (int i = 0; i < flat_size; ++i) { + const int32_t input = + static_cast(input_data[i]) - input_zero_point; + if (input <= -input_range_radius) { + output_data[i] = kMinInt8; + } else if (input >= input_range_radius) { + output_data[i] = kMaxInt8; + } else { + const int32_t input_in_q4 = + MultiplyByQuantizedMultiplier(input, input_multiplier, input_shift); + const int32_t output_in_q0 = + gemmlowp::tanh(F4::FromRaw(input_in_q4)).raw(); + + // Rescale and downcast. + using gemmlowp::RoundingDivideByPOT; + int32_t output_in_q24 = + RoundingDivideByPOT(output_in_q0, 31 - kOutputScale); + output_in_q24 = std::min(std::max(output_in_q24, kMinInt8), kMaxInt8); + output_data[i] = static_cast(output_in_q24); + } + } +} + +inline void Tanh(int32_t input_multiplier, int32_t input_left_shift, + const RuntimeShape& input_shape, const int16_t* ptr_input_data, + const RuntimeShape& output_shape, int16_t* ptr_output_data) { + // We use the LUT for sigmoid and take into account, that + // tanh(x) = 2*sigmoid(2*x) - 1 + + // We scale by 3/4 to expand range [-8,8]->[-10.7,10.7]. + // In case of general parameter scale, multiplier 3 is taken into account + // in TanhPrepare function and it is included in + // input_multiplier already. + + if (input_multiplier == 0) { // power of two case + input_multiplier = 3 << input_left_shift; + input_left_shift = 0; + } + + int32_t round = (input_left_shift > 0) ? 1 << (input_left_shift - 1) : 0; + + int flat_size = MatchingFlatSize(input_shape, output_shape); + + for (int i = 0; i < flat_size; ++i, ptr_input_data++, ptr_output_data++) { + int32_t input_data = + ((*ptr_input_data) * input_multiplier + round) >> input_left_shift; + + uint32_t abs_input_data = abs(input_data); + uint32_t uh = abs_input_data >> 8; + int32_t result; + + if (uh >= 255) { + // Saturate to maximum. + result = 0xFFFF << 8; + } else { + uint32_t ua = sigmoid_table_uint16[uh]; + uint32_t ub = sigmoid_table_uint16[uh + 1]; + + uint8_t ut = abs_input_data & 0xFF; + + result = (ua << 8) + ut * (ub - ua); + } + + result = (input_data >= 0) + ? (result - (1 << (14 + 9)) + (1 << (9 - 2))) + : (-result + (1 << (14 + 9)) + (1 << (9 - 2)) - 1); + + // Convert back to 16-bit. + result >>= (9 - 1); + + *ptr_output_data = result; + } +} + +} // namespace reference_integer_ops +} // namespace tflite + +#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_TANH_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/transpose_conv.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/transpose_conv.h new file mode 100644 index 0000000..8ce1cb7 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/transpose_conv.h @@ -0,0 +1,224 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_TRANSPOSE_CONV_H_ +#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_TRANSPOSE_CONV_H_ + +#include + +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h" + +namespace tflite { +namespace reference_integer_ops { + +// Fixed-point per-channel-quantization transpose convolution reference kernel. +inline void TransposeConv( + const ConvParams& params, const int32_t* output_multiplier, + const int32_t* output_shift, const RuntimeShape& input_shape, + const int8_t* input_data, const RuntimeShape& filter_shape, + const int8_t* filter_data, const RuntimeShape& bias_shape, + const int32_t* bias_data, const RuntimeShape& output_shape, + int8_t* output_data, const RuntimeShape& im2col_shape, int8_t* im2col_data, + int32_t* scratch_buffer) { + const int stride_width = params.stride_width; + const int stride_height = params.stride_height; + const int pad_width = params.padding_values.width; + const int pad_height = params.padding_values.height; + TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4); + TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4); + TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4); + (void)im2col_data; // only used in optimized code. + (void)im2col_shape; // only used in optimized code. + + const int batches = MatchingDim(input_shape, 0, output_shape, 0); + const int input_depth = MatchingDim(input_shape, 3, filter_shape, 3); + const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3); + if (bias_data) { + TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth); + } + const int input_height = input_shape.Dims(1); + const int input_width = input_shape.Dims(2); + const int filter_height = filter_shape.Dims(1); + const int filter_width = filter_shape.Dims(2); + const int output_height = output_shape.Dims(1); + const int output_width = output_shape.Dims(2); + const int32_t input_offset = params.input_offset; + const int32_t output_offset = params.output_offset; + const int32_t output_activation_min = params.quantized_activation_min; + const int32_t output_activation_max = params.quantized_activation_max; + TFLITE_DCHECK_LE(output_activation_min, output_activation_max); + + const int num_elements = output_shape.FlatSize(); + // We need to initialize scratch_buffer to all 0s, as we apply the same + // 'scatter' based trick as in float version. + memset(scratch_buffer, 0, num_elements * sizeof(int32_t)); + + // Loop through input elements one at a time. + for (int batch = 0; batch < batches; ++batch) { + for (int in_y = 0; in_y < input_height; ++in_y) { + for (int in_x = 0; in_x < input_width; ++in_x) { + for (int in_channel = 0; in_channel < input_depth; ++in_channel) { + // Loop through the output elements it will influence. + const int out_x_origin = (in_x * stride_width) - pad_width; + const int out_y_origin = (in_y * stride_height) - pad_height; + for (int filter_y = 0; filter_y < filter_height; ++filter_y) { + for (int filter_x = 0; filter_x < filter_width; ++filter_x) { + for (int out_channel = 0; out_channel < output_depth; + ++out_channel) { + // Compute output element location. + const int out_x = out_x_origin + filter_x; + const int out_y = out_y_origin + filter_y; + // We cannot accumulate out of bounds. + if ((out_x >= 0) && (out_x < output_width) && (out_y >= 0) && + (out_y < output_height)) { + const int8_t input_value = input_data[Offset( + input_shape, batch, in_y, in_x, in_channel)]; + const int8_t filter_value = + filter_data[Offset(filter_shape, out_channel, filter_y, + filter_x, in_channel)]; + scratch_buffer[Offset(output_shape, batch, out_y, out_x, + out_channel)] += + (input_value + input_offset) * filter_value; + } + } + } + } + } + } + } + } + + for (int batch = 0; batch < batches; ++batch) { + for (int out_y = 0; out_y < output_height; ++out_y) { + for (int out_x = 0; out_x < output_width; ++out_x) { + for (int out_channel = 0; out_channel < output_depth; ++out_channel) { + int32_t acc = scratch_buffer[Offset(output_shape, batch, out_y, out_x, + out_channel)]; + if (bias_data) { + acc += bias_data[out_channel]; + } + acc = MultiplyByQuantizedMultiplier( + acc, output_multiplier[out_channel], output_shift[out_channel]); + acc += output_offset; + acc = std::max(acc, output_activation_min); + acc = std::min(acc, output_activation_max); + output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] = + static_cast(acc); + } + } + } + } +} + +// int16_t input (zero_point=0), int8_t filter, int32 or int64 accumulator +template +inline void TransposeConv( + const ConvParams& params, const int32_t* output_multiplier, + const int32_t* output_shift, const RuntimeShape& input_shape, + const int16_t* input_data, const RuntimeShape& filter_shape, + const int8_t* filter_data, const RuntimeShape& bias_shape, + const Scalar* bias_data, const RuntimeShape& output_shape, + int16_t* output_data, const RuntimeShape& im2col_shape, int8_t* im2col_data, + Scalar* scratch_buffer) { + const int stride_width = params.stride_width; + const int stride_height = params.stride_height; + const int pad_width = params.padding_values.width; + const int pad_height = params.padding_values.height; + TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4); + TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4); + TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4); + (void)im2col_data; // only used in optimized code. + (void)im2col_shape; // only used in optimized code. + + const int batches = MatchingDim(input_shape, 0, output_shape, 0); + const int input_depth = MatchingDim(input_shape, 3, filter_shape, 3); + const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3); + if (bias_data) { + TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth); + } + const int input_height = input_shape.Dims(1); + const int input_width = input_shape.Dims(2); + const int filter_height = filter_shape.Dims(1); + const int filter_width = filter_shape.Dims(2); + const int output_height = output_shape.Dims(1); + const int output_width = output_shape.Dims(2); + const int32_t output_activation_min = params.quantized_activation_min; + const int32_t output_activation_max = params.quantized_activation_max; + TFLITE_DCHECK_LE(output_activation_min, output_activation_max); + + const int num_elements = output_shape.FlatSize(); + // We need to initialize scratch_buffer to all 0s, as we apply the same + // 'scatter' based trick as in float version. + memset(scratch_buffer, 0, num_elements * sizeof(Scalar)); + + // Loop through input elements one at a time. + for (int batch = 0; batch < batches; ++batch) { + for (int in_y = 0; in_y < input_height; ++in_y) { + for (int in_x = 0; in_x < input_width; ++in_x) { + for (int in_channel = 0; in_channel < input_depth; ++in_channel) { + // Loop through the output elements it will influence. + const int out_x_origin = (in_x * stride_width) - pad_width; + const int out_y_origin = (in_y * stride_height) - pad_height; + for (int filter_y = 0; filter_y < filter_height; ++filter_y) { + for (int filter_x = 0; filter_x < filter_width; ++filter_x) { + for (int out_channel = 0; out_channel < output_depth; + ++out_channel) { + // Compute output element location. + const int out_x = out_x_origin + filter_x; + const int out_y = out_y_origin + filter_y; + // We cannot accumulate out of bounds. + if ((out_x >= 0) && (out_x < output_width) && (out_y >= 0) && + (out_y < output_height)) { + const int32_t input_value = input_data[Offset( + input_shape, batch, in_y, in_x, in_channel)]; + const int32_t filter_value = + filter_data[Offset(filter_shape, out_channel, filter_y, + filter_x, in_channel)]; + scratch_buffer[Offset(output_shape, batch, out_y, out_x, + out_channel)] += + input_value * filter_value; + } + } + } + } + } + } + } + } + + for (int batch = 0; batch < batches; ++batch) { + for (int out_y = 0; out_y < output_height; ++out_y) { + for (int out_x = 0; out_x < output_width; ++out_x) { + for (int out_channel = 0; out_channel < output_depth; ++out_channel) { + Scalar acc = scratch_buffer[Offset(output_shape, batch, out_y, out_x, + out_channel)]; + if (bias_data) { + acc += bias_data[out_channel]; + } + int32_t scaled_acc = MultiplyByQuantizedMultiplier( + acc, output_multiplier[out_channel], output_shift[out_channel]); + scaled_acc = std::max(scaled_acc, output_activation_min); + scaled_acc = std::min(scaled_acc, output_activation_max); + output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] = + static_cast(scaled_acc); + } + } + } + } +} + +} // namespace reference_integer_ops +} // namespace tflite + +#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_TRANSPOSE_CONV_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/l2normalization.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/l2normalization.h new file mode 100644 index 0000000..cf32ea5 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/l2normalization.h @@ -0,0 +1,90 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_L2NORMALIZATION_H_ +#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_L2NORMALIZATION_H_ + +#include +#include + +#include "edge-impulse-sdk/tensorflow/lite/core/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h" + +namespace tflite { + +namespace reference_ops { + +inline void L2Normalization(const tflite::L2NormalizationParams& op_params, + const RuntimeShape& input_shape, + const float* input_data, + const RuntimeShape& output_shape, + float* output_data, float epsilon = 1e-6) { + const int trailing_dim = input_shape.DimensionsCount() - 1; + const int outer_size = + MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape); + const int depth = + MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim); + for (int i = 0; i < outer_size; ++i) { + float squared_l2_norm = 0; + for (int c = 0; c < depth; ++c) { + const float val = input_data[depth * i + c]; + squared_l2_norm += val * val; + } + float l2_norm = std::sqrt(squared_l2_norm); + l2_norm = std::max(l2_norm, epsilon); + for (int c = 0; c < depth; ++c) { + output_data[depth * i + c] = input_data[depth * i + c] / l2_norm; + } + } +} + +inline void L2Normalization(const tflite::L2NormalizationParams& op_params, + const RuntimeShape& input_shape, + const uint8_t* input_data, + const RuntimeShape& output_shape, + uint8_t* output_data) { + const int trailing_dim = input_shape.DimensionsCount() - 1; + const int depth = + MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim); + const int outer_size = + MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape); + const int32_t input_zero_point = op_params.input_zero_point; + + for (int i = 0; i < outer_size; ++i) { + int32_t square_l2_norm = 0; + for (int c = 0; c < depth; c++) { + int32_t diff = input_data[depth * i + c] - input_zero_point; + square_l2_norm += diff * diff; + } + int32_t inv_l2norm_multiplier; + int inv_l2norm_shift; + GetInvSqrtQuantizedMultiplierExp(square_l2_norm, kReverseShift, + &inv_l2norm_multiplier, &inv_l2norm_shift); + for (int c = 0; c < depth; c++) { + int32_t diff = input_data[depth * i + c] - input_zero_point; + int32_t rescaled_diff = MultiplyByQuantizedMultiplierSmallerThanOneExp( + 128 * diff, inv_l2norm_multiplier, inv_l2norm_shift); + int32_t unclamped_output_val = 128 + rescaled_diff; + int32_t output_val = + std::min(static_cast(255), + std::max(static_cast(0), unclamped_output_val)); + output_data[depth * i + c] = static_cast(output_val); + } + } +} + +} // namespace reference_ops +} // namespace tflite +#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_L2NORMALIZATION_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/leaky_relu.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/leaky_relu.h new file mode 100644 index 0000000..5c05b15 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/leaky_relu.h @@ -0,0 +1,69 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_LEAKY_RELU_H_ +#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_LEAKY_RELU_H_ + +#include +#include + +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h" + +namespace tflite { +namespace reference_ops { + +inline void LeakyRelu(const tflite::LeakyReluParams& params, + const RuntimeShape& input_shape, const float* input_data, + const RuntimeShape& output_shape, float* output_data) { + const int flat_size = MatchingFlatSize(input_shape, output_shape); + for (int i = 0; i < flat_size; ++i) { + const float val = input_data[i]; + // Note that alpha might be > 1 or < 0, so we don't use std::max here. + output_data[i] = val > 0 ? val : val * params.alpha; + } +} + +template +inline void QuantizeLeakyRelu(const LeakyReluParams& params, + const RuntimeShape& input_shape, + const T* input_data, + const RuntimeShape& output_shape, + T* output_data) { + const int flat_size = MatchingFlatSize(input_shape, output_shape); + static const int32_t quantized_min = std::numeric_limits::min(); + static const int32_t quantized_max = std::numeric_limits::max(); + for (int i = 0; i < flat_size; ++i) { + const int32_t input_value = input_data[i] - params.input_offset; + int32_t unclamped_output; + if (input_value >= 0) { + unclamped_output = params.output_offset + + MultiplyByQuantizedMultiplier( + input_value, params.output_multiplier_identity, + params.output_shift_identity); + } else { + unclamped_output = params.output_offset + + MultiplyByQuantizedMultiplier( + input_value, params.output_multiplier_alpha, + params.output_shift_alpha); + } + const T clamped_output = + std::min(quantized_max, std::max(quantized_min, unclamped_output)); + output_data[i] = static_cast(clamped_output); + } +} + +} // namespace reference_ops +} // namespace tflite + +#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_LEAKY_RELU_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/log_softmax.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/log_softmax.h new file mode 100644 index 0000000..af55755 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/log_softmax.h @@ -0,0 +1,256 @@ +/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_LOG_SOFTMAX_H_ +#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_LOG_SOFTMAX_H_ + +#include +#include +#include + +#include "edge-impulse-sdk/third_party/gemmlowp/fixedpoint/fixedpoint.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h" + +namespace tflite { +namespace reference_ops { + +inline void LogSoftmax(const SoftmaxParams& params, + const RuntimeShape& input_shape, const float* input_data, + const RuntimeShape& output_shape, float* output_data) { + const int trailing_dim = input_shape.DimensionsCount() - 1; + const int outer_size = + MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape); + const int depth = + MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim); + + for (int i = 0; i < outer_size; ++i) { + // Find max element value which we'll use to ensure numerical stability + // taking advantage of the following equality: + // log(exp(x[i])/sum(exp(x[i]))) == log(exp(x[i]+C)/sum(exp(x[i]+C))) + float max = std::numeric_limits::lowest(); + for (int c = 0; c < depth; ++c) { + max = std::max(max, input_data[i * depth + c]); + } + + // Compute sum. + float sum = 0.f; + for (int c = 0; c < depth; ++c) { + sum += std::exp(input_data[i * depth + c] - max); + } + + // Compute result. + const float log_sum = std::log(sum); + for (int c = 0; c < depth; ++c) { + output_data[i * depth + c] = input_data[i * depth + c] - max - log_sum; + } + } +} + +inline void LogSoftmax(const SoftmaxParams& params, + const RuntimeShape& input_shape, + const uint8_t* input_data, + const RuntimeShape& output_shape, uint8_t* output_data) { + const int32_t input_multiplier = params.input_multiplier; + const int32_t input_left_shift = params.input_left_shift; + const int32_t reverse_scaling_divisor = params.reverse_scaling_divisor; + const int32_t reverse_scaling_right_shift = + params.reverse_scaling_right_shift; + const int diff_min = params.diff_min; + // The representation chosen for the input to the exp() function is Q5.26. + // We need to leave extra space since values that we skip might be as large + // as -32 before multiplying by input_beta_multiplier, and therefore as + // large as -16 afterwards. Note that exp(-8) is definitely not + // insignificant to accumulation, but exp(-16) definitely is. + static constexpr int kScaledDiffIntegerBits = 5; + static constexpr int kAccumulationIntegerBits = 12; + static constexpr int kOutputIntegerBits = 4; + using FixedPointScaledDiff = + gemmlowp::FixedPoint; + using FixedPointAccum = + gemmlowp::FixedPoint; + + const int trailing_dim = input_shape.DimensionsCount() - 1; + const int outer_size = + MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape); + const int depth = + MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim); + + for (int i = 0; i < outer_size; ++i) { + uint8_t max_in_row = 0; + for (int c = 0; c < depth; ++c) { + max_in_row = std::max(max_in_row, input_data[i * depth + c]); + } + + FixedPointAccum sum_of_exps = FixedPointAccum::Zero(); + for (int c = 0; c < depth; ++c) { + int32_t input_diff = + static_cast(input_data[i * depth + c]) - max_in_row; + if (input_diff >= diff_min) { + const int32_t input_diff_rescaled = + MultiplyByQuantizedMultiplierGreaterThanOne( + input_diff, input_multiplier, input_left_shift); + const FixedPointScaledDiff scaled_diff_f8 = + FixedPointScaledDiff::FromRaw(input_diff_rescaled); + sum_of_exps = sum_of_exps + gemmlowp::Rescale( + exp_on_negative_values(scaled_diff_f8)); + } + } + + const int32_t fixed_log_sum_of_exps = + log_x_for_x_greater_than_or_equal_to_1( + sum_of_exps) + .raw(); + + // rescaled_diff_min is smallest representable in + // Q(kScaledDiffIntegerBits).(31-kScaledDiffIntegerBits) plus the + // log-sub-exps that will be subtracted in the loop. + // + // The thresholds diff_min, etc are negative. + const int rescaled_diff_min = + fixed_log_sum_of_exps + std::numeric_limits::lowest(); + const int adjusted_diff_min = + std::max(static_cast( + diff_min - 1), // Note use of > below instead of >= above. + MultiplyByQuantizedMultiplierSmallerThanOneExp( + rescaled_diff_min, reverse_scaling_divisor, + -reverse_scaling_right_shift)); + + for (int c = 0; c < depth; ++c) { + int32_t input_diff = + static_cast(input_data[i * depth + c]) - max_in_row; + if (input_diff > adjusted_diff_min) { + const int32_t input_diff_rescaled = + MultiplyByQuantizedMultiplierGreaterThanOne( + input_diff, input_multiplier, input_left_shift); + int32_t unsat_output = + gemmlowp::RoundingDivideByPOT( + (input_diff_rescaled - fixed_log_sum_of_exps), + 31 - kScaledDiffIntegerBits - kOutputIntegerBits) + + 255; + + output_data[i * depth + c] = static_cast( + std::max(std::min(unsat_output, static_cast(255)), + static_cast(0))); + } else { + // Set output to smallest value. + output_data[i * depth + c] = 0; + } + } + } +} + +template +inline void LogSoftmaxQuantized(const SoftmaxParams& params, + const size_t outer_size, const size_t depth, + const RuntimeShape& input_shape, + const T* input_data, + const RuntimeShape& output_shape, + T* output_data) { + const int32_t input_multiplier = params.input_multiplier; + const int32_t input_left_shift = params.input_left_shift; + const int32_t reverse_scaling_divisor = params.reverse_scaling_divisor; + const int32_t reverse_scaling_right_shift = + params.reverse_scaling_right_shift; + const int diff_min = params.diff_min; + + static constexpr T kMinT8 = std::numeric_limits::min(); + static constexpr T kMaxT8 = std::numeric_limits::max(); + static constexpr int32_t kMinInt32 = std::numeric_limits::min(); + + // All IntegerBits must agree with Prepare function. + // Input is chosen as Q5.26 so exp(-1 * 2^5 * 2^-1) = exp(-16) is negligible. + static constexpr int kInputIntegerBits = 5; + static constexpr int kAccumulationIntegerBits = 12; + static constexpr int kOutputIntegerBits = 4; + using F5 = gemmlowp::FixedPoint; + using F12 = gemmlowp::FixedPoint; + + for (size_t outer_index = 0; outer_index < outer_size; ++outer_index) { + T max_in_row = kMinT8; + for (size_t inner_index = 0; inner_index < depth; ++inner_index) { + max_in_row = + std::max(max_in_row, input_data[outer_index * depth + inner_index]); + } + + // Accumulator "sum_of_exps_in_q12" is safe from overflowing in 2^12 steps. + F12 sum_of_exps_in_q12 = F12::FromRaw(0); + for (size_t inner_index = 0; inner_index < depth; ++inner_index) { + int32_t input_diff = + static_cast(input_data[outer_index * depth + inner_index]) - + max_in_row; + if (input_diff >= diff_min) { + const int32_t input_diff_in_q5 = MultiplyByQuantizedMultiplier( + input_diff, input_multiplier, input_left_shift); + sum_of_exps_in_q12 = + sum_of_exps_in_q12 + + gemmlowp::Rescale( + exp_on_negative_values(F5::FromRaw(input_diff_in_q5))); + } + } + + const int32_t log_sum_of_exps_in_q5 = + log_x_for_x_greater_than_or_equal_to_1( + sum_of_exps_in_q12) + .raw(); + + // Potentially reduced the valid range. shifted_log_sum_of_exps_in_q5 is + // smallest representable in Q5.26 plus the log_sum_of_exps. + const int32_t shifted_log_sum_of_exps_in_q5 = + log_sum_of_exps_in_q5 + kMinInt32; + const int32_t adjusted_diff_min = + std::max(static_cast(diff_min - 1), + MultiplyByQuantizedMultiplier(shifted_log_sum_of_exps_in_q5, + reverse_scaling_divisor, + -reverse_scaling_right_shift)); + + for (size_t inner_index = 0; inner_index < depth; ++inner_index) { + int32_t input_diff = + static_cast(input_data[outer_index * depth + inner_index]) - + max_in_row; + // Note use of > below instead of >= above. + if (input_diff > adjusted_diff_min) { + const int32_t input_diff_in_q5 = MultiplyByQuantizedMultiplier( + input_diff, input_multiplier, input_left_shift); + + // Rescale and downcast. + int32_t output_in_q27 = + gemmlowp::RoundingDivideByPOT( + (input_diff_in_q5 - log_sum_of_exps_in_q5), + 31 - kInputIntegerBits - kOutputIntegerBits) + + kMaxT8; + + output_in_q27 = + std::max(std::min(output_in_q27, static_cast(kMaxT8)), + static_cast(kMinT8)); + output_data[outer_index * depth + inner_index] = + static_cast(output_in_q27); + } else { + output_data[outer_index * depth + inner_index] = kMinT8; + } + } + } +} + +inline void LogSoftmax(const SoftmaxParams& params, const size_t outer_size, + const size_t depth, const RuntimeShape& input_shape, + const int8_t* input_data, + const RuntimeShape& output_shape, int8_t* output_data) { + LogSoftmaxQuantized(params, outer_size, depth, input_shape, input_data, + output_shape, output_data); +} + +} // namespace reference_ops +} // namespace tflite + +#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_LOG_SOFTMAX_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/logistic.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/logistic.h new file mode 100644 index 0000000..5a46190 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/logistic.h @@ -0,0 +1,132 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_LOGISTIC_H_ +#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_LOGISTIC_H_ + +#include + +#include "edge-impulse-sdk/third_party/gemmlowp/fixedpoint/fixedpoint.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/cppmath.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/quantization_util.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/op_macros.h" + +namespace tflite { +namespace reference_ops { + +inline void Logistic(const RuntimeShape& input_shape, const float* input_data, + const RuntimeShape& output_shape, float* output_data) { + const float cutoff_upper = 16.619047164916992188f; + const float cutoff_lower = -9.f; + + const int flat_size = MatchingFlatSize(input_shape, output_shape); + + // Rational for using approximation in reference kernel. + // 0. This approximation gives enough precision for float. + // 1. This works around an issue on an embedded chipset where exp() does not + // return correctly as expected - exp(x) should return inf when overflown + // not 1.701417 IEEE 754 defines representation for inf. + // 2. This will speed up calculation and is matching the behavior in the + // optimized kernels. (check the definition of scalar_logistic_op) + + for (int i = 0; i < flat_size; i++) { + float val = input_data[i]; + float result; + if (val > cutoff_upper) { + result = 1.0f; + } else if (val < cutoff_lower) { + result = std::exp(val); + } else { + result = 1.f / (1.f + std::exp(-val)); + } + output_data[i] = result; + } +} + +// Convenience version that allows, for example, generated-code calls to be +// uniform between data types. +inline void Logistic(const LogisticParams&, const RuntimeShape& input_shape, + const float* input_data, const RuntimeShape& output_shape, + float* output_data) { + // Drop params: not needed. + Logistic(input_shape, input_data, output_shape, output_data); +} + +inline void Logistic(const LogisticParams& params, + const RuntimeShape& input_shape, const int16_t* input_data, + const RuntimeShape& output_shape, int16_t* output_data) { + const int flat_size = MatchingFlatSize(input_shape, output_shape); + + for (int i = 0; i < flat_size; i++) { + // F0 uses 0 integer bits, range [-1, 1]. + // This is the return type of math functions such as tanh, logistic, + // whose range is in [-1, 1]. + using F0 = gemmlowp::FixedPoint; + // F3 uses 3 integer bits, range [-8, 8], the input range expected here. + using F3 = gemmlowp::FixedPoint; + + const F3 input = F3::FromRaw(input_data[i]); + F0 output = gemmlowp::logistic(input); + output_data[i] = output.raw(); + } +} + +// Quantized int8_t logistic activation. Cheats by dequantizing and +// requantizing around the floating point logistic method. This implementation +// is slow on platforms without a floating point unit. + +// TODO(b/141211002): Delete this int8_t implementation once we can reuse the +// approach used in TFLite for int8_t Logistic. +inline void Logistic(const RuntimeShape& input_shape, const int8_t* input_data, + float input_scale, int input_zero_point, + const RuntimeShape& output_shape, int8_t* output_data, + float output_scale, int output_zero_point) { + const float cutoff_upper = 16.619047164916992188f; + const float cutoff_lower = -9.f; + + const int flat_size = MatchingFlatSize(input_shape, output_shape); + + // Rational for using approximation in reference kernel. + // 0. This approximation gives enough precision for float. + // 1. This works around an issue on an embedded chipset where exp() does not + // return correctly as expected - exp(x) should return inf when overflown + // not 1.701417 IEEE 754 defines representation for inf. + // 2. This will speed up calculation and is matching the behavior in the + // optimized kernels. (check the definition of scalar_logistic_op) + + for (int i = 0; i < flat_size; i++) { + // Dequantize. + float val = + static_cast((input_data[i] - input_zero_point) * input_scale); + float result; + if (val > cutoff_upper) { + result = 1.0f; + } else if (val < cutoff_lower) { + result = std::exp(val); + } else { + result = 1.f / (1.f + std::exp(-val)); + } + // Requantize + int8_t output = + static_cast(result / output_scale + output_zero_point); + output_data[i] = output; + } +} + +} // namespace reference_ops +} // namespace tflite + +#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_LOGISTIC_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/lstm_cell.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/lstm_cell.h new file mode 100644 index 0000000..de1c485 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/lstm_cell.h @@ -0,0 +1,422 @@ +/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_LSTM_CELL_H_ +#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_LSTM_CELL_H_ + +#include +#include +#include + +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/concatenation.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/fully_connected.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h" + +namespace tflite { +namespace reference_ops { + +inline void LstmCell( + const LstmCellParams& params, const RuntimeShape& unextended_input_shape, + const float* input_data, const RuntimeShape& unextended_prev_activ_shape, + const float* prev_activ_data, const RuntimeShape& weights_shape, + const float* weights_data, const RuntimeShape& unextended_bias_shape, + const float* bias_data, const RuntimeShape& unextended_prev_state_shape, + const float* prev_state_data, + const RuntimeShape& unextended_output_state_shape, float* output_state_data, + const RuntimeShape& unextended_output_activ_shape, float* output_activ_data, + const RuntimeShape& unextended_concat_temp_shape, float* concat_temp_data, + const RuntimeShape& unextended_activ_temp_shape, float* activ_temp_data) { + TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(unextended_prev_activ_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(unextended_bias_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(unextended_prev_state_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(unextended_output_state_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(unextended_output_activ_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(unextended_concat_temp_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(unextended_activ_temp_shape.DimensionsCount(), 4); + const RuntimeShape input_shape = + RuntimeShape::ExtendedShape(4, unextended_input_shape); + const RuntimeShape prev_activ_shape = + RuntimeShape::ExtendedShape(4, unextended_prev_activ_shape); + const RuntimeShape bias_shape = + RuntimeShape::ExtendedShape(4, unextended_bias_shape); + const RuntimeShape prev_state_shape = + RuntimeShape::ExtendedShape(4, unextended_prev_state_shape); + const RuntimeShape output_state_shape = + RuntimeShape::ExtendedShape(4, unextended_output_state_shape); + const RuntimeShape output_activ_shape = + RuntimeShape::ExtendedShape(4, unextended_output_activ_shape); + const RuntimeShape concat_temp_shape = + RuntimeShape::ExtendedShape(4, unextended_concat_temp_shape); + const RuntimeShape activ_temp_shape = + RuntimeShape::ExtendedShape(4, unextended_activ_temp_shape); + TFLITE_DCHECK_GE(weights_shape.DimensionsCount(), 2); + + const int weights_dim_count = weights_shape.DimensionsCount(); + const int batches = + MatchingDim(input_shape, 0, prev_activ_shape, 0, prev_state_shape, 0, + output_state_shape, 0, output_activ_shape, 0); + const int height = + MatchingDim(input_shape, 1, prev_activ_shape, 1, prev_state_shape, 1, + output_state_shape, 1, output_activ_shape, 1); + const int width = + MatchingDim(input_shape, 2, prev_activ_shape, 2, prev_state_shape, 2, + output_state_shape, 2, output_activ_shape, 2); + const int input_depth = input_shape.Dims(3); + const int prev_activ_depth = prev_activ_shape.Dims(3); + const int total_input_depth = prev_activ_depth + input_depth; + TFLITE_DCHECK_EQ(weights_shape.Dims(weights_dim_count - 1), + total_input_depth); + TFLITE_DCHECK_EQ(FlatSizeSkipDim(bias_shape, 3), 1); + const int intern_activ_depth = + MatchingDim(weights_shape, weights_dim_count - 2, bias_shape, 3); + TFLITE_DCHECK_EQ(weights_shape.FlatSize(), + intern_activ_depth * total_input_depth); + TFLITE_DCHECK_EQ(intern_activ_depth % 4, 0); + const int output_depth = + MatchingDim(prev_state_shape, 3, prev_activ_shape, 3, output_state_shape, + 3, output_activ_shape, 3); + TFLITE_DCHECK_EQ(output_depth, intern_activ_depth / 4); + + // Concatenate prev_activ and input data together + float const* concat_input_arrays_data[2] = {input_data, prev_activ_data}; + const RuntimeShape* concat_input_arrays_shapes[2] = {&input_shape, + &prev_activ_shape}; + tflite::ConcatenationParams concat_params; + concat_params.axis = 3; + concat_params.inputs_count = 2; + Concatenation(concat_params, concat_input_arrays_shapes, + concat_input_arrays_data, concat_temp_shape, concat_temp_data); + + // Fully connected + tflite::FullyConnectedParams fc_params; + fc_params.float_activation_min = std::numeric_limits::lowest(); + fc_params.float_activation_max = std::numeric_limits::max(); + FullyConnected(fc_params, concat_temp_shape, concat_temp_data, weights_shape, + weights_data, bias_shape, bias_data, activ_temp_shape, + activ_temp_data); + + // Memory state update (the LSTM "guts") + for (int b = 0; b < batches; ++b) { + for (int w = 0; w < width; ++w) { + for (int h = 0; h < height; ++h) { + for (int c = 0; c < output_depth; ++c) { + const float input_gate = + 1.f / + (1.f + std::exp(-activ_temp_data[Offset(activ_temp_shape, b, h, w, + 0 * output_depth + c)])); + const float new_input = std::tanh(activ_temp_data[Offset( + activ_temp_shape, b, h, w, 1 * output_depth + c)]); + const float forget_gate = + 1.f / + (1.f + std::exp(-activ_temp_data[Offset(activ_temp_shape, b, h, w, + 2 * output_depth + c)])); + const float output_gate = + 1.f / + (1.f + std::exp(-activ_temp_data[Offset(activ_temp_shape, b, h, w, + 3 * output_depth + c)])); + const float new_state = + input_gate * new_input + + forget_gate * + prev_state_data[Offset(prev_state_shape, b, h, w, c)]; + output_state_data[Offset(output_state_shape, b, h, w, c)] = new_state; + output_activ_data[Offset(output_activ_shape, b, h, w, c)] = + output_gate * std::tanh(new_state); + } + } + } + } +} + +// Quantized LSTM cell implementation. +// The quantization of the input, output arrays is as follows: +// - The input activations are quantized as uint8 on the interval +// [-1, 127/128]. +// The rationale for that is that is the natural interval for output +// activations (see next point) and these need to be concatenated together. +// We could accommodate different ranges by re-scaling, but we empirically +// found that setting the input activations range to be [-1, 127/128] in the +// first place, removing the need for re-scaling, greatly improves accuracy. +// - The output activations are quantized as uint8 on the interval +// [-1, 127/128]. +// The rationale for that is that the definition of a LSTM cell makes them +// intrinsically constrained in [-1, 1]; tweaking that to [-1, 127/128] +// makes for simpler, more accurate fixed-point arithmetic. +// - The output-at-previous-timestep state array is obviously quantized as +// the output activations. +// - The internal LSTM memory (not the output-at-previous-timestep, the other +// internal state array) is int16-quantized and may use any power-of-two, +// symmetric range i.e. [-2^N, 2^N * 32767/32768] for any N, which we call +// StateIntegerBits below, see the below discussion of that template +// parameter ("The StateIntegerBits template parameter"). +// - The output of the internal fully-connected node is int16-quantized +// on the interval [-8, 8 * 32767/32768], the rationale for which is +// explained just below ("Why [-8, 8] for fully-connected output?"). +// +// +// === The StateIntegerBits template parameter === +// +// The StateIntegerBits template parameter controls the fixed-point format used +// to represent the internal memory of the LSTM cell (not the +// output-at-previous-timestep, the other internal state array). It's currently +// a template parameter so that the model can control that. The most typical +// value for StateIntegerBits is 4. Other plausible values are anywhere between +// 3 and 5. We might eventually standardize on a single supported value, e.g. 4, +// and drop that template parameter. The reason why it can't be a runtime +// parameter is that this controls the fixed-point format used, i.e. we need to +// generate actually different code based on it. In particular, we generate code +// for a fixed-point tanh() implementation for that format, which internally +// uses a fixed-point exp() implementation, which internally uses a +// barrel-shifter with a number of steps that depends on StateIntegerBits. +// Another consequence of that is that a higher value of StateIntegerBits +// results in a more expensive implementation (more barrel shifter steps +// needed). +// +// +// === Why [-8, 8] for fully-connected output? === +// +// This array is only fed to Logistic and Tanh functions, for which +// the quantized implementation will want to use fixed-point arithmetic, +// requiring a power-of-two representation interval. Thus, we should right +// away quantize this array to a power-of-two interval; otherwise, +// implementation will need to rescale that, losing any benefit that a tighter +// representation interval might otherwise yield, while introducing some +// numerical error and computational overhead. +// +// Now, Logistic and Tanh +// are nearly constant (nearly equal to their horizontal asymptotes) +// outside of a small bounded interval around 0: +// +// Logistic(4) = 1 - 1.8e-2 Tanh(4) = 1 - 6.7e-4 +// Logistic(8) = 1 - 3.4e-4 Tanh(8) = 1 - 2.3e-7 +// Logistic(16) = 1 - 1.1e-7 Tanh(16) = 1 - 2.5e-14 +// +// From this, we see that clamping to [-4, 4] would be too inaccurate +// (the error of 1.8e-2 on Logistic would be felt even in 8bit precision) +// while clamping to [-16, 16] would make no difference even in float32. +// However, for a fixed-point implementation in 16-bit integers, using 5 +// integer bits to represent the [-16, 16] range would leave only 11 +// fractional bits, giving an increment of 2^-11 = 4.9e-4 between consecutive +// representable values. Notice that is higher than the +// worst-case clamping error with clamping to [-8, 8]: 3.4e-4 for Logistic. +// Using [-8, 8] thus seems like the better compromise overall, enjoying +// an increment of 2.4e-4 between representable values and a worst-case +// clamping error of 3.4e-4, both better than the increment of 4.9e-4 with +// [-16, 16]. +// +// Moreover, all other things being equal, it is nice to choose the narrower +// representation range, as that makes the implementation of fixed-point +// math functions a little cheaper (each integer bit requires an additional +// barrel-shifter atep in the implementation of exp(-x)). That is further +// reason to prefer [-8, 8] over [-16, 16]. The choice of [-16, 16] would make +// sense for 32-bit float or 32-bit fixed-point quantization, but we are +// aiming for 16-bit fixed-point quantization of these internal nodes here. +// +template +inline void LstmCell(const LstmCellParams& params, + const RuntimeShape& unextended_input_shape, + const uint8_t* input_data_uint8, + const RuntimeShape& unextended_prev_activ_shape, + const uint8_t* prev_activ_data_uint8, + const RuntimeShape& weights_shape, + const uint8_t* weights_data_uint8, + const RuntimeShape& unextended_bias_shape, + const int32_t* bias_data_int32, + const RuntimeShape& unextended_prev_state_shape, + const int16_t* prev_state_data_int16, + const RuntimeShape& unextended_output_state_shape, + int16_t* output_state_data_int16, + const RuntimeShape& unextended_output_activ_shape, + uint8_t* output_activ_data_uint8, + const RuntimeShape& unextended_concat_temp_shape, + uint8_t* concat_temp_data_uint8, + const RuntimeShape& unextended_activ_temp_shape, + int16_t* activ_temp_data_int16, void* gemmlowp_context) { + (void)gemmlowp_context; // only used in optimized code. + int32_t weights_zero_point = params.weights_zero_point; + int32_t accum_multiplier = params.accum_multiplier; + int accum_shift = params.accum_shift; + TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(unextended_prev_activ_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(unextended_bias_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(unextended_prev_state_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(unextended_output_state_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(unextended_output_activ_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(unextended_concat_temp_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(unextended_activ_temp_shape.DimensionsCount(), 4); + const RuntimeShape input_shape = + RuntimeShape::ExtendedShape(4, unextended_input_shape); + const RuntimeShape prev_activ_shape = + RuntimeShape::ExtendedShape(4, unextended_prev_activ_shape); + const RuntimeShape bias_shape = + RuntimeShape::ExtendedShape(4, unextended_bias_shape); + const RuntimeShape prev_state_shape = + RuntimeShape::ExtendedShape(4, unextended_prev_state_shape); + const RuntimeShape output_state_shape = + RuntimeShape::ExtendedShape(4, unextended_output_state_shape); + const RuntimeShape output_activ_shape = + RuntimeShape::ExtendedShape(4, unextended_output_activ_shape); + const RuntimeShape concat_temp_shape = + RuntimeShape::ExtendedShape(4, unextended_concat_temp_shape); + const RuntimeShape activ_temp_shape = + RuntimeShape::ExtendedShape(4, unextended_activ_temp_shape); + TFLITE_DCHECK_GE(weights_shape.DimensionsCount(), 2); + + // Gather dimensions information, and perform consistency checks. + const int weights_dim_count = weights_shape.DimensionsCount(); + const int outer_size = MatchingFlatSizeSkipDim( + input_shape, 3, prev_activ_shape, prev_state_shape, output_state_shape, + output_activ_shape); + const int input_depth = input_shape.Dims(3); + const int prev_activ_depth = prev_activ_shape.Dims(3); + const int total_input_depth = prev_activ_depth + input_depth; + TFLITE_DCHECK_EQ(weights_shape.Dims(weights_dim_count - 1), + total_input_depth); + const int intern_activ_depth = + MatchingDim(weights_shape, weights_dim_count - 2, bias_shape, 3); + TFLITE_DCHECK_EQ(weights_shape.FlatSize(), + intern_activ_depth * total_input_depth); + TFLITE_DCHECK_EQ(FlatSizeSkipDim(bias_shape, 3), 1); + TFLITE_DCHECK_EQ(intern_activ_depth % 4, 0); + const int output_depth = + MatchingDim(prev_state_shape, 3, prev_activ_shape, 3, output_state_shape, + 3, output_activ_shape, 3); + TFLITE_DCHECK_EQ(output_depth, intern_activ_depth / 4); + const int fc_batches = FlatSizeSkipDim(activ_temp_shape, 3); + const int fc_output_depth = + MatchingDim(weights_shape, weights_dim_count - 2, activ_temp_shape, 3); + const int fc_accum_depth = total_input_depth; + TFLITE_DCHECK_EQ(fc_output_depth, 4 * output_depth); + + // Depth-concatenate prev_activ and input data together. + uint8_t const* concat_input_arrays_data[2] = {input_data_uint8, + prev_activ_data_uint8}; + const RuntimeShape* concat_input_arrays_shapes[2] = {&input_shape, + &prev_activ_shape}; + tflite::ConcatenationParams concat_params; + concat_params.axis = 3; + concat_params.inputs_count = 2; + Concatenation(concat_params, concat_input_arrays_shapes, + concat_input_arrays_data, concat_temp_shape, + concat_temp_data_uint8); + + // Implementation of the fully connected node inside the LSTM cell. + // The operands are 8-bit integers, the accumulators are internally 32bit + // integers, and the output is 16-bit fixed-point with 3 integer bits so + // the output range is [-2^3, 2^3] == [-8, 8]. The rationale for that + // is explained in the function comment above. + for (int b = 0; b < fc_batches; ++b) { + for (int out_c = 0; out_c < fc_output_depth; ++out_c) { + // Internal accumulation. + // Initialize accumulator with the bias-value. + int32_t accum = bias_data_int32[out_c]; + // Accumulation loop. + for (int d = 0; d < fc_accum_depth; ++d) { + int16_t input_val = + concat_temp_data_uint8[b * fc_accum_depth + d] - 128; + int16_t weights_val = + weights_data_uint8[out_c * fc_accum_depth + d] - weights_zero_point; + accum += input_val * weights_val; + } + // Down-scale the final int32 accumulator to the scale used by our + // (16-bit, using 3 integer bits) fixed-point format. The quantized + // multiplier and shift here have been pre-computed offline + // (e.g. by toco). + accum = + MultiplyByQuantizedMultiplier(accum, accum_multiplier, accum_shift); + // Saturate, cast to int16, and store to the temporary activations array. + accum = std::max(-32768, std::min(32767, accum)); + activ_temp_data_int16[out_c + fc_output_depth * b] = accum; + } + } + + // Rest of the LSTM cell: tanh and logistic math functions, and some adds + // and muls, all done in 16-bit fixed-point. + for (int b = 0; b < outer_size; ++b) { + for (int c = 0; c < output_depth; ++c) { + // Define the fixed-point data types that we will use here. All use + // int16 as the underlying integer type i.e. all are 16-bit fixed-point. + // They only differ by the number of integral vs. fractional bits, + // determining the range of values that they can represent. + // + // F0 uses 0 integer bits, range [-1, 1]. + // This is the return type of math functions such as tanh, logistic, + // whose range is in [-1, 1]. + using F0 = gemmlowp::FixedPoint; + // F3 uses 3 integer bits, range [-8, 8]. + // This is the range of the previous fully-connected node's output, + // which is our input here. + using F3 = gemmlowp::FixedPoint; + // FS uses StateIntegerBits integer bits, range [-2^StateIntegerBits, + // 2^StateIntegerBits]. It's used to represent the internal state, whose + // number of integer bits is currently dictated by the model. See comment + // on the StateIntegerBits template parameter above. + using FS = gemmlowp::FixedPoint; + // Implementation of input gate, using fixed-point logistic function. + F3 input_gate_input = F3::FromRaw( + activ_temp_data_int16[b * fc_output_depth + 0 * output_depth + c]); + F0 input_gate_output = gemmlowp::logistic(input_gate_input); + // Implementation of input modulation gate, using fixed-point tanh + // function. + F3 input_modulation_gate_input = F3::FromRaw( + activ_temp_data_int16[b * fc_output_depth + 1 * output_depth + c]); + F0 input_modulation_gate_output = + gemmlowp::tanh(input_modulation_gate_input); + // Implementation of forget gate, using fixed-point logistic function. + F3 forget_gate_input = F3::FromRaw( + activ_temp_data_int16[b * fc_output_depth + 2 * output_depth + c]); + F0 forget_gate_output = gemmlowp::logistic(forget_gate_input); + // Implementation of output gate, using fixed-point logistic function. + F3 output_gate_input = F3::FromRaw( + activ_temp_data_int16[b * fc_output_depth + 3 * output_depth + c]); + F0 output_gate_output = gemmlowp::logistic(output_gate_input); + // Implementation of internal multiplication nodes, still in fixed-point. + F0 input_times_input_modulation = + input_gate_output * input_modulation_gate_output; + FS prev_state = FS::FromRaw(prev_state_data_int16[b * output_depth + c]); + FS prev_state_times_forget_state = forget_gate_output * prev_state; + // Implementation of internal addition node, saturating. + FS new_state = gemmlowp::SaturatingAdd( + gemmlowp::Rescale(input_times_input_modulation), + prev_state_times_forget_state); + // Implementation of last internal Tanh node, still in fixed-point. + // Since a Tanh fixed-point implementation is specialized for a given + // number or integer bits, and each specialization can have a substantial + // code size, and we already used above a Tanh on an input with 3 integer + // bits, and per the table in the above function comment there is no + // significant accuracy to be lost by clamping to [-8, +8] for a + // 3-integer-bits representation, let us just do that. This helps people + // porting this to targets where code footprint must be minimized. + F3 new_state_f3 = gemmlowp::Rescale<3>(new_state); + F0 output_activ_int16 = output_gate_output * gemmlowp::tanh(new_state_f3); + // Store the new internal state back to memory, as 16-bit integers. + // Note: here we store the original value with StateIntegerBits, not + // the rescaled 3-integer-bits value fed to tanh. + output_state_data_int16[b * output_depth + c] = new_state.raw(); + // Down-scale the output activations to 8-bit integers, saturating, + // and store back to memory. + int16_t rescaled_output_activ = + gemmlowp::RoundingDivideByPOT(output_activ_int16.raw(), 8); + int16_t clamped_output_activ = std::max( + -128, std::min(127, rescaled_output_activ)); + output_activ_data_uint8[b * output_depth + c] = + 128 + clamped_output_activ; + } + } +} + +} // namespace reference_ops +} // namespace tflite +#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_LSTM_CELL_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/maximum_minimum.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/maximum_minimum.h new file mode 100644 index 0000000..b005336 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/maximum_minimum.h @@ -0,0 +1,64 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_MAXIMUM_MINIMUM_H_ +#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_MAXIMUM_MINIMUM_H_ + +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h" + +namespace tflite { +namespace reference_ops { + +template +void MaximumMinimumBroadcastSlow(const RuntimeShape& unextended_input1_shape, + const T* input1_data, + const RuntimeShape& unextended_input2_shape, + const T* input2_data, + const RuntimeShape& unextended_output_shape, + T* output_data, Op op) { + // Uses element-wise calculation if broadcast is not required. + if (unextended_input1_shape == unextended_input2_shape) { + const int flat_size = + MatchingElementsSize(unextended_input1_shape, unextended_input2_shape, + unextended_output_shape); + for (int i = 0; i < flat_size; ++i) { + output_data[i] = op(input1_data[i], input2_data[i]); + } + } else { + TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), N); + TFLITE_DCHECK_LE(unextended_input2_shape.DimensionsCount(), N); + TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), N); + + NdArrayDesc desc1; + NdArrayDesc desc2; + NdArrayDesc output_desc; + NdArrayDescsForElementwiseBroadcast( + unextended_input1_shape, unextended_input2_shape, &desc1, &desc2); + CopyDimsToDesc(RuntimeShape::ExtendedShape(N, unextended_output_shape), + &output_desc); + + auto maxmin_func = [&](int indexes[N]) { + output_data[SubscriptToIndex(output_desc, indexes)] = + op(input1_data[SubscriptToIndex(desc1, indexes)], + input2_data[SubscriptToIndex(desc2, indexes)]); + }; + NDOpsHelper(output_desc, maxmin_func); + } +} + +} // namespace reference_ops +} // namespace tflite + +#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_MAXIMUM_MINIMUM_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/mul.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/mul.h new file mode 100644 index 0000000..63ece01 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/mul.h @@ -0,0 +1,214 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_MUL_H_ +#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_MUL_H_ + +#include +#include + +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h" + +namespace tflite { + +namespace reference_ops { + +// Element-wise mul that can often be used for inner loop of broadcast Mul as +// well as the non-broadcast Mul. +inline void MulElementwise(int size, const ArithmeticParams& params, + const uint8_t* input1_data, + const uint8_t* input2_data, uint8_t* output_data) { + for (int i = 0; i < size; ++i) { + const int32_t input1_val = params.input1_offset + input1_data[i]; + const int32_t input2_val = params.input2_offset + input2_data[i]; + const int32_t unclamped_result = + params.output_offset + + MultiplyByQuantizedMultiplier(input1_val * input2_val, + params.output_multiplier, + params.output_shift); + const int32_t clamped_output = + std::min(params.quantized_activation_max, + std::max(params.quantized_activation_min, unclamped_result)); + output_data[i] = static_cast(clamped_output); + } +} + +template +inline void Mul(const ArithmeticParams& params, + const RuntimeShape& input1_shape, const T* input1_data, + const RuntimeShape& input2_shape, const T* input2_data, + const RuntimeShape& output_shape, T* output_data) { + T output_activation_min; + T output_activation_max; + GetActivationParams(params, &output_activation_min, &output_activation_max); + + const int flat_size = + MatchingExtendedShapeFlatSize(input1_shape, input2_shape, output_shape); + for (int i = 0; i < flat_size; ++i) { + output_data[i] = ActivationFunctionWithMinMax( + input1_data[i] * input2_data[i], output_activation_min, + output_activation_max); + } +} + +inline void Mul(const ArithmeticParams& params, + const RuntimeShape& input1_shape, + const std::complex* input1_data, + const RuntimeShape& input2_shape, + const std::complex* input2_data, + const RuntimeShape& output_shape, + std::complex* output_data) { + const int flat_size = + MatchingExtendedShapeFlatSize(input1_shape, input2_shape, output_shape); + for (int i = 0; i < flat_size; ++i) { + output_data[i] = input1_data[i] * input2_data[i]; + } +} + +inline void Mul(const ArithmeticParams& params, + const RuntimeShape& input1_shape, const uint8_t* input1_data, + const RuntimeShape& input2_shape, const uint8_t* input2_data, + const RuntimeShape& output_shape, uint8_t* output_data) { + TFLITE_DCHECK_LE(params.quantized_activation_min, + params.quantized_activation_max); + const int flat_size = + MatchingExtendedShapeFlatSize(input1_shape, input2_shape, output_shape); + + MulElementwise(flat_size, params, input1_data, input2_data, output_data); +} + +inline void BroadcastMul4DSlow(const ArithmeticParams& params, + const RuntimeShape& input1_shape, + const uint8_t* input1_data, + const RuntimeShape& input2_shape, + const uint8_t* input2_data, + const RuntimeShape& output_shape, + uint8_t* output_data) { + NdArrayDesc<4> desc1; + NdArrayDesc<4> desc2; + NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1, + &desc2); + const RuntimeShape extended_output_shape = + RuntimeShape::ExtendedShape(4, output_shape); + + for (int b = 0; b < extended_output_shape.Dims(0); ++b) { + for (int y = 0; y < extended_output_shape.Dims(1); ++y) { + for (int x = 0; x < extended_output_shape.Dims(2); ++x) { + for (int c = 0; c < extended_output_shape.Dims(3); ++c) { + const int32_t input1_val = + params.input1_offset + + input1_data[SubscriptToIndex(desc1, b, y, x, c)]; + const int32_t input2_val = + params.input2_offset + + input2_data[SubscriptToIndex(desc2, b, y, x, c)]; + const int32_t unclamped_result = + params.output_offset + + MultiplyByQuantizedMultiplier(input1_val * input2_val, + params.output_multiplier, + params.output_shift); + const int32_t clamped_output = std::min( + params.quantized_activation_max, + std::max(params.quantized_activation_min, unclamped_result)); + output_data[Offset(extended_output_shape, b, y, x, c)] = + static_cast(clamped_output); + } + } + } + } +} + +template +void BroadcastMul4DSlow(const ArithmeticParams& params, + const RuntimeShape& unextended_input1_shape, + const T* input1_data, + const RuntimeShape& unextended_input2_shape, + const T* input2_data, + const RuntimeShape& unextended_output_shape, + T* output_data) { + T output_activation_min; + T output_activation_max; + GetActivationParams(params, &output_activation_min, &output_activation_max); + + TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(unextended_input2_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4); + const RuntimeShape output_shape = + RuntimeShape::ExtendedShape(4, unextended_output_shape); + + NdArrayDesc<4> desc1; + NdArrayDesc<4> desc2; + NdArrayDescsForElementwiseBroadcast(unextended_input1_shape, + unextended_input2_shape, &desc1, &desc2); + + // In Tensorflow, the dimensions are canonically named (batch_number, row, + // col, channel), with extents (batches, height, width, depth), with the + // trailing dimension changing most rapidly (channels has the smallest stride, + // typically 1 element). + // + // In generated C code, we store arrays with the dimensions reversed. The + // first dimension has smallest stride. + // + // We name our variables by their Tensorflow convention, but generate C code + // nesting loops such that the innermost loop has the smallest stride for the + // best cache behavior. + for (int b = 0; b < output_shape.Dims(0); ++b) { + for (int y = 0; y < output_shape.Dims(1); ++y) { + for (int x = 0; x < output_shape.Dims(2); ++x) { + for (int c = 0; c < output_shape.Dims(3); ++c) { + output_data[Offset(output_shape, b, y, x, c)] = + ActivationFunctionWithMinMax( + input1_data[SubscriptToIndex(desc1, b, y, x, c)] * + input2_data[SubscriptToIndex(desc2, b, y, x, c)], + output_activation_min, output_activation_max); + } + } + } + } +} + +inline void BroadcastMul4DSlow(const ArithmeticParams& params, + const RuntimeShape& unextended_input1_shape, + const std::complex* input1_data, + const RuntimeShape& unextended_input2_shape, + const std::complex* input2_data, + const RuntimeShape& unextended_output_shape, + std::complex* output_data) { + TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(unextended_input2_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4); + const RuntimeShape output_shape = + RuntimeShape::ExtendedShape(4, unextended_output_shape); + + NdArrayDesc<4> desc1; + NdArrayDesc<4> desc2; + NdArrayDescsForElementwiseBroadcast(unextended_input1_shape, + unextended_input2_shape, &desc1, &desc2); + + for (int b = 0; b < output_shape.Dims(0); ++b) { + for (int y = 0; y < output_shape.Dims(1); ++y) { + for (int x = 0; x < output_shape.Dims(2); ++x) { + for (int c = 0; c < output_shape.Dims(3); ++c) { + output_data[Offset(output_shape, b, y, x, c)] = + input1_data[SubscriptToIndex(desc1, b, y, x, c)] * + input2_data[SubscriptToIndex(desc2, b, y, x, c)]; + } + } + } + } +} + +} // namespace reference_ops +} // namespace tflite + +#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_MUL_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/neg.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/neg.h new file mode 100644 index 0000000..9fc6f39 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/neg.h @@ -0,0 +1,37 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_NEG_H_ +#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_NEG_H_ + +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h" + +namespace tflite { + +namespace reference_ops { + +template +inline void Negate(const RuntimeShape& input_shape, const T* input_data, + const RuntimeShape& output_shape, T* output_data) { + const int flat_size = MatchingFlatSize(input_shape, output_shape); + + for (int i = 0; i < flat_size; ++i) { + output_data[i] = -input_data[i]; + } +} + +} // namespace reference_ops +} // namespace tflite + +#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_NEG_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/pad.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/pad.h new file mode 100644 index 0000000..b4b2a75 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/pad.h @@ -0,0 +1,169 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PAD_H_ +#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PAD_H_ + +#include + +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h" + +namespace tflite { + +namespace reference_ops { + +// TFLite Pad supports activation tensors with up to 5 dimensions. +constexpr int PadKernelMaxDimensionCount() { return 5; } + +// There are two versions of pad: Pad and PadV2. In PadV2 there is a second +// scalar input that provides the padding value. Therefore pad_value_ptr can be +// equivalent to a simple input1_data. For Pad, it should point to a zero +// value. +// +// Note that two typenames are required, so that T=P=int32_t is considered a +// specialization distinct from P=int32_t. +template +inline void PadImpl(const tflite::PadParams& op_params, + const RuntimeShape& input_shape, const T* input_data, + const P* pad_value_ptr, const RuntimeShape& output_shape, + T* output_data) { + const RuntimeShape ext_input_shape = + RuntimeShape::ExtendedShape(PadKernelMaxDimensionCount(), input_shape); + const RuntimeShape ext_output_shape = + RuntimeShape::ExtendedShape(PadKernelMaxDimensionCount(), output_shape); + TFLITE_DCHECK_LE(op_params.left_padding_count, PadKernelMaxDimensionCount()); + TFLITE_DCHECK_LE(op_params.right_padding_count, PadKernelMaxDimensionCount()); + + // Runtime calls are currently fixed at 5 dimensions. Copy inputs so we can + // pad them to 5 dims (yes, we are "padding the padding"). + int left_padding_copy[PadKernelMaxDimensionCount()]; + for (int i = 0; i < PadKernelMaxDimensionCount(); i++) { + left_padding_copy[i] = 0; + } + for (int i = 0; i < op_params.left_padding_count; ++i) { + left_padding_copy[i + PadKernelMaxDimensionCount() - + op_params.left_padding_count] = op_params.left_padding[i]; + } + int right_padding_copy[PadKernelMaxDimensionCount()]; + for (int i = 0; i < PadKernelMaxDimensionCount(); i++) { + right_padding_copy[i] = 0; + } + for (int i = 0; i < op_params.right_padding_count; ++i) { + right_padding_copy[i + PadKernelMaxDimensionCount() - + op_params.right_padding_count] = + op_params.right_padding[i]; + } + + const int output_batch = ext_output_shape.Dims(0); + const int output_plane = ext_output_shape.Dims(1); + const int output_height = ext_output_shape.Dims(2); + const int output_width = ext_output_shape.Dims(3); + const int output_depth = ext_output_shape.Dims(4); + + const int left_b_padding = left_padding_copy[0]; + const int left_p_padding = left_padding_copy[1]; + const int left_h_padding = left_padding_copy[2]; + const int left_w_padding = left_padding_copy[3]; + const int left_d_padding = left_padding_copy[4]; + + const int right_b_padding = right_padding_copy[0]; + const int right_p_padding = right_padding_copy[1]; + const int right_h_padding = right_padding_copy[2]; + const int right_w_padding = right_padding_copy[3]; + const int right_d_padding = right_padding_copy[4]; + + const T pad_value = *pad_value_ptr; + + const T* in_ptr = input_data; + T* out_ptr = output_data; + for (int out_b = 0; out_b < output_batch; ++out_b) { + for (int out_p = 0; out_p < output_plane; ++out_p) { + for (int out_h = 0; out_h < output_height; ++out_h) { + for (int out_w = 0; out_w < output_width; ++out_w) { + for (int out_d = 0; out_d < output_depth; ++out_d) { + if (out_b < left_b_padding || + out_b >= output_batch - right_b_padding || + out_p < left_p_padding || + out_p >= output_plane - right_p_padding || + out_h < left_h_padding || + out_h >= output_height - right_h_padding || + out_w < left_w_padding || + out_w >= output_width - right_w_padding || + out_d < left_d_padding || + out_d >= output_depth - right_d_padding) { + *out_ptr++ = pad_value; + } else { + *out_ptr++ = *in_ptr++; + } + } + } + } + } + } +} + +template +inline void Pad(const tflite::PadParams& op_params, + const RuntimeShape& input_shape, const T* input_data, + const P* pad_value_ptr, const RuntimeShape& output_shape, + T* output_data) { + PadImpl(op_params, input_shape, input_data, pad_value_ptr, output_shape, + output_data); +} + +// The second (pad-value) input can be int32_t when, say, the first is uint8_t. +template +inline void Pad(const tflite::PadParams& op_params, + const RuntimeShape& input_shape, const T* input_data, + const int32_t* pad_value_ptr, const RuntimeShape& output_shape, + T* output_data) { + const T converted_pad_value = static_cast(*pad_value_ptr); + PadImpl(op_params, input_shape, input_data, &converted_pad_value, + output_shape, output_data); +} + +// This version avoids conflicting template matching. +template <> +inline void Pad(const tflite::PadParams& op_params, + const RuntimeShape& input_shape, const int32_t* input_data, + const int32_t* pad_value_ptr, const RuntimeShape& output_shape, + int32_t* output_data) { + PadImpl(op_params, input_shape, input_data, pad_value_ptr, output_shape, + output_data); +} + +template +inline void PadImageStyle(const tflite::PadParams& op_params, + const RuntimeShape& input_shape, const T* input_data, + const P* pad_value_ptr, + const RuntimeShape& output_shape, T* output_data) { + Pad(op_params, input_shape, input_data, pad_value_ptr, output_shape, + output_data); +} + +template +inline void PadImageStyle(const tflite::PadParams& op_params, + const RuntimeShape& input_shape, + const float* input_data, const P* pad_value_ptr, + const RuntimeShape& output_shape, + float* output_data) { + Pad(op_params, input_shape, input_data, pad_value_ptr, output_shape, + output_data); +} + +} // namespace reference_ops +} // namespace tflite + +#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PAD_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/pooling.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/pooling.h new file mode 100644 index 0000000..3657ffd --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/pooling.h @@ -0,0 +1,303 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_POOLING_H_ +#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_POOLING_H_ + +#include + +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/cppmath.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/quantization_util.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h" + +namespace tflite { +namespace reference_ops { + +inline bool AveragePool(const PoolParams& params, + const RuntimeShape& input_shape, + const float* input_data, + const RuntimeShape& output_shape, float* output_data) { + TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4); + TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4); + const int batches = MatchingDim(input_shape, 0, output_shape, 0); + const int depth = MatchingDim(input_shape, 3, output_shape, 3); + const int input_height = input_shape.Dims(1); + const int input_width = input_shape.Dims(2); + const int output_height = output_shape.Dims(1); + const int output_width = output_shape.Dims(2); + const int stride_height = params.stride_height; + const int stride_width = params.stride_width; + for (int batch = 0; batch < batches; ++batch) { + for (int out_y = 0; out_y < output_height; ++out_y) { + for (int out_x = 0; out_x < output_width; ++out_x) { + for (int channel = 0; channel < depth; ++channel) { + const int in_x_origin = + (out_x * stride_width) - params.padding_values.width; + const int in_y_origin = + (out_y * stride_height) - params.padding_values.height; + // Compute the boundaries of the filter region clamped so as to + // ensure that the filter window fits in the input array. + const int filter_x_start = std::max(0, -in_x_origin); + const int filter_x_end = + std::min(params.filter_width, input_width - in_x_origin); + const int filter_y_start = std::max(0, -in_y_origin); + const int filter_y_end = + std::min(params.filter_height, input_height - in_y_origin); + float total = 0.f; + float filter_count = 0; + for (int filter_y = filter_y_start; filter_y < filter_y_end; + ++filter_y) { + for (int filter_x = filter_x_start; filter_x < filter_x_end; + ++filter_x) { + const int in_x = in_x_origin + filter_x; + const int in_y = in_y_origin + filter_y; + total += + input_data[Offset(input_shape, batch, in_y, in_x, channel)]; + filter_count++; + } + } + if (filter_count == 0) return false; + const float average = total / filter_count; + output_data[Offset(output_shape, batch, out_y, out_x, channel)] = + ActivationFunctionWithMinMax(average, params.float_activation_min, + params.float_activation_max); + } + } + } + } + return true; +} + +inline bool AveragePool(const PoolParams& params, + const RuntimeShape& input_shape, + const uint8_t* input_data, + const RuntimeShape& output_shape, + uint8_t* output_data) { + TFLITE_DCHECK_LE(params.quantized_activation_min, + params.quantized_activation_max); + TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4); + TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4); + const int batches = MatchingDim(input_shape, 0, output_shape, 0); + const int depth = MatchingDim(input_shape, 3, output_shape, 3); + const int input_height = input_shape.Dims(1); + const int input_width = input_shape.Dims(2); + const int output_height = output_shape.Dims(1); + const int output_width = output_shape.Dims(2); + const int stride_height = params.stride_height; + const int stride_width = params.stride_width; + for (int batch = 0; batch < batches; ++batch) { + for (int out_y = 0; out_y < output_height; ++out_y) { + for (int out_x = 0; out_x < output_width; ++out_x) { + for (int channel = 0; channel < depth; ++channel) { + const int in_x_origin = + (out_x * stride_width) - params.padding_values.width; + const int in_y_origin = + (out_y * stride_height) - params.padding_values.height; + // Compute the boundaries of the filter region clamped so as to + // ensure that the filter window fits in the input array. + const int filter_x_start = std::max(0, -in_x_origin); + const int filter_x_end = + std::min(params.filter_width, input_width - in_x_origin); + const int filter_y_start = std::max(0, -in_y_origin); + const int filter_y_end = + std::min(params.filter_height, input_height - in_y_origin); + int32_t acc = 0; + int filter_count = 0; + for (int filter_y = filter_y_start; filter_y < filter_y_end; + ++filter_y) { + for (int filter_x = filter_x_start; filter_x < filter_x_end; + ++filter_x) { + const int in_x = in_x_origin + filter_x; + const int in_y = in_y_origin + filter_y; + acc += + input_data[Offset(input_shape, batch, in_y, in_x, channel)]; + filter_count++; + } + } + if (filter_count == 0) return false; + acc = (acc + filter_count / 2) / filter_count; + acc = std::max(acc, params.quantized_activation_min); + acc = std::min(acc, params.quantized_activation_max); + output_data[Offset(output_shape, batch, out_y, out_x, channel)] = + static_cast(acc); + } + } + } + } + return true; +} + +inline void L2Pool(const PoolParams& params, const RuntimeShape& input_shape, + const float* input_data, const RuntimeShape& output_shape, + float* output_data) { + TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4); + TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4); + const int batches = MatchingDim(input_shape, 0, output_shape, 0); + const int depth = MatchingDim(input_shape, 3, output_shape, 3); + const int input_height = input_shape.Dims(1); + const int input_width = input_shape.Dims(2); + const int output_height = output_shape.Dims(1); + const int output_width = output_shape.Dims(2); + const int stride_height = params.stride_height; + const int stride_width = params.stride_width; + for (int batch = 0; batch < batches; ++batch) { + for (int out_y = 0; out_y < output_height; ++out_y) { + for (int out_x = 0; out_x < output_width; ++out_x) { + for (int channel = 0; channel < depth; ++channel) { + const int in_x_origin = + (out_x * stride_width) - params.padding_values.width; + const int in_y_origin = + (out_y * stride_height) - params.padding_values.height; + // Compute the boundaries of the filter region clamped so as to + // ensure that the filter window fits in the input array. + const int filter_x_start = std::max(0, -in_x_origin); + const int filter_x_end = + std::min(params.filter_width, input_width - in_x_origin); + const int filter_y_start = std::max(0, -in_y_origin); + const int filter_y_end = + std::min(params.filter_height, input_height - in_y_origin); + float sum_squares = 0.f; + int filter_count = 0; + for (int filter_y = filter_y_start; filter_y < filter_y_end; + ++filter_y) { + for (int filter_x = filter_x_start; filter_x < filter_x_end; + ++filter_x) { + const int in_x = in_x_origin + filter_x; + const int in_y = in_y_origin + filter_y; + const float val = + input_data[Offset(input_shape, batch, in_y, in_x, channel)]; + sum_squares += val * val; + filter_count++; + } + } + const float l2pool_result = std::sqrt(sum_squares / filter_count); + output_data[Offset(output_shape, batch, out_y, out_x, channel)] = + ActivationFunctionWithMinMax(l2pool_result, + params.float_activation_min, + params.float_activation_max); + } + } + } + } +} + +inline void MaxPool(const PoolParams& params, const RuntimeShape& input_shape, + const float* input_data, const RuntimeShape& output_shape, + float* output_data) { + TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4); + TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4); + const int batches = MatchingDim(input_shape, 0, output_shape, 0); + const int depth = MatchingDim(input_shape, 3, output_shape, 3); + const int input_height = input_shape.Dims(1); + const int input_width = input_shape.Dims(2); + const int output_height = output_shape.Dims(1); + const int output_width = output_shape.Dims(2); + const int stride_height = params.stride_height; + const int stride_width = params.stride_width; + for (int batch = 0; batch < batches; ++batch) { + for (int out_y = 0; out_y < output_height; ++out_y) { + for (int out_x = 0; out_x < output_width; ++out_x) { + for (int channel = 0; channel < depth; ++channel) { + const int in_x_origin = + (out_x * stride_width) - params.padding_values.width; + const int in_y_origin = + (out_y * stride_height) - params.padding_values.height; + // Compute the boundaries of the filter region clamped so as to + // ensure that the filter window fits in the input array. + const int filter_x_start = std::max(0, -in_x_origin); + const int filter_x_end = + std::min(params.filter_width, input_width - in_x_origin); + const int filter_y_start = std::max(0, -in_y_origin); + const int filter_y_end = + std::min(params.filter_height, input_height - in_y_origin); + float max = std::numeric_limits::lowest(); + for (int filter_y = filter_y_start; filter_y < filter_y_end; + ++filter_y) { + for (int filter_x = filter_x_start; filter_x < filter_x_end; + ++filter_x) { + const int in_x = in_x_origin + filter_x; + const int in_y = in_y_origin + filter_y; + max = std::max( + max, + input_data[Offset(input_shape, batch, in_y, in_x, channel)]); + } + } + output_data[Offset(output_shape, batch, out_y, out_x, channel)] = + ActivationFunctionWithMinMax(max, params.float_activation_min, + params.float_activation_max); + } + } + } + } +} + +inline void MaxPool(const PoolParams& params, const RuntimeShape& input_shape, + const uint8_t* input_data, const RuntimeShape& output_shape, + uint8_t* output_data) { + TFLITE_DCHECK_LE(params.quantized_activation_min, + params.quantized_activation_max); + TFLITE_DCHECK_GE(params.quantized_activation_min, 0); + TFLITE_DCHECK_LE(params.quantized_activation_max, 255); + TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4); + TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4); + const int batches = MatchingDim(input_shape, 0, output_shape, 0); + const int depth = MatchingDim(input_shape, 3, output_shape, 3); + const int input_height = input_shape.Dims(1); + const int input_width = input_shape.Dims(2); + const int output_height = output_shape.Dims(1); + const int output_width = output_shape.Dims(2); + const int stride_height = params.stride_height; + const int stride_width = params.stride_width; + for (int batch = 0; batch < batches; ++batch) { + for (int out_y = 0; out_y < output_height; ++out_y) { + for (int out_x = 0; out_x < output_width; ++out_x) { + for (int channel = 0; channel < depth; ++channel) { + const int in_x_origin = + (out_x * stride_width) - params.padding_values.width; + const int in_y_origin = + (out_y * stride_height) - params.padding_values.height; + // Compute the boundaries of the filter region clamped so as to + // ensure that the filter window fits in the input array. + const int filter_x_start = std::max(0, -in_x_origin); + const int filter_x_end = + std::min(params.filter_width, input_width - in_x_origin); + const int filter_y_start = std::max(0, -in_y_origin); + const int filter_y_end = + std::min(params.filter_height, input_height - in_y_origin); + uint8_t max = 0; + for (int filter_y = filter_y_start; filter_y < filter_y_end; + ++filter_y) { + for (int filter_x = filter_x_start; filter_x < filter_x_end; + ++filter_x) { + const int in_x = in_x_origin + filter_x; + const int in_y = in_y_origin + filter_y; + max = std::max( + max, + input_data[Offset(input_shape, batch, in_y, in_x, channel)]); + } + } + max = std::max(max, params.quantized_activation_min); + max = std::min(max, params.quantized_activation_max); + output_data[Offset(output_shape, batch, out_y, out_x, channel)] = + static_cast(max); + } + } + } + } +} +} // namespace reference_ops +} // namespace tflite + +#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_POOLING_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/prelu.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/prelu.h new file mode 100644 index 0000000..6d1dbe0 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/prelu.h @@ -0,0 +1,111 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PRELU_H_ +#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PRELU_H_ + +#include + +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/compatibility.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h" + +namespace tflite { + +namespace reference_ops { + +// Broadcast prelu to output_shape for quantized uint8_t/int8_t data. +template +inline void BroadcastPrelu4DSlow( + const PreluParams& params, const RuntimeShape& input_shape, + const T* input_data, const RuntimeShape& alpha_shape, const T* alpha_data, + const RuntimeShape& output_shape, T* output_data) { + TFLITE_DCHECK_LE(input_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(alpha_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(output_shape.DimensionsCount(), 4); + const RuntimeShape extended_output_shape = + RuntimeShape::ExtendedShape(4, output_shape); + NdArrayDesc<4> desc1; + NdArrayDesc<4> desc2; + NdArrayDescsForElementwiseBroadcast(input_shape, alpha_shape, &desc1, &desc2); + + for (int b = 0; b < extended_output_shape.Dims(0); ++b) { + for (int y = 0; y < extended_output_shape.Dims(1); ++y) { + for (int x = 0; x < extended_output_shape.Dims(2); ++x) { + for (int c = 0; c < extended_output_shape.Dims(3); ++c) { + int output_index = Offset(extended_output_shape, b, y, x, c); + int input_index = SubscriptToIndex(desc1, b, y, x, c); + const int32_t input_value = + params.input_offset + input_data[input_index]; + int32_t output_value; + if (input_value >= 0) { + output_value = MultiplyByQuantizedMultiplier( + input_value, params.output_multiplier_1, params.output_shift_1); + } else { + auto alpha_index = SubscriptToIndex(desc2, b, y, x, c); + const int32_t alpha_value = + params.alpha_offset + alpha_data[alpha_index]; + + output_value = MultiplyByQuantizedMultiplier( + input_value * alpha_value, params.output_multiplier_2, + params.output_shift_2); + } + output_value += params.output_offset; + + const int32_t quantized_min = std::numeric_limits::min(); + const int32_t quantized_max = std::numeric_limits::max(); + const int32_t clamped_output = + std::min(quantized_max, std::max(quantized_min, output_value)); + output_data[output_index] = static_cast(clamped_output); + } + } + } + } +} + +template +inline void Prelu(const PreluParams& params, const RuntimeShape& input_shape, + const T* input_data, const RuntimeShape& alpha_shape, + const T* alpha_data, const RuntimeShape& output_shape, + T* output_data) { + const int32_t quantized_min = std::numeric_limits::min(); + const int32_t quantized_max = std::numeric_limits::max(); + + const int flat_size = + MatchingElementsSize(input_shape, alpha_shape, output_shape); + for (int i = 0; i < flat_size; ++i) { + const int32_t input_value = params.input_offset + input_data[i]; + int32_t output_value; + if (input_value >= 0) { + output_value = MultiplyByQuantizedMultiplier( + input_value, params.output_multiplier_1, params.output_shift_1); + } else { + const int32_t alpha_value = params.alpha_offset + alpha_data[i]; + + output_value = MultiplyByQuantizedMultiplier(input_value * alpha_value, + params.output_multiplier_2, + params.output_shift_2); + } + output_value += params.output_offset; + + const int32_t clamped_output = + std::min(quantized_max, std::max(quantized_min, output_value)); + output_data[i] = static_cast(clamped_output); + } +} + +} // namespace reference_ops +} // namespace tflite + +#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PRELU_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h new file mode 100644 index 0000000..760f54d --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h @@ -0,0 +1,140 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PROCESS_BROADCAST_SHAPES_H_ +#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PROCESS_BROADCAST_SHAPES_H_ + +#include + +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h" + +namespace tflite { + +namespace reference_ops { + +// Consolidates dimensions in broadcast inputs, checks for five-fold pattern. +// +// For example, if sequence of dimensions of one input is +// ..., 1, 3, 1, 7, 9, 5,... and the other is ..., 2, 3, 1, 7, 1, 1, ... +// we can consolidate these as +// ..., 1, 3*7, 9*5, ... and 2, 3*7, 1. +// +// The category is updated in the less-frequent case of shapes that are +// not suited to a fivefold-loop broadcast. +// +// Falls back to generic pattern when it does not know how to process properly. +// +// Returns true iff there is some sort of broadcast, which includes five-fold +// patterns and falling back to generic broadcast. +inline bool ProcessBroadcastShapes(const RuntimeShape& shape0, + const RuntimeShape& shape1, + tflite::ArithmeticParams* params) { + const int dims_count = + std::max(shape0.DimensionsCount(), shape1.DimensionsCount()); + + params->broadcast_category = BroadcastableOpCategory::kGenericBroadcast; + RuntimeShape scalar_shape(dims_count, 1); + + auto extended_shape0 = RuntimeShape::ExtendedShape(dims_count, shape0); + auto extended_shape1 = RuntimeShape::ExtendedShape(dims_count, shape1); + + // Check for "exact" match, implicitly accepting any scalar shapes. + if (extended_shape0 == extended_shape1) { + params->broadcast_category = BroadcastableOpCategory::kNonBroadcast; + return false; + } + + for (int i = dims_count - 1; i >= 0; --i) { + if (extended_shape0.Dims(i) == extended_shape1.Dims(i)) { + continue; + } else if (extended_shape0.Dims(i) == 1) { + params->broadcast_category = + BroadcastableOpCategory::kFirstInputBroadcastsFast; + break; + } else if (extended_shape1.Dims(i) == 1) { + params->broadcast_category = + BroadcastableOpCategory::kSecondInputBroadcastsFast; + break; + } else { + // This case is erroneous: there is a dimension that does not match and + // is not a broadcast from one shape to the other. + params->broadcast_category = BroadcastableOpCategory::kGenericBroadcast; + return true; + } + } + + if (params->broadcast_category != + BroadcastableOpCategory::kFirstInputBroadcastsFast && + params->broadcast_category != + BroadcastableOpCategory::kSecondInputBroadcastsFast) { + // This is unreachable because at least one else clause in the above loop + // must be reached. + TFLITE_DCHECK(false); + params->broadcast_category = BroadcastableOpCategory::kNonBroadcast; + return false; + } + + // From this point it is assumed contractually that corresponding dimensions + // in shape0 and shape1 are either (a) equal or (b) one or other equals 1. + const bool swap_inputs = params->broadcast_category == + BroadcastableOpCategory::kSecondInputBroadcastsFast; + const RuntimeShape* shape_a = + swap_inputs ? &extended_shape1 : &extended_shape0; + const RuntimeShape* shape_b = + swap_inputs ? &extended_shape0 : &extended_shape1; + + int i = dims_count - 1; + params->broadcast_shape[0] = 1; + params->broadcast_shape[1] = 1; + params->broadcast_shape[2] = 1; + params->broadcast_shape[3] = 1; + params->broadcast_shape[4] = 1; + // y_0 is greedy: include dims if both or neither equal 1: in other words, + // test for equality rather than (shape_a->Dims(i) != 1). + while (i >= 0 && shape_a->Dims(i) == shape_b->Dims(i)) { + params->broadcast_shape[4] *= shape_b->Dims(i); + --i; + } + // Here either input_a or input_b has dim of 1 (if i >= 0). If it is input_b + // that has the unit dimension, the next two loops are not entered. + while (i >= 0 && shape_a->Dims(i) == 1) { + params->broadcast_shape[3] *= shape_b->Dims(i); + --i; + } + while (i >= 0 && shape_a->Dims(i) == shape_b->Dims(i)) { + params->broadcast_shape[2] *= shape_a->Dims(i); + --i; + } + // Here either input_a or input_b has dim of 1 (if i >= 0). + while (i >= 0 && shape_b->Dims(i) == 1) { + params->broadcast_shape[1] *= shape_a->Dims(i); + --i; + } + while (i >= 0 && shape_a->Dims(i) == shape_b->Dims(i)) { + params->broadcast_shape[0] *= shape_b->Dims(i); + --i; + } + + // Rarer case is when the broadcast dimensions cannot be handled by a fivefold + // loop. + if (i >= 0) { + params->broadcast_category = BroadcastableOpCategory::kGenericBroadcast; + } + return true; +} + +} // namespace reference_ops +} // namespace tflite + +#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PROCESS_BROADCAST_SHAPES_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/quantize.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/quantize.h new file mode 100644 index 0000000..b791413 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/quantize.h @@ -0,0 +1,89 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_QUANTIZE_H_ +#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_QUANTIZE_H_ + +#include +#include +#include + +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/compatibility.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/cppmath.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h" + +namespace tflite { + +namespace reference_ops { + +template +inline void AffineQuantize(const tflite::QuantizationParams& op_params, + const RuntimeShape& input_shape, + const InputT* input_data, + const RuntimeShape& output_shape, + OutputT* output_data) { + const int32_t zero_point = op_params.zero_point; + const double scale = op_params.scale; + const int flat_size = MatchingFlatSize(input_shape, output_shape); + static constexpr int32_t min_val = std::numeric_limits::min(); + static constexpr int32_t max_val = std::numeric_limits::max(); + + for (int i = 0; i < flat_size; i++) { + const InputT val = input_data[i]; + int32_t unclamped = + static_cast(TfLiteRound(val / static_cast(scale))) + + zero_point; + int32_t clamped = std::min(std::max(unclamped, min_val), max_val); + output_data[i] = clamped; + } +} + +// Quantizes per-channel. +template +inline void PerChannelQuantize( + const tflite::PerChannelQuantizationParams& op_params, + const RuntimeShape& input_shape, const InputT* input_data, + const RuntimeShape& output_shape, OutputT* output_data) { + // Ensure flat size is same. + MatchingFlatSize(input_shape, output_shape); + + const int32_t* zero_point = op_params.zero_point; + const float* scale = op_params.scale; + const int32_t quantized_dimension = op_params.quantized_dimension; + const int32_t num_dims = input_shape.DimensionsCount(); + const int32_t* dims_data = input_shape.DimsData(); + std::vector current_dim(num_dims, 0); + static constexpr int32_t min_val = std::numeric_limits::min(); + static constexpr int32_t max_val = std::numeric_limits::max(); + + do { + size_t offset = + ReducedOutputOffset(num_dims, reinterpret_cast(dims_data), + current_dim.data(), 0, nullptr); + const InputT val = input_data[offset]; + const int channel = current_dim[quantized_dimension]; + int32_t unclamped = static_cast(TfLiteRound( + val / static_cast(scale[channel]))) + + zero_point[channel]; + int32_t clamped = std::min(std::max(unclamped, min_val), max_val); + output_data[offset] = static_cast(clamped); + } while (NextIndex(num_dims, reinterpret_cast(dims_data), + current_dim.data())); +} + +} // namespace reference_ops + +} // namespace tflite +#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_QUANTIZE_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/reduce.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/reduce.h new file mode 100644 index 0000000..54f24f4 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/reduce.h @@ -0,0 +1,542 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_REDUCE_H_ +#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_REDUCE_H_ + +#include + +#include "edge-impulse-sdk/third_party/ruy/ruy/profiler/instrumentation.h" // from @ruy +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/cppmath.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/max.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/min.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/quantization_util.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h" + +// Check if the reduction at index is the first one along the dimensions given +// in axis. +inline bool IsFirstReduction(const int* index, const int num_axis, + const int* axis) { + if (num_axis == 0) { + return true; + } + + TFLITE_DCHECK(index != nullptr); + TFLITE_DCHECK(axis != nullptr); + for (int axis_idx = 0; axis_idx < num_axis; ++axis_idx) { + if (index[axis[axis_idx]] != 0) { + return false; + } + } + + return true; +} + +namespace tflite { + +namespace reference_ops { + +// A generic reduce method that can be used for reduce_sum, reduce_mean, etc. +// This method iterates through input data and reduce elements along the +// dimensions given in axis. +template +inline bool Reduce(const In* input_data, const int* input_dims, + const int* output_dims, const int input_num_dims, + const int output_num_dims, const int* axis, + const int num_axis, int* input_iter, + Out reducer(Out current, const In in), Out* output_data) { + // Reset input iterator. + for (int idx = 0; idx < input_num_dims; ++idx) { + input_iter[idx] = 0; + } + // Iterate through input_data. + do { + size_t input_offset = + ReducedOutputOffset(input_num_dims, input_dims, input_iter, 0, nullptr); + size_t output_offset = ReducedOutputOffset(input_num_dims, input_dims, + input_iter, num_axis, axis); + output_data[output_offset] = + reducer(output_data[output_offset], input_data[input_offset]); + } while (NextIndex(input_num_dims, input_dims, input_iter)); + return true; +} + +// Similar to above Reduce function but takes two reducer functions. +// The 'reducer_first' is called with the first value of the reduction, +// 'reducer_next' is then called for all the others. +template +inline bool Reduce(const In* input_data, const int* input_dims, + const int* output_dims, const int input_num_dims, + const int output_num_dims, const int* axis, + const int num_axis, int* input_iter, + const std::function& reducer_first, + const std::function& reducer_next, + Out* output_data) { + // Reset input iterator. + for (int idx = 0; idx < input_num_dims; ++idx) { + input_iter[idx] = 0; + } + // Iterate through input_data. + do { + size_t input_offset = + ReducedOutputOffset(input_num_dims, input_dims, input_iter, 0, nullptr); + size_t output_offset = ReducedOutputOffset(input_num_dims, input_dims, + input_iter, num_axis, axis); + if (IsFirstReduction(input_iter, num_axis, axis)) { + output_data[output_offset] = reducer_first(input_data[input_offset]); + } else { + output_data[output_offset] = + reducer_next(output_data[output_offset], input_data[input_offset]); + } + } while (NextIndex(input_num_dims, input_dims, input_iter)); + return true; +} + +// This method parses the input 'axis' to remove duplicates and handle negative +// values, and returns a valid 'out_axis' +inline bool ResolveAxis(const int num_dims, const int* axis, + const int64_t num_axis, int* out_axis, + int* out_num_axis) { + *out_num_axis = 0; // Just in case. + // Short-circuit axis resolution for scalars; the axis will go unused. + if (num_dims == 0) { + return true; + } + // o(n^2) is fine since out_num_axis should be really small, mostly <= 4 + for (int64_t idx = 0; idx < num_axis; ++idx) { + // Handle negative index. A positive index 'p_idx' can be represented as a + // negative index 'n_idx' as: n_idx = p_idx-num_dims + // eg: For num_dims=3, [0, 1, 2] is the same as [-3, -2, -1] */ + int current = axis[idx] < 0 ? (axis[idx] + num_dims) : axis[idx]; + TFLITE_DCHECK(current >= 0 && current < num_dims); + if (current < 0 || current >= num_dims) { + return false; + } + bool is_dup = false; + for (int j = 0; j < *out_num_axis; ++j) { + if (out_axis[j] == current) { + is_dup = true; + break; + } + } + if (!is_dup) { + out_axis[*out_num_axis] = current; + *out_num_axis += 1; + } + } + return true; +} + +// This method expects that output_data has been initialized. +template +inline bool ReduceSumImpl(const In* input_data, const int* input_dims, + const int* output_dims, const int input_num_dims, + const int output_num_dims, const int* axis, + const int num_axis, int* input_iter, + Out* output_data) { + auto reducer = [](const Out current, const In in) -> Out { + const Out actual_in = static_cast(in); + return current + actual_in; + }; + return Reduce(input_data, input_dims, output_dims, input_num_dims, + output_num_dims, axis, num_axis, input_iter, reducer, + output_data); +} + +template +inline bool InitTensorDataForReduce(const int* dims, const int num_dims, + const T init_value, T* data) { + size_t num_elements = 1; + for (int idx = 0; idx < num_dims; ++idx) { + size_t current = static_cast(dims[idx]); + // Overflow prevention. + if (current > 0 && + num_elements > std::numeric_limits::max() / current) { + return false; + } + num_elements *= current; + } + for (size_t idx = 0; idx < num_elements; ++idx) { + data[idx] = init_value; + } + return true; +} + +// Computes the generic value (i.e., sum/max/min/prod) of elements across +// dimensions given in axis. It needs to pass in init_value and reducer. +template +inline bool ReduceGeneric(const T* input_data, const int* input_dims, + const int input_num_dims, T* output_data, + const int* output_dims, const int output_num_dims, + const int* axis, const int64_t num_axis_dimensions, + bool keep_dims, int* temp_index, int* resolved_axis, + T init_value, + T reducer(const T current, const T in)) { + // Reset output data. + if (!InitTensorDataForReduce(output_dims, output_num_dims, init_value, + output_data)) { + return false; + } + + // Return early when input shape has zero dim. This is done after initializing + // data for output tensor because there are cases that the input tensor is + // empty but output tensor is not. In that case, output tensor should be + // filled with init_value. + for (int i = 0; i < input_num_dims; ++i) { + if (input_dims[i] == 0) return true; + } + + // Resolve axis. + int num_resolved_axis = 0; + if (!ResolveAxis(input_num_dims, axis, num_axis_dimensions, resolved_axis, + &num_resolved_axis)) { + return false; + } + + return Reduce(input_data, input_dims, output_dims, input_num_dims, + output_num_dims, resolved_axis, num_resolved_axis, + temp_index, reducer, output_data); +} + +// Computes the mean of elements across dimensions given in axis. +// It does so in two stages, first calculates the sum of elements along the axis +// then divides it by the number of element in axis. +template +inline bool Mean(const T* input_data, const int* input_dims, + const int input_num_dims, T* output_data, + const int* output_dims, const int output_num_dims, + const int* axis, const int num_axis_dimensions, bool keep_dims, + int* temp_index, int* resolved_axis, U* temp_sum) { + ruy::profiler::ScopeLabel label("Mean"); + // Reset output data. + size_t num_outputs = 1; + for (int idx = 0; idx < output_num_dims; ++idx) { + size_t current = static_cast(output_dims[idx]); + // Overflow prevention. + if (num_outputs > std::numeric_limits::max() / current) { + return false; + } + num_outputs *= current; + } + for (size_t idx = 0; idx < num_outputs; ++idx) { + output_data[idx] = T(); + temp_sum[idx] = U(); + } + + // Resolve axis. + int num_resolved_axis = 0; + if (!ResolveAxis(input_num_dims, axis, num_axis_dimensions, resolved_axis, + &num_resolved_axis)) { + return false; + } + + if (!ReduceSumImpl(input_data, input_dims, output_dims, input_num_dims, + output_num_dims, resolved_axis, num_resolved_axis, + temp_index, temp_sum)) { + return false; + } + + // Calculate mean by dividing output_data by num of aggregated element. + size_t num_elements_in_axis = 1; + for (int idx = 0; idx < num_resolved_axis; ++idx) { + size_t current = static_cast(input_dims[resolved_axis[idx]]); + // Overflow prevention. + if (current > (std::numeric_limits::max() / num_elements_in_axis)) { + return false; + } + num_elements_in_axis *= current; + } + + if (num_elements_in_axis > 0) { + for (size_t idx = 0; idx < num_outputs; ++idx) { + output_data[idx] = + static_cast(temp_sum[idx] / static_cast(num_elements_in_axis)); + } + } + return true; +} + +template +inline void Mean(const tflite::MeanParams& op_params, + const RuntimeShape& unextended_input_shape, + const T* input_data, + const RuntimeShape& unextended_output_shape, T* output_data) { + ruy::profiler::ScopeLabel label("Mean4D"); + + // Current implementation only supports dimension equals 4 and simultaneous + // reduction over width and height. + TFLITE_CHECK_EQ(unextended_input_shape.DimensionsCount(), 4); + TFLITE_CHECK_LE(unextended_output_shape.DimensionsCount(), 4); + const RuntimeShape input_shape = + RuntimeShape::ExtendedShape(4, unextended_input_shape); + const RuntimeShape output_shape = + RuntimeShape::ExtendedShape(4, unextended_output_shape); + + const int output_batch = output_shape.Dims(0); + const int output_height = output_shape.Dims(1); + const int output_width = output_shape.Dims(2); + const int output_depth = output_shape.Dims(3); + + const int input_height = input_shape.Dims(1); + const int input_width = input_shape.Dims(2); + + TFLITE_CHECK_EQ(op_params.axis_count, 2); + TFLITE_CHECK((op_params.axis[0] == 1 && op_params.axis[1] == 2) || + (op_params.axis[0] == 2 && op_params.axis[1] == 1)); + TFLITE_CHECK_EQ(output_height, 1); + TFLITE_CHECK_EQ(output_width, 1); + + for (int out_b = 0; out_b < output_batch; ++out_b) { + for (int out_d = 0; out_d < output_depth; ++out_d) { + float value = 0; + for (int in_h = 0; in_h < input_height; ++in_h) { + for (int in_w = 0; in_w < input_width; ++in_w) { + value += input_data[Offset(input_shape, out_b, in_h, in_w, out_d)]; + } + } + output_data[Offset(output_shape, out_b, 0, 0, out_d)] = + value / (input_width * input_height); + } + } +} + +inline void Mean(const tflite::MeanParams& op_params, + const RuntimeShape& unextended_input_shape, + const uint8_t* input_data, int32_t input_zero_point, + float input_scale, const RuntimeShape& unextended_output_shape, + uint8_t* output_data, int32_t output_zero_point, + float output_scale) { + ruy::profiler::ScopeLabel label("Mean4D/Uint8"); + + // Current implementation only supports dimension equals 4 and simultaneous + // reduction over width and height. + TFLITE_CHECK_EQ(unextended_input_shape.DimensionsCount(), 4); + TFLITE_CHECK_LE(unextended_output_shape.DimensionsCount(), 4); + const RuntimeShape input_shape = + RuntimeShape::ExtendedShape(4, unextended_input_shape); + const RuntimeShape output_shape = + RuntimeShape::ExtendedShape(4, unextended_output_shape); + const int output_batch = output_shape.Dims(0); + const int output_height = output_shape.Dims(1); + const int output_width = output_shape.Dims(2); + const int output_depth = output_shape.Dims(3); + const int input_height = input_shape.Dims(1); + const int input_width = input_shape.Dims(2); + const float num_elements_in_axis = input_width * input_height; + + TFLITE_CHECK_EQ(op_params.axis_count, 2); + TFLITE_CHECK((op_params.axis[0] == 1 && op_params.axis[1] == 2) || + (op_params.axis[0] == 2 && op_params.axis[1] == 1)); + TFLITE_CHECK_EQ(output_height, 1); + TFLITE_CHECK_EQ(output_width, 1); + + constexpr int32_t kMinValue = std::numeric_limits::min(); + constexpr int32_t kMaxValue = std::numeric_limits::max(); + + float temp = input_zero_point * input_scale / output_scale; + temp = temp > 0 ? temp + 0.5f : temp - 0.5f; + int32_t bias = output_zero_point - static_cast(temp); + double real_scale = + static_cast(input_scale / (num_elements_in_axis * output_scale)); + + int32_t multiplier; + int shift; + QuantizeMultiplier(real_scale, &multiplier, &shift); + for (int out_b = 0; out_b < output_batch; ++out_b) { + for (int out_d = 0; out_d < output_depth; ++out_d) { + int32_t acc = 0; + for (int in_h = 0; in_h < input_height; ++in_h) { + for (int in_w = 0; in_w < input_width; ++in_w) { + acc += input_data[Offset(input_shape, out_b, in_h, in_w, out_d)]; + } + } + acc = MultiplyByQuantizedMultiplier(acc, multiplier, shift); + acc += bias; + acc = std::min(std::max(acc, kMinValue), kMaxValue); + output_data[Offset(output_shape, out_b, 0, 0, out_d)] = + static_cast(acc); + } + } +} + +// Computes the mean of elements across dimensions given in axis. +// It does so in two stages, first calculates the sum of elements along the axis +// then divides it by the number of element in axis for quantized values. +template +inline bool QuantizedMeanOrSum(const T* input_data, int32_t input_zero_point, + float input_scale, const int* input_dims, + const int input_num_dims, T* output_data, + int32_t output_zero_point, float output_scale, + const int* output_dims, + const int output_num_dims, const int* axis, + const int num_axis_dimensions, bool keep_dims, + int* temp_index, int* resolved_axis, U* temp_sum, + bool compute_sum) { + const bool uint8_case = std::is_same::value; + const bool int16_case = std::is_same::value; + if (uint8_case) { + ruy::profiler::ScopeLabel label(compute_sum ? "Sum/Uint8" : "Mean/Uint8"); + } else if (int16_case) { + ruy::profiler::ScopeLabel label(compute_sum ? "Sum/Int16" : "Mean/Int16"); + } else { + ruy::profiler::ScopeLabel label(compute_sum ? "Sum/Int8" : "Mean/Int8"); + } + // Reset output data. + size_t num_outputs = 1; + for (int idx = 0; idx < output_num_dims; ++idx) { + size_t current = static_cast(output_dims[idx]); + // Overflow prevention. + if (num_outputs > std::numeric_limits::max() / current) { + return false; + } + num_outputs *= current; + } + for (size_t idx = 0; idx < num_outputs; ++idx) { + output_data[idx] = T(); + temp_sum[idx] = U(); + } + + // Return early when input shape has zero dim. This is done after initializing + // data for output tensor because there are cases that the input tensor is + // empty but output tensor is not. In that case, output tensor should be + // filled with init_value. + for (int i = 0; i < input_num_dims; ++i) { + if (input_dims[i] == 0) return true; + } + + // Resolve axis. + int num_resolved_axis = 0; + if (!ResolveAxis(input_num_dims, axis, num_axis_dimensions, resolved_axis, + &num_resolved_axis)) { + return false; + } + + if (!ReduceSumImpl(input_data, input_dims, output_dims, input_num_dims, + output_num_dims, resolved_axis, num_resolved_axis, + temp_index, temp_sum)) { + return false; + } + + // Calculate mean by dividing output_data by num of aggregated element. + size_t num_elements_in_axis = 1; + for (int idx = 0; idx < num_resolved_axis; ++idx) { + size_t current = static_cast(input_dims[resolved_axis[idx]]); + // Overflow prevention. + if (current > (std::numeric_limits::max() / num_elements_in_axis)) { + return false; + } + num_elements_in_axis *= current; + } + + if (num_elements_in_axis > 0) { + const float scale = input_scale / output_scale; + if (compute_sum) { + // TODO(b/116341117): Eliminate float and do this completely in 8bit. + const float bias = -input_zero_point * scale * num_elements_in_axis; + for (size_t idx = 0; idx < num_outputs; ++idx) { + const U value = + static_cast(TfLiteRound(temp_sum[idx] * scale + bias)) + + output_zero_point; + output_data[idx] = static_cast(value); + } + } else { + const float bias = -input_zero_point * scale; + for (size_t idx = 0; idx < num_outputs; ++idx) { + float float_mean = static_cast(temp_sum[idx]) / + static_cast(num_elements_in_axis); + float result = TfLiteMin( + TfLiteRound(float_mean * scale + bias) + output_zero_point, + static_cast(std::numeric_limits::max())); + result = TfLiteMax(result, + static_cast(std::numeric_limits::min())); + output_data[idx] = static_cast(result); + } + } + } + return true; +} + +template +inline bool QuantizedMeanOrSumExtraArgs( + const T* input_data, int32_t input_zero_point, float input_scale, + const int* input_dims, const int input_num_dims, T* output_data, + float output_scale, int32_t output_multiplier, int output_shift, + int32_t output_zero_point, const int* output_dims, + const int output_num_dims, const int* axis, const int num_axis_dimensions, + bool keep_dims, int* temp_index, int* resolved_axis, U* temp_sum, + bool compute_sum) { + return QuantizedMeanOrSum( + input_data, input_zero_point, input_scale, input_dims, input_num_dims, + output_data, output_zero_point, output_scale, output_dims, + output_num_dims, axis, num_axis_dimensions, keep_dims, temp_index, + resolved_axis, temp_sum, compute_sum); +} + +template +inline bool QuantizedReduceProd(const T* input_data, int32_t input_zero_point, + const RuntimeShape& input_shape, T* output_data, + int32_t output_zero_point, + const RuntimeShape& output_shape, + const int* axis, + const int64_t num_axis_dimensions, + bool keep_dims, int* temp_index, + int* resolved_axis, int32_t* temp_prod, + int32_t scaling_multiplier, int scaling_shift) { + const int32_t kMinValue = std::numeric_limits::min(); + const int32_t kMaxValue = std::numeric_limits::max(); + + // Resolve axis. + int num_resolved_axis = 0; + if (!ResolveAxis(input_shape.DimensionsCount(), axis, num_axis_dimensions, + resolved_axis, &num_resolved_axis)) { + return false; + } + + // Calculate the reduced product by rescaling each multiplication step to + // avoid an overflow. + auto reducer_first = [&](T in) -> int32_t { return in - input_zero_point; }; + + auto reducer_next = [&](int32_t current, T in) -> int32_t { + const int64_t result = + static_cast(current) * (in - input_zero_point); + return MultiplyByQuantizedMultiplier(result, scaling_multiplier, + scaling_shift); + }; + + if (!Reduce( + input_data, input_shape.DimsData(), output_shape.DimsData(), + input_shape.DimensionsCount(), output_shape.DimensionsCount(), + resolved_axis, num_resolved_axis, temp_index, reducer_first, + reducer_next, temp_prod)) { + return false; + } + + for (int i = 0; i < output_shape.FlatSize(); i++) { + int32_t result = + MultiplyByQuantizedMultiplier(static_cast(temp_prod[i]), + scaling_multiplier, scaling_shift) + + output_zero_point; + result = std::min(std::max(result, kMinValue), kMaxValue); + output_data[i] = static_cast(result); + } + + return true; +} + +} // namespace reference_ops + +} // namespace tflite + +#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_REDUCE_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/requantize.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/requantize.h new file mode 100644 index 0000000..662046f --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/requantize.h @@ -0,0 +1,70 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_REQUANTIZE_H_ +#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_REQUANTIZE_H_ + +#include + +#include "edge-impulse-sdk/third_party/ruy/ruy/profiler/instrumentation.h" // from @ruy +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h" + +namespace tflite { +namespace reference_ops { + +template +inline void Requantize(const input_type* input_data, int32_t size, + int32_t effective_scale_multiplier, + int32_t effective_scale_shift, int32_t input_zeropoint, + int32_t output_zeropoint, output_type* output_data) { + ruy::profiler::ScopeLabel label("Requantize"); + const bool same_scale = + (effective_scale_multiplier == 1 << 30 && effective_scale_shift == 1); + if (same_scale) { + const bool mixed_type_int8_uint8 = + std::is_same::value && + std::is_same::value; + const bool mixed_type_uint8_int8 = + std::is_same::value && + std::is_same::value; + const int32_t zero_point_diff = input_zeropoint - output_zeropoint; + // Fast path to do requantization for the case when just a shift of 128 is + // needed. + if ((mixed_type_int8_uint8 && zero_point_diff == -128) || + (mixed_type_uint8_int8 && zero_point_diff == 128)) { + for (int i = 0; i < size; ++i) { + output_data[i] = input_data[i] ^ 0x80; + } + return; + } + } + static constexpr int32_t kMinOutput = std::numeric_limits::min(); + static constexpr int32_t kMaxOutput = std::numeric_limits::max(); + for (int i = 0; i < size; ++i) { + const int32_t input = input_data[i] - input_zeropoint; + const int32_t output = + MultiplyByQuantizedMultiplier(input, effective_scale_multiplier, + effective_scale_shift) + + output_zeropoint; + const int32_t clamped_output = + std::max(std::min(output, kMaxOutput), kMinOutput); + output_data[i] = static_cast(clamped_output); + } +} + +} // namespace reference_ops +} // namespace tflite + +#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_REQUANTIZE_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/resize_bilinear.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/resize_bilinear.h new file mode 100644 index 0000000..ec8ec26 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/resize_bilinear.h @@ -0,0 +1,228 @@ +/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_RESIZE_BILINEAR_H_ +#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_RESIZE_BILINEAR_H_ + +#include +#include +#include +#include + +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/cppmath.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h" + +namespace tflite { +namespace reference_ops { + +inline void ComputeInterpolationValues(const float value, const float scale, + const bool half_pixel_centers, + int32_t input_size, float* scaled_value, + int32_t* lower_bound, + int32_t* upper_bound) { + if (half_pixel_centers) { + *scaled_value = (value + 0.5f) * scale - 0.5f; + } else { + *scaled_value = value * scale; + } + float scaled_value_floor = std::floor(*scaled_value); + *lower_bound = std::max(static_cast(scaled_value_floor), + static_cast(0)); + *upper_bound = + std::min(static_cast(std::ceil(*scaled_value)), input_size - 1); +} + +template +inline void ResizeBilinear(const tflite::ResizeBilinearParams& op_params, + const RuntimeShape& unextended_input_shape, + const T* input_data, + const RuntimeShape& unextended_output_size_shape, + const int32_t* output_size_data, + const RuntimeShape& unextended_output_shape, + T* output_data) { + // If half_pixel_centers is True, align_corners must be False. + TFLITE_DCHECK(!op_params.half_pixel_centers || !op_params.align_corners); + TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(unextended_output_size_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4); + const RuntimeShape input_shape = + RuntimeShape::ExtendedShape(4, unextended_input_shape); + const RuntimeShape output_size_shape = + RuntimeShape::ExtendedShape(4, unextended_output_size_shape); + const RuntimeShape output_shape = + RuntimeShape::ExtendedShape(4, unextended_output_shape); + + int32_t batches = MatchingDim(input_shape, 0, output_shape, 0); + int32_t input_height = input_shape.Dims(1); + int32_t input_width = input_shape.Dims(2); + int32_t depth = MatchingDim(input_shape, 3, output_shape, 3); + + TFLITE_DCHECK_EQ(output_size_shape.Dims(0), 1); + TFLITE_DCHECK_EQ(output_size_shape.Dims(1), 1); + TFLITE_DCHECK_EQ(output_size_shape.Dims(2), 1); + TFLITE_DCHECK_EQ(output_size_shape.Dims(3), 2); + int32_t output_height = + output_size_data[Offset(output_size_shape, 0, 0, 0, 0)]; + int32_t output_width = + output_size_data[Offset(output_size_shape, 0, 0, 0, 1)]; + + float height_scale = static_cast(input_height) / output_height; + float width_scale = static_cast(input_width) / output_width; + if (op_params.align_corners && output_height > 1) { + height_scale = static_cast(input_height - 1) / (output_height - 1); + } + if (op_params.align_corners && output_width > 1) { + width_scale = static_cast(input_width - 1) / (output_width - 1); + } + const float rounding_offset = std::numeric_limits::is_integer ? .5f : .0f; + + for (int b = 0; b < batches; ++b) { + for (int y = 0; y < output_height; ++y) { + float input_y; + int32_t y0, y1; + ComputeInterpolationValues(y, height_scale, op_params.half_pixel_centers, + input_height, &input_y, &y0, &y1); + for (int x = 0; x < output_width; ++x) { + float input_x; + int32_t x0, x1; + ComputeInterpolationValues(x, width_scale, op_params.half_pixel_centers, + input_width, &input_x, &x0, &x1); + for (int c = 0; c < depth; ++c) { + T interpolation = + static_cast(input_data[Offset(input_shape, b, y0, x0, c)] * + (1 - (input_y - y0)) * (1 - (input_x - x0)) + + input_data[Offset(input_shape, b, y1, x0, c)] * + (input_y - y0) * (1 - (input_x - x0)) + + input_data[Offset(input_shape, b, y0, x1, c)] * + (1 - (input_y - y0)) * (input_x - x0) + + input_data[Offset(input_shape, b, y1, x1, c)] * + (input_y - y0) * (input_x - x0) + + rounding_offset); + output_data[Offset(output_shape, b, y, x, c)] = interpolation; + } + } + } + } +} + +inline void ComputeInterpolationValuesInteger( + const int32_t value, const int32_t scale_10, const bool half_pixel_centers, + int32_t input_size, int32_t* scaled_value, int32_t* lower_bound, + int32_t* upper_bound) { + if (half_pixel_centers) { + *scaled_value = value * scale_10 + scale_10 / 2 - (1 << 9); + } else { + *scaled_value = value * scale_10; + } + constexpr int32_t zero = 0; + *lower_bound = std::max(*scaled_value / (1 << 10), zero); + *upper_bound = + std::min((*scaled_value + (1 << 10) - 1) / (1 << 10), input_size - 1); +} + +// Same as above but doesn't use any floating-point for the resize +template +inline void ResizeBilinearInteger( + const tflite::ResizeBilinearParams& op_params, + const RuntimeShape& unextended_input_shape, const T* input_data, + const RuntimeShape& unextended_output_size_shape, + const int32_t* output_size_data, + const RuntimeShape& unextended_output_shape, T* output_data) { + // If half_pixel_centers is True, align_corners must be False. + TFLITE_DCHECK(!op_params.half_pixel_centers || !op_params.align_corners); + TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(unextended_output_size_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4); + const RuntimeShape input_shape = + RuntimeShape::ExtendedShape(4, unextended_input_shape); + const RuntimeShape output_size_shape = + RuntimeShape::ExtendedShape(4, unextended_output_size_shape); + const RuntimeShape output_shape = + RuntimeShape::ExtendedShape(4, unextended_output_shape); + + const int32_t batches = MatchingDim(input_shape, 0, output_shape, 0); + const int32_t input_height = input_shape.Dims(1); + const int32_t input_width = input_shape.Dims(2); + const int32_t depth = MatchingDim(input_shape, 3, output_shape, 3); + + TFLITE_DCHECK_EQ(output_size_shape.Dims(0), 1); + TFLITE_DCHECK_EQ(output_size_shape.Dims(1), 1); + TFLITE_DCHECK_EQ(output_size_shape.Dims(2), 1); + TFLITE_DCHECK_EQ(output_size_shape.Dims(3), 2); + const int32_t output_height = + output_size_data[Offset(output_size_shape, 0, 0, 0, 0)]; + const int32_t output_width = + output_size_data[Offset(output_size_shape, 0, 0, 0, 1)]; + + int32_t height_scale_10 = + ((1 << 10) * input_height + output_height / 2) / output_height; + int32_t width_scale_10 = + ((1 << 10) * input_width + output_width / 2) / output_width; + if (op_params.align_corners && output_height > 1) { + height_scale_10 = + ((1 << 10) * (input_height - 1) + (output_height - 1) / 2) / + (output_height - 1); + } + if (op_params.align_corners && output_width > 1) { + width_scale_10 = ((1 << 10) * (input_width - 1) + (output_width - 1) / 2) / + (output_width - 1); + } + + for (int b = 0; b < batches; ++b) { + for (int y = 0; y < output_height; ++y) { + int32_t input_y, y0, y1; + ComputeInterpolationValuesInteger(y, height_scale_10, + op_params.half_pixel_centers, + input_height, &input_y, &y0, &y1); + for (int x = 0; x < output_width; ++x) { + int32_t input_x, x0, x1; + ComputeInterpolationValuesInteger(x, width_scale_10, + op_params.half_pixel_centers, + input_width, &input_x, &x0, &x1); + for (int c = 0; c < depth; ++c) { + const int64_t output_20_ll = + static_cast( + input_data[Offset(input_shape, b, y0, x0, c)]) * + ((1 << 10) - (input_y - (1 << 10) * y0)) * + ((1 << 10) - (input_x - (1 << 10) * x0)); + const int64_t output_20_lu = + static_cast( + input_data[Offset(input_shape, b, y1, x0, c)]) * + (input_y - (1 << 10) * y0) * + ((1 << 10) - (input_x - (1 << 10) * x0)); + const int64_t output_20_rl = + static_cast( + input_data[Offset(input_shape, b, y0, x1, c)]) * + ((1 << 10) - (input_y - (1 << 10) * y0)) * + (input_x - (1 << 10) * x0); + const int64_t output_20_ru = + static_cast( + input_data[Offset(input_shape, b, y1, x1, c)]) * + (input_y - (1 << 10) * y0) * (input_x - (1 << 10) * x0); + const int64_t output_20 = + output_20_ll + output_20_lu + output_20_rl + output_20_ru; + const int64_t round = (output_20 > 0) ? (1 << 19) : -(1 << 19); + const T interpolation = + static_cast((output_20 + round) / (1 << 20)); + output_data[Offset(output_shape, b, y, x, c)] = interpolation; + } + } + } + } +} + +} // namespace reference_ops +} // namespace tflite + +#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_RESIZE_BILINEAR_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/resize_nearest_neighbor.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/resize_nearest_neighbor.h new file mode 100644 index 0000000..bbed46a --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/resize_nearest_neighbor.h @@ -0,0 +1,102 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_RESIZE_NEAREST_NEIGHBOR_H_ +#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_RESIZE_NEAREST_NEIGHBOR_H_ + +#include +#include + +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/cppmath.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h" + +namespace tflite { + +namespace reference_ops { + +inline int32_t GetNearestNeighbor(const int input_value, + const int32_t input_size, + const int32_t output_size, + const bool align_corners, + const bool half_pixel_centers) { + const float scale = + (align_corners && output_size > 1) + ? (input_size - 1) / static_cast(output_size - 1) + : input_size / static_cast(output_size); + const float offset = half_pixel_centers ? 0.5f : 0.0f; + int32_t output_value = std::min( + align_corners + ? static_cast(TfLiteRound((input_value + offset) * scale)) + : static_cast(std::floor((input_value + offset) * scale)), + input_size - 1); + if (half_pixel_centers) { + output_value = std::max(static_cast(0), output_value); + } + return output_value; +} + +template +inline void ResizeNearestNeighbor( + const tflite::ResizeNearestNeighborParams& op_params, + const RuntimeShape& unextended_input_shape, const T* input_data, + const RuntimeShape& output_size_shape, const int32_t* output_size_data, + const RuntimeShape& unextended_output_shape, T* output_data) { + TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4); + + const RuntimeShape input_shape = + RuntimeShape::ExtendedShape(4, unextended_input_shape); + const RuntimeShape output_shape = + RuntimeShape::ExtendedShape(4, unextended_output_shape); + + int32_t batches = MatchingDim(input_shape, 0, output_shape, 0); + int32_t input_height = input_shape.Dims(1); + int32_t input_width = input_shape.Dims(2); + int32_t depth = MatchingDim(input_shape, 3, output_shape, 3); + + // The Tensorflow version of this op allows resize on the width and height + // axis only. + TFLITE_DCHECK_EQ(output_size_shape.FlatSize(), 2); + int32_t output_height = output_size_data[0]; + int32_t output_width = output_size_data[1]; + + const int col_offset = input_shape.Dims(3); + const int row_offset = input_shape.Dims(2) * col_offset; + const int batch_offset = input_shape.Dims(1) * row_offset; + + const T* input_ptr = input_data; + T* output_ptr = output_data; + for (int b = 0; b < batches; ++b) { + for (int y = 0; y < output_height; ++y) { + int32_t in_y = GetNearestNeighbor(y, input_height, output_height, + op_params.align_corners, + op_params.half_pixel_centers); + const T* y_input_ptr = input_ptr + in_y * row_offset; + for (int x = 0; x < output_width; ++x) { + int32_t in_x = GetNearestNeighbor(x, input_width, output_width, + op_params.align_corners, + op_params.half_pixel_centers); + const T* x_input_ptr = y_input_ptr + in_x * col_offset; + memcpy(output_ptr, x_input_ptr, depth * sizeof(T)); + output_ptr += depth; + } + } + input_ptr += batch_offset; + } +} + +} // namespace reference_ops +} // namespace tflite + +#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_RESIZE_NEAREST_NEIGHBOR_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/round.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/round.h new file mode 100644 index 0000000..6999d1e --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/round.h @@ -0,0 +1,51 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ROUND_H_ +#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ROUND_H_ + +#include + +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h" + +namespace tflite { + +namespace reference_ops { + +inline float RoundToNearest(float value) { + auto floor_val = std::floor(value); + auto diff = value - floor_val; + if ((diff < 0.5f) || + ((diff == 0.5f) && (static_cast(floor_val) % 2 == 0))) { + return floor_val; + } else { + return floor_val = floor_val + 1.0f; + } +} + +inline void Round(const RuntimeShape& input_shape, const float* input_data, + const RuntimeShape& output_shape, float* output_data) { + const int flat_size = MatchingFlatSize(input_shape, output_shape); + for (int i = 0; i < flat_size; ++i) { + // Note that this implementation matches that of tensorFlow tf.round + // and corresponds to the bankers rounding method. + // cfenv (for fesetround) is not yet supported universally on Android, so + // using a work around. + output_data[i] = RoundToNearest(input_data[i]); + } +} + +} // namespace reference_ops +} // namespace tflite +#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ROUND_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/select.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/select.h new file mode 100644 index 0000000..2230c96 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/select.h @@ -0,0 +1,151 @@ +/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_SELECT_H_ +#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_SELECT_H_ + +#include + +#include "edge-impulse-sdk/third_party/ruy/ruy/profiler/instrumentation.h" // from @ruy +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h" + +namespace tflite { +namespace reference_ops { + +template +void Select(const RuntimeShape& input_condition_shape, + const D* input_condition_data, const RuntimeShape& input_x_shape, + const T* input_x_data, const RuntimeShape& input_y_shape, + const T* input_y_data, const RuntimeShape& output_shape, + T* output_data) { + ruy::profiler::ScopeLabel label("Select"); + int64_t flatsize; + // Allow select operator executions on mixed scalar tensors and one element + // tensors. + if (input_condition_shape.FlatSize() == 1 && input_x_shape.FlatSize() == 1 && + input_y_shape.FlatSize() == 1 && output_shape.FlatSize() == 1) { + flatsize = 1; + } else { + flatsize = MatchingFlatSize(input_condition_shape, input_x_shape, + input_y_shape, output_shape); + } + for (int64_t i = 0; i < flatsize; ++i) { + output_data[i] = + input_condition_data[i] ? input_x_data[i] : input_y_data[i]; + } +} + +template +void RankOneSelect(const RuntimeShape& input_condition_shape, + const D* input_condition_data, + const RuntimeShape& input_x_shape, const T* input_x_data, + const RuntimeShape& input_y_shape, const T* input_y_data, + const RuntimeShape& output_shape, T* output_data) { + ruy::profiler::ScopeLabel label("Select/RankOneSelect"); + const int64_t outer_size = input_condition_shape.FlatSize(); + int64_t inner_size; + if (input_condition_shape.DimensionsCount() == 0) { + inner_size = MatchingFlatSize(input_x_shape, input_y_shape, output_shape); + } else { + TFLITE_DCHECK_EQ( + MatchingDim(input_x_shape, 0, input_y_shape, 0, output_shape, 0), + outer_size); + inner_size = + MatchingFlatSizeSkipDim(input_x_shape, 0, input_y_shape, output_shape); + } + + int64_t offset = 0; + for (int64_t i = 0; i < outer_size; i++) { + const T* input_data = input_condition_data[i] ? input_x_data : input_y_data; + memcpy(output_data + offset, input_data + offset, inner_size * sizeof(T)); + offset += inner_size; + } +} + +template +void BroadcastSelect5DSlow(const RuntimeShape& input_condition_shape, + const D* input_condition_data, + const RuntimeShape& input_x_shape, + const T* input_x_data, + const RuntimeShape& input_y_shape, + const T* input_y_data, + const RuntimeShape& output_shape, T* output_data) { + ruy::profiler::ScopeLabel label("Select/BroadcastSelectSlow"); + TFLITE_DCHECK_LE(input_condition_shape.DimensionsCount(), 5); + TFLITE_DCHECK_LE(input_x_shape.DimensionsCount(), 5); + TFLITE_DCHECK_LE(input_y_shape.DimensionsCount(), 5); + TFLITE_DCHECK_LE(output_shape.DimensionsCount(), 5); + + NdArrayDesc<5> desc_condition; + NdArrayDesc<5> desc_x; + NdArrayDesc<5> desc_y; + NdArrayDesc<5> desc_output; + const RuntimeShape extended_output_shape = + RuntimeShape::ExtendedShape(5, output_shape); + CopyDimsToDesc(extended_output_shape, &desc_output); + NdArrayDescsForElementwiseBroadcast(input_condition_shape, input_x_shape, + input_y_shape, &desc_condition, &desc_x, + &desc_y); + + // In Tensorflow, the dimensions are canonically named (batch_number, row, + // col, channel), with extents (batches, height, width, depth), with the + // trailing dimension changing most rapidly (channels has the smallest + // stride, typically 1 element). + // + // In generated C code, we store arrays with the dimensions reversed. The + // first dimension has smallest stride. + // + // We name our variables by their Tensorflow convention, but generate C code + // nesting loops such that the innermost loop has the smallest stride for + // the best cache behavior. + for (int n = 0; n < desc_output.extents[0]; ++n) { + int out_idx_n = desc_output.extents[1] * n; + int cond_idx_n = desc_condition.strides[0] * n; + int in_idx1_n = desc_x.strides[0] * n; + int in_idx2_n = desc_y.strides[0] * n; + for (int b = 0; b < desc_output.extents[1]; ++b) { + int out_idx_b = (out_idx_n + b) * desc_output.extents[2]; + int cond_idx_b = cond_idx_n + desc_condition.strides[1] * b; + int in_idx1_b = in_idx1_n + desc_x.strides[1] * b; + int in_idx2_b = in_idx2_n + desc_y.strides[1] * b; + for (int y = 0; y < desc_output.extents[2]; ++y) { + int out_idx_y = (out_idx_b + y) * desc_output.extents[3]; + int cond_idx_y = cond_idx_b + desc_condition.strides[2] * y; + int in_idx1_y = in_idx1_b + desc_x.strides[2] * y; + int in_idx2_y = in_idx2_b + desc_y.strides[2] * y; + for (int x = 0; x < desc_output.extents[3]; ++x) { + int out_idx = (out_idx_y + x) * desc_output.extents[4]; + int cond_idx = cond_idx_y + desc_condition.strides[3] * x; + int in_idx1 = in_idx1_y + desc_x.strides[3] * x; + int in_idx2 = in_idx2_y + desc_y.strides[3] * x; + for (int c = 0; c < desc_output.extents[4]; ++c) { + output_data[out_idx] = input_condition_data[cond_idx] + ? input_x_data[in_idx1] + : input_y_data[in_idx2]; + out_idx++; + cond_idx += desc_condition.strides[4]; + in_idx1 += desc_x.strides[4]; + in_idx2 += desc_y.strides[4]; + } + } + } + } + } +} + +} // namespace reference_ops +} // namespace tflite + +#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_SELECT_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/slice.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/slice.h new file mode 100644 index 0000000..8214269 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/slice.h @@ -0,0 +1,80 @@ +/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_SLICE_H_ +#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_SLICE_H_ + +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/portable_tensor.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h" + +namespace tflite { + +namespace reference_ops { + +template +inline void Slice(const tflite::SliceParams& op_params, + const RuntimeShape& input_shape, + const RuntimeShape& output_shape, + SequentialTensorWriter* writer) { + const RuntimeShape ext_shape = RuntimeShape::ExtendedShape(5, input_shape); + TFLITE_DCHECK_LE(op_params.begin_count, 5); + TFLITE_DCHECK_LE(op_params.size_count, 5); + const int begin_count = op_params.begin_count; + const int size_count = op_params.size_count; + // We front-pad the begin and size vectors. + int start[5]; + int stop[5]; + for (int i = 0; i < 5; ++i) { + int padded_i = 5 - i; + start[i] = + begin_count < padded_i ? 0 : op_params.begin[begin_count - padded_i]; + stop[i] = + (size_count < padded_i || op_params.size[size_count - padded_i] == -1) + ? ext_shape.Dims(i) + : start[i] + op_params.size[size_count - padded_i]; + } + + for (int i0 = start[0]; i0 < stop[0]; ++i0) { + for (int i1 = start[1]; i1 < stop[1]; ++i1) { + for (int i2 = start[2]; i2 < stop[2]; ++i2) { + for (int i3 = start[3]; i3 < stop[3]; ++i3) { + for (int i4 = start[4]; i4 < stop[4]; ++i4) { + writer->Write(Offset(ext_shape, i0, i1, i2, i3, i4)); + } + } + } + } + } +} + +template +inline void Slice(const tflite::SliceParams& op_params, + const RuntimeShape& input_shape, const T* input_data, + const RuntimeShape& output_shape, T* output_data) { + SequentialTensorWriter writer(input_data, output_data); + return Slice(op_params, input_shape, output_shape, &writer); +} + +template +inline void Slice(const tflite::SliceParams& op_params, + const RuntimeShape& input_shape, const TfLiteTensor* input, + const RuntimeShape& output_shape, TfLiteTensor* output) { + SequentialTensorWriter writer(input, output); + return Slice(op_params, input_shape, output_shape, &writer); +} + +} // namespace reference_ops +} // namespace tflite + +#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_SLICE_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/softmax.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/softmax.h new file mode 100644 index 0000000..1c6c0b9 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/softmax.h @@ -0,0 +1,233 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_SOFTMAX_H_ +#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_SOFTMAX_H_ + +#include +#include + +#include "edge-impulse-sdk/third_party/gemmlowp/fixedpoint/fixedpoint.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/cppmath.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/quantization_util.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/op_macros.h" + +namespace tflite { +namespace reference_ops { + +inline void Softmax(const SoftmaxParams& params, + const RuntimeShape& input_shape, const float* input_data, + const RuntimeShape& output_shape, float* output_data) { + const int trailing_dim = input_shape.DimensionsCount() - 1; + const int outer_size = + MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape); + const int depth = + MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim); + + for (int i = 0; i < outer_size; ++i) { + // Find max element value which we'll use to ensure numerical stability + // taking advantage of the following equality: + // exp(x[i])/sum(exp(x[i])) == exp(x[i]+C)/sum(exp(x[i]+C)) + float max = std::numeric_limits::lowest(); + for (int c = 0; c < depth; ++c) { + max = std::max(max, input_data[i * depth + c]); + } + + // Compute sum. + float sum = 0.f; + for (int c = 0; c < depth; ++c) { + const float exp_c = std::exp((input_data[i * depth + c] - max) * + static_cast(params.beta)); + output_data[i * depth + c] = exp_c; + sum += exp_c; + } + + // Compute result. + for (int c = 0; c < depth; ++c) { + output_data[i * depth + c] = output_data[i * depth + c] / sum; + } + } +} + +// Quantized softmax with int8_t/uint8_t input and int8_t/uint8_t/int16_t +// output. +template +inline void Softmax(const SoftmaxParams& params, + const RuntimeShape& input_shape, const InputT* input_data, + const RuntimeShape& output_shape, OutputT* output_data) { + const int32_t input_beta_multiplier = params.input_multiplier; + const int32_t input_beta_left_shift = params.input_left_shift; + const int diff_min = params.diff_min; + // The representation chosen for the input to the exp() function is Q5.26. + // We need to leave extra space since values that we skip might be as large as + // -32 before multiplying by input_beta_multiplier, and therefore as large as + // -16 afterwards. Note that exp(-8) is definitely not insignificant to + // accumulation, but exp(-16) definitely is. + static const int kScaledDiffIntegerBits = 5; + static const int kAccumulationIntegerBits = 12; + using FixedPointScaledDiff = + gemmlowp::FixedPoint; + using FixedPointAccum = + gemmlowp::FixedPoint; + using FixedPoint0 = gemmlowp::FixedPoint; + + const int trailing_dim = input_shape.DimensionsCount() - 1; + const int outer_size = + MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape); + const int depth = + MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim); + + for (int i = 0; i < outer_size; ++i) { + InputT max_in_row = std::numeric_limits::min(); + for (int c = 0; c < depth; ++c) { + max_in_row = std::max(max_in_row, input_data[i * depth + c]); + } + + FixedPointAccum sum_of_exps = FixedPointAccum::Zero(); + for (int c = 0; c < depth; ++c) { + int32_t input_diff = + static_cast(input_data[i * depth + c]) - max_in_row; + if (input_diff >= diff_min) { + const int32_t input_diff_rescaled = + MultiplyByQuantizedMultiplierGreaterThanOne( + input_diff, input_beta_multiplier, input_beta_left_shift); + const FixedPointScaledDiff scaled_diff_f8 = + FixedPointScaledDiff::FromRaw(input_diff_rescaled); + sum_of_exps = sum_of_exps + gemmlowp::Rescale( + exp_on_negative_values(scaled_diff_f8)); + } + } + + int num_bits_over_unit; + FixedPoint0 shifted_scale = FixedPoint0::FromRaw(GetReciprocal( + sum_of_exps.raw(), kAccumulationIntegerBits, &num_bits_over_unit)); + + for (int c = 0; c < depth; ++c) { + int32_t input_diff = + static_cast(input_data[i * depth + c]) - max_in_row; + if (input_diff >= diff_min) { + const int32_t input_diff_rescaled = + MultiplyByQuantizedMultiplierGreaterThanOne( + input_diff, input_beta_multiplier, input_beta_left_shift); + const FixedPointScaledDiff scaled_diff_f8 = + FixedPointScaledDiff::FromRaw(input_diff_rescaled); + + FixedPoint0 exp_in_0 = exp_on_negative_values(scaled_diff_f8); + int32_t unsat_output = gemmlowp::RoundingDivideByPOT( + (shifted_scale * exp_in_0).raw(), + num_bits_over_unit + 31 - (sizeof(OutputT) * 8)); + + const int32_t shifted_output = + unsat_output + + static_cast(std::numeric_limits::min()); + + output_data[i * depth + c] = static_cast(std::max( + std::min(shifted_output, + static_cast(std::numeric_limits::max())), + static_cast(std::numeric_limits::min()))); + } else { + output_data[i * depth + c] = std::numeric_limits::min(); + } + } + } +} + +// Computes exp(input - max_input) +inline int16_t SoftMaxCalculateExp(const SoftmaxParams& params, + const int16_t* input_data, const int depth, + int16_t max_in_row, int i, int c) { + int32_t input_diff = input_data[i * depth + c] - max_in_row; + // scale the input_diff such that [-65535, 0] correspond to [-10.0, 0.0] + // exp lut generated with range [-10, 0], as exp(-10) is negligible. + int32_t scaled_diff = MultiplyByQuantizedMultiplier( + input_diff, params.input_multiplier, params.input_left_shift); + // recenter to [-32768, 32767] + int32_t sym_scaled_diff = scaled_diff + 32767; + int16_t sat_sym_scaled_diff = + std::min(std::max(sym_scaled_diff, static_cast(-32768)), + static_cast(32767)); + // apply the exp() LUT activation function + return LUTLookup(sat_sym_scaled_diff, params.exp_lut); +} +// Quantized softmax with int16_t input and int16_t output. +inline void SoftmaxInt16(const SoftmaxParams& params, + const RuntimeShape& input_shape, + const int16_t* input_data, + const RuntimeShape& output_shape, + int16_t* output_data) { + const int trailing_dim = input_shape.DimensionsCount() - 1; + const int outer_size = + MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape); + const int depth = + MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim); + + for (int i = 0; i < outer_size; ++i) { + // Find the largest element + int16_t max_in_row = std::numeric_limits::min(); + for (int c = 0; c < depth; ++c) { + max_in_row = std::max(max_in_row, input_data[i * depth + c]); + } + + // This loops computes the exp values and their sum. We will need the exp + // values later on in the function so we cache them in the output_data + // buffer. This is an optimization done to avoid calculating the exp values + // twice making use of the output_data buffer as scratch memory. + int32_t sum_of_exps = 0; // Q16.15 fixed point format. + int16_t* exp_results_Q015 = output_data + i * depth; + for (int c = 0; c < depth; ++c) { + exp_results_Q015[c] = + SoftMaxCalculateExp(params, input_data, depth, max_in_row, i, c); + sum_of_exps += exp_results_Q015[c]; + } + + // Compute the reciprocal 1/sum_of_exps + uint8_t headroom_plus_one = + CountLeadingZeros(static_cast(sum_of_exps)); + int32_t shifted_sum = + ((static_cast(sum_of_exps) << (headroom_plus_one - 1)) + + (1 << 13)) >> + 14; + // since the LUT computes 1/(1 + x) we need to first compute x = (sum - 1). + // also, the LUT expects a symmetrical input, so we must also recenter x + // from [0, 65535] to [-32768, 32767]. + int32_t sym_shifted_sum = shifted_sum + (-((1 << 15) + (1 << 16))); + int16_t sat_sym_shifted_sum = static_cast( + std::min(std::max(sym_shifted_sum, static_cast(-32768)), + static_cast(32767))); + // apply 1/(1 + x) LUT activation function + int16_t reciprocal_scale_Q015 = + LUTLookup(sat_sym_shifted_sum, params.one_over_one_plus_x_lut); + + // Rescale the exp_result with reciprocal + // range of output is [0, 32767] correspond to [0.0, 1.0] + for (int c = 0; c < depth; ++c) { + uint8_t right_shift = 31 - headroom_plus_one; + int64_t round = 1 << (right_shift - 1); + int32_t result = (static_cast(exp_results_Q015[c]) * + static_cast(reciprocal_scale_Q015) + + round) >> + right_shift; + output_data[i * depth + c] = static_cast( + std::min(std::max(result, static_cast(0)), + static_cast(32767))); + } + } +} + +} // namespace reference_ops +} // namespace tflite + +#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_SOFTMAX_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/space_to_batch_nd.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/space_to_batch_nd.h new file mode 100644 index 0000000..f21a3a0 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/space_to_batch_nd.h @@ -0,0 +1,109 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_SPACE_TO_BATCH_ND_H_ +#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_SPACE_TO_BATCH_ND_H_ + +#include + +#include "edge-impulse-sdk/third_party/ruy/ruy/profiler/instrumentation.h" // from @ruy +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h" + +namespace tflite { +namespace reference_ops { + +// TODO(b/135760455): Move this method anonymous namespace in a cc file. +inline RuntimeShape ExtendShapeSpaceToBatch(const RuntimeShape& shape) { + if (shape.DimensionsCount() == 4) { + return shape; + } + RuntimeShape new_shape(4, 1); + new_shape.SetDim(0, shape.Dims(0)); + new_shape.SetDim(1, shape.Dims(1)); + new_shape.SetDim(3, shape.Dims(2)); + return new_shape; +} + +template +inline void SpaceToBatchND(const SpaceToBatchParams& params, + const RuntimeShape& unextended_input1_shape, + const T* input1_data, + const RuntimeShape& unextended_input2_shape, + const int32_t* block_shape_data, + const RuntimeShape& unextended_input3_shape, + const int32_t* paddings_data, + const RuntimeShape& unextended_output_shape, + T* output_data) { + ruy::profiler::ScopeLabel label("SpaceToBatchND"); + TFLITE_DCHECK_GE(unextended_input1_shape.DimensionsCount(), 3); + TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), 4); + TFLITE_DCHECK_EQ(unextended_input1_shape.DimensionsCount(), + unextended_output_shape.DimensionsCount()); + + // Extends the input/output shape from 3D to 4D if needed, NHC -> NH1C. + const RuntimeShape input1_shape = + ExtendShapeSpaceToBatch(unextended_input1_shape); + const RuntimeShape output_shape = + ExtendShapeSpaceToBatch(unextended_output_shape); + + const int depth = input1_shape.Dims(3); + const int input_width = input1_shape.Dims(2); + const int input_height = input1_shape.Dims(1); + const int input_batch_size = input1_shape.Dims(0); + + const int output_width = output_shape.Dims(2); + const int output_height = output_shape.Dims(1); + const int output_batch_size = output_shape.Dims(0); + + const int block_shape_height = block_shape_data[0]; + const int block_shape_width = + unextended_input1_shape.DimensionsCount() == 4 ? block_shape_data[1] : 1; + const int padding_top = paddings_data[0]; + const int padding_left = + unextended_input1_shape.DimensionsCount() == 4 ? paddings_data[2] : 0; + + // For uint8 quantized, the correct padding "zero value" is the output offset. + const int32_t pad_value = params.output_offset; + for (int out_b = 0; out_b < output_batch_size; ++out_b) { + int input_batch = out_b % input_batch_size; + int shift_w = (out_b / input_batch_size) % block_shape_width; + int shift_h = (out_b / input_batch_size) / block_shape_width; + for (int out_h = 0; out_h < output_height; ++out_h) { + for (int out_w = 0; out_w < output_width; ++out_w) { + T* out = output_data + Offset(output_shape, out_b, out_h, out_w, 0); + if (out_h * block_shape_height + shift_h < padding_top || + out_h * block_shape_height + shift_h >= + padding_top + input_height || + out_w * block_shape_width + shift_w < padding_left || + out_w * block_shape_width + shift_w >= padding_left + input_width) { + // This may not execute correctly when pad_value != 0 and T != uint8. + memset(out, pad_value, depth * sizeof(T)); + } else { + const T* in = + input1_data + + Offset(input1_shape, input_batch, + (out_h * block_shape_height + shift_h) - padding_top, + (out_w * block_shape_width + shift_w) - padding_left, 0); + memcpy(out, in, depth * sizeof(T)); + } + } + } + } +} + +} // namespace reference_ops +} // namespace tflite + +#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_SPACE_TO_BATCH_ND_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/space_to_depth.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/space_to_depth.h new file mode 100644 index 0000000..53260ae --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/space_to_depth.h @@ -0,0 +1,80 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_SPACE_TO_DEPTH_H_ +#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_SPACE_TO_DEPTH_H_ + +#include + +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h" + +namespace tflite { +namespace reference_ops { + +template +inline void SpaceToDepth(const tflite::SpaceToDepthParams& op_params, + const RuntimeShape& unextended_input_shape, + const T* input_data, + const RuntimeShape& unextended_output_shape, + T* output_data) { + TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4); + const RuntimeShape input_shape = + RuntimeShape::ExtendedShape(4, unextended_input_shape); + const RuntimeShape output_shape = + RuntimeShape::ExtendedShape(4, unextended_output_shape); + + const int input_depth = input_shape.Dims(3); + const int input_width = input_shape.Dims(2); + const int input_height = input_shape.Dims(1); + const int input_batch = input_shape.Dims(0); + + const int output_depth = output_shape.Dims(3); + const int output_width = output_shape.Dims(2); + const int output_height = output_shape.Dims(1); + const int output_batch = output_shape.Dims(0); + + const int32_t block_size = op_params.block_size; + + TFLITE_DCHECK_EQ(input_width, output_width * block_size); + TFLITE_DCHECK_EQ(input_height, output_height * block_size); + TFLITE_DCHECK_EQ(input_depth * block_size * block_size, output_depth); + TFLITE_DCHECK_EQ(input_batch, output_batch); + + for (int in_b = 0; in_b < input_batch; ++in_b) { + for (int in_h = 0; in_h < input_height; ++in_h) { + for (int in_w = 0; in_w < input_width; ++in_w) { + for (int in_d = 0; in_d < input_depth; ++in_d) { + const int out_d = + in_d + ((in_h % block_size) * block_size + in_w % block_size) * + input_depth; + const int out_w = in_w / block_size; + const int out_h = in_h / block_size; + const int out_b = in_b; + + const int input_index = Offset(input_shape, in_b, in_h, in_w, in_d); + const int output_index = + Offset(output_shape, out_b, out_h, out_w, out_d); + + output_data[output_index] = input_data[input_index]; + } + } + } + } +} + +} // namespace reference_ops +} // namespace tflite + +#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_SPACE_TO_DEPTH_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/strided_slice.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/strided_slice.h new file mode 100644 index 0000000..493d8f3 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/strided_slice.h @@ -0,0 +1,147 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_STRIDED_SLICE_H_ +#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_STRIDED_SLICE_H_ + +#include "edge-impulse-sdk/third_party/ruy/ruy/profiler/instrumentation.h" // from @ruy +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/compatibility.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/portable_tensor.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/strided_slice_logic.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h" + +namespace tflite { + +namespace reference_ops { + +template +inline void StridedSlice(const tflite::StridedSliceParams& op_params, + const RuntimeShape& unextended_input_shape, + const RuntimeShape& unextended_output_shape, + SequentialTensorWriter* writer) { + ruy::profiler::ScopeLabel label("StridedSlice"); + + // Note that the output_shape is not used herein. + tflite::StridedSliceParams params_copy = op_params; + + TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 5); + TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 5); + const RuntimeShape input_shape = + RuntimeShape::ExtendedShape(5, unextended_input_shape); + const RuntimeShape output_shape = + RuntimeShape::ExtendedShape(5, unextended_output_shape); + + // Reverse and pad to 5 dimensions because that is what the runtime code + // requires (ie. all shapes must be 5D and are given backwards). + strided_slice::StridedSlicePadIndices(¶ms_copy, 5); + + const int start_0 = + strided_slice::StridedSliceStartForAxis(params_copy, input_shape, 0); + const int stop_0 = strided_slice::StridedSliceEndForAxis( + params_copy, input_shape, 0, start_0); + const int start_1 = + strided_slice::StridedSliceStartForAxis(params_copy, input_shape, 1); + const int stop_1 = strided_slice::StridedSliceEndForAxis( + params_copy, input_shape, 1, start_1); + const int start_2 = + strided_slice::StridedSliceStartForAxis(params_copy, input_shape, 2); + const int stop_2 = strided_slice::StridedSliceEndForAxis( + params_copy, input_shape, 2, start_2); + const int start_3 = + strided_slice::StridedSliceStartForAxis(params_copy, input_shape, 3); + const int stop_3 = strided_slice::StridedSliceEndForAxis( + params_copy, input_shape, 3, start_3); + const int start_4 = + strided_slice::StridedSliceStartForAxis(params_copy, input_shape, 4); + const int stop_4 = strided_slice::StridedSliceEndForAxis( + params_copy, input_shape, 4, start_4); + + auto lc = [&](int end, int stride, int index) { + if (stride < 0) { + return index > end; + } else { + return index < end; + } + }; + // With a static_cast it is not possible to initialize + // a variable of type 'const int *' + // with an rvalue of type 'const int32_t *' (aka 'const long *'). + // reinterpret_cast is required to handle this casting. + const int* shape = reinterpret_cast(input_shape.DimsData()); + const int* stride = reinterpret_cast(params_copy.strides); + const bool inner_stride_is_1 = params_copy.strides[4] == 1; + + for (int offset_0 = start_0; lc(stop_0, stride[0], offset_0); + offset_0 += stride[0]) { + for (int offset_1 = start_1; lc(stop_1, stride[1], offset_1); + offset_1 += stride[1]) { + for (int offset_2 = start_2; lc(stop_2, stride[2], offset_2); + offset_2 += stride[2]) { + for (int offset_3 = start_3; lc(stop_3, stride[3], offset_3); + offset_3 += stride[3]) { + // When the stride is 1, the inner loop is equivalent to the + // optimized slice inner loop. Otherwise, it is identical to the + // strided_slice reference implementation inner loop. + if (inner_stride_is_1) { + const int len = stop_4 - start_4; + int index = start_4 + offset_3 * shape[4] + + offset_2 * shape[3] * shape[4] + + offset_1 * shape[2] * shape[3] * shape[4] + + offset_0 * shape[1] * shape[2] * shape[3] * shape[4]; + if (len > 0) { + writer->WriteN(index, len); + } + } else { + for (int offset_4 = start_4; lc(stop_4, stride[4], offset_4); + offset_4 += stride[4]) { + int index = offset_4 + offset_3 * shape[4] + + offset_2 * shape[3] * shape[4] + + offset_1 * shape[2] * shape[3] * shape[4] + + offset_0 * shape[1] * shape[2] * shape[3] * shape[4]; + writer->Write(index); + } + } + } + } + } + } +} + +template +inline void StridedSlice(const tflite::StridedSliceParams& op_params, + const RuntimeShape& unextended_input_shape, + const T* input_data, + const RuntimeShape& unextended_output_shape, + T* output_data) { + SequentialTensorWriter writer(input_data, output_data); + StridedSlice(op_params, unextended_input_shape, unextended_output_shape, + &writer); +} + +template +inline void StridedSlice(const tflite::StridedSliceParams& op_params, + const RuntimeShape& unextended_input_shape, + const TfLiteTensor* input, + const RuntimeShape& unextended_output_shape, + TfLiteTensor* output) { + SequentialTensorWriter writer(input, output); + StridedSlice(op_params, unextended_input_shape, unextended_output_shape, + &writer); +} + +} // namespace reference_ops +} // namespace tflite + +#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_STRIDED_SLICE_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/sub.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/sub.h new file mode 100644 index 0000000..44718a8 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/sub.h @@ -0,0 +1,479 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_SUB_H_ +#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_SUB_H_ + +#include + +#include +#include + +#include "edge-impulse-sdk/third_party/ruy/ruy/profiler/instrumentation.h" // from @ruy +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/compatibility.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h" + +namespace tflite { + +namespace reference_ops { + +inline void SubNonBroadcast(const ArithmeticParams& params, + const RuntimeShape& input1_shape, + const float* input1_data, + const RuntimeShape& input2_shape, + const float* input2_data, + const RuntimeShape& output_shape, + float* output_data) { + const int flat_size = + MatchingElementsSize(input1_shape, input2_shape, output_shape); + for (int i = 0; i < flat_size; ++i) { + output_data[i] = ActivationFunctionWithMinMax( + input1_data[i] - input2_data[i], params.float_activation_min, + params.float_activation_max); + } +} + +inline void SubNonBroadcast(const ArithmeticParams& params, + const RuntimeShape& input1_shape, + const int32_t* input1_data, + const RuntimeShape& input2_shape, + const int32_t* input2_data, + const RuntimeShape& output_shape, + int32_t* output_data) { + const int flat_size = + MatchingElementsSize(input1_shape, input2_shape, output_shape); + for (int i = 0; i < flat_size; ++i) { + output_data[i] = ActivationFunctionWithMinMax( + input1_data[i] - input2_data[i], params.quantized_activation_min, + params.quantized_activation_max); + } +} + +// TODO(b/151345304): We can implement BroadcastSub on buffers of arbitrary +// dimensionality if the runtime code does a single loop over one dimension +// that handles broadcasting as the base case. The code generator would then +// generate max(D1, D2) nested for loops. +template +inline void BroadcastSubSlow(const ArithmeticParams& params, + const RuntimeShape& input1_shape, + const float* input1_data, + const RuntimeShape& input2_shape, + const float* input2_data, + const RuntimeShape& output_shape, + float* output_data) { + ruy::profiler::ScopeLabel label("BroadcastSubSlow/float"); + TFLITE_DCHECK_LE(input1_shape.DimensionsCount(), N); + TFLITE_DCHECK_LE(input2_shape.DimensionsCount(), N); + TFLITE_DCHECK_LE(output_shape.DimensionsCount(), N); + NdArrayDesc desc1; + NdArrayDesc desc2; + NdArrayDesc output_desc; + NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1, + &desc2); + CopyDimsToDesc(RuntimeShape::ExtendedShape(N, output_shape), &output_desc); + + // In Tensorflow, the dimensions are canonically named (batch_number, row, + // col, channel), with extents (batches, height, width, depth), with the + // trailing dimension changing most rapidly (channels has the smallest stride, + // typically 1 element). + // + // In generated C code, we store arrays with the dimensions reversed. The + // first dimension has smallest stride. + // + // We name our variables by their Tensorflow convention, but generate C code + // nesting loops such that the innermost loop has the smallest stride for the + // best cache behavior. + auto sub_func = [&](int indexes[N]) { + output_data[SubscriptToIndex(output_desc, indexes)] = + ActivationFunctionWithMinMax( + input1_data[SubscriptToIndex(desc1, indexes)] - + input2_data[SubscriptToIndex(desc2, indexes)], + params.float_activation_min, params.float_activation_max); + }; + NDOpsHelper(output_desc, sub_func); +} + +template +inline void BroadcastSubSlow(const ArithmeticParams& params, + const RuntimeShape& input1_shape, + const int32_t* input1_data, + const RuntimeShape& input2_shape, + const int32_t* input2_data, + const RuntimeShape& output_shape, + int32_t* output_data) { + ruy::profiler::ScopeLabel label("BroadcastSubSlow/int32_t"); + TFLITE_DCHECK_LE(input1_shape.DimensionsCount(), N); + TFLITE_DCHECK_LE(input2_shape.DimensionsCount(), N); + TFLITE_DCHECK_LE(output_shape.DimensionsCount(), N); + NdArrayDesc desc1; + NdArrayDesc desc2; + NdArrayDesc output_desc; + NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1, + &desc2); + CopyDimsToDesc(RuntimeShape::ExtendedShape(N, output_shape), &output_desc); + + // In Tensorflow, the dimensions are canonically named (batch_number, row, + // col, channel), with extents (batches, height, width, depth), with the + // trailing dimension changing most rapidly (channels has the smallest stride, + // typically 1 element). + // + // In generated C code, we store arrays with the dimensions reversed. The + // first dimension has smallest stride. + // + // We name our variables by their Tensorflow convention, but generate C code + // nesting loops such that the innermost loop has the smallest stride for the + // best cache behavior. + auto sub_func = [&](int indexes[N]) { + output_data[SubscriptToIndex(output_desc, indexes)] = + ActivationFunctionWithMinMax( + input1_data[SubscriptToIndex(desc1, indexes)] - + input2_data[SubscriptToIndex(desc2, indexes)], + params.quantized_activation_min, params.quantized_activation_max); + }; + NDOpsHelper(output_desc, sub_func); +} + +template +void BroadcastSubSlow(const ArithmeticParams& params, + const RuntimeShape& input1_shape, + const int64_t* input1_data, + const RuntimeShape& input2_shape, + const int64_t* input2_data, + const RuntimeShape& output_shape, int64_t* output_data) { + ruy::profiler::ScopeLabel label("BroadcastSubSlow/int64_t"); + TFLITE_DCHECK_LE(input1_shape.DimensionsCount(), N); + TFLITE_DCHECK_LE(input2_shape.DimensionsCount(), N); + TFLITE_DCHECK_LE(output_shape.DimensionsCount(), N); + NdArrayDesc desc1; + NdArrayDesc desc2; + NdArrayDesc output_desc; + NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1, + &desc2); + CopyDimsToDesc(RuntimeShape::ExtendedShape(N, output_shape), &output_desc); + + // In Tensorflow, the dimensions are canonically named (batch_number, row, + // col, channel), with extents (batches, height, width, depth), with the + // trailing dimension changing most rapidly (channels has the smallest stride, + // typically 1 element). + // + // In generated C code, we store arrays with the dimensions reversed. The + // first dimension has smallest stride. + // + // We name our variables by their Tensorflow convention, but generate C code + // nesting loops such that the innermost loop has the smallest stride for the + // best cache behavior. + auto sub_func = [&](int indexes[N]) { + output_data[SubscriptToIndex(output_desc, indexes)] = + ActivationFunctionWithMinMax( + input1_data[SubscriptToIndex(desc1, indexes)] - + input2_data[SubscriptToIndex(desc2, indexes)], + params.int64_activation_min, params.int64_activation_max); + }; + NDOpsHelper(output_desc, sub_func); +} + +template +void BroadcastSubSlow(const ArithmeticParams& params, + const RuntimeShape& input1_shape, const T* input1_data, + const RuntimeShape& input2_shape, const T* input2_data, + const RuntimeShape& output_shape, T* output_data) { + ruy::profiler::ScopeLabel label("BroadcastSubSlow/templated"); + TFLITE_DCHECK_LE(input1_shape.DimensionsCount(), N); + TFLITE_DCHECK_LE(input2_shape.DimensionsCount(), N); + TFLITE_DCHECK_LE(output_shape.DimensionsCount(), N); + NdArrayDesc desc1; + NdArrayDesc desc2; + NdArrayDesc output_desc; + NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1, + &desc2); + CopyDimsToDesc(RuntimeShape::ExtendedShape(N, output_shape), &output_desc); + + // In Tensorflow, the dimensions are canonically named (batch_number, row, + // col, channel), with extents (batches, height, width, depth), with the + // trailing dimension changing most rapidly (channels has the smallest stride, + // typically 1 element). + // + // In generated C code, we store arrays with the dimensions reversed. The + // first dimension has smallest stride. + // + // We name our variables by their Tensorflow convention, but generate C code + // nesting loops such that the innermost loop has the smallest stride for the + // best cache behavior. + auto sub_func = [&](int indexes[N]) { + output_data[SubscriptToIndex(output_desc, indexes)] = + ActivationFunctionWithMinMax( + input1_data[SubscriptToIndex(desc1, indexes)] - + input2_data[SubscriptToIndex(desc2, indexes)], + params.quantized_activation_min, params.quantized_activation_max); + }; + NDOpsHelper(output_desc, sub_func); +} + +template +inline void BroadcastSub16POTSlow(const ArithmeticParams& params, + const RuntimeShape& input1_shape, + const int16_t* input1_data, + const RuntimeShape& input2_shape, + const int16_t* input2_data, + const RuntimeShape& output_shape, + int16_t* output_data) { + ruy::profiler::ScopeLabel label("BroadcastSub16POTSlow/int16_t"); + NdArrayDesc desc1; + NdArrayDesc desc2; + NdArrayDesc output_desc; + NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1, + &desc2); + CopyDimsToDesc(RuntimeShape::ExtendedShape(N, output_shape), &output_desc); + + // In Tensorflow, the dimensions are canonically named (batch_number, row, + // col, channel), with extents (batches, height, width, depth), with the + // trailing dimension changing most rapidly (channels has the smallest stride, + // typically 1 element). + // + // In generated C code, we store arrays with the dimensions reversed. The + // first dimension has smallest stride. + // + // We name our variables by their Tensorflow convention, but generate C code + // nesting loops such that the innermost loop has the smallest stride for the + // best cache behavior. + auto sub_func = [&](int indexes[N]) { + const int32_t input1_val = input1_data[SubscriptToIndex(desc1, indexes)]; + const int32_t input2_val = input2_data[SubscriptToIndex(desc2, indexes)]; + const int32_t scaled_input1_val = + gemmlowp::RoundingDivideByPOT(input1_val, -params.input1_shift); + const int32_t scaled_input2_val = + gemmlowp::RoundingDivideByPOT(input2_val, -params.input2_shift); + const int32_t raw_output = scaled_input1_val - scaled_input2_val; + const int32_t clamped_output = + std::min(params.quantized_activation_max, + std::max(params.quantized_activation_min, raw_output)); + output_data[SubscriptToIndex(output_desc, indexes)] = + static_cast(clamped_output); + }; + NDOpsHelper(output_desc, sub_func); +} + +template +void BroadcastQuantSubSlow(const ArithmeticParams& params, + const RuntimeShape& input1_shape, + const T* input1_data, + const RuntimeShape& input2_shape, + const T* input2_data, + const RuntimeShape& output_shape, T* output_data) { + ruy::profiler::ScopeLabel label("BroadcastQuantSubSlow/T"); + TFLITE_DCHECK_LE(input1_shape.DimensionsCount(), N); + TFLITE_DCHECK_LE(input2_shape.DimensionsCount(), N); + TFLITE_DCHECK_LE(output_shape.DimensionsCount(), N); + NdArrayDesc desc1; + NdArrayDesc desc2; + NdArrayDesc output_desc; + NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1, + &desc2); + CopyDimsToDesc(RuntimeShape::ExtendedShape(N, output_shape), &output_desc); + + // In Tensorflow, the dimensions are canonically named (batch_number, row, + // col, channel), with extents (batches, height, width, depth), with the + // trailing dimension changing most rapidly (channels has the smallest stride, + // typically 1 element). + // + // In generated C code, we store arrays with the dimensions reversed. The + // first dimension has smallest stride. + // + // We name our variables by their Tensorflow convention, but generate C code + // nesting loops such that the innermost loop has the smallest stride for the + // best cache behavior. + auto sub_func = [&](int indexes[N]) { + const int32_t input1_val = + params.input1_offset + input1_data[SubscriptToIndex(desc1, indexes)]; + const int32_t input2_val = + params.input2_offset + input2_data[SubscriptToIndex(desc2, indexes)]; + const int32_t shifted_input1_val = input1_val * (1 << params.left_shift); + const int32_t shifted_input2_val = input2_val * (1 << params.left_shift); + const int32_t scaled_input1_val = + MultiplyByQuantizedMultiplierSmallerThanOneExp( + shifted_input1_val, params.input1_multiplier, params.input1_shift); + const int32_t scaled_input2_val = + MultiplyByQuantizedMultiplierSmallerThanOneExp( + shifted_input2_val, params.input2_multiplier, params.input2_shift); + const int32_t raw_sub = scaled_input1_val - scaled_input2_val; + const int32_t raw_output = + MultiplyByQuantizedMultiplierSmallerThanOneExp( + raw_sub, params.output_multiplier, params.output_shift) + + params.output_offset; + const int32_t clamped_output = + std::min(params.quantized_activation_max, + std::max(params.quantized_activation_min, raw_output)); + output_data[SubscriptToIndex(output_desc, indexes)] = + static_cast(clamped_output); + }; + NDOpsHelper(output_desc, sub_func); +} + +// Element-wise add that can often be used for inner loop of broadcast add as +// well as the non-broadcast add. +template +inline void SubElementwise(int size, const ArithmeticParams& params, + const T* input1_data, const T* input2_data, + T* output_data) { + for (int i = 0; i < size; ++i) { + const int32_t input1_val = params.input1_offset + input1_data[i]; + const int32_t input2_val = params.input2_offset + input2_data[i]; + const int32_t shifted_input1_val = input1_val * (1 << params.left_shift); + const int32_t shifted_input2_val = input2_val * (1 << params.left_shift); + const int32_t scaled_input1_val = + MultiplyByQuantizedMultiplierSmallerThanOneExp( + shifted_input1_val, params.input1_multiplier, params.input1_shift); + const int32_t scaled_input2_val = + MultiplyByQuantizedMultiplierSmallerThanOneExp( + shifted_input2_val, params.input2_multiplier, params.input2_shift); + const int32_t raw_sub = scaled_input1_val - scaled_input2_val; + const int32_t raw_output = + MultiplyByQuantizedMultiplierSmallerThanOneExp( + raw_sub, params.output_multiplier, params.output_shift) + + params.output_offset; + const int32_t clamped_output = + std::min(params.quantized_activation_max, + std::max(params.quantized_activation_min, raw_output)); + output_data[i] = static_cast(clamped_output); + } +} + +inline void Sub(const ArithmeticParams& params, + const RuntimeShape& input1_shape, const uint8_t* input1_data, + const RuntimeShape& input2_shape, const uint8_t* input2_data, + const RuntimeShape& output_shape, uint8_t* output_data) { + TFLITE_DCHECK_LE(params.quantized_activation_min, + params.quantized_activation_max); + const int flat_size = + MatchingElementsSize(input1_shape, input2_shape, output_shape); + + TFLITE_DCHECK_GT(params.input1_offset, -256); + TFLITE_DCHECK_GT(params.input2_offset, -256); + TFLITE_DCHECK_LT(params.input1_offset, 256); + TFLITE_DCHECK_LT(params.input2_offset, 256); + SubElementwise(flat_size, params, input1_data, input2_data, output_data); +} + +inline void Sub(const ArithmeticParams& params, + const RuntimeShape& input1_shape, const int8_t* input1_data, + const RuntimeShape& input2_shape, const int8_t* input2_data, + const RuntimeShape& output_shape, int8_t* output_data) { + TFLITE_DCHECK_LE(params.quantized_activation_min, + params.quantized_activation_max); + + const int flat_size = + MatchingElementsSize(input1_shape, input2_shape, output_shape); + + TFLITE_DCHECK_GE(params.input1_offset, -128); + TFLITE_DCHECK_GE(params.input2_offset, -128); + // offset = -quantization_params.zero_point in PrepareGeneralSubOp(). + // So it's maximum can be 128 not 127. + TFLITE_DCHECK_LE(params.input1_offset, 128); + TFLITE_DCHECK_LE(params.input2_offset, 128); + SubElementwise(flat_size, params, input1_data, input2_data, output_data); +} + +inline void Sub(const ArithmeticParams& params, + const RuntimeShape& input1_shape, const int16_t* input1_data, + const RuntimeShape& input2_shape, const int16_t* input2_data, + const RuntimeShape& output_shape, int16_t* output_data) { + TFLITE_DCHECK_LE(params.quantized_activation_min, + params.quantized_activation_max); + + const int flat_size = + MatchingElementsSize(input1_shape, input2_shape, output_shape); + + TFLITE_DCHECK_EQ(params.input1_offset, 0); + TFLITE_DCHECK_EQ(params.input2_offset, 0); + SubElementwise(flat_size, params, input1_data, input2_data, output_data); +} + +template +void Sub(const ArithmeticParams& params, const RuntimeShape& input1_shape, + const T* input1_data, const RuntimeShape& input2_shape, + const T* input2_data, const RuntimeShape& output_shape, + T* output_data) { + NdArrayDesc<4> desc1; + NdArrayDesc<4> desc2; + NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1, + &desc2); + const RuntimeShape extended_output_shape = + RuntimeShape::ExtendedShape(4, output_shape); + + // In Tensorflow, the dimensions are canonically named (batch_number, row, + // col, channel), with extents (batches, height, width, depth), with the + // trailing dimension changing most rapidly (channels has the smallest stride, + // typically 1 element). + // + // In generated C code, we store arrays with the dimensions reversed. The + // first dimension has smallest stride. + // + // We name our variables by their Tensorflow convention, but generate C code + // nesting loops such that the innermost loop has the smallest stride for the + // best cache behavior. + for (int b = 0; b < extended_output_shape.Dims(0); ++b) { + for (int y = 0; y < extended_output_shape.Dims(1); ++y) { + for (int x = 0; x < extended_output_shape.Dims(2); ++x) { + for (int c = 0; c < extended_output_shape.Dims(3); ++c) { + output_data[Offset(extended_output_shape, b, y, x, c)] = + input1_data[SubscriptToIndex(desc1, b, y, x, c)] - + input2_data[SubscriptToIndex(desc2, b, y, x, c)]; + } + } + } + } +} + +inline void SetActivationMinMax(const ArithmeticParams& params, + int32_t* activation_min, + int32_t* activation_max) { + *activation_min = params.quantized_activation_min; + *activation_max = params.quantized_activation_max; +} + +inline void SetActivationMinMax(const ArithmeticParams& params, + float* activation_min, float* activation_max) { + *activation_min = params.float_activation_min; + *activation_max = params.float_activation_max; +} + +inline void SetActivationMinMax(const ArithmeticParams& params, + int64_t* activation_min, + int64_t* activation_max) { + *activation_min = params.int64_activation_min; + *activation_max = params.int64_activation_max; +} + +template +inline void SubWithActivation( + const ArithmeticParams& params, const RuntimeShape& input1_shape, + const T* input1_data, const RuntimeShape& input2_shape, + const T* input2_data, const RuntimeShape& output_shape, T* output_data) { + ruy::profiler::ScopeLabel label("SubWithActivation"); + const int flat_size = + MatchingElementsSize(input1_shape, input2_shape, output_shape); + T activation_min, activation_max; + SetActivationMinMax(params, &activation_min, &activation_max); + + for (int i = 0; i < flat_size; ++i) { + output_data[i] = ActivationFunctionWithMinMax( + input1_data[i] - input2_data[i], activation_min, activation_max); + } +} + +} // namespace reference_ops +} // namespace tflite + +#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_SUB_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/tanh.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/tanh.h new file mode 100644 index 0000000..b0320fd --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/tanh.h @@ -0,0 +1,129 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_TANH_H_ +#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_TANH_H_ + +#include + +#include "edge-impulse-sdk/third_party/gemmlowp/fixedpoint/fixedpoint.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/cppmath.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/op_macros.h" + +namespace tflite { +namespace reference_ops { + +inline void Tanh(const RuntimeShape& input_shape, const float* input_data, + const RuntimeShape& output_shape, float* output_data) { + const int flat_size = MatchingFlatSize(input_shape, output_shape); + + for (int i = 0; i < flat_size; i++) { + float val = input_data[i]; + float result = std::tanh(val); + output_data[i] = result; + } +} + +// Convenience version that allows, for example, generated-code calls to be +// uniform between data types. +inline void Tanh(const TanhParams&, const RuntimeShape& input_shape, + const float* input_data, const RuntimeShape& output_shape, + float* output_data) { + // Drop params: not needed. + Tanh(input_shape, input_data, output_shape, output_data); +} + +inline void Tanh(const TanhParams& params, const RuntimeShape& input_shape, + const int16_t* input_data, const RuntimeShape& output_shape, + int16_t* output_data) { + const int input_left_shift = params.input_left_shift; + // Support for shifts is limited until we have a parameterized version of + // SaturatingRoundingMultiplyByPOT(). + TFLITE_DCHECK_GE(input_left_shift, 0); + TFLITE_DCHECK_LE(input_left_shift, 1); + + const int flat_size = MatchingFlatSize(input_shape, output_shape); + + // F0 uses 0 integer bits, range [-1, 1]. + // This is the return type of math functions such as tanh, logistic, + // whose range is in [-1, 1]. + using F0 = gemmlowp::FixedPoint; + // F3 uses 3 integer bits, range [-8, 8], the input range expected here. + using F3 = gemmlowp::FixedPoint; + + if (input_left_shift == 0) { + for (int i = 0; i < flat_size; i++) { + F3 input = F3::FromRaw(input_data[i]); + F0 output = gemmlowp::tanh(input); + output_data[i] = output.raw(); + } + } else { + for (int i = 0; i < flat_size; i++) { + F3 input = F3::FromRaw( + gemmlowp::SaturatingRoundingMultiplyByPOT<1>(input_data[i])); + F0 output = gemmlowp::tanh(input); + output_data[i] = output.raw(); + } + } +} + +inline void Tanh(const TanhParams& params, const RuntimeShape& input_shape, + const uint8_t* input_data, const RuntimeShape& output_shape, + uint8_t* output_data) { + const int32_t input_zero_point = params.input_zero_point; + const int32_t input_range_radius = params.input_range_radius; + const int32_t input_multiplier = params.input_multiplier; + const int input_left_shift = params.input_left_shift; + const int32_t output_zero_point = 128; + const int flat_size = MatchingFlatSize(input_shape, output_shape); + + for (int i = 0; i < flat_size; i++) { + const uint8_t input_val_u8 = input_data[i]; + const int32_t input_val_centered = + static_cast(input_val_u8) - input_zero_point; + uint8_t output_val; + if (input_val_centered <= -input_range_radius) { + output_val = 0; + } else if (input_val_centered >= input_range_radius) { + output_val = 255; + } else { + const int32_t input_val_rescaled = + MultiplyByQuantizedMultiplierGreaterThanOne( + input_val_centered, input_multiplier, input_left_shift); + using FixedPoint4 = gemmlowp::FixedPoint; + using FixedPoint0 = gemmlowp::FixedPoint; + const FixedPoint4 input_val_f4 = FixedPoint4::FromRaw(input_val_rescaled); + const FixedPoint0 output_val_f0 = gemmlowp::tanh(input_val_f4); + // Convert from Q0.31 to Q24.7. + using gemmlowp::RoundingDivideByPOT; + int32_t output_val_s32 = RoundingDivideByPOT(output_val_f0.raw(), 24); + output_val_s32 += output_zero_point; + if (output_val_s32 == 256) { + output_val_s32 = 255; + } + // Reinterpret as Q0.7, encoded in uint8_t. + TFLITE_DCHECK_GE(output_val_s32, 0); + TFLITE_DCHECK_LE(output_val_s32, 255); + output_val = static_cast(output_val_s32); + } + output_data[i] = output_val; + } +} + +} // namespace reference_ops +} // namespace tflite + +#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_TANH_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/transpose.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/transpose.h new file mode 100644 index 0000000..d236420 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/transpose.h @@ -0,0 +1,203 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_TRANSPOSE_H_ +#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_TRANSPOSE_H_ + +#include + +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h" + +namespace tflite { + +namespace reference_ops { + +namespace transpose_internal { + +// Recursively explores all the dimensions of the output tensor and writes the +// corresponding input tensor data. +// +// - depth: the current depth of the recursion. +// - dims: tensor dimension count, also `perm` size. +// - perm: permutation array. +// - input_data: Running input data pointer. If depth == num_dims-1, this points +// to the first element of the last dimension to traverse. +// - input_stride: Reverse partial product of input shapes. +// - output_data: Running output data pointer. If depth == num_dims-1, this +// points to the first element of the last dimension to traverse. +// - output_stride: Reverse partial product of output shapes. +// - output_shape: Shape of the output tensor. +// +// ## Algorithm explanation +// +// Assume a 3D tensor T with a shape of [I, J, K] stored in row major order. +// T[i, j, k] is at position `i*J*K + j*K + k` in the tensor buffer. +// +// If we want to go through the whole tensor iteratively, we can use loops. +// +// ``` +// for(i = 0; i < I; ++i) { +// for(j = 0; j < J; ++j) { +// for(k = 0; k < K; ++k) { +// T.data[i*J*K + j*K + k] = ... +// } +// } +// } +// ``` +// +// We can also compute the offset as we go through the loops. +// +// ``` +// stride_i = K * J; +// stride_j = K; +// stride_k = 1; +// for(i = 0; i < I; ++i) { +// offset_i = i * stride_i; +// offset_j = 0; +// for(j = 0; j < J; ++j) { +// offset_j += stride_j; +// offset_k = 0; +// for(k = 0; k < K; ++k) { +// offset_k += stride_k; +// T.data[offset_i + offset_j + offset_k] = ... +// } +// } +// } +// ``` +// +// This nicely extends to a recursive version which is the base of this +// algorithm and supports any number of dimensions. +// +// ``` +// shape = [I, J, K] +// strides = [K*J, K, 1] +// void recurse(T* data, shape, strides, depth = 0) { +// if(depth == shape.size) { +// *data = ... +// } else { +// for(a = 0; a < shape[depth]; ++a) { +// recurse(data, shape, strides, depth+1); +// data += strides[depth]; +// } +// } +// } +// ``` +template +void TransposeImpl(const int depth, const int dims, const int32_t* perm, + const T* input_data, const int* input_stride, T* output_data, + const int* output_stride, const int32_t* output_shape) { + const int dimension_size = output_shape[depth]; + if (depth == dims - 1) { + const int loop_stride = input_stride[perm[depth]]; + for (int i = 0; i < dimension_size; ++i) { + output_data[i] = *input_data; + input_data += loop_stride; + } + } else { + for (int i = 0; i < dimension_size; ++i) { + TransposeImpl(depth + 1, dims, perm, input_data, input_stride, + output_data, output_stride, output_shape); + + input_data += input_stride[perm[depth]]; + output_data += output_stride[depth]; + } + } +} + +// Compile-time switch to get the storage type of the transposition. +template +struct TransposeStorageType; + +template <> +struct TransposeStorageType<1> { + using type = int8_t; +}; + +template <> +struct TransposeStorageType<2> { + using type = int16_t; +}; + +template <> +struct TransposeStorageType<4> { + using type = int32_t; +}; + +template <> +struct TransposeStorageType<8> { + using type = int64_t; +}; + +// Sets up the stride arrays for the recursive transpose algorithm. +// +// Implementation notes: +// +// This is a reverse partial product. We could use standard algorithms to +// implement this but the result is not a readable and is tricky to get right +// because the first element must be set to 1, which leads to offset +// shenanigans: +// +// ``` +// stride[dims - 1] = 1; +// std::partial_sum(std::make_reverse_iterator(shape + dims), +// std::make_reverse_iterator(shape + 1), +// stride.rend() - input_rank + 1, std::multiplies()); +// ``` +// +// Note that Abseil isn't used in kernels implementation. That would make the +// above solution more readable. +inline void SetupTransposeStrides( + std::array& stride, const int32_t* shape, + const int dims) { + stride[dims - 1] = 1; + for (int i = dims - 2; i >= 0; --i) { + stride[i] = stride[i + 1] * shape[i + 1]; + } +} + +} // namespace transpose_internal + +// Copies a tensor to an other buffer and permutes its dimensions. +// +// Note: template parameter N is not used anymore. It is kept for API +// compatibility with TFLite micro. +template +void Transpose(const TransposeParams& params, const RuntimeShape& input_shape, + const T* input_data, const RuntimeShape& output_shape, + T* output_data) { + using transpose_internal::SetupTransposeStrides; + using transpose_internal::TransposeImpl; + using transpose_internal::TransposeStorageType; + // Transpose kernel only does rearranging values not numeric evaluations on + // each cell. It's safe to implement per size of scalar type and this trick + // keeps the total code size in a reasonable range. + using StorageType = typename TransposeStorageType::type; + const StorageType* const input_data_storage = + reinterpret_cast(input_data); + StorageType* const output_data_storage = + reinterpret_cast(output_data); + + const int dims = input_shape.DimensionsCount(); + std::array input_stride, output_stride; + SetupTransposeStrides(input_stride, input_shape.DimsData(), dims); + SetupTransposeStrides(output_stride, output_shape.DimsData(), dims); + TransposeImpl(0, dims, ¶ms.perm[0], input_data_storage, + input_stride.data(), output_data_storage, output_stride.data(), + output_shape.DimsData()); +} + +} // namespace reference_ops +} // namespace tflite + +#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_TRANSPOSE_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/transpose_conv.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/transpose_conv.h new file mode 100644 index 0000000..55fae7d --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/transpose_conv.h @@ -0,0 +1,225 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_TRANSPOSE_CONV_H_ +#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_TRANSPOSE_CONV_H_ + +#include + +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h" + +namespace tflite { + +namespace reference_ops { + +inline void TransposeConv( + const ConvParams& params, const RuntimeShape& input_shape, + const float* input_data, const RuntimeShape& filter_shape, + const float* filter_data, const RuntimeShape& bias_shape, + const float* bias_data, const RuntimeShape& output_shape, + float* output_data, const RuntimeShape& im2col_shape, float* im2col_data) { + const int stride_width = params.stride_width; + const int stride_height = params.stride_height; + const int pad_width = params.padding_values.width; + const int pad_height = params.padding_values.height; + TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4); + TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4); + TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4); + (void)im2col_data; // only used in optimized code. + (void)im2col_shape; // only used in optimized code. + + const int batches = MatchingDim(input_shape, 0, output_shape, 0); + const int input_depth = MatchingDim(input_shape, 3, filter_shape, 3); + const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3); + const int input_height = input_shape.Dims(1); + const int input_width = input_shape.Dims(2); + const int filter_height = filter_shape.Dims(1); + const int filter_width = filter_shape.Dims(2); + const int output_height = output_shape.Dims(1); + const int output_width = output_shape.Dims(2); + const float output_activation_min = params.float_activation_min; + const float output_activation_max = params.float_activation_max; + if (bias_data) { + TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth); + } + + // Although transpose convolution simplifies to convolution with transposed + // weights for strides of 1, non-unitary striding complicates matters. To + // keep this reference implementation as clear as possible, we use a + // "scatter" access pattern, where we loop through all the input elements, + // computing their influence on the output, rather than looping through the + // output elements in the typical "gather" access pattern of a conv. We + // therefore must initialize the output array to zero. + const int num_elements = output_shape.FlatSize(); + for (int i = 0; i < num_elements; i++) { + output_data[i] = 0.0f; + } + + // Loop through input elements one at a time. + for (int batch = 0; batch < batches; ++batch) { + for (int in_y = 0; in_y < input_height; ++in_y) { + for (int in_x = 0; in_x < input_width; ++in_x) { + for (int in_channel = 0; in_channel < input_depth; ++in_channel) { + // Loop through the output elements it will influence + const int out_x_origin = (in_x * stride_width) - pad_width; + const int out_y_origin = (in_y * stride_height) - pad_height; + for (int filter_y = 0; filter_y < filter_height; ++filter_y) { + for (int filter_x = 0; filter_x < filter_width; ++filter_x) { + for (int out_channel = 0; out_channel < output_depth; + ++out_channel) { + // Compute output element location + const int out_x = out_x_origin + filter_x; + const int out_y = out_y_origin + filter_y; + // We cannot accumulate out of bounds + if ((out_x >= 0) && (out_x < output_width) && (out_y >= 0) && + (out_y < output_height)) { + float input_value = input_data[Offset( + input_shape, batch, in_y, in_x, in_channel)]; + float filter_value = + filter_data[Offset(filter_shape, out_channel, filter_y, + filter_x, in_channel)]; + output_data[Offset(output_shape, batch, out_y, out_x, + out_channel)] += + input_value * filter_value; + } + } + } + } + } + } + } + } + + for (int batch = 0; batch < batches; ++batch) { + for (int out_y = 0; out_y < output_height; ++out_y) { + for (int out_x = 0; out_x < output_width; ++out_x) { + for (int out_channel = 0; out_channel < output_depth; ++out_channel) { + float acc = output_data[Offset(output_shape, batch, out_y, out_x, + out_channel)]; + if (bias_data) acc += bias_data[out_channel]; + + output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] = + ActivationFunctionWithMinMax(acc, output_activation_min, + output_activation_max); + } + } + } + } +} + +inline void TransposeConv( + const ConvParams& params, const RuntimeShape& input_shape, + const uint8_t* input_data, const RuntimeShape& filter_shape, + const uint8_t* filter_data, const RuntimeShape& bias_shape, + const int32_t* bias_data, const RuntimeShape& output_shape, + uint8_t* output_data, const RuntimeShape& im2col_shape, + uint8_t* im2col_data, int32_t* scratch_buffer) { + const int stride_width = params.stride_width; + const int stride_height = params.stride_height; + const int pad_width = params.padding_values.width; + const int pad_height = params.padding_values.height; + TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4); + TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4); + TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4); + (void)im2col_data; // only used in optimized code. + (void)im2col_shape; // only used in optimized code. + + const int batches = MatchingDim(input_shape, 0, output_shape, 0); + const int input_depth = MatchingDim(input_shape, 3, filter_shape, 3); + const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3); + const int input_height = input_shape.Dims(1); + const int input_width = input_shape.Dims(2); + const int filter_height = filter_shape.Dims(1); + const int filter_width = filter_shape.Dims(2); + const int output_height = output_shape.Dims(1); + const int output_width = output_shape.Dims(2); + const int32_t input_offset = params.input_offset; + const int32_t filter_offset = params.weights_offset; + const int32_t output_offset = params.output_offset; + const int32_t output_multiplier = params.output_multiplier; + const int output_shift = params.output_shift; + const int32_t output_activation_min = params.quantized_activation_min; + const int32_t output_activation_max = params.quantized_activation_max; + TFLITE_DCHECK_LE(output_activation_min, output_activation_max); + if (bias_data) { + TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth); + } + + const int num_elements = output_shape.FlatSize(); + // We need to initialize scratch_buffer to all 0s, as we apply the same + // 'scatter' based trick as in float version. + memset(scratch_buffer, 0, num_elements * sizeof(int32_t)); + + // Loop through input elements one at a time. + for (int batch = 0; batch < batches; ++batch) { + for (int in_y = 0; in_y < input_height; ++in_y) { + for (int in_x = 0; in_x < input_width; ++in_x) { + for (int in_channel = 0; in_channel < input_depth; ++in_channel) { + // Loop through the output elements it will influence. + const int out_x_origin = (in_x * stride_width) - pad_width; + const int out_y_origin = (in_y * stride_height) - pad_height; + for (int filter_y = 0; filter_y < filter_height; ++filter_y) { + for (int filter_x = 0; filter_x < filter_width; ++filter_x) { + for (int out_channel = 0; out_channel < output_depth; + ++out_channel) { + // Compute output element location. + const int out_x = out_x_origin + filter_x; + const int out_y = out_y_origin + filter_y; + // We cannot accumulate out of bounds. + if ((out_x >= 0) && (out_x < output_width) && (out_y >= 0) && + (out_y < output_height)) { + uint8_t input_value = input_data[Offset( + input_shape, batch, in_y, in_x, in_channel)]; + uint8_t filter_value = + filter_data[Offset(filter_shape, out_channel, filter_y, + filter_x, in_channel)]; + scratch_buffer[Offset(output_shape, batch, out_y, out_x, + out_channel)] += + (input_value + input_offset) * + (filter_value + filter_offset); + } + } + } + } + } + } + } + } + for (int batch = 0; batch < batches; ++batch) { + for (int out_y = 0; out_y < output_height; ++out_y) { + for (int out_x = 0; out_x < output_width; ++out_x) { + for (int out_channel = 0; out_channel < output_depth; ++out_channel) { + int32_t acc = scratch_buffer[Offset(output_shape, batch, out_y, out_x, + out_channel)]; + if (bias_data) { + acc += bias_data[out_channel]; + } + int32_t scaled_acc = MultiplyByQuantizedMultiplier( + acc, output_multiplier, output_shift); + scaled_acc += output_offset; + scaled_acc = std::max(scaled_acc, output_activation_min); + scaled_acc = std::min(scaled_acc, output_activation_max); + output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] = + static_cast(scaled_acc); + } + } + } + } +} + +} // namespace reference_ops +} // namespace tflite + +#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_TRANSPOSE_CONV_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference_portable_tensor_utils.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference_portable_tensor_utils.cpp new file mode 100644 index 0000000..6ae01b8 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference_portable_tensor_utils.cpp @@ -0,0 +1,809 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include +#include +#include +#include +#include + +#include "edge-impulse-sdk/third_party/gemmlowp/fixedpoint/fixedpoint.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/compatibility.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/cppmath.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference_portable_tensor_utils_impl.h" + +#if defined(_MSC_VER) +#define __restrict__ __restrict +#endif + +namespace tflite { +namespace tensor_utils { + +namespace { +const int32_t kInt16Max = std::numeric_limits::max(); +const int32_t kInt16Min = std::numeric_limits::min(); +} // namespace + +void PortableSymmetricQuantizeFloats(const float* values, const int size, + int8_t* quantized_values, float* min_value, + float* max_value, float* scaling_factor) { + auto minmax = std::minmax_element(values, values + size); + *min_value = *minmax.first; + *max_value = *minmax.second; + + PortableSymmetricQuantizeFloats(values, size, quantized_values, *min_value, + *max_value, scaling_factor); +} + +void PortableSymmetricQuantizeFloats(const float* values, const int size, + int8_t* quantized_values, float min_value, + float max_value, float* scaling_factor) { + const int32_t kScale = 127; + const float range = std::max(std::abs(min_value), std::abs(max_value)); + if (range == 0) { + memset(quantized_values, 0, size * sizeof(int8_t)); + *scaling_factor = 1; + return; + } + *scaling_factor = range / kScale; + const float scaling_factor_inv = kScale / range; + for (int i = 0; i < size; ++i) { + const int32_t quantized_value = + static_cast(TfLiteRound(values[i] * scaling_factor_inv)); + // Clamp: just in case some odd numeric offset. + quantized_values[i] = static_cast( + std::min(kScale, std::max(-kScale, quantized_value))); + } +} + +void PortableAsymmetricQuantizeFloats(const float* values, const int size, + int8_t* quantized_values, + float* scaling_factor, int32_t* offset) { + const int32_t kMinScale = -128; + const int32_t kMaxScale = 127; + const double qmin_double = kMinScale; + const double qmax_double = kMaxScale; + const auto minmax = std::minmax_element(values, values + size); + const double rmin = static_cast(std::min(0.0f, *minmax.first)); + const double rmax = static_cast(std::max(0.0f, *minmax.second)); + if (rmin == rmax) { + memset(quantized_values, 0, size * sizeof(int8_t)); + *scaling_factor = 1; + *offset = 0; + return; + } else { + double scale = (rmax - rmin) / (qmax_double - qmin_double); + const double zero_point_from_min = qmin_double - rmin / scale; + const double zero_point_from_max = qmax_double - rmax / scale; + const double zero_point_from_min_error = + std::abs(qmin_double) + std::abs(rmin / scale); + const double zero_point_from_max_error = + std::abs(qmax_double) + std::abs(rmax / scale); + const double zero_point_double = + zero_point_from_min_error < zero_point_from_max_error + ? zero_point_from_min + : zero_point_from_max; + int8_t nudged_zero_point = 0; + if (zero_point_double <= qmin_double) { + nudged_zero_point = kMinScale; + } else if (zero_point_double >= qmax_double) { + nudged_zero_point = kMaxScale; + } else { + nudged_zero_point = static_cast(round(zero_point_double)); + } + *scaling_factor = scale; + *offset = nudged_zero_point; + } + const float scaling_factor_inv = 1.0f / *scaling_factor; + for (int i = 0; i < size; ++i) { + const int32_t quantized_value = static_cast( + TfLiteRound(*offset + values[i] * scaling_factor_inv)); + quantized_values[i] = + std::min(kMaxScale, std::max(kMinScale, quantized_value)); + } +} + +void PortableMatrixBatchVectorMultiplyAccumulate(const float* matrix, + int m_rows, int m_cols, + const float* vector, + int n_batch, float* result) { + float* result_in_batch = result; + for (int b = 0; b < n_batch; b++) { + const float* matrix_ptr = matrix; + for (int r = 0; r < m_rows; r++) { + float dot_prod = 0.0f; + const float* vector_in_batch = vector + b * m_cols; + for (int c = 0; c < m_cols; c++) { + dot_prod += *matrix_ptr++ * *vector_in_batch++; + } + *result_in_batch += dot_prod; + ++result_in_batch; + } + } +} + +void PortableMatrixBatchVectorMultiplyAccumulate( + const int8_t* __restrict__ matrix, const int m_rows, const int m_cols, + const int8_t* __restrict__ vectors, const float* scaling_factors, + int n_batch, float* __restrict__ result) { + for (int batch = 0; batch < n_batch; ++batch, vectors += m_cols) { + const float batch_scaling_factor = scaling_factors[batch]; + // Get the address of the first row. + const int8_t* row_ptr = matrix; + for (int row = 0; row < m_rows; ++row) { + // Initialize the dot product sum for the row to 0. + int32_t dotprod = 0; +#if defined(__GNUC__) + // Prefetch the row to cache. + __builtin_prefetch(row_ptr, 0 /* prefetch for read */, + 3 /* temporal locality */); +#endif + for (int col = 0; col < m_cols; ++col, ++row_ptr) { + dotprod += (*row_ptr) * (vectors[col]); + } // for col + *result += dotprod * batch_scaling_factor; + ++result; + } // for row + } // for batch +} + +void PortableMatrixBatchVectorMultiplyAccumulate( + const int8_t* __restrict__ matrix, const int m_rows, const int m_cols, + const int8_t* __restrict__ vectors, const float* scaling_factors, + int n_batch, float* __restrict__ result, const float* per_channel_scale, + const int32_t* input_offset, int32_t* scratch, int32_t* row_sums, + bool* compute_row_sums, CpuBackendContext* context) { + if (input_offset == nullptr) { + PortableMatrixBatchVectorMultiplyAccumulate( + matrix, m_rows, m_cols, vectors, scaling_factors, n_batch, result); + return; + } + if (!compute_row_sums || *compute_row_sums) { + PortableReductionSumVector(matrix, row_sums, m_rows, m_cols); + if (compute_row_sums) { + *compute_row_sums = false; + } + } + + for (int batch = 0; batch < n_batch; ++batch, vectors += m_cols) { + const float batch_scaling_factor = scaling_factors[batch]; + const int32_t batch_offset = input_offset[batch]; + const int8_t* row_ptr = matrix; + for (int row = 0; row < m_rows; ++row) { + int32_t dotprod = 0; + float scale = batch_scaling_factor; + if (per_channel_scale) { + scale *= per_channel_scale[row]; + } +#if defined(__GNUC__) + // Prefetch the row to cache. + __builtin_prefetch(row_ptr, 0 /* prefetch for read */, + 3 /* temporal locality */); +#endif + for (int col = 0; col < m_cols; ++col, ++row_ptr) { + dotprod += (*row_ptr) * vectors[col]; + } // for col + dotprod -= row_sums[row] * batch_offset; + *result += dotprod * scale; + ++result; + } // for row + } // for batch +} + +void PortableSparseMatrixBatchVectorMultiplyAccumulate1x4( + const float* __restrict__ matrix, const int32_t* __restrict__ segments, + const int32_t* __restrict__ indices, int m_rows, int m_cols, + const float* __restrict__ vector, int n_batch, float* __restrict__ result) { + const int kBlockSize = 4; + TFLITE_DCHECK_EQ(m_cols % kBlockSize, 0); + for (int batch = 0; batch < n_batch; batch++) { + const float* matrix_ptr = matrix; + for (int row = 0; row < m_rows; row++) { + float dot_prod = 0.0f; + const float* vector_in_batch = vector + batch * m_cols; + for (int i = segments[row]; i < segments[row + 1]; i++) { + const int block_start_index = indices[i] * kBlockSize; + const float* vector_block_in_batch_ptr = + vector_in_batch + block_start_index; + for (int c = 0; c < kBlockSize; c++) { + dot_prod += *matrix_ptr++ * *vector_block_in_batch_ptr++; + } + } + result[batch * m_rows + row] += dot_prod; + } + } +} + +void PortableSparseMatrixBatchVectorMultiplyAccumulate1x16( + const int8_t* __restrict__ matrix, const int32_t* __restrict__ segments, + const int32_t* __restrict__ indices, int m_rows, int m_cols, + const int8_t* __restrict__ vector, const int32_t* __restrict__ bias_vector, + int n_batch, const int32_t input_offset, const int32_t output_multiplier, + const int32_t output_shift, const int32_t output_offset, + const int32_t output_activation_min, const int32_t output_activation_max, + int8_t* __restrict__ result) { + const int kBlockSize = 16; + TFLITE_DCHECK_EQ(m_cols % kBlockSize, 0); + for (int batch = 0; batch < n_batch; ++batch) { + const int8_t* matrix_ptr = matrix; + for (int row = 0; row < m_rows; ++row) { + int32_t dot_prod = 0; + const int8_t* vector_in_batch = vector + batch * m_cols; + for (int i = segments[row]; i < segments[row + 1]; ++i) { + const int block_start_index = indices[i] * kBlockSize; + const int8_t* vector_block_in_batch_ptr = + vector_in_batch + block_start_index; + for (int c = 0; c < kBlockSize; c++) { + dot_prod += *matrix_ptr * *vector_block_in_batch_ptr++; + dot_prod += *matrix_ptr++ * input_offset; + } + } + const int32_t bias_value = bias_vector != nullptr ? bias_vector[row] : 0; + dot_prod = MultiplyByQuantizedMultiplier(dot_prod + bias_value, + output_multiplier, output_shift); + dot_prod += output_offset; + result[batch * m_rows + row] = + static_cast(ActivationFunctionWithMinMax( + dot_prod, output_activation_min, output_activation_max)); + } + } +} + +void PortableSparseMatrixBatchVectorMultiplyAccumulate( + const float* __restrict__ matrix, const uint8_t* __restrict__ ledger, + int m_rows, int m_cols, const float* __restrict__ vector, int n_batch, + float* __restrict__ result) { + const int kBlockSize = 16; + TFLITE_DCHECK_EQ( // NOLINT + m_cols % kBlockSize, 0); + for (int batch = 0; batch < n_batch; batch++) { + const float* matrix_ptr = matrix; + const uint8_t* ledger_ptr = ledger; + for (int row = 0; row < m_rows; row++) { + float dot_prod = 0.0f; + int num_nonzero_blocks = *ledger_ptr++; + if (num_nonzero_blocks > 0) { + const float* vector_in_batch = vector + batch * m_cols; + for (int i = 0; i < num_nonzero_blocks; i++) { + const int block_start_index = *ledger_ptr++ * kBlockSize; + const float* vector_block_in_batch_ptr = + vector_in_batch + block_start_index; + for (int c = 0; c < kBlockSize; c++) { + dot_prod += *matrix_ptr++ * *vector_block_in_batch_ptr++; + } + } + } + result[batch * m_rows + row] += dot_prod; + } + } +} + +void PortableSparseMatrixBatchVectorMultiplyAccumulate( + const int8_t* __restrict__ matrix, const uint8_t* ledger, const int m_rows, + const int m_cols, const int8_t* __restrict__ vectors, + const float* scaling_factors, int n_batch, float* __restrict__ result) { + static const int kBlockSize = 16; + TFLITE_DCHECK_EQ( // NOLINT + m_cols % kBlockSize, 0); + for (int batch = 0; batch < n_batch; ++batch, vectors += m_cols) { + const float batch_scaling_factor = scaling_factors[batch]; + const uint8_t* ledger_ptr = ledger; + // Get the address of the first row. + const int8_t* row_ptr = matrix; + for (int row = 0; row < m_rows; ++row) { + // Initialize the dot product sum for the row to 0. + int32_t dotprod = 0; +#if defined(__GNUC__) + // Prefetch the row to cache. + __builtin_prefetch(row_ptr, 0 /* prefetch for read */, + 3 /* temporal locality */); +#endif + int num_nonzero_blocks = *ledger_ptr++; + for (int i = 0; i < num_nonzero_blocks; i++) { + const int block_start_index = *ledger_ptr++ * kBlockSize; + const int8_t* vector_block_ptr = vectors + block_start_index; + for (int c = 0; c < kBlockSize; c++) { + dotprod += (*row_ptr++) * (*vector_block_ptr++); + } // for block + } // for num_nonzero_blocks + result[batch * m_rows + row] += dotprod * batch_scaling_factor; + } // for row + } // for batch +} + +template +void PortableMatrixBatchVectorMultiplyAccumulateImpl( + const int8_t* input, const int32_t* bias, + const int8_t* input_to_gate_weights, int32_t multiplier, int32_t shift, + int32_t n_batch, int32_t n_input, int32_t n_output, int32_t output_zp, + T* output) { + const int16_t output_max = std::numeric_limits::max(); + const int16_t output_min = std::numeric_limits::min(); + for (int batch = 0; batch < n_batch; ++batch) { + for (int row = 0; row < n_output; ++row) { + int32_t acc = bias[row]; + for (int col = 0; col < n_input; ++col) { + int8_t input_val = input[batch * n_input + col]; + int8_t weights_val = input_to_gate_weights[row * n_input + col]; + acc += input_val * weights_val; + } + acc = MultiplyByQuantizedMultiplier(acc, multiplier, shift); + acc += output_zp; + acc += output[batch * n_output + row]; + if (acc > output_max) { + acc = output_max; + } + if (acc < output_min) { + acc = output_min; + } + output[batch * n_output + row] = static_cast(acc); + } + } +} + +void PortableMatrixBatchVectorMultiplyAccumulate( + const int8_t* input, const int32_t* bias, + const int8_t* input_to_gate_weights, int32_t multiplier, int32_t shift, + int32_t n_batch, int32_t n_input, int32_t n_output, int32_t output_zp, + int32_t* scratch, int16_t* output, CpuBackendContext* context) { + PortableMatrixBatchVectorMultiplyAccumulateImpl( + input, bias, input_to_gate_weights, multiplier, shift, n_batch, n_input, + n_output, output_zp, output); +} + +void PortableMatrixBatchVectorMultiplyAccumulate( + const int8_t* input, const int32_t* bias, + const int8_t* input_to_gate_weights, int32_t multiplier, int32_t shift, + int32_t n_batch, int32_t n_input, int32_t n_output, int32_t output_zp, + int32_t* scratch, int8_t* output, CpuBackendContext* context) { + PortableMatrixBatchVectorMultiplyAccumulateImpl( + input, bias, input_to_gate_weights, multiplier, shift, n_batch, n_input, + n_output, output_zp, output); +} + +void PortableMatrixBatchVectorMultiply(const int8_t* input, + int32_t input_zeropoint, + const int8_t* input_to_gate_weights, + int32_t input_to_gate_effective_scale_a, + int32_t input_to_gate_effective_scale_b, + int32_t n_batch, int32_t n_input, + int32_t n_cell, int8_t* gate_output, + int8_t gate_output_zp) { + const int32_t int8_max = std::numeric_limits::max(); + const int32_t int8_min = std::numeric_limits::min(); + for (int batch = 0; batch < n_batch; ++batch) { + for (int row = 0; row < n_cell; ++row) { + int32_t acc = 0; + for (int col = 0; col < n_input; ++col) { + int32_t input_val = input[batch * n_input + col]; + int8_t weights_val = input_to_gate_weights[row * n_input + col]; + acc += (input_val - input_zeropoint) * weights_val; + } + acc = MultiplyByQuantizedMultiplier(acc, input_to_gate_effective_scale_a, + input_to_gate_effective_scale_b); + acc += gate_output_zp; + if (acc > int8_max) { + acc = int8_max; + } + if (acc < int8_min) { + acc = int8_min; + } + gate_output[batch * n_cell + row] = static_cast(acc); + } + } +} + +void PortableMatrixBatchVectorMultiply( + const int16_t* hidden, const int8_t* hidden_to_output_weights, + int32_t proj_effective_scale_a, int32_t proj_effective_scale_b, + const int32_t* gate_bias, int32_t n_batch, int32_t n_hidden, + int32_t n_output, int32_t output_zp, int8_t* proj_output) { + const int16_t int8_max = std::numeric_limits::max(); + const int16_t int8_min = std::numeric_limits::min(); + for (int batch = 0; batch < n_batch; ++batch) { + for (int row = 0; row < n_output; ++row) { + int64_t acc = gate_bias[row]; + for (int col = 0; col < n_hidden; ++col) { + int16_t input_val = hidden[batch * n_hidden + col]; + int8_t weights_val = hidden_to_output_weights[row * n_hidden + col]; + int64_t curr = acc; + acc += input_val * weights_val; + if (input_val * weights_val > 0 && acc < curr) { + acc = std::numeric_limits::max(); + } + if (input_val * weights_val < 0 && acc > curr) { + acc = std::numeric_limits::min(); + } + } + acc = MultiplyByQuantizedMultiplier(acc, proj_effective_scale_a, + proj_effective_scale_b); + acc += output_zp; + if (acc > int8_max) { + acc = int8_max; + } + if (acc < int8_min) { + acc = int8_min; + } + proj_output[batch * n_output + row] = acc; + } + } +} + +void PortableApplyLayerNorm(const int16_t* input, + const int16_t* layer_norm_weights, + const int32_t* bias, int32_t layer_norm_scale_a, + int32_t layer_norm_scale_b, int32_t variance_limit, + int n_batch, int n_input, int16_t* output) { + // The square of std::pow(2, 10), which is the extra factor that makes sure + // normalized values has enough resolution. + static const int kTwoToPower20 = 1 << 20; + for (int i = 0; i < n_batch; ++i) { + int64_t sum = 0; + int64_t sum_sq = 0; + for (int j = 0; j < n_input; ++j) { + const int32_t index = i * n_input + j; + int32_t val = static_cast(input[index]); + sum += val; + sum_sq += val * val; + } + int32_t mean = + static_cast(static_cast(sum) * 1024 / n_input); + // TODO(b/173994730): Avoids overflow but only works for POT n_input. + int32_t temp = kTwoToPower20 / n_input; + int64_t variance = + sum_sq * temp - static_cast(mean) * static_cast(mean); + int32_t variance2 = static_cast(variance / kTwoToPower20); + if (variance2 < 1) { + variance2 = variance_limit; + } + int32_t stddev_inverse_a; + int stddev_inverse_b; + GetInvSqrtQuantizedMultiplierExp(variance2, /*reverse_shift*/ -1, + &stddev_inverse_a, &stddev_inverse_b); + + for (int j = 0; j < n_input; ++j) { + const int32_t index = i * n_input + j; + int32_t val = static_cast(input[index]); + int32_t shifted = 1024 * val - mean; + int32_t rescaled = MultiplyByQuantizedMultiplier( + shifted, stddev_inverse_a, stddev_inverse_b); + // TODO(jianlijianli): Saturate this. + int64_t val3 = rescaled * layer_norm_weights[j] + bias[j]; + int32_t val4 = + static_cast((val3 > 0 ? val3 + 512 : val3 - 512) / 1024); + int32_t val5 = MultiplyByQuantizedMultiplier(val4, layer_norm_scale_a, + layer_norm_scale_b + 12); + val5 = std::min(std::max(kInt16Min, val5), kInt16Max); + output[index] = static_cast(val5); + } + } +} + +void PortableApplyLayerNormFloat(const int16_t* input, + const int16_t* layer_norm_weights, + int32_t layer_norm_scale_a, + int32_t layer_norm_scale_b, + const int32_t* bias, int n_batch, int n_input, + int16_t* output) { + const int32_t int16_max = std::numeric_limits::max(); + const int32_t int16_min = std::numeric_limits::min(); + const float layer_norm_scale = + layer_norm_scale_a * + std::pow(2.0, static_cast(layer_norm_scale_b - 31)); + const float bias_scale = + static_cast(std::pow(2.0, -10)) * layer_norm_scale; + + for (int batch = 0; batch < n_batch; ++batch) { + float sum = 0.0f; + float sum_sq = 0.0f; + for (int i = 0; i < n_input; ++i) { + const int index = batch * n_input + i; + const float value = static_cast(input[index]); + sum += value; + sum_sq += value * value; + } + const float mean = sum / n_input; + float stddev_inv = 0.0f; + const float variance = sum_sq / n_input - mean * mean; + if (variance == 0) { + stddev_inv = 1.0f / std::sqrt(1e-8f); + } else { + stddev_inv = 1.0f / std::sqrt(variance); + } + for (int i = 0; i < n_input; ++i) { + const int index = batch * n_input + i; + const float normalized_value = + (static_cast(input[index]) - mean) * stddev_inv; + const float weighted_normalized_value = + normalized_value * layer_norm_weights[i] * layer_norm_scale + + bias[i] * bias_scale; + const int32_t quant_output = static_cast(round( + weighted_normalized_value * static_cast(std::pow(2, 12)))); + output[index] = std::min(int16_max, std::max(int16_min, quant_output)); + } + } +} + +void PortableMatrixScalarMultiplyAccumulate(const int8_t* matrix, + int32_t scalar, int32_t n_row, + int32_t n_col, int32_t* output) { + for (int i = 0; i < n_row; ++i) { + int32_t row_sum = 0; + for (int j = 0; j < n_col; ++j) { + row_sum += *matrix++; + } + output[i] += row_sum * scalar; + } +} + +void PortableApplySigmoid(const int16_t* input, int32_t n_batch, + int32_t n_input, int16_t* output) { + for (int batch = 0; batch < n_batch; ++batch) { + for (int c = 0; c < n_input; c++) { + using F3 = gemmlowp::FixedPoint; + using F0 = gemmlowp::FixedPoint; + const int index = batch * n_input + c; + F3 sigmoid_input = F3::FromRaw(input[index]); + F0 sigmoid_output = gemmlowp::logistic(sigmoid_input); + output[index] = sigmoid_output.raw(); + } + } +} + +void PortableApplySigmoidFloat(const int16_t* input, int32_t n_batch, + int32_t n_input, int16_t* output) { + const int32_t int16_max = std::numeric_limits::max(); + const int32_t int16_min = std::numeric_limits::min(); + for (int batch = 0; batch < n_batch; ++batch) { + for (int i = 0; i < n_input; ++i) { + const int index = batch * n_input + i; + const float float_input = + input[index] * static_cast(std::pow(2, -12)); + const float float_output = 1.0f / (1.0f + std::exp(-float_input)); + const int32_t quant_output = static_cast( + float_output * static_cast(std::pow(2, 15))); + const int32_t quant_output_clamped = + std::min(int16_max, std::max(int16_min, quant_output)); + output[index] = static_cast(quant_output_clamped); + } + } +} + +template +void PortableApplyTanhImpl(const int16_t* input, int32_t n_batch, + int32_t n_input, int16_t* output) { + using FX = gemmlowp::FixedPoint; + using F0 = gemmlowp::FixedPoint; + for (int batch = 0; batch < n_batch; ++batch) { + for (int i = 0; i < n_input; ++i) { + const int index = batch * n_input + i; + FX tanh_input = FX::FromRaw(input[index]); + F0 tanh_output = gemmlowp::tanh(tanh_input); + output[index] = tanh_output.raw(); + } + } +} + +void PortableApplyTanh(int32_t integer_bits, const int16_t* input, + int32_t n_batch, int32_t n_input, int16_t* output) { + assert(integer_bits <= 6); +#define DISPATCH_TANH(i) \ + case i: \ + PortableApplyTanhImpl(input, n_batch, n_input, output); \ + break; + switch (integer_bits) { + DISPATCH_TANH(0); + DISPATCH_TANH(1); + DISPATCH_TANH(2); + DISPATCH_TANH(3); + DISPATCH_TANH(4); + DISPATCH_TANH(5); + DISPATCH_TANH(6); + default: + return; + } +#undef DISPATCH_TANH +} + +void PortableApplyTanhFloat(const int16_t* input, int32_t n_batch, + int32_t n_input, int32_t integer_bits, + int16_t* output) { + const int32_t int16_max = std::numeric_limits::max(); + const int32_t int16_min = std::numeric_limits::min(); + const double two = 2.0; + for (int batch = 0; batch < n_batch; ++batch) { + for (int i = 0; i < n_input; ++i) { + const int index = batch * n_input + i; + const float float_input = + input[index] * std::pow(two, static_cast(integer_bits)); + const float float_output = std::tanh(float_input); + const int32_t quant_output = static_cast( + float_output * static_cast(std::pow(2, 15))); + const int32_t quant_output_clamped = + std::min(int16_max, std::max(int16_min, quant_output)); + output[index] = static_cast(quant_output_clamped); + } + } +} + +void PortableCwiseMul(const int16_t* input_1, const int16_t* input_2, + int n_batch, int n_input, int shift, int16_t* output) { + for (int batch = 0; batch < n_batch; ++batch) { + for (int i = 0; i < n_input; ++i) { + const int index = batch * n_input + i; + const int16_t a = input_1[index]; + const int16_t b = input_2[index]; + const int32_t value = static_cast(a) * static_cast(b); + output[index] = + static_cast(gemmlowp::RoundingDivideByPOT(value, shift)); + } + } +} + +void PortableCwiseMul(const int16_t* input_1, const int16_t* input_2, + int32_t multiplier, int32_t shift, int32_t n_batch, + int32_t n_input, int32_t output_zp, int8_t* output) { + for (int batch = 0; batch < n_batch; ++batch) { + for (int i = 0; i < n_input; ++i) { + const int index = batch * n_input + i; + const int16_t a = input_1[index]; + const int16_t b = input_2[index]; + int32_t value = static_cast(a) * static_cast(b); + value = MultiplyByQuantizedMultiplier(value, multiplier, shift); + value += output_zp; + value = std::min(std::max(static_cast(-128), value), + static_cast(127)); + + output[index] = static_cast(value); + } + } +} + +void PortableCwiseAdd(const int16_t* input_1, const int16_t* input_2, + int n_batch, int n_input, int16_t* output) { + for (int batch = 0; batch < n_batch; ++batch) { + for (int i = 0; i < n_input; ++i) { + const int index = batch * n_input + i; + int32_t sum = input_1[index] + input_2[index]; + const int32_t sum_clamped = std::min(kInt16Max, std::max(kInt16Min, sum)); + output[index] = static_cast(sum_clamped); + } + } +} + +float PortableVectorVectorDotProduct(const float* vector1, const float* vector2, + int v_size) { + float result = 0.0; + for (int v = 0; v < v_size; v++) { + result += *vector1++ * *vector2++; + } + return result; +} + +namespace { +inline int32_t VectorVectorDotProduct(const int16_t* vector1, + const int16_t* vector2, int v_size) { + int32_t result = 0; + for (int v = 0; v < v_size; v++) { + result += *vector1++ * *vector2++; + } + return result; +} +} // namespace + +void PortableBatchVectorBatchVectorDotProduct(const int16_t* vector1, + const int16_t* vector2, + int v_size, int n_batch, + int32_t* result) { + for (int b = 0; b < n_batch; b++) { + result[b] = VectorVectorDotProduct(vector1, vector2, v_size); + vector1 += v_size; + vector2 += v_size; + } +} + +void PortableVectorBatchVectorCwiseProductAccumulate( + const int16_t* vector, int v_size, const int16_t* batch_vector, int n_batch, + int32_t multiplier, int shift, int16_t* result) { + for (int b = 0; b < n_batch; b++) { + for (int v = 0; v < v_size; v++) { + int32_t prod = vector[v] * *batch_vector++; + prod = MultiplyByQuantizedMultiplier(prod, multiplier, shift); + int32_t output = prod + *result; + output = std::max(std::min(static_cast(32767), output), + static_cast(-32768)); + *result++ = output; + } + } +} + +void PortableSub1Vector(const float* vector, int v_size, float* result) { + for (int v = 0; v < v_size; v++) { + *result++ = 1.0f - *vector++; + } +} + +void PortableSub1Vector(const int16_t* vector, int v_size, int16_t* result) { + static const int16_t kOne = 32767; + for (int v = 0; v < v_size; v++) { + *result++ = kOne - *vector++; + } +} + +void PortableVectorScalarMultiply(const int8_t* vector, const int v_size, + const float scale, float* result) { + for (int v = 0; v < v_size; ++v) { + *result++ = scale * *vector++; + } +} + +void PortableMeanStddevNormalization(const float* __restrict__ input_vector, + float* __restrict__ output_vector, + int v_size, int n_batch) { + for (int batch = 0; batch < n_batch; ++batch) { + float sum = 0.0f; + for (int i = 0; i < v_size; ++i) { + sum += input_vector[i]; + } + const float mean = sum / v_size; + float sum_diff_sq = 0.0f; + for (int i = 0; i < v_size; ++i) { + const float diff = input_vector[i] - mean; + sum_diff_sq += diff * diff; + } + const float variance = sum_diff_sq / v_size; + constexpr float kNormalizationConstant = 1e-8f; + const float stddev_inv = + 1.0f / std::sqrt(variance + kNormalizationConstant); + for (int i = 0; i < v_size; ++i) { + output_vector[i] = (input_vector[i] - mean) * stddev_inv; + } + input_vector += v_size; + output_vector += v_size; + } +} + +void PortableTwoGateSaturatingAdd(const int8_t* input, int8_t input_zp, + const int8_t* recurrent, int8_t recurrent_zp, + int32_t input_effective_scale_a, + int32_t input_effective_scale_b, + int32_t recurrent_effective_scale_a, + int32_t recurrent_effective_scale_b, + int32_t n_batch, int32_t n_cell, + int16_t* output) { + const int32_t int16_max = std::numeric_limits::max(); + const int32_t int16_min = std::numeric_limits::min(); + for (int i = 0; i < n_batch * n_cell; ++i) { + int32_t x = static_cast(input[i]) - static_cast(input_zp); + int32_t h = + static_cast(recurrent[i]) - static_cast(recurrent_zp); + int32_t x_scaled = MultiplyByQuantizedMultiplier(x, input_effective_scale_a, + input_effective_scale_b); + int32_t h_scaled = MultiplyByQuantizedMultiplier( + h, recurrent_effective_scale_a, recurrent_effective_scale_b); + int32_t y = h_scaled + x_scaled; + if (y > int16_max) { + y = int16_max; + } + if (y < int16_min) { + y = int16_min; + } + output[i] = static_cast(y); + } +} + +} // namespace tensor_utils +} // namespace tflite diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference_portable_tensor_utils.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference_portable_tensor_utils.h new file mode 100644 index 0000000..06c867c --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference_portable_tensor_utils.h @@ -0,0 +1,333 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PORTABLE_TENSOR_UTILS_H_ +#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PORTABLE_TENSOR_UTILS_H_ + +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference_portable_tensor_utils_impl.h" + +#if defined(_MSC_VER) +#define __restrict__ __restrict +#endif + +namespace tflite { +namespace tensor_utils { + +// Check if all entries of a vector are zero for float. +bool IsZeroVector(const float* vector, int v_size) { + return PortableIsZeroVector(vector, v_size); +} + +// Check if all entries of a vector are zero for int8_t. +bool IsZeroVector(const int8_t* vector, int v_size) { + return PortableIsZeroVector(vector, v_size); +} + +void SymmetricQuantizeFloats(const float* values, const int size, + int8_t* quantized_values, float* min, float* max, + float* scaling_factor) { + PortableSymmetricQuantizeFloats(values, size, quantized_values, min, max, + scaling_factor); +} + +void SymmetricQuantizeFloats(const float* values, const int size, + int8_t* quantized_values, float min_value, + float max_value, float* scaling_factor) { + PortableSymmetricQuantizeFloats(values, size, quantized_values, min_value, + max_value, scaling_factor); +} + +void AsymmetricQuantizeFloats(const float* values, const int size, + int8_t* quantized_values, float* scaling_factor, + int32_t* offset) { + PortableAsymmetricQuantizeFloats(values, size, quantized_values, + scaling_factor, offset); +} + +void MatrixBatchVectorMultiplyAccumulate(const float* matrix, int m_rows, + int m_cols, const float* vector, + int n_batch, float* result) { + PortableMatrixBatchVectorMultiplyAccumulate(matrix, m_rows, m_cols, vector, + n_batch, result); +} + +void MatrixBatchVectorMultiplyAccumulate(const int8_t* __restrict__ matrix, + const int m_rows, const int m_cols, + const int8_t* __restrict__ vector, + const float* scaling_factors, + int n_batch, + float* __restrict__ result) { + PortableMatrixBatchVectorMultiplyAccumulate(matrix, m_rows, m_cols, vector, + scaling_factors, n_batch, result); +} + +void MatrixBatchVectorMultiplyAccumulate( + const int8_t* __restrict__ matrix, const int m_rows, const int m_cols, + const int8_t* __restrict__ vectors, const float* scaling_factors, + int n_batch, float* __restrict__ result, const float* per_channel_scale, + const int32_t* input_offset, int32_t* scratch, int32_t* row_sums, + bool* compute_row_sums, CpuBackendContext* context) { + PortableMatrixBatchVectorMultiplyAccumulate( + matrix, m_rows, m_cols, vectors, scaling_factors, n_batch, result, + per_channel_scale, input_offset, scratch, row_sums, compute_row_sums, + context); +} + +void MatrixBatchVectorMultiplyAccumulate(const int8_t* __restrict__ matrix, + const int m_rows, const int m_cols, + const int8_t* __restrict__ vector, + const float* scaling_factors, + int n_batch, int32_t* scratch, + float* __restrict__ result, + CpuBackendContext* context) { + PortableMatrixBatchVectorMultiplyAccumulate(matrix, m_rows, m_cols, vector, + scaling_factors, n_batch, result); +} + +void SparseMatrixBatchVectorMultiplyAccumulate1x4( + const float* __restrict__ matrix, const int32_t* __restrict__ segments, + const int32_t* __restrict__ indices, int m_rows, int m_cols, + const float* __restrict__ vector, int n_batch, float* __restrict__ result) { + PortableSparseMatrixBatchVectorMultiplyAccumulate1x4( + matrix, segments, indices, m_rows, m_cols, vector, n_batch, result); +} + +void SparseMatrixBatchVectorMultiplyAccumulate( + const float* __restrict__ matrix, const uint8_t* __restrict__ ledger, + int m_rows, int m_cols, const float* __restrict__ vector, int n_batch, + float* __restrict__ result) { + PortableSparseMatrixBatchVectorMultiplyAccumulate( + matrix, ledger, m_rows, m_cols, vector, n_batch, result); +} + +void SparseMatrixBatchVectorMultiplyAccumulate1x16( + const int8_t* __restrict__ matrix, const int32_t* __restrict__ segments, + const int32_t* __restrict__ indices, int m_rows, int m_cols, + const int8_t* __restrict__ vector, const int32_t* __restrict__ bias_vector, + int n_batch, const int32_t input_offset, const int32_t output_multiplier, + const int32_t output_shift, const int32_t output_offset, + const int32_t output_activation_min, const int32_t output_activation_max, + + int8_t* __restrict__ result) { + PortableSparseMatrixBatchVectorMultiplyAccumulate1x16( + matrix, segments, indices, m_rows, m_cols, vector, bias_vector, n_batch, + input_offset, output_multiplier, output_shift, output_offset, + output_activation_min, output_activation_max, result); +} + +void SparseMatrixBatchVectorMultiplyAccumulate( + const int8_t* __restrict__ matrix, const uint8_t* ledger, const int m_rows, + const int m_cols, const int8_t* __restrict__ vectors, + const float* scaling_factors, int n_batch, float* __restrict__ result) { + PortableSparseMatrixBatchVectorMultiplyAccumulate( + matrix, ledger, m_rows, m_cols, vectors, scaling_factors, n_batch, + result); +} + +void MatrixBatchVectorMultiplyAccumulate( + const int8_t* input, const int32_t* bias, + const int8_t* input_to_gate_weights, int32_t multiplier, int32_t shift, + int32_t n_batch, int32_t n_input, int32_t n_output, int32_t output_zp, + int32_t* scratch, int16_t* output, CpuBackendContext* context) { + PortableMatrixBatchVectorMultiplyAccumulate( + input, bias, input_to_gate_weights, multiplier, shift, n_batch, n_input, + n_output, output_zp, scratch, output, context); +} + +void MatrixBatchVectorMultiplyAccumulate( + const int8_t* input, const int32_t* bias, + const int8_t* input_to_gate_weights, int32_t multiplier, int32_t shift, + int32_t n_batch, int32_t n_input, int32_t n_output, int32_t output_zp, + int32_t* scratch, int8_t* output, CpuBackendContext* context) { + PortableMatrixBatchVectorMultiplyAccumulate( + input, bias, input_to_gate_weights, multiplier, shift, n_batch, n_input, + n_output, output_zp, scratch, output, context); +} + +void MatrixScalarMultiplyAccumulate(const int8_t* matrix, int32_t scalar, + int32_t n_row, int32_t n_col, + int32_t* output) { + PortableMatrixScalarMultiplyAccumulate(matrix, scalar, n_row, n_col, output); +} + +void MatrixBatchVectorMultiply(const int8_t* input, int32_t input_zeropoint, + const int8_t* input_to_gate_weights, + int32_t input_to_gate_effective_scale_a, + int32_t input_to_gate_effective_scale_b, + int32_t n_batch, int32_t n_input, int32_t n_cell, + int8_t* gate_output, int8_t gate_output_zp) { + PortableMatrixBatchVectorMultiply( + input, input_zeropoint, input_to_gate_weights, + input_to_gate_effective_scale_a, input_to_gate_effective_scale_b, n_batch, + n_input, n_cell, gate_output, gate_output_zp); +} + +void MatrixBatchVectorMultiply(const int16_t* hidden, + const int8_t* hidden_to_output_weights, + int32_t proj_effective_scale_a, + int32_t proj_effective_scale_b, + const int32_t* gate_bias, int32_t n_batch, + int32_t n_hidden, int32_t n_output, + int32_t output_zp, int8_t* proj_output) { + PortableMatrixBatchVectorMultiply(hidden, hidden_to_output_weights, + proj_effective_scale_a, + proj_effective_scale_b, gate_bias, n_batch, + n_hidden, n_output, output_zp, proj_output); +} + +void ApplyLayerNorm(const int16_t* input, const int16_t* layer_norm_weights, + const int32_t* bias, int32_t layer_norm_scale_a, + int32_t layer_norm_scale_b, int32_t variance_limit, + int n_batch, int n_input, int16_t* output) { + PortableApplyLayerNorm(input, layer_norm_weights, bias, layer_norm_scale_a, + layer_norm_scale_b, variance_limit, n_batch, n_input, + output); +} + +void ApplyLayerNormFloat(const int16_t* input, + const int16_t* layer_norm_weights, + int32_t layer_norm_scale_a, int32_t layer_norm_scale_b, + const int32_t* bias, int n_batch, int n_input, + int16_t* output) { + PortableApplyLayerNormFloat(input, layer_norm_weights, layer_norm_scale_a, + layer_norm_scale_b, bias, n_batch, n_input, + output); +} + +void ApplySigmoid(const int16_t* input, int32_t n_batch, int32_t n_input, + int16_t* output) { + PortableApplySigmoid(input, n_batch, n_input, output); +} + +void ApplySigmoidFloat(const int16_t* input, int32_t n_batch, int32_t n_input, + int16_t* output) { + PortableApplySigmoidFloat(input, n_batch, n_input, output); +} + +void ApplyTanh(int32_t integer_bits, const int16_t* input, int32_t n_batch, + int32_t n_input, int16_t* output) { + PortableApplyTanh(integer_bits, input, n_batch, n_input, output); +} + +void ApplyTanhFloat(const int16_t* input, int32_t n_batch, int32_t n_input, + int32_t integer_bits, int16_t* output) { + PortableApplyTanhFloat(input, n_batch, n_input, integer_bits, output); +} + +void CwiseMul(const int16_t* input_1, const int16_t* input_2, int n_batch, + int n_input, int shift, int16_t* output) { + PortableCwiseMul(input_1, input_2, n_batch, n_input, shift, output); +} + +void CwiseMul(const int16_t* input_1, const int16_t* input_2, + int32_t multiplier, int32_t shift, int32_t n_batch, + int32_t n_input, int32_t output_zp, int8_t* output) { + PortableCwiseMul(input_1, input_2, multiplier, shift, n_batch, n_input, + output_zp, output); +} + +void CwiseAdd(const int16_t* input_1, const int16_t* input_2, int n_batch, + int n_input, int16_t* output) { + PortableCwiseAdd(input_1, input_2, n_batch, n_input, output); +} + +void CwiseClipping(float* vector, const int v_size, + const float clipping_value) { + PortableCwiseClipping(vector, v_size, clipping_value); +} + +void CwiseClipping(int16_t* vector, const int v_size, + const int16_t clipping_value) { + PortableCwiseClipping(vector, v_size, clipping_value); +} + +void CwiseClipping(int8_t* vector, const int v_size, + const int8_t clipping_value) { + PortableCwiseClipping(vector, v_size, clipping_value); +} + +void VectorBatchVectorCwiseProductAccumulate(const int16_t* vector, int v_size, + const int16_t* batch_vector, + int n_batch, int32_t multiplier, + int shift, int16_t* result) { + PortableVectorBatchVectorCwiseProductAccumulate( + vector, v_size, batch_vector, n_batch, multiplier, shift, result); +} + +float VectorVectorDotProduct(const float* vector1, const float* vector2, + int v_size) { + return PortableVectorVectorDotProduct(vector1, vector2, v_size); +} + +void BatchVectorBatchVectorDotProduct(const int16_t* vector1, + const int16_t* vector2, int v_size, + int n_batch, int32_t* result) { + PortableBatchVectorBatchVectorDotProduct(vector1, vector2, v_size, n_batch, + result); +} + +void Sub1Vector(const float* vector, int v_size, float* result) { + PortableSub1Vector(vector, v_size, result); +} + +void Sub1Vector(const int16_t* vector, int v_size, int16_t* result) { + PortableSub1Vector(vector, v_size, result); +} + +// Multiply all elements of vector with a scalar. +void VectorScalarMultiply(const int8_t* vector, int v_size, float scale, + float* result) { + PortableVectorScalarMultiply(vector, v_size, scale, result); +} + +void ReductionSumVector(const float* input_vector, float* output_vector, + int output_size, int reduction_size) { + PortableReductionSumVector(input_vector, output_vector, output_size, + reduction_size); +} + +void ReductionSumVector(const int32_t* input_vector, int32_t* output_vector, + int output_size, int reduction_size) { + PortableReductionSumVector(input_vector, output_vector, output_size, + reduction_size); +} + +void ReductionSumVector(const int8_t* input_vector, int32_t* output_vector, + int output_size, int reduction_size) { + PortableReductionSumVector(input_vector, output_vector, output_size, + reduction_size); +} + +void MeanStddevNormalization(const float* input_vector, float* output_vector, + int v_size, int n_batch) { + PortableMeanStddevNormalization(input_vector, output_vector, v_size, n_batch); +} + +void TwoGateSaturatingAdd(const int8_t* input, int8_t input_zp, + const int8_t* recurrent, int8_t recurrent_zp, + int32_t input_effective_scale_a, + int32_t input_effective_scale_b, + int32_t recurrent_effective_scale_a, + int32_t recurrent_effective_scale_b, int32_t n_batch, + int32_t n_cell, int16_t* output) { + PortableTwoGateSaturatingAdd( + input, input_zp, recurrent, recurrent_zp, input_effective_scale_a, + input_effective_scale_b, recurrent_effective_scale_a, + recurrent_effective_scale_b, n_batch, n_cell, output); +} + +} // namespace tensor_utils +} // namespace tflite + +#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PORTABLE_TENSOR_UTILS_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference_portable_tensor_utils_impl.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference_portable_tensor_utils_impl.h new file mode 100644 index 0000000..6c404d5 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference_portable_tensor_utils_impl.h @@ -0,0 +1,244 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PORTABLE_TENSOR_UTILS_IMPL_H_ +#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PORTABLE_TENSOR_UTILS_IMPL_H_ + +#include +#include + +#if defined(_MSC_VER) +#define __restrict__ __restrict +#endif + +namespace tflite { + +// Not all backends support CpuBackendContext usage, so forward declare to avoid +// pulling in its implementation. +class CpuBackendContext; + +namespace tensor_utils { + +template +bool PortableIsZeroVector(const T* vector, int v_size) { + for (int i = 0; i < v_size; ++i) { + if (vector[i] != 0) { + return false; + } + } + return true; +} + +void PortableSymmetricQuantizeFloats(const float* values, const int size, + int8_t* quantized_values, float* min_value, + float* max_value, float* scaling_factor); + +void PortableSymmetricQuantizeFloats(const float* values, const int size, + int8_t* quantized_values, float min_value, + float max_value, float* scaling_factor); + +void PortableAsymmetricQuantizeFloats(const float* values, const int size, + int8_t* quantized_values, + float* scaling_factor, int32_t* offset); + +// Multiply a matrix by a batch vector, and store results in a batch-size +// vector. +void PortableMatrixBatchVectorMultiplyAccumulate(const float* matrix, + int m_rows, int m_cols, + const float* vector, + int n_batch, float* result); + +void PortableMatrixBatchVectorMultiplyAccumulate( + const int8_t* __restrict__ matrix, const int m_rows, const int m_cols, + const int8_t* __restrict__ vectors, const float* scaling_factors, + int n_batch, float* __restrict__ result); + +void PortableMatrixBatchVectorMultiplyAccumulate( + const int8_t* __restrict__ matrix, const int m_rows, const int m_cols, + const int8_t* __restrict__ vectors, const float* scaling_factors, + int n_batch, float* __restrict__ result, const float* per_channel_scale, + const int32_t* input_offset, int32_t* scratch, int32_t* row_sums, + bool* compute_row_sums, CpuBackendContext* context); + +void PortableMatrixBatchVectorMultiplyAccumulate( + const int8_t* __restrict__ matrix, const int m_rows, const int m_cols, + const int8_t* __restrict__ vector, const float* scaling_factors, + int n_batch, int32_t* scratch, float* __restrict__ result, + CpuBackendContext* context); + +void PortableSparseMatrixBatchVectorMultiplyAccumulate1x4( + const float* __restrict__ matrix, const int32_t* __restrict__ segments, + const int32_t* __restrict__ indices, int m_rows, int m_cols, + const float* __restrict__ vector, int n_batch, float* __restrict__ result); + +void PortableSparseMatrixBatchVectorMultiplyAccumulate( + const float* __restrict__ matrix, const uint8_t* __restrict__ ledger, + int m_rows, int m_cols, const float* __restrict__ vector, int n_batch, + float* __restrict__ result); + +void PortableSparseMatrixBatchVectorMultiplyAccumulate1x16( + const int8_t* __restrict__ matrix, const int32_t* __restrict__ segments, + const int32_t* __restrict__ indices, int m_rows, int m_cols, + const int8_t* __restrict__ vector, const int32_t* __restrict__ bias_vector, + int n_batch, const int32_t input_offset, const int32_t output_multiplier, + const int32_t output_shift, const int32_t output_offset, + const int32_t output_activation_min, const int32_t output_activation_max, + int8_t* __restrict__ result); + +void PortableSparseMatrixBatchVectorMultiplyAccumulate( + const int8_t* __restrict__ matrix, const uint8_t* ledger, const int m_rows, + const int m_cols, const int8_t* __restrict__ vectors, + const float* scaling_factors, int n_batch, float* __restrict__ result); + +// Dot product of two vectors. +float PortableVectorVectorDotProduct(const float* vector1, const float* vector2, + int v_size); + +void PortableBatchVectorBatchVectorDotProduct(const int16_t* vector1, + const int16_t* vector2, + int v_size, int n_batch, + int32_t* result); + +void PortableVectorBatchVectorCwiseProductAccumulate( + const int16_t* vector, int v_size, const int16_t* batch_vector, int n_batch, + int32_t multiplier, int shift, int16_t* result); + +void PortableMatrixBatchVectorMultiplyAccumulate( + const int8_t* input, const int32_t* bias, + const int8_t* input_to_gate_weights, int32_t multiplier, int32_t shift, + int32_t n_batch, int32_t n_input, int32_t n_output, int32_t output_zp, + int32_t* scratch, int16_t* output, CpuBackendContext* context); + +void PortableMatrixBatchVectorMultiplyAccumulate( + const int8_t* input, const int32_t* bias, + const int8_t* input_to_gate_weights, int32_t multiplier, int32_t shift, + int32_t n_batch, int32_t n_input, int32_t n_output, int32_t output_zp, + int32_t* scratch, int8_t* output, CpuBackendContext* context); + +void PortableMatrixBatchVectorMultiply(const int8_t* input, + int32_t input_zeropoint, + const int8_t* input_to_gate_weights, + int32_t input_to_gate_effective_scale_a, + int32_t input_to_gate_effective_scale_b, + int32_t n_batch, int32_t n_input, + int32_t n_cell, int8_t* gate_output, + int8_t gate_output_zp); + +void PortableMatrixBatchVectorMultiply( + const int16_t* hidden, const int8_t* hidden_to_output_weights, + int32_t proj_effective_scale_a, int32_t proj_effective_scale_b, + const int32_t* gate_bias, int32_t n_batch, int32_t n_hidden, + int32_t n_output, int32_t output_zp, int8_t* proj_output); + +void PortableMatrixScalarMultiplyAccumulate(const int8_t* matrix, + int32_t scalar, int32_t n_row, + int32_t n_col, int32_t* output); + +void PortableApplyLayerNorm(const int16_t* input, + const int16_t* layer_norm_weights, + const int32_t* bias, int32_t layer_norm_scale_a, + int32_t layer_norm_scale_b, int32_t variance_limit, + int n_batch, int n_input, int16_t* output); + +void PortableApplyLayerNormFloat(const int16_t* input, + const int16_t* layer_norm_weights, + int32_t layer_norm_scale_a, + int32_t layer_norm_scale_b, + const int32_t* bias, int n_batch, int n_input, + int16_t* output); + +void PortableApplySigmoid(const int16_t* input, int32_t n_batch, + int32_t n_input, int16_t* output); + +void PortableApplySigmoidFloat(const int16_t* input, int32_t n_batch, + int32_t n_input, int16_t* output); + +void PortableApplyTanh(int32_t integer_bits, const int16_t* input, + int32_t n_batch, int32_t n_input, int16_t* output); + +void PortableApplyTanhFloat(const int16_t* input, int32_t n_batch, + int32_t n_input, int32_t integer_bits, + int16_t* output); + +void PortableCwiseMul(const int16_t* input_1, const int16_t* input_2, + int n_batch, int n_input, int shift, int16_t* output); + +void PortableCwiseMul(const int16_t* input_1, const int16_t* input_2, + int32_t multiplier, int32_t shift, int32_t n_batch, + int32_t n_input, int32_t output_zp, int8_t* output); + +void PortableCwiseAdd(const int16_t* input_1, const int16_t* input_2, + int n_batch, int n_input, int16_t* output); + +template +void PortableCwiseClipping(T* vector, const int v_size, + const T& clipping_value) { + for (int i = 0; i < v_size; i++) { + vector[i] = std::max(std::min(clipping_value, vector[i]), + static_cast(-clipping_value)); + } +} + +// Batch vector initialization with another vector. +void PortableVectorBatchVectorAssign(const float* vector, int v_size, + int n_batch, float* batch_vector); + +// Compute "1.0f - elements of vector" (used in CIFG). +void PortableSub1Vector(const float* vector, int v_size, float* result); + +void PortableSub1Vector(const int16_t* vector, int v_size, int16_t* result); + +// Multiply all elements of vector with a scalar. +void PortableVectorScalarMultiply(const int8_t* vector, int v_size, float scale, + float* result); + +// Reduce-sum on a vector: +// input_vector: pointer to input vector. +// output_vector: pointer to vector. +// output_size: output vector size. +// reduction_size: number of consecutive elements from input vector which are +// added to get one element of output. +template +void PortableReductionSumVector(const INPUT* input_vector, + OUTPUT* output_vector, int output_size, + int reduction_size) { + for (int o = 0; o < output_size; o++) { + OUTPUT result = 0; + for (int r = 0; r < reduction_size; r++) { + result += input_vector[r]; + } + output_vector[o] = result; + input_vector += reduction_size; + } +} + +// Layer norm for each batch. +void PortableMeanStddevNormalization(const float* __restrict__ input_vector, + float* __restrict__ output_vector, + int v_size, int n_batch); + +// Saturate Add. +void PortableTwoGateSaturatingAdd(const int8_t* input, int8_t input_zp, + const int8_t* recurrent, int8_t recurrent_zp, + int32_t input_effective_scale_a, + int32_t input_effective_scale_b, + int32_t recurrent_effective_scale_a, + int32_t recurrent_effective_scale_b, + int32_t n_batch, int32_t n_cell, + int16_t* output); + +} // namespace tensor_utils +} // namespace tflite + +#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PORTABLE_TENSOR_UTILS_IMPL_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/runtime_shape.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/runtime_shape.h new file mode 100644 index 0000000..c2678b5 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/runtime_shape.h @@ -0,0 +1,158 @@ +/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_RUNTIME_SHAPE_H_ +#define TENSORFLOW_LITE_KERNELS_INTERNAL_RUNTIME_SHAPE_H_ + +namespace tflite { + +template +struct Dims { + int sizes[N]; + int strides[N]; +}; + +class RuntimeShape { + public: + RuntimeShape& operator=(RuntimeShape const&) = delete; + + // RuntimeShape in TFLM supports up to 5 dimensions. + // The name kMaxSmallSize comes from the same file of the upstream + // tensorflow lite repo and need to be kept the same for max reuse. + static constexpr int kMaxSmallSize = 5; + + RuntimeShape() : size_(0) {} + + explicit RuntimeShape(int dimensions_count) : size_(dimensions_count) {} + + RuntimeShape(int shape_size, int32_t value) : size_(shape_size) { + for (int i = 0; i < shape_size; ++i) { + SetDim(i, value); + } + } + + RuntimeShape(int dimensions_count, const int32_t* dims_data) + : size_(dimensions_count) { + ReplaceWith(dimensions_count, dims_data); + } + + bool operator==(const RuntimeShape& comp) const { + return this->size_ == comp.size_ && + std::memcmp(DimsData(), comp.DimsData(), size_ * sizeof(int32_t)) == + 0; + } + + ~RuntimeShape() {} + + int32_t DimensionsCount() const { return size_; } + int32_t Dims(int i) const { + TFLITE_DCHECK_GE(i, 0); + TFLITE_DCHECK_LT(i, size_); + return dims_[i]; + } + void SetDim(int i, int32_t val) { + TFLITE_DCHECK_GE(i, 0); + TFLITE_DCHECK_LT(i, size_); + dims_[i] = val; + } + + static RuntimeShape ExtendedShape(int new_shape_size, + const RuntimeShape& shape) { + return RuntimeShape(new_shape_size, shape, 1); + } + int32_t* DimsData() { return dims_; } + const int32_t* DimsData() const { return dims_; } + const int32_t* DimsDataUpTo5D() const { return dims_; } + + void ReplaceWith(int dimensions_count, const int32_t* dims_data) { + size_ = dimensions_count; + int32_t* dst_dims = DimsData(); + std::memcpy(dst_dims, dims_data, dimensions_count * sizeof(int32_t)); + } + + // Returns the total count of elements, that is the size when flattened into a + // vector. + int FlatSize() const { + int buffer_size = 1; + const int* dims_data = reinterpret_cast(DimsData()); + for (int i = 0; i < size_; i++) { + buffer_size *= dims_data[i]; + } + return buffer_size; + } + + private: + // For use only by ExtendedShape(), written to guarantee (return-value) copy + // elision in C++17. + // This creates a shape padded to the desired size with the specified value. + RuntimeShape(int new_shape_size, const RuntimeShape& shape, int pad_value) + : size_(new_shape_size) { + // If the following check fails, it is likely because a 4D-only kernel is + // being used with an array of larger dimension count. + TFLITE_CHECK_GE(new_shape_size, shape.DimensionsCount()); + const int size_increase = new_shape_size - shape.DimensionsCount(); + for (int i = 0; i < size_increase; ++i) { + SetDim(i, pad_value); + } + std::memcpy(DimsData() + size_increase, shape.DimsData(), + sizeof(int32_t) * shape.DimensionsCount()); + } + + int32_t size_; + union { + int32_t dims_[kMaxSmallSize]; + }; +}; + +// Since tensors with '0' in their shape are valid in TF, these offset functions +// allow that as long as the corresponding index is also 0. It is upto the +// calling ops to ensure that they perform verification checks on tensor shapes +// if they don't support a particular behavior. + +inline int Offset(const RuntimeShape& shape, int i0, int i1, int i2, int i3) { + TFLITE_DCHECK_EQ(shape.DimensionsCount(), 4); + const int* dims_data = reinterpret_cast(shape.DimsData()); + TFLITE_DCHECK((dims_data[0] == 0 && i0 == 0) || + (i0 >= 0 && i0 < dims_data[0])); + TFLITE_DCHECK((dims_data[1] == 0 && i1 == 0) || + (i1 >= 0 && i1 < dims_data[1])); + TFLITE_DCHECK((dims_data[2] == 0 && i2 == 0) || + (i2 >= 0 && i2 < dims_data[2])); + TFLITE_DCHECK((dims_data[3] == 0 && i3 == 0) || + (i3 >= 0 && i3 < dims_data[3])); + return ((i0 * dims_data[1] + i1) * dims_data[2] + i2) * dims_data[3] + i3; +} + +inline int Offset(const RuntimeShape& shape, int i0, int i1, int i2, int i3, + int i4) { + TFLITE_DCHECK_EQ(shape.DimensionsCount(), 5); + const int* dims_data = reinterpret_cast(shape.DimsData()); + TFLITE_DCHECK((dims_data[0] == 0 && i0 == 0) || + (i0 >= 0 && i0 < dims_data[0])); + TFLITE_DCHECK((dims_data[1] == 0 && i1 == 0) || + (i1 >= 0 && i1 < dims_data[1])); + TFLITE_DCHECK((dims_data[2] == 0 && i2 == 0) || + (i2 >= 0 && i2 < dims_data[2])); + TFLITE_DCHECK((dims_data[3] == 0 && i3 == 0) || + (i3 >= 0 && i3 < dims_data[3])); + TFLITE_DCHECK((dims_data[4] == 0 && i4 == 0) || + (i4 >= 0 && i4 < dims_data[4])); + return (((i0 * dims_data[1] + i1) * dims_data[2] + i2) * dims_data[3] + i3) * + dims_data[4] + + i4; +} + +} // namespace tflite + +#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_RUNTIME_SHAPE_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/strided_slice_logic.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/strided_slice_logic.h new file mode 100644 index 0000000..18a7940 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/strided_slice_logic.h @@ -0,0 +1,274 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_STRIDED_SLICE_LOGIC_H_ +#define TENSORFLOW_LITE_KERNELS_INTERNAL_STRIDED_SLICE_LOGIC_H_ + +#include +#include + +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/compatibility.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h" + +namespace tflite { +namespace strided_slice { + +// Use until std::clamp() is available from C++17. +inline int Clamp(const int v, const int lo, const int hi) { + TFLITE_DCHECK(!(hi < lo)); + if (hi < v) return hi; + if (v < lo) return lo; + return v; +} + +inline void StridedSlicePadIndices(tflite::StridedSliceParams* p, + int dim_count) { + // Add indices and mask bits to fully include extra dimensions + TFLITE_CHECK_LE(dim_count, 5); + TFLITE_CHECK_GE(dim_count, p->start_indices_count); + TFLITE_CHECK_EQ(p->start_indices_count, p->stop_indices_count); + TFLITE_CHECK_EQ(p->stop_indices_count, p->strides_count); + + const int pad_count = dim_count - p->start_indices_count; + + // Pad indices at start, so move arrays by pad_count. + for (int i = p->start_indices_count - 1; i >= 0; --i) { + p->strides[i + pad_count] = p->strides[i]; + p->start_indices[i + pad_count] = p->start_indices[i]; + p->stop_indices[i + pad_count] = p->stop_indices[i]; + } + for (int i = 0; i < pad_count; ++i) { + p->start_indices[i] = 0; + p->stop_indices[i] = 1; + p->strides[i] = 1; + } + + // Pad masks with 0s or 1s as required. + p->shrink_axis_mask <<= pad_count; + p->ellipsis_mask <<= pad_count; + p->new_axis_mask <<= pad_count; + p->begin_mask <<= pad_count; + p->end_mask <<= pad_count; + p->begin_mask |= (1 << pad_count) - 1; + p->end_mask |= (1 << pad_count) - 1; + + p->start_indices_count = dim_count; + p->stop_indices_count = dim_count; + p->strides_count = dim_count; +} + +// Return the index for the first element along that axis. This index will be a +// positive integer between [0, axis_size] (or [-1, axis_size -1] if stride < 0) +// that can be used to index directly into the data. +inline int StridedSliceStartForAxis(const tflite::StridedSliceParams& params, + const RuntimeShape& input_shape, + int32_t axis) { + const int32_t axis_size = input_shape.Dims(axis); + int32_t start = params.start_indices[axis]; + const int32_t stride = params.strides[axis]; + const int32_t begin_mask = (params.begin_mask & 1 << axis); + if (start < 0) { + start += axis_size; + } + if (stride > 0) { + start = Clamp(start, 0, axis_size); + } else { + start = Clamp(start, -1, axis_size - 1); + } + if (begin_mask) { + if (stride > 0) { + start = 0; + } else { + start = axis_size - 1; + } + } + return start; +} + +inline int StridedSliceEndForAxis(const tflite::StridedSliceParams& params, + const RuntimeShape& input_shape, int axis, + int start) { + const auto shrink_axis_mask = params.shrink_axis_mask; + const bool shrink_axis = shrink_axis_mask & (1 << axis); + const int axis_size = input_shape.Dims(axis); + if (shrink_axis) { + if (start >= axis_size) { + return start; + } else { + return start + 1; + } + } + const auto* indices = params.stop_indices; + int end = indices[axis]; + const int32_t stride = params.strides[axis]; + const int32_t end_mask = (params.end_mask & 1 << axis); + if (end < 0) { + end += axis_size; + } + if (stride > 0) { + end = Clamp(end, 0, axis_size); + } else { + end = Clamp(end, -1, axis_size - 1); + } + if (end_mask) { + if (stride > 0) { + end = axis_size; + } else { + end = -1; + } + } + return end; +} + +// Return the index for the first element along that axis. This index will be a +// positive integer between [0, axis_size] (or [-1, axis_size -1] if stride < 0) +// that can be used to index directly into the data. +inline int StartForAxis(const tflite::StridedSliceParams& params, + const RuntimeShape& input_shape, int axis) { + const auto begin_mask = params.begin_mask; + const auto* start_indices = params.start_indices; + const auto* strides = params.strides; + const int axis_size = input_shape.Dims(axis); + if (axis_size == 0) { + return 0; + } + // Begin with the specified index. + int start = start_indices[axis]; + + // begin_mask override + if (begin_mask & 1 << axis) { + if (strides[axis] > 0) { + // Forward iteration - use the first element. These values will get + // clamped below (Note: We could have set them to 0 and axis_size-1, but + // use lowest() and max() to maintain symmetry with StopForAxis()) + start = std::numeric_limits::lowest(); + } else { + // Backward iteration - use the last element. + start = std::numeric_limits::max(); + } + } + + // Handle negative indices + if (start < 0) { + start += axis_size; + } + + // Clamping + if (strides[axis] > 0) { + // Forward iteration + start = Clamp(start, 0, axis_size); + } else { + // Backward iteration + start = Clamp(start, -1, axis_size - 1); + } + + return start; +} + +// Return the "real" index for the end of iteration along that axis. This is an +// "end" in the traditional C sense, in that it points to one past the last +// element. ie. So if you were iterating through all elements of a 1D array of +// size 4, this function would return 4 as the stop, because it is one past the +// "real" indices of 0, 1, 2 & 3. +inline int StopForAxis(const tflite::StridedSliceParams& params, + const RuntimeShape& input_shape, int axis, + int start_for_axis) { + const auto end_mask = params.end_mask; + const auto shrink_axis_mask = params.shrink_axis_mask; + const auto* stop_indices = params.stop_indices; + const auto* strides = params.strides; + const int axis_size = input_shape.Dims(axis); + if (axis_size == 0) { + return 0; + } + + // Begin with the specified index + const bool shrink_axis = shrink_axis_mask & (1 << axis); + int stop = stop_indices[axis]; + + // When shrinking an axis, the end position does not matter (and can be + // incorrect when negative indexing is used, see Issue #19260). Always use + // start_for_axis + 1 to generate a length 1 slice, since start_for_axis has + // already been adjusted for negative indices. + if (shrink_axis) { + return start_for_axis + 1; + } + + // end_mask override + if (end_mask & (1 << axis)) { + if (strides[axis] > 0) { + // Forward iteration - use the last element. These values will get + // clamped below + stop = std::numeric_limits::max(); + } else { + // Backward iteration - use the first element. + stop = std::numeric_limits::lowest(); + } + } + + // Handle negative indices + if (stop < 0) { + stop += axis_size; + } + + // Clamping + // Because the end index points one past the last element, we need slightly + // different clamping ranges depending on the direction. + if (strides[axis] > 0) { + // Forward iteration + stop = Clamp(stop, 0, axis_size); + } else { + // Backward iteration + stop = Clamp(stop, -1, axis_size - 1); + } + + return stop; +} + +inline bool LoopCondition(int index, int stop, int stride) { + // True when we have reached the end of an axis and should loop. + return stride > 0 ? index >= stop : index <= stop; +} + +inline tflite::StridedSliceParams BuildStridedSliceParams( + int begin_mask, int end_mask, int shrink_axis_mask, + const std::vector& start_indices, const std::vector& stop_indices, + const std::vector& strides) { + tflite::StridedSliceParams op_params; + const int dims_count = start_indices.size(); + + op_params.start_indices_count = dims_count; + op_params.stop_indices_count = dims_count; + op_params.strides_count = dims_count; + for (int i = 0; i < dims_count; ++i) { + op_params.start_indices[i] = start_indices[i]; + op_params.stop_indices[i] = stop_indices[i]; + op_params.strides[i] = strides[i]; + } + + op_params.begin_mask = begin_mask; + op_params.ellipsis_mask = 0; + op_params.end_mask = end_mask; + op_params.new_axis_mask = 0; + op_params.shrink_axis_mask = shrink_axis_mask; + + return op_params; +} + +} // namespace strided_slice + +} // namespace tflite + +#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_STRIDED_SLICE_LOGIC_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h new file mode 100644 index 0000000..de2d802 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h @@ -0,0 +1,47 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_TENSOR_CTYPES_H_ +#define TENSORFLOW_LITE_KERNELS_INTERNAL_TENSOR_CTYPES_H_ + +#include "edge-impulse-sdk/tensorflow/lite/core/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h" + +namespace tflite { + +template +inline T* GetTensorData(TfLiteTensor* tensor) { + return tensor != nullptr ? reinterpret_cast(tensor->data.raw) : nullptr; +} + +template +inline const T* GetTensorData(const TfLiteTensor* tensor) { + return tensor != nullptr ? reinterpret_cast(tensor->data.raw) + : nullptr; +} + +inline RuntimeShape GetTensorShape(const TfLiteTensor* tensor) { + if (tensor == nullptr) { + return RuntimeShape(); + } + + TfLiteIntArray* dims = tensor->dims; + const int dims_size = dims->size; + const int32_t* dims_data = reinterpret_cast(dims->data); + return RuntimeShape(dims_size, dims_data); +} + +} // namespace tflite + +#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_TENSOR_CTYPES_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_utils.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_utils.cpp new file mode 100644 index 0000000..7527994 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_utils.cpp @@ -0,0 +1,25 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +============================================================================== +*/ + +// internal/reference_portable_tensor_utils.h has the implementation of the +// functions declared in internal/portable_tensor_utils.h. This somewhat +// confusing setup is derived from how the code is organized in TfLite where it +// is used to select between NEON, SSE and portable implementaitons. See +// https://github.com/tensorflow/tensorflow/blob/d76c23975c4a3a0d7987cfe3f45c76566df06180/tensorflow/lite/kernels/internal/tensor_utils.cc +// for how the code is written in TfLite. + +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/portable_tensor_utils.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference_portable_tensor_utils.h" diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h new file mode 100644 index 0000000..9e73812 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h @@ -0,0 +1,1067 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_TYPES_H_ +#define TENSORFLOW_LITE_KERNELS_INTERNAL_TYPES_H_ + +#include +#include +#include +#include + +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/compatibility.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/runtime_shape.h" + +namespace tflite { + +enum class FusedActivationFunctionType : uint8_t { + kNone, + kRelu6, + kRelu1, + kRelu +}; +enum class PaddingType : uint8_t { kNone, kSame, kValid }; + +struct PaddingValues { + int16_t width; + int16_t height; + // offset is used for calculating "remaining" padding, for example, `width` + // is 1 and `width_offset` is 1, so padding_left is 1 while padding_right is + // 1 + 1 = 2. + int16_t width_offset; + // Same as width_offset except it's over the height dimension. + int16_t height_offset; +}; + +struct Padding3DValues { + int16_t width; + int16_t height; + int16_t depth; + // offset is used for calculating "remaining" padding, for example, `width` + // is 1 and `width_offset` is 1, so padding_left is 1 while padding_right is + // 1 + 1 = 2. + int16_t width_offset; + // Same as width_offset except it's over the height dimension. + int16_t height_offset; + // Same as width_offset except it's over the depth dimension. + int16_t depth_offset; +}; + +// This enumeration allows for non-default formats for the weights array +// of a fully-connected operator, allowing the use of special optimized +// runtime paths. +enum class FullyConnectedWeightsFormat : uint8_t { + // Default format (flat 2D layout, the inner contiguous dimension + // is input_depth, the outer non-contiguous dimension is output_depth) + kDefault, + // Summary: optimized layout for fast CPU runtime implementation, + // aimed specifically at ARM CPUs at the moment, and specialized for + // 8-bit quantized layers. + // + // The use case we're concerned with here is: 8-bit quantization, + // large weights matrix that doesn't fit in cache (e.g. 4096x2048 in + // a key application that drove this), very small batch size (e.g. 1 -- 4). + // + // Even with 8-bit quantization of weights, the performance of memory + // accesses to the weights can become the dominant issue when + // the batch size is small, so each weight value is used in only a few + // arithmetic ops, i.e. the fully-connected node has a low arithmetic + // intensity. The specific issues that arise are of three kinds: + // (1) One may, ideally, max out DRAM bandwidth, i.e. be truly memory + // bound. That's the "good" issue to run into. + // (2) One may run into sub-optimal pre-fetching: the data hasn't been + // prefetched into the cache by the time we need it. + // (3) One may run into cache aliasing: multiple values that are + // pre-fetched, alias each other in the L1 cache (which typically + // has only 4-way set associativity in ARM CPUs) and thus evict + // each other before we get to using them. + // + // The point of this shuffling is to avoid issues (2) and (3) so that + // we get as fast as possible given only the hard constraint (1). + // This is achieved by turning the difficulty into a solution: the + // difficulty, that each value loaded from memory is used only in + // one kernel iteration, making this operation memory-intensive, hints at + // the solution, of shuffling the weights so that they are stored in the + // exact order as the kernel needs to load them, so that the memory + // accesses made by the kernel are trivial. This solves (2) because the + // trivial memory access pattern allows the CPU's automatic prefetching + // to perform very well (no need even for preload instructions), and this + // solves (3) because the values being loaded concurrently are now + // contiguous in the address space, thus don't alias each other in the cache. + // + // On ARM, we typically want our kernel to process a 4x16 block of weights + // at a time, because: + // - 16 is the number of bytes in a NEON register. + // - 4 is how many rows we need to handle concurrently in the kernel in + // order to have sufficient mutual independence of instructions to + // maximize arithmetic throughput. + // + // Finally, the 'Int8' part in the name refers to the fact that this + // weights format has each weights value encoded as a signed int8_t value, + // even if the data type of the weights buffer is uint8_t. This is intended + // to save runtime kernels the effort to have to XOR the top bit of these + // bytes before using them in signed arithmetic, see this file for more + // explanations on the 'signed int8_t trick' in matrix multiplication kernels: + // + // tensorflow/lite/toco/graph_transformations/ensure_uint8_weights_safe_for_fast_int8_kernels.cc + // + kShuffled4x16Int8, +}; + +// Quantization parameters, determining the mapping of quantized values +// to real values (i.e. determining how quantized values are mathematically +// interpreted). +// +// The correspondence is as follows: +// +// real_value = scale * (quantized_value - zero_point); +// +// In other words, zero_point designates which quantized value corresponds to +// the real 0 value, and scale designates the difference between the real values +// corresponding to consecutive quantized values differing by 1. +struct QuantizationParams { + int32_t zero_point = 0; + double scale = 0.0; +}; + +inline bool operator==(const QuantizationParams& qp1, + const QuantizationParams& qp2) { + return qp1.zero_point == qp2.zero_point && qp1.scale == qp2.scale; +} + +// Quantization parameters for each channel, determining the mapping of +// quantized values to real values. See QuantizationParams for a single set of +// parameters per tensor. This has one parameters set per each channel. +// +// The correspondence is as follows: +// +// real_value = scale[channel] * (quantized_value - zero_point[channel]); +// +struct PerChannelQuantizationParams { + // The following members typically point to the corresponding members of a + // TfLiteAffineQuantization struct. + const float* scale; + const int32_t* zero_point; + int32_t quantized_dimension; +}; + +// Gets next index to iterate through a multidimensional array. +inline bool NextIndex(const int num_dims, const int* dims, int* current) { + if (num_dims == 0) { + return false; + } + TFLITE_DCHECK(dims != nullptr); + TFLITE_DCHECK(current != nullptr); + int carry = 1; + for (int idx = num_dims - 1; idx >= 0; --idx) { + int current_val = current[idx] + carry; + TFLITE_DCHECK_GE(dims[idx], current_val); + if (dims[idx] == current_val) { + current[idx] = 0; + } else { + current[idx] = current_val; + carry = 0; + break; + } + } + return (carry == 0); +} + +// Gets offset of index if reducing on axis. When reducing, the flattened offset +// will not change, if the input index changes on the given axis. For example, +// if you have a 3D tensor and you are reducing to 2D by eliminating axis 0, +// then index (0, 1, 2) and index (1, 1, 2) will map to the same flattened +// offset. +// TODO(kanlig): uses Dims to represent dimensions. +inline size_t ReducedOutputOffset(const int num_dims, const int* dims, + const int* index, const int num_axis, + const int* axis) { + if (num_dims == 0) { + return 0; + } + TFLITE_DCHECK(dims != nullptr); + TFLITE_DCHECK(index != nullptr); + size_t offset = 0; + for (int idx = 0; idx < num_dims; ++idx) { + // if we need to skip this axis + bool is_axis = false; + if (axis != nullptr) { + for (int axis_idx = 0; axis_idx < num_axis; ++axis_idx) { + if (idx == axis[axis_idx]) { + is_axis = true; + break; + } + } + } + if (!is_axis) { + offset = offset * static_cast(dims[idx]) + + static_cast(index[idx]); + } + } + return offset; +} + +// Since tensors with '0' in their shape are valid in TF, these offset functions +// allow that as long as the corresponding index is also 0. It is upto the +// calling ops to ensure that they perform verification checks on tensor shapes +// if they don't support a particular behavior. + +inline int Offset(const Dims<4>& dims, int i0, int i1, int i2, int i3) { + TFLITE_DCHECK((i0 == 0 && dims.sizes[0] == 0) || + (i0 >= 0 && i0 < dims.sizes[0])); + TFLITE_DCHECK((i1 == 0 && dims.sizes[1] == 0) || + (i1 >= 0 && i1 < dims.sizes[1])); + TFLITE_DCHECK((i2 == 0 && dims.sizes[2] == 0) || + (i2 >= 0 && i2 < dims.sizes[2])); + TFLITE_DCHECK((i3 == 0 && dims.sizes[3] == 0) || + (i3 >= 0 && i3 < dims.sizes[3])); + return i0 * dims.strides[0] + i1 * dims.strides[1] + i2 * dims.strides[2] + + i3 * dims.strides[3]; +} + +inline int Offset(const Dims<4>& dims, int* index) { + return Offset(dims, index[0], index[1], index[2], index[3]); +} + +// Get array size, DCHECKing that the dim index is in range. +// +// Note that this will be phased out with Dims<4>, since RuntimeShape::Dims() +// already performs this check. +template +int ArraySize(const Dims& array, int index) { + TFLITE_DCHECK(index >= 0 && index < N); + return array.sizes[index]; +} + +// Get common array size, DCHECKing that they all agree. +template +int MatchingArraySize(const ArrayType1& array1, int index1, + const ArrayType2& array2, int index2) { + TFLITE_DCHECK_EQ(ArraySize(array1, index1), ArraySize(array2, index2)); + return ArraySize(array1, index1); +} + +template +int MatchingArraySize(const ArrayType1& array1, int index1, + const ArrayType2& array2, int index2, Args... args) { + TFLITE_DCHECK_EQ(ArraySize(array1, index1), ArraySize(array2, index2)); + return MatchingArraySize(array1, index1, args...); +} + +// Get common shape dim, DCHECKing that they all agree. +inline int MatchingDim(const RuntimeShape& shape1, int index1, + const RuntimeShape& shape2, int index2) { + TFLITE_DCHECK_EQ(shape1.Dims(index1), shape2.Dims(index2)); + return std::min(shape1.Dims(index1), shape2.Dims(index2)); +} + +template +int MatchingDim(const RuntimeShape& shape1, int index1, + const RuntimeShape& shape2, int index2, Args... args) { + TFLITE_DCHECK_EQ(shape1.Dims(index1), shape2.Dims(index2)); + return MatchingDim(shape1, index1, args...); +} + +// Will be phased out with Dims<4>, replaced by RuntimeShape::FlatSize(). +template +inline int FlatSize(const Dims& dims) { + int flat_size = 1; + for (int i = 0; i < N; ++i) { + flat_size *= dims.sizes[i]; + } + return flat_size; +} + +TFLITE_DEPRECATED("Prefer FlatSize.") +inline int RequiredBufferSizeForDims(const Dims<4>& dims) { + return FlatSize(dims); +} + +inline int MatchingElementsSize(const RuntimeShape& shape, + const RuntimeShape& check_shape_0) { + const int size_1 = shape.FlatSize(); + const int size_2 = check_shape_0.FlatSize(); + TFLITE_CHECK_EQ(size_1, size_2); + return size_1; +} + +inline int MatchingElementsSize(const RuntimeShape& shape, + const RuntimeShape& check_shape_0, + const RuntimeShape& check_shape_1) { + const int size_1 = shape.FlatSize(); + const int size_2 = check_shape_0.FlatSize(); + const int size_3 = check_shape_1.FlatSize(); + TFLITE_CHECK_EQ(size_1, size_2); + TFLITE_CHECK_EQ(size_2, size_3); + return size_1; +} + +// Flat size calculation, checking that dimensions match with one or more other +// arrays. +inline int MatchingFlatSize(const RuntimeShape& shape, + const RuntimeShape& check_shape_0) { + TFLITE_DCHECK_EQ(shape.DimensionsCount(), check_shape_0.DimensionsCount()); + const int dims_count = shape.DimensionsCount(); + for (int i = 0; i < dims_count; ++i) { + TFLITE_DCHECK_EQ(shape.Dims(i), check_shape_0.Dims(i)); + } + return shape.FlatSize(); +} + +inline int MatchingFlatSize(const RuntimeShape& shape, + const RuntimeShape& check_shape_0, + const RuntimeShape& check_shape_1) { + TFLITE_DCHECK_EQ(shape.DimensionsCount(), check_shape_0.DimensionsCount()); + const int dims_count = shape.DimensionsCount(); + for (int i = 0; i < dims_count; ++i) { + TFLITE_DCHECK_EQ(shape.Dims(i), check_shape_0.Dims(i)); + } + return MatchingFlatSize(shape, check_shape_1); +} + +inline int MatchingFlatSize(const RuntimeShape& shape, + const RuntimeShape& check_shape_0, + const RuntimeShape& check_shape_1, + const RuntimeShape& check_shape_2) { + TFLITE_DCHECK_EQ(shape.DimensionsCount(), check_shape_0.DimensionsCount()); + const int dims_count = shape.DimensionsCount(); + for (int i = 0; i < dims_count; ++i) { + TFLITE_DCHECK_EQ(shape.Dims(i), check_shape_0.Dims(i)); + } + return MatchingFlatSize(shape, check_shape_1, check_shape_2); +} + +inline int MatchingFlatSize(const RuntimeShape& shape, + const RuntimeShape& check_shape_0, + const RuntimeShape& check_shape_1, + const RuntimeShape& check_shape_2, + const RuntimeShape& check_shape_3) { + TFLITE_DCHECK_EQ(shape.DimensionsCount(), check_shape_0.DimensionsCount()); + const int dims_count = shape.DimensionsCount(); + for (int i = 0; i < dims_count; ++i) { + TFLITE_DCHECK_EQ(shape.Dims(i), check_shape_0.Dims(i)); + } + return MatchingFlatSize(shape, check_shape_1, check_shape_2, check_shape_3); +} + +// Flat size calculation, checking that dimensions match with one or more other +// arrays. +template +inline int MatchingFlatSize(const Dims& dims, const Dims& check_dims_0) { + for (int i = 0; i < N; ++i) { + TFLITE_DCHECK_EQ(ArraySize(dims, i), ArraySize(check_dims_0, i)); + } + return FlatSize(dims); +} + +template +inline int MatchingFlatSize(const Dims& dims, const Dims& check_dims_0, + const Dims& check_dims_1) { + for (int i = 0; i < N; ++i) { + TFLITE_DCHECK_EQ(ArraySize(dims, i), ArraySize(check_dims_0, i)); + } + return MatchingFlatSize(dims, check_dims_1); +} + +template +inline int MatchingFlatSize(const Dims& dims, const Dims& check_dims_0, + const Dims& check_dims_1, + const Dims& check_dims_2) { + for (int i = 0; i < N; ++i) { + TFLITE_DCHECK_EQ(ArraySize(dims, i), ArraySize(check_dims_0, i)); + } + return MatchingFlatSize(dims, check_dims_1, check_dims_2); +} + +template +inline int MatchingFlatSize(const Dims& dims, const Dims& check_dims_0, + const Dims& check_dims_1, + const Dims& check_dims_2, + const Dims& check_dims_3) { + for (int i = 0; i < N; ++i) { + TFLITE_DCHECK_EQ(ArraySize(dims, i), ArraySize(check_dims_0, i)); + } + return MatchingFlatSize(dims, check_dims_1, check_dims_2, check_dims_3); +} + +// Flat size calculation, checking if their extended shapes match. +inline int MatchingExtendedShapeFlatSize(const RuntimeShape& shape, + const RuntimeShape& check_shape_0) { + const int shape_dims = shape.DimensionsCount(); + const int check_shape_0_dims = check_shape_0.DimensionsCount(); + const int min_dims = std::min(shape_dims, check_shape_0_dims); + + for (int i = 0; i < min_dims; ++i) { + TFLITE_DCHECK_EQ(shape.Dims(shape_dims - 1 - i), + check_shape_0.Dims(check_shape_0_dims - 1 - i)); + } + for (int i = min_dims; i < shape_dims; ++i) { + TFLITE_DCHECK_EQ(shape.Dims(shape_dims - 1 - i), 1); + } + for (int i = min_dims; i < check_shape_0_dims; ++i) { + TFLITE_DCHECK_EQ(check_shape_0.Dims(check_shape_0_dims - 1 - i), 1); + } + return shape.FlatSize(); +} + +inline int MatchingExtendedShapeFlatSize(const RuntimeShape& shape, + const RuntimeShape& check_shape_0, + const RuntimeShape& check_shape_1) { + const int flat_size = MatchingExtendedShapeFlatSize(shape, check_shape_0); + TFLITE_DCHECK_EQ(MatchingExtendedShapeFlatSize(shape, check_shape_1), + flat_size); + return flat_size; +} + +inline int MatchingExtendedShapeFlatSize(const RuntimeShape& shape, + const RuntimeShape& check_shape_0, + const RuntimeShape& check_shape_1, + const RuntimeShape& check_shape_2) { + const int flat_size = MatchingExtendedShapeFlatSize(shape, check_shape_0); + TFLITE_DCHECK_EQ( + MatchingExtendedShapeFlatSize(shape, check_shape_1, check_shape_2), + flat_size); + return flat_size; +} + +inline int MatchingExtendedShapeFlatSize(const RuntimeShape& shape, + const RuntimeShape& check_shape_0, + const RuntimeShape& check_shape_1, + const RuntimeShape& check_shape_2, + const RuntimeShape& check_shape_3) { + const int flat_size = MatchingExtendedShapeFlatSize(shape, check_shape_0); + TFLITE_DCHECK_EQ(MatchingExtendedShapeFlatSize(shape, check_shape_1, + check_shape_2, check_shape_3), + flat_size); + return flat_size; +} + +// Data is required to be contiguous, and so many operators can use either the +// full array flat size or the flat size with one dimension skipped (commonly +// the depth). +template +inline int FlatSizeSkipDim(const Dims& dims, int skip_dim) { + TFLITE_DCHECK(skip_dim >= 0 && skip_dim < N); + int flat_size = 1; + for (int i = 0; i < N; ++i) { + flat_size *= (i == skip_dim) ? 1 : dims.sizes[i]; + } + return flat_size; +} + +// A combination of MatchingFlatSize() and FlatSizeSkipDim(). +template +inline int MatchingFlatSizeSkipDim(const Dims& dims, int skip_dim, + const Dims& check_dims_0) { + for (int i = 0; i < N; ++i) { + if (i != skip_dim) { + TFLITE_DCHECK_EQ(ArraySize(dims, i), ArraySize(check_dims_0, i)); + } + } + return FlatSizeSkipDim(dims, skip_dim); +} + +template +inline int MatchingFlatSizeSkipDim(const Dims& dims, int skip_dim, + const Dims& check_dims_0, + const Dims& check_dims_1) { + for (int i = 0; i < N; ++i) { + if (i != skip_dim) { + TFLITE_DCHECK_EQ(ArraySize(dims, i), ArraySize(check_dims_0, i)); + } + } + return MatchingFlatSizeSkipDim(dims, skip_dim, check_dims_1); +} + +template +inline int MatchingFlatSizeSkipDim(const Dims& dims, int skip_dim, + const Dims& check_dims_0, + const Dims& check_dims_1, + const Dims& check_dims_2) { + for (int i = 0; i < N; ++i) { + if (i != skip_dim) { + TFLITE_DCHECK_EQ(ArraySize(dims, i), ArraySize(check_dims_0, i)); + } + } + return MatchingFlatSizeSkipDim(dims, skip_dim, check_dims_1, check_dims_2); +} + +template +inline int MatchingFlatSizeSkipDim(const Dims& dims, int skip_dim, + const Dims& check_dims_0, + const Dims& check_dims_1, + const Dims& check_dims_2, + const Dims& check_dims_3) { + for (int i = 0; i < N; ++i) { + if (i != skip_dim) { + TFLITE_DCHECK_EQ(ArraySize(dims, i), ArraySize(check_dims_0, i)); + } + } + return MatchingFlatSizeSkipDim(dims, skip_dim, check_dims_1, check_dims_2, + check_dims_3); +} + +// Data is required to be contiguous, and so many operators can use either the +// full array flat size or the flat size with one dimension skipped (commonly +// the depth). +inline int FlatSizeSkipDim(const RuntimeShape& shape, int skip_dim) { + const int dims_count = shape.DimensionsCount(); + TFLITE_DCHECK(skip_dim >= 0 && skip_dim < dims_count); + const auto* dims_data = shape.DimsData(); + int flat_size = 1; + for (int i = 0; i < dims_count; ++i) { + flat_size *= (i == skip_dim) ? 1 : dims_data[i]; + } + return flat_size; +} + +// A combination of MatchingFlatSize() and FlatSizeSkipDim(). +inline int MatchingFlatSizeSkipDim(const RuntimeShape& shape, int skip_dim, + const RuntimeShape& check_shape_0) { + const int dims_count = shape.DimensionsCount(); + for (int i = 0; i < dims_count; ++i) { + if (i != skip_dim) { + TFLITE_DCHECK_EQ(shape.Dims(i), check_shape_0.Dims(i)); + } + } + return FlatSizeSkipDim(shape, skip_dim); +} + +inline int MatchingFlatSizeSkipDim(const RuntimeShape& shape, int skip_dim, + const RuntimeShape& check_shape_0, + const RuntimeShape& check_shape_1) { + const int dims_count = shape.DimensionsCount(); + for (int i = 0; i < dims_count; ++i) { + if (i != skip_dim) { + TFLITE_DCHECK_EQ(shape.Dims(i), check_shape_0.Dims(i)); + } + } + return MatchingFlatSizeSkipDim(shape, skip_dim, check_shape_1); +} + +inline int MatchingFlatSizeSkipDim(const RuntimeShape& shape, int skip_dim, + const RuntimeShape& check_shape_0, + const RuntimeShape& check_shape_1, + const RuntimeShape& check_shape_2) { + const int dims_count = shape.DimensionsCount(); + for (int i = 0; i < dims_count; ++i) { + if (i != skip_dim) { + TFLITE_DCHECK_EQ(shape.Dims(i), check_shape_0.Dims(i)); + } + } + return MatchingFlatSizeSkipDim(shape, skip_dim, check_shape_1, check_shape_2); +} + +inline int MatchingFlatSizeSkipDim(const RuntimeShape& shape, int skip_dim, + const RuntimeShape& check_shape_0, + const RuntimeShape& check_shape_1, + const RuntimeShape& check_shape_2, + const RuntimeShape& check_shape_3) { + const int dims_count = shape.DimensionsCount(); + for (int i = 0; i < dims_count; ++i) { + if (i != skip_dim) { + TFLITE_DCHECK_EQ(shape.Dims(i), check_shape_0.Dims(i)); + } + } + return MatchingFlatSizeSkipDim(shape, skip_dim, check_shape_1, check_shape_2, + check_shape_3); +} + +template +bool IsPackedWithoutStrides(const Dims& dims) { + int expected_stride = 1; + for (int d = 0; d < N; d++) { + if (dims.strides[d] != expected_stride) return false; + expected_stride *= dims.sizes[d]; + } + return true; +} + +template +void ComputeStrides(Dims* dims) { + dims->strides[0] = 1; + for (int d = 1; d < N; d++) { + dims->strides[d] = dims->strides[d - 1] * dims->sizes[d - 1]; + } +} + +enum class BroadcastableOpCategory : uint8_t { + kNone, + kNonBroadcast, // Matching input shapes. + kFirstInputBroadcastsFast, // Fivefold nested loops. + kSecondInputBroadcastsFast, // Fivefold nested loops. + kGenericBroadcast, // Fall-back. +}; + +struct MinMax { + float min; + float max; +}; +static_assert(sizeof(MinMax) == 8, ""); + +struct ActivationParams { + FusedActivationFunctionType activation_type; + // uint8_t, etc, activation params. + int32_t quantized_activation_min; + int32_t quantized_activation_max; +}; + +struct ReluParams : public ActivationParams { + int32_t input_offset; + int32_t output_offset; + int32_t output_multiplier; + int output_shift; +}; + +// Styles of resizing op usages. For example, kImageStyle can be used with a Pad +// op for pattern-specific optimization. +enum class ResizingCategory : uint8_t { + kNone, + kImageStyle, // 4D, operating on inner dimensions, say {0, a, b, 0}. + kGenericResize, +}; + +// For Add, Sub, Mul ops. +struct ArithmeticParams { + // Shape dependent / common to data / op types. + BroadcastableOpCategory broadcast_category; + // uint8_t inference params. + int32_t input1_offset; + int32_t input2_offset; + int32_t output_offset; + int32_t output_multiplier; + int output_shift; + // Add / Sub, not Mul, uint8_t inference params. + int left_shift; + int32_t input1_multiplier; + int input1_shift; + int32_t input2_multiplier; + int input2_shift; + + // TODO(b/158622529): Union the following activation params. + // uint8_t, etc, activation params. + int32_t quantized_activation_min; + int32_t quantized_activation_max; + // float activation params. + float float_activation_min; + float float_activation_max; + // int64_t activation params. + int64_t int64_activation_min; + int64_t int64_activation_max; + + // Processed output dimensions. + // Let input "a" be the one that broadcasts in the faster-changing dimension. + // Then, after coalescing, for shapes {a0, a1, a2, a3, a4} and + // {b0, b1, b2, b3, b4}, + // broadcast_shape[4] = b0 = a0. + // broadcast_shape[3] = b1; a1 = 1. + // broadcast_shape[2] = b2 = a2. + // broadcast_shape[1] = a3; b3 = 1. + // broadcast_shape[0] = b4 = a4. + int broadcast_shape[5]; +}; + +struct ConcatenationParams { + int8_t axis; + const int32_t* input_zeropoint; + const float* input_scale; + uint16_t inputs_count; + int32_t output_zeropoint; + float output_scale; +}; + +struct ComparisonParams { + // uint8_t inference params. + int left_shift; + int32_t input1_offset; + int32_t input1_multiplier; + int input1_shift; + int32_t input2_offset; + int32_t input2_multiplier; + int input2_shift; + // Shape dependent / common to inference types. + bool is_broadcast; +}; + +struct ConvParams { + PaddingType padding_type; + PaddingValues padding_values; + // TODO(starka): This was just "stride", so check that width+height is OK. + int16_t stride_width; + int16_t stride_height; + int16_t dilation_width_factor; + int16_t dilation_height_factor; + // uint8_t inference params. + // TODO(b/65838351): Use smaller types if appropriate. + int32_t input_offset; + int32_t weights_offset; + int32_t output_offset; + int32_t output_multiplier; + int output_shift; + // uint8_t, etc, activation params. + int32_t quantized_activation_min; + int32_t quantized_activation_max; + // float activation params. + float float_activation_min; + float float_activation_max; +}; + +struct Conv3DParams { + Padding3DValues padding_values; + int stride_width; + int stride_height; + int stride_depth; + int dilation_width; + int dilation_height; + int dilation_depth; + // float activation params. + float float_activation_min; + float float_activation_max; +}; + +typedef Conv3DParams Conv3DTransposeParams; + +struct DepthToSpaceParams { + int32_t block_size; +}; + +struct DepthwiseParams { + PaddingType padding_type; + PaddingValues padding_values; + int16_t stride_width; + int16_t stride_height; + int16_t dilation_width_factor; + int16_t dilation_height_factor; + int16_t depth_multiplier; + // uint8_t inference params. + // TODO(b/65838351): Use smaller types if appropriate. + int32_t input_offset; + int32_t weights_offset; + int32_t output_offset; + int32_t output_multiplier; + int output_shift; + // uint8_t, etc, activation params. + int32_t quantized_activation_min; + int32_t quantized_activation_max; + // float activation params. + float float_activation_min; + float float_activation_max; + const int32_t* output_multiplier_per_channel; + const int32_t* output_shift_per_channel; +}; + +struct DequantizationParams { + double scale; + int32_t zero_point; +}; + +struct PerChannelDequantizationParams { + const float* scale; + const int32_t* zero_point; + int32_t quantized_dimension; +}; + +struct FakeQuantParams { + MinMax minmax; + int32_t num_bits; +}; + +struct FullyConnectedParams { + // uint8_t inference params. + // TODO(b/65838351): Use smaller types if appropriate. + int32_t input_offset; + int32_t weights_offset; + int32_t output_offset; + int32_t output_multiplier; + int output_shift; + // uint8_t, etc, activation params. + int32_t quantized_activation_min; + int32_t quantized_activation_max; + // float activation params. + float float_activation_min; + float float_activation_max; + // Mark the operands as cacheable if they are unchanging, e.g. weights. + bool lhs_cacheable; + bool rhs_cacheable; + FullyConnectedWeightsFormat weights_format; +}; + +struct GatherParams { + int16_t axis; + int16_t batch_dims; +}; + +struct L2NormalizationParams { + // uint8_t inference params. + int32_t input_zero_point; +}; + +struct LocalResponseNormalizationParams { + int32_t range; + double bias; + double alpha; + double beta; +}; + +struct HardSwishParams { + // zero_point of the input activations. + int16_t input_zero_point; + // zero_point of the output activations. + int16_t output_zero_point; + // 16bit fixed-point component of the multiplier to apply to go from the + // "high-res input scale", which is the input scale multiplied by 2^7, to the + // "relu-ish scale", which 3.0/32768. + // See the implementation of HardSwishPrepare. + int16_t reluish_multiplier_fixedpoint_int16; + // exponent/bit-shift component of the aforementioned multiplier. + int reluish_multiplier_exponent; + // 16bit fixed-point component of the multiplier to apply to go from the + // "high-res input scale", which is the input scale multiplied by 2^7, to the + // output scale. + // See the implementation of HardSwishPrepare. + int16_t output_multiplier_fixedpoint_int16; + // exponent/bit-shift component of the aforementioned multiplier. + int output_multiplier_exponent; +}; + +struct LogisticParams { + // uint8_t inference params. + int32_t input_zero_point; + int32_t input_range_radius; + int32_t input_multiplier; + int input_left_shift; +}; + +struct LstmCellParams { + int32_t weights_zero_point; + int32_t accum_multiplier; + int accum_shift; + int state_integer_bits; +}; + +struct MeanParams { + int8_t axis_count; + int16_t axis[4]; +}; + +struct PackParams { + int8_t axis; + const int32_t* input_zeropoint; + const float* input_scale; + uint16_t inputs_count; + int32_t output_zeropoint; + float output_scale; +}; + +struct PadParams { + int8_t left_padding_count; + int32_t left_padding[5]; + int8_t right_padding_count; + int32_t right_padding[5]; + ResizingCategory resizing_category; +}; + +struct PreluParams { + int32_t input_offset; + int32_t alpha_offset; + int32_t output_offset; + int32_t output_multiplier_1; + int output_shift_1; + int32_t output_multiplier_2; + int output_shift_2; +}; + +struct PoolParams { + FusedActivationFunctionType activation; + PaddingType padding_type; + PaddingValues padding_values; + int stride_height; + int stride_width; + int filter_height; + int filter_width; + // uint8_t, etc, activation params. + int32_t quantized_activation_min; + int32_t quantized_activation_max; + // float activation params. + float float_activation_min; + float float_activation_max; +}; + +struct ReshapeParams { + int8_t shape_count; + int32_t shape[4]; +}; + +struct ResizeBilinearParams { + bool align_corners; + // half_pixel_centers assumes pixels are of half the actual dimensions, and + // yields more accurate resizes. Corresponds to the same argument for the + // original TensorFlow op in TF2.0. + bool half_pixel_centers; +}; + +struct ResizeNearestNeighborParams { + bool align_corners; + bool half_pixel_centers; +}; + +struct SliceParams { + int8_t begin_count; + int32_t begin[5]; + int8_t size_count; + int32_t size[5]; +}; + +struct SoftmaxParams { + // beta is not really used (not a Tensorflow parameter) and not implemented + // for LogSoftmax. + double beta; + // uint8_t inference params. Used even when beta defaults to 1.0. + int32_t input_multiplier; + int32_t input_left_shift; + // Reverse scaling is only used by LogSoftmax. + int32_t reverse_scaling_divisor; + int32_t reverse_scaling_right_shift; + int diff_min; + int32_t zero_point; + float scale; + float* table; + // int16 LUT for exp(x), where x uniform distributed between [-10.0 , 0.0] + int16_t* exp_lut; + // int16 LUT for 1 / (1 + x), where x uniform distributed between [0.0 , 1.0] + int16_t* one_over_one_plus_x_lut; + uint8_t* uint8_table1; + uint8_t* uint8_table2; +}; + +struct SpaceToBatchParams { + // "Zero" padding for uint8_t means padding with the output offset. + int32_t output_offset; +}; + +struct SpaceToDepthParams { + int32_t block_size; +}; + +struct SplitParams { + // Graphs that split into, say, 2000 nodes are encountered. The indices in + // OperatorEdges are of type uint16_t. + uint16_t num_split; + int16_t axis; +}; + +struct SqueezeParams { + int8_t squeeze_dims_count; + int32_t squeeze_dims[4]; +}; + +struct StridedSliceParams { + int8_t start_indices_count; + int32_t start_indices[5]; + int8_t stop_indices_count; + int32_t stop_indices[5]; + int8_t strides_count; + int32_t strides[5]; + + uint16_t begin_mask; + uint16_t ellipsis_mask; + uint16_t end_mask; + uint16_t new_axis_mask; + uint16_t shrink_axis_mask; +}; + +struct TanhParams { + int32_t input_zero_point; + int32_t input_range_radius; + int32_t input_multiplier; + int input_left_shift; +}; + +constexpr int kTransposeMaxDimensions = 6; + +struct TransposeParams { + int8_t perm_count; + int32_t perm[kTransposeMaxDimensions]; +}; + +struct UnpackParams { + uint16_t num_split; + int16_t axis; +}; + +struct LeakyReluParams { + float alpha; + int32_t input_offset; + int32_t output_offset; + int32_t output_multiplier_alpha; + int32_t output_shift_alpha; + int32_t output_multiplier_identity; + int32_t output_shift_identity; +}; + +template +inline void SetActivationParams(float min, float max, P* params) { + params->float_activation_min = min; + params->float_activation_max = max; +} + +template +inline void SetActivationParams(int32_t min, int32_t max, P* params) { + params->quantized_activation_min = min; + params->quantized_activation_max = max; +} + +template +inline void SetActivationParams(int64_t min, int64_t max, P* params) { + params->int64_activation_min = min; + params->int64_activation_max = max; +} + +template +inline void GetActivationParams(const P& params, int32_t* min, int32_t* max) { + *min = params.quantized_activation_min; + *max = params.quantized_activation_max; +} + +template +inline void GetActivationParams(const P& params, float* min, float* max) { + *min = params.float_activation_min; + *max = params.float_activation_max; +} + +template +inline void GetActivationParams(const P& params, int64_t* min, int64_t* max) { + *min = params.int64_activation_min; + *max = params.int64_activation_max; +} + +// Type trait to check of given type has size smaller than 4 bytes. +template +struct is_small_integer + : public std::integral_constant::value || + std::is_same::value || + std::is_same::value || + std::is_same::value> {}; + +// Type trait to check of given type is int32 or int64. +template +struct is_int32_or_int64 + : public std::integral_constant::value || + std::is_same::value> { +}; + +} // namespace tflite + +#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_TYPES_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h new file mode 100644 index 0000000..5f33173 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h @@ -0,0 +1,335 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_KERNELS_KERNEL_UTIL_H_ +#define TENSORFLOW_LITE_KERNELS_KERNEL_UTIL_H_ + +#include + +#include +#ifndef TF_LITE_STATIC_MEMORY +#include +#endif // TF_LITE_STATIC_MEMORY + +#include "edge-impulse-sdk/tensorflow/lite/core/c/builtin_op_data.h" +#include "edge-impulse-sdk/tensorflow/lite/core/c/common.h" + +namespace tflite { + +// A fair number of functions in this header have historically been inline. +// It is ok to change functions to not be inline if the latency with +// benchmark_model for MobileNet + MobileBERT is unaffected. If such a change is +// made, move the newly non-inlined function declarations to the top of this +// header file. + +// Note: You must check if result is not null: +// +// TfLiteTensor* my_tensor = GetInput(context, node, kMyTensorIdx); +// TF_LITE_ENSURE(context, my_tensor != nullptr); +// +// This is because the index might point to the optional tensor constant +// (kTfLiteOptionalTensor) in which case there is no tensor to return. +const TfLiteTensor* GetInput(const TfLiteContext* context, + const TfLiteNode* node, int index); + +// Same as `GetInput` but returns boolean and uses output argument for tensor. +// +// TfLiteTensor* my_tensor; +// TF_LITE_ENSURE_OK(context, +// GetInputSafe(context, node, kMyTensorIdx, &my_tensor)); +// // can use my_tensor directly from here onwards, it is not nullptr +// +// Should be used in cases where the binary size is too large. +TfLiteStatus GetInputSafe(const TfLiteContext* context, const TfLiteNode* node, + int index, const TfLiteTensor** tensor); + +// Note: You must check if result is not null: +// +// TfLiteTensor* my_tensor = GetVariableInput(context, node, kMyTensorIdx); +// TF_LITE_ENSURE(context, my_tensor != nullptr); +// +// This is because the index might point to the optional tensor constant +// (kTfLiteOptionalTensor) in which case there is no tensor to return. +TfLiteTensor* GetVariableInput(TfLiteContext* context, const TfLiteNode* node, + int index); + +// Note: You must check if result is not null: +// +// TfLiteTensor* my_tensor = GetOutput(context, node, kMyTensorIdx); +// TF_LITE_ENSURE(context, my_tensor != nullptr); +// +// This is because the index might point to the optional tensor constant +// (kTfLiteOptionalTensor) in which case there is no tensor to return. +TfLiteTensor* GetOutput(TfLiteContext* context, const TfLiteNode* node, + int index); + +// Same as `GetOutput` but returns boolean and uses output argument for tensor. +// +// TfLiteTensor* my_tensor; +// TF_LITE_ENSURE_OK(context, +// GetOutputSafe(context, node, kMyTensorIdx, &my_tensor)); +// // can use my_tensor directly from here onwards, it is not nullptr +// +// Should be used in cases where the binary size is too large. +TfLiteStatus GetOutputSafe(const TfLiteContext* context, const TfLiteNode* node, + int index, TfLiteTensor** tensor); + +// Note: You must check if result is not null: +// +// TfLiteTensor* my_tensor = GetOptionalInputTensor(context, node, kIdx); +// TF_LITE_ENSURE(context, my_tensor != nullptr); +// +// This is because the index might point to the optional tensor constant +// (kTfLiteOptionalTensor) in which case there is no tensor to return. +// +// Deprecated. GetInput has the same functionality. +const TfLiteTensor* GetOptionalInputTensor(const TfLiteContext* context, + const TfLiteNode* node, int index); + +#ifndef TF_LITE_STATIC_MEMORY +// Note: You must check if result is not null: +// +// TfLiteTensor* my_tensor = GetTemporary(context, node, kMyTensorIdx); +// TF_LITE_ENSURE(context, my_tensor != nullptr); +// +// This is because the index might point to the optional tensor constant +// (kTfLiteOptionalTensor) in which case there is no tensor to return. +TfLiteTensor* GetTemporary(TfLiteContext* context, const TfLiteNode* node, + int index); + +// Same as `GetTemporary` but returns boolean and uses output argument for +// tensor. +// +// TfLiteTensor* my_tensor; +// TF_LITE_ENSURE_OK(context, +// GetTemporarySafe(context, node, kMyTensorIdx, +// &my_tensor)); +// // can use my_tensor directly from here onwards, it is not nullptr +// +// Should be used in cases where the binary size is too large. +TfLiteStatus GetTemporarySafe(const TfLiteContext* context, + const TfLiteNode* node, int index, + TfLiteTensor** tensor); + +// Note: You must check if result is not null: +// +// TfLiteTensor* my_tensor = GetIntermediates(context, node, kMyTensorIdx); +// TF_LITE_ENSURE(context, my_tensor != nullptr); +// +// This is because the index might point to the optional tensor constant +// (kTfLiteOptionalTensor) in which case there is no tensor to return. +const TfLiteTensor* GetIntermediates(TfLiteContext* context, + const TfLiteNode* node, int index); + +// Same as `GetIntermediates` but returns boolean and uses output argument for +// tensor. +// +// TfLiteTensor* my_tensor; +// TF_LITE_ENSURE_OK(context, +// GetIntermediatesSafe(context, node, kMyTensorIdx, +// &my_tensor)); +// // can use my_tensor directly from here onwards, it is not nullptr +// +// Should be used in cases where the binary size is too large. +TfLiteStatus GetIntermediatesSafe(const TfLiteContext* context, + const TfLiteNode* node, int index, + TfLiteTensor** tensor); +#endif // TF_LITE_STATIC_MEMORY + +inline int NumDimensions(const TfLiteTensor* t) { return t->dims->size; } +inline int SizeOfDimension(const TfLiteTensor* t, int dim) { + return t->dims->data[dim]; +} + +inline int NumDimensions(const TfLiteEvalTensor* t) { return t->dims->size; } +inline int SizeOfDimension(const TfLiteEvalTensor* t, int dim) { + return t->dims->data[dim]; +} + +inline int NumInputs(const TfLiteNode* node) { + return node->inputs == nullptr ? 0 : node->inputs->size; +} +inline int NumOutputs(const TfLiteNode* node) { + return node->outputs == nullptr ? 0 : node->outputs->size; +} + +#ifndef TF_LITE_STATIC_MEMORY +inline int NumIntermediates(const TfLiteNode* node) { + return node->intermediates->size; +} +#endif // TF_LITE_STATIC_MEMORY + +inline int64_t NumElements(const TfLiteIntArray* dims) { + int64_t count = 1; + for (int i = 0; i < dims->size; ++i) { + count *= dims->data[i]; + } + return count; +} + +inline int64_t NumElements(const TfLiteTensor* t) { + return NumElements(t->dims); +} + +inline int64_t NumElements(const int* dims, int num_dims) { + int64_t count = 1; + for (int i = 0; i < num_dims; ++i) { + count *= dims[i]; + } + return count; +} + +// Determines whether tensor is constant. +// TODO(b/138199592): Introduce new query which checks for constant OR +// persistent-read-only, which would be useful for most tensor kernels that +// are potentially dynamic based on the input tensor value availability at the +// time of prepare. +inline bool IsConstantTensor(const TfLiteTensor* tensor) { + return tensor->allocation_type == kTfLiteMmapRo; +} + +inline bool IsConstantOrPersistentTensor(const TfLiteTensor* tensor) { + return IsConstantTensor(tensor) || + (tensor->allocation_type == kTfLitePersistentRo); +} + +// Determines whether tensor is dynamic. Note that a tensor can be non-const and +// not dynamic. This function specifically checks for a dynamic tensor. +inline bool IsDynamicTensor(const TfLiteTensor* tensor) { + return tensor->allocation_type == kTfLiteDynamic; +} + +// Sets tensor to dynamic. +inline void SetTensorToDynamic(TfLiteTensor* tensor) { + if (tensor->allocation_type != kTfLiteDynamic) { + tensor->allocation_type = kTfLiteDynamic; + tensor->data.raw = nullptr; + } +} + +// Sets tensor to persistent and read-only. +inline void SetTensorToPersistentRo(TfLiteTensor* tensor) { + if (tensor->allocation_type != kTfLitePersistentRo) { + tensor->allocation_type = kTfLitePersistentRo; + tensor->data.raw = nullptr; + } +} + +// Determines whether it is a hybrid op - one that has float inputs and +// quantized weights. +inline bool IsHybridOp(const TfLiteTensor* input, const TfLiteTensor* weight) { + return ((weight->type == kTfLiteUInt8 || weight->type == kTfLiteInt8) && + input->type == kTfLiteFloat32); +} + +// Check dimensionality match and populate OpData for Conv and DepthwiseConv. +TfLiteStatus PopulateConvolutionQuantizationParams( + TfLiteContext* context, const TfLiteTensor* input, + const TfLiteTensor* filter, const TfLiteTensor* bias, TfLiteTensor* output, + const TfLiteFusedActivation& activation, int32_t* multiplier, int* shift, + int32_t* output_activation_min, int32_t* output_activation_max, + int32_t* per_channel_multiplier, int32_t* per_channel_shift); + +TfLiteStatus PopulateConvolutionQuantizationParams( + TfLiteContext* context, const TfLiteTensor* input, + const TfLiteTensor* filter, const TfLiteTensor* bias, TfLiteTensor* output, + const TfLiteFusedActivation& activation, int32_t* multiplier, int* shift, + int32_t* output_activation_min, int32_t* output_activation_max, + int32_t* per_channel_multiplier, int32_t* per_channel_shift, + int num_channels); + +// Calculates the multiplication factor for a quantized convolution (or +// quantized depthwise convolution) involving the given tensors. Returns an +// error if the scales of the tensors are not compatible. +TfLiteStatus GetQuantizedConvolutionMultipler(TfLiteContext* context, + const TfLiteTensor* input, + const TfLiteTensor* filter, + const TfLiteTensor* bias, + TfLiteTensor* output, + double* multiplier); + +TfLiteStatus GetQuantizedConvolutionMultipler(TfLiteContext* context, + const TfLiteTensor* input, + const TfLiteTensor* filter, + TfLiteTensor* output, + double* multiplier); + +// Calculates the useful quantized range of an activation layer given its +// activation tensor. +TfLiteStatus CalculateActivationRangeQuantized(TfLiteContext* context, + TfLiteFusedActivation activation, + TfLiteTensor* output, + int32_t* act_min, + int32_t* act_max); + +// Calculates the useful range of an activation layer given its activation +// tensor.a +template +void CalculateActivationRange(TfLiteFusedActivation activation, + T* activation_min, T* activation_max) { + if (activation == kTfLiteActRelu) { + *activation_min = 0; + *activation_max = std::numeric_limits::max(); + } else if (activation == kTfLiteActRelu6) { + *activation_min = 0; + *activation_max = 6; + } else if (activation == kTfLiteActReluN1To1) { + *activation_min = -1; + *activation_max = 1; + } else { + *activation_min = std::numeric_limits::lowest(); + *activation_max = std::numeric_limits::max(); + } +} + +// Return true if the given tensors have the same shape. +bool HaveSameShapes(const TfLiteTensor* input1, const TfLiteTensor* input2); + +#if !defined(TF_LITE_STATIC_MEMORY) +// Gets the output shape from the input tensor. +TfLiteStatus GetOutputShapeFromInput(TfLiteContext* context, + const TfLiteTensor* input, + TfLiteIntArray** output_shape); + +const std::string GetShapeDebugString(const TfLiteIntArray* shape); + +#endif // !defined(TF_LITE_STATIC_MEMORY) + +// Calculates the output_shape that is necessary for element-wise operations +// with broadcasting involving the two input tensors. +TfLiteStatus CalculateShapeForBroadcast(TfLiteContext* context, + const TfLiteTensor* input1, + const TfLiteTensor* input2, + TfLiteIntArray** output_shape); + +// Calculates the output_shape that is necessary for element-wise operations +// with broadcasting involving the three input tensors. +TfLiteStatus CalculateShapeForBroadcast(TfLiteContext* context, + const TfLiteTensor* input1, + const TfLiteTensor* input2, + const TfLiteTensor* input3, + TfLiteIntArray** output_shape); + +// Return the size of given type in bytes. Return 0 in case of string. +int TfLiteTypeGetSize(TfLiteType type); + +// Whether the current platform is mobile (Android or iOS). +bool IsMobilePlatform(); + +// Returns whether there is unspecified dimension in the tensor's dim signature. +bool HasUnspecifiedDimension(const TfLiteTensor* tensor); + +} // namespace tflite + +#endif // TENSORFLOW_LITE_KERNELS_KERNEL_UTIL_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/kernel_util_lite.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/kernel_util_lite.cpp new file mode 100644 index 0000000..a786b68 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/kernel_util_lite.cpp @@ -0,0 +1,594 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h" + +#include +#include + +#include +#include +#include +#include +#ifndef TF_LITE_STATIC_MEMORY +#include +#endif // TF_LITE_STATIC_MEMORY + +#include "edge-impulse-sdk/tensorflow/lite/context_util.h" +#include "edge-impulse-sdk/tensorflow/lite/core/c/builtin_op_data.h" +#include "edge-impulse-sdk/tensorflow/lite/core/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/cppmath.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/quantization_util.h" + +#if defined(__APPLE__) +#include "TargetConditionals.h" +#endif + +namespace tflite { + +namespace { + +// Assumes tensor_index is a valid index (in bounds) +inline TfLiteTensor* GetTensorAtIndex(const TfLiteContext* context, + int tensor_index) { + return context->GetTensor(context, tensor_index); +} + +// Validate in a single place to reduce binary size +inline TfLiteStatus ValidateTensorIndexingSafe(const TfLiteContext* context, + int index, int max_size, + const int* tensor_indices, + int* tensor_index) { + if (index < 0 || index >= max_size) { + TF_LITE_KERNEL_LOG(const_cast(context), + "Invalid tensor index %d (not in [0, %d))\n", index, + max_size); + return kTfLiteError; + } + if (tensor_indices[index] == kTfLiteOptionalTensor) { + TF_LITE_KERNEL_LOG(const_cast(context), + "Tensor at index %d was optional but was expected\n", + index); + return kTfLiteError; + } + + *tensor_index = tensor_indices[index]; + return kTfLiteOk; +} + +// Same as above but returns -1 for invalid inputs instead of status + logging +// error. +inline int ValidateTensorIndexing(const TfLiteContext* context, int index, + int max_size, const int* tensor_indices) { + if (index >= 0 && index < max_size) { + const int tensor_index = tensor_indices[index]; + if (tensor_index != kTfLiteOptionalTensor) { + return tensor_index; + } + } + return -1; +} + +inline TfLiteTensor* GetMutableInput(const TfLiteContext* context, + const TfLiteNode* node, int index) { + const int tensor_index = ValidateTensorIndexing( + context, index, node->inputs->size, node->inputs->data); + if (tensor_index < 0) { + return nullptr; + } + return GetTensorAtIndex(context, tensor_index); +} + +inline TfLiteStatus GetMutableInputSafe(const TfLiteContext* context, + const TfLiteNode* node, int index, + const TfLiteTensor** tensor) { + int tensor_index; + TF_LITE_ENSURE_OK( + context, ValidateTensorIndexingSafe(context, index, node->inputs->size, + node->inputs->data, &tensor_index)); + *tensor = GetTensorAtIndex(context, tensor_index); + return kTfLiteOk; +} + +} // anonymous namespace. + +const TfLiteTensor* GetInput(const TfLiteContext* context, + const TfLiteNode* node, int index) { + return GetMutableInput(context, node, index); +} + +TfLiteStatus GetInputSafe(const TfLiteContext* context, const TfLiteNode* node, + int index, const TfLiteTensor** tensor) { + return GetMutableInputSafe(context, node, index, tensor); +} + +TfLiteTensor* GetVariableInput(TfLiteContext* context, const TfLiteNode* node, + int index) { + TfLiteTensor* tensor = GetMutableInput(context, node, index); + if (tensor == nullptr) return nullptr; + return tensor->is_variable ? tensor : nullptr; +} + +TfLiteTensor* GetOutput(TfLiteContext* context, const TfLiteNode* node, + int index) { + const int tensor_index = ValidateTensorIndexing( + context, index, node->outputs->size, node->outputs->data); + if (tensor_index < 0) { + return nullptr; + } + return GetTensorAtIndex(context, tensor_index); +} + +TfLiteStatus GetOutputSafe(const TfLiteContext* context, const TfLiteNode* node, + int index, TfLiteTensor** tensor) { + int tensor_index; + TF_LITE_ENSURE_OK( + context, ValidateTensorIndexingSafe(context, index, node->outputs->size, + node->outputs->data, &tensor_index)); + *tensor = GetTensorAtIndex(context, tensor_index); + return kTfLiteOk; +} + +const TfLiteTensor* GetOptionalInputTensor(const TfLiteContext* context, + const TfLiteNode* node, int index) { + return GetInput(context, node, index); +} + +#ifndef TF_LITE_STATIC_MEMORY +TfLiteTensor* GetTemporary(TfLiteContext* context, const TfLiteNode* node, + int index) { + const int tensor_index = ValidateTensorIndexing( + context, index, node->temporaries->size, node->temporaries->data); + if (tensor_index < 0) { + return nullptr; + } + return GetTensorAtIndex(context, tensor_index); +} + +TfLiteStatus GetTemporarySafe(const TfLiteContext* context, + const TfLiteNode* node, int index, + TfLiteTensor** tensor) { + int tensor_index; + TF_LITE_ENSURE_OK(context, ValidateTensorIndexingSafe( + context, index, node->temporaries->size, + node->temporaries->data, &tensor_index)); + *tensor = GetTensorAtIndex(context, tensor_index); + return kTfLiteOk; +} + +const TfLiteTensor* GetIntermediates(TfLiteContext* context, + const TfLiteNode* node, int index) { + const int tensor_index = ValidateTensorIndexing( + context, index, node->intermediates->size, node->intermediates->data); + if (tensor_index < 0) { + return nullptr; + } + return GetTensorAtIndex(context, tensor_index); +} + +TfLiteStatus GetIntermediatesSafe(const TfLiteContext* context, + const TfLiteNode* node, int index, + TfLiteTensor** tensor) { + int tensor_index; + TF_LITE_ENSURE_OK(context, ValidateTensorIndexingSafe( + context, index, node->intermediates->size, + node->intermediates->data, &tensor_index)); + *tensor = GetTensorAtIndex(context, tensor_index); + return kTfLiteOk; +} +#endif // TF_LITE_STATIC_MEMORY + +// Per-axis +TfLiteStatus PopulateConvolutionQuantizationParams( + TfLiteContext* context, const TfLiteTensor* input, + const TfLiteTensor* filter, const TfLiteTensor* bias, TfLiteTensor* output, + const TfLiteFusedActivation& activation, int32_t* multiplier, int* shift, + int32_t* output_activation_min, int32_t* output_activation_max, + int32_t* per_channel_multiplier, int32_t* per_channel_shift) { + const auto* affine_quantization = + reinterpret_cast(filter->quantization.params); + return PopulateConvolutionQuantizationParams( + context, input, filter, bias, output, activation, multiplier, shift, + output_activation_min, output_activation_max, per_channel_multiplier, + per_channel_shift, affine_quantization->scale->size); +} + +// Per-axis & per-tensor +TfLiteStatus PopulateConvolutionQuantizationParams( + TfLiteContext* context, const TfLiteTensor* input, + const TfLiteTensor* filter, const TfLiteTensor* bias, TfLiteTensor* output, + const TfLiteFusedActivation& activation, int32_t* multiplier, int* shift, + int32_t* output_activation_min, int32_t* output_activation_max, + int32_t* per_channel_multiplier, int32_t* per_channel_shift, + int num_channels) { + TF_LITE_ENSURE_EQ(context, input->quantization.type, + kTfLiteAffineQuantization); + TF_LITE_ENSURE_EQ(context, filter->quantization.type, + kTfLiteAffineQuantization); + // TODO(jianlijianli): Enable bias type check and bias scale == input scale + // * filter scale for each channel in affine quantization once bias + // quantization is properly populated. + // TF_LITE_ENSURE_EQ(context, bias->quantization.type, + // kTfLiteAffineQuantization); + + // Check data type. + const auto* affine_quantization = + reinterpret_cast(filter->quantization.params); + TF_LITE_ENSURE(context, affine_quantization); + TF_LITE_ENSURE(context, affine_quantization->scale); + const bool is_per_channel = affine_quantization->scale->size > 1; + if (is_per_channel) { + // Currently only Int8/Int16 is supported for per channel quantization. + TF_LITE_ENSURE(context, + input->type == kTfLiteInt8 || input->type == kTfLiteInt16); + TF_LITE_ENSURE(context, + filter->type == kTfLiteInt8 || filter->type == kTfLiteInt4); + TF_LITE_ENSURE_EQ(context, affine_quantization->scale->size, num_channels); + TF_LITE_ENSURE_EQ( + context, num_channels, + filter->dims->data[affine_quantization->quantized_dimension]); + } + + // Populate multiplier and shift using affine quantization. + const float input_scale = input->params.scale; + const float output_scale = output->params.scale; + const float* filter_scales = affine_quantization->scale->data; + for (int i = 0; i < num_channels; ++i) { + // If per-tensor quantization parameter is specified, broadcast it along the + // quantization dimension (channels_out). + const float scale = is_per_channel ? filter_scales[i] : filter_scales[0]; + const double filter_scale = static_cast(scale); + const double effective_output_scale = static_cast(input_scale) * + filter_scale / + static_cast(output_scale); + int32_t significand; + int channel_shift; + QuantizeMultiplier(effective_output_scale, &significand, &channel_shift); + per_channel_multiplier[i] = significand; + per_channel_shift[i] = channel_shift; + } + + // Populate scalar quantization parameters. + // This check on legacy quantization parameters is kept only for backward + // compatibility. + if (input->type == kTfLiteUInt8) { + // Check bias scale == input scale * filter scale. + double real_multiplier = 0.0; + TF_LITE_ENSURE_STATUS(GetQuantizedConvolutionMultipler( + context, input, filter, bias, output, &real_multiplier)); + int exponent; + + // Populate quantization parameters with multiplier and shift. + QuantizeMultiplier(real_multiplier, multiplier, &exponent); + *shift = -exponent; + } + if (input->type == kTfLiteInt8 || input->type == kTfLiteUInt8 || + input->type == kTfLiteInt16) { + TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized( + context, activation, output, output_activation_min, + output_activation_max)); + } + return kTfLiteOk; +} + +TfLiteStatus GetQuantizedConvolutionMultipler(TfLiteContext* context, + const TfLiteTensor* input, + const TfLiteTensor* filter, + const TfLiteTensor* bias, + TfLiteTensor* output, + double* multiplier) { + const double input_product_scale = static_cast(input->params.scale) * + static_cast(filter->params.scale); + // The following conditions must be guaranteed by the training pipeline. + if (bias) { + const double bias_scale = static_cast(bias->params.scale); + // Here we're making sure the input_product_scale & bias_scale are about the + // same. Since we have: + // (output - output_zp) * output_scale = + // input_product_scale * input_product + bias * bias_scale ---- (0) + // + // (0) equals: + // (input_product + bias) * input_product_scale ----- (1) + // + + // bias * (bias_scale - input_product_scale) ------ (2) + // + // For the real kernel computation, we're doing (1), so we really need to + // make sure (2) has minimum impact on the output, so: + // bias * (bias_scale - input_product_scale) / output_scale should be + // a small number for an integer. + // Since normally bias should be within a small range. + // We should expect (bias_scale - input_product_scale) / output_scale to + // be a small number like 0.02. + const double scale_diff = std::abs(input_product_scale - bias_scale); + const double output_scale = static_cast(output->params.scale); + + TF_LITE_ENSURE(context, scale_diff / output_scale <= 0.02); + } + return GetQuantizedConvolutionMultipler(context, input, filter, output, + multiplier); +} + +TfLiteStatus GetQuantizedConvolutionMultipler(TfLiteContext* context, + const TfLiteTensor* input, + const TfLiteTensor* filter, + TfLiteTensor* output, + double* multiplier) { + const double input_product_scale = + static_cast(input->params.scale * filter->params.scale); + TF_LITE_ENSURE(context, input_product_scale >= 0); + *multiplier = input_product_scale / static_cast(output->params.scale); + + return kTfLiteOk; +} + +namespace { + +inline TfLiteStatus Quantize(TfLiteContext* context, float scale, + int32_t zero_point, float f, int32_t& q) { + const float tmp = TfLiteRound(f / scale); + const bool no_integer_overflow_from_quantization = + (tmp >= static_cast(std::numeric_limits::min()) && + tmp <= static_cast(std::numeric_limits::max())); + TF_LITE_ENSURE(context, no_integer_overflow_from_quantization); + q = zero_point + static_cast(tmp); + return kTfLiteOk; +} + +TfLiteStatus CalculateActivationRangeQuantizedImpl( + TfLiteContext* context, TfLiteFusedActivation activation, int32_t qmin, + int32_t qmax, TfLiteTensor* output, int32_t* act_min, int32_t* act_max) { + const auto scale = output->params.scale; + const auto zero_point = output->params.zero_point; + + int32_t tmp_q; + if (activation == kTfLiteActRelu) { + TF_LITE_ENSURE_OK(context, + Quantize(context, scale, zero_point, 0.0, tmp_q)); + *act_min = std::max(qmin, tmp_q); + *act_max = qmax; + } else if (activation == kTfLiteActRelu6) { + TF_LITE_ENSURE_OK(context, + Quantize(context, scale, zero_point, 0.0, tmp_q)); + *act_min = std::max(qmin, tmp_q); + TF_LITE_ENSURE_OK(context, + Quantize(context, scale, zero_point, 6.0, tmp_q)); + *act_max = std::min(qmax, tmp_q); + } else if (activation == kTfLiteActReluN1To1) { + TF_LITE_ENSURE_OK(context, + Quantize(context, scale, zero_point, -1.0, tmp_q)); + *act_min = std::max(qmin, tmp_q); + TF_LITE_ENSURE_OK(context, + Quantize(context, scale, zero_point, 1.0, tmp_q)); + *act_max = std::min(qmax, tmp_q); + } else { + *act_min = qmin; + *act_max = qmax; + } + return kTfLiteOk; +} +} // namespace + +TfLiteStatus CalculateActivationRangeQuantized(TfLiteContext* context, + TfLiteFusedActivation activation, + TfLiteTensor* output, + int32_t* act_min, + int32_t* act_max) { + int32_t qmin = 0; + int32_t qmax = 0; + if (output->type == kTfLiteUInt8) { + qmin = std::numeric_limits::min(); + qmax = std::numeric_limits::max(); + } else if (output->type == kTfLiteInt8) { + qmin = std::numeric_limits::min(); + qmax = std::numeric_limits::max(); + } else if (output->type == kTfLiteInt16) { + qmin = std::numeric_limits::min(); + qmax = std::numeric_limits::max(); + } else { + TF_LITE_ENSURE(context, false); + } + + return CalculateActivationRangeQuantizedImpl(context, activation, qmin, qmax, + output, act_min, act_max); +} + +bool HaveSameShapes(const TfLiteTensor* input1, const TfLiteTensor* input2) { + return TfLiteIntArrayEqual(input1->dims, input2->dims); +} + +#ifndef TF_LITE_STATIC_MEMORY +TfLiteStatus GetOutputShapeFromInput(TfLiteContext* context, + const TfLiteTensor* input, + TfLiteIntArray** output_shape) { + if (NumDimensions(input) != 1) { + TF_LITE_KERNEL_LOG(const_cast(context), + "Invalid %dD input tensor (must be a 1D tensor).", + NumDimensions(input)); + return kTfLiteError; + } + const int output_dims = SizeOfDimension(input, 0); + std::unique_ptr shape( + TfLiteIntArrayCreate(output_dims), TfLiteIntArrayFree); + for (int i = 0; i < output_dims; i++) { + shape->data[i] = input->data.i32[i]; + } + *output_shape = shape.release(); + return kTfLiteOk; +} + +// TODO(b/172067338): Having this function be part of TF_LITE_STATIC_MEMORY +// build results in a 6KB size increase, even though the function is unsused for +// that build. What appears to be happening is that while the linker drops the +// unsused function, the string library that gets pulled in is not dropped, +// resulting in the increased binary size. +// Patched by Edge Impulse, issues with building for TinkerGen +// TODO inspect if we still need this +const std::string GetShapeDebugString(const TfLiteIntArray* shape) { + std::string str = "GetShapeDebugString";; + /* + for (int d = 0; d < shape->size; ++d) { + if (str.empty()) + str = "[" + std::to_string(shape->data[d]); + else + // Don't add space after "," to make the output consistent with + // tensorflow::shape_inference::InferenceContext::DebugString() + str += "," + std::to_string(shape->data[d]); + } + if (str.empty()) { + str = "[]"; + } else { + str += "]"; + } + */ + return str; +} + +TfLiteStatus CalculateShapeForBroadcast(TfLiteContext* context, + const TfLiteTensor* input1, + const TfLiteTensor* input2, + TfLiteIntArray** output_shape) { + const int dims1 = NumDimensions(input1); + const int dims2 = NumDimensions(input2); + const int out_dims = std::max(dims1, dims2); + + std::unique_ptr shape( + TfLiteIntArrayCreate(out_dims), TfLiteIntArrayFree); + for (int i = 0; i < out_dims; ++i) { + const int d1 = i >= dims1 ? 1 : SizeOfDimension(input1, dims1 - i - 1); + const int d2 = i >= dims2 ? 1 : SizeOfDimension(input2, dims2 - i - 1); + if (!(d1 == d2 || d1 == 1 || d2 == 1)) { + TF_LITE_KERNEL_LOG(context, + "Given shapes, %s and %s, are not broadcastable.", + GetShapeDebugString(input1->dims).c_str(), + GetShapeDebugString(input2->dims).c_str()); + return kTfLiteError; + } + + if (d1 == 0 || d2 == 0) { + shape->data[out_dims - i - 1] = 0; + } else { + shape->data[out_dims - i - 1] = std::max(d1, d2); + } + } + *output_shape = shape.release(); + return kTfLiteOk; +} + +TfLiteStatus CalculateShapeForBroadcast(TfLiteContext* context, + const TfLiteTensor* input1, + const TfLiteTensor* input2, + const TfLiteTensor* input3, + TfLiteIntArray** output_shape) { + const int dims1 = NumDimensions(input1); + const int dims2 = NumDimensions(input2); + const int dims3 = NumDimensions(input3); + const int out_dims = std::max(std::max(dims1, dims2), dims3); + std::unique_ptr shape( + TfLiteIntArrayCreate(out_dims), TfLiteIntArrayFree); + for (int i = 0; i < out_dims; ++i) { + const int d1 = i >= dims1 ? 1 : SizeOfDimension(input1, dims1 - i - 1); + const int d2 = i >= dims2 ? 1 : SizeOfDimension(input2, dims2 - i - 1); + const int d3 = i >= dims3 ? 1 : SizeOfDimension(input3, dims3 - i - 1); + const int min_value = std::min(std::min(d1, d2), d3); + int max_value = std::max(std::max(d1, d2), d3); + // If one dimention is 0, others must be 0 or 1. + if (min_value == 0) max_value = 0; + if (!(d1 == 1 || d1 == max_value) || !(d2 == 1 || d2 == max_value) || + !(d3 == 1 || d3 == max_value)) { + TF_LITE_KERNEL_LOG(context, + "Given shapes, %s, %s and %s, are not broadcastable.", + GetShapeDebugString(input1->dims).c_str(), + GetShapeDebugString(input2->dims).c_str(), + GetShapeDebugString(input3->dims).c_str()); + return kTfLiteError; + } + shape->data[out_dims - i - 1] = max_value; + } + *output_shape = shape.release(); + return kTfLiteOk; +} +#endif // TF_LITE_STATIC_MEMORY + +// Size of string is not constant, return 0 in such case. +int TfLiteTypeGetSize(TfLiteType type) { + switch (type) { + case kTfLiteUInt8: + static_assert(sizeof(uint8_t) == 1, ""); + return 1; + case kTfLiteInt8: + static_assert(sizeof(int8_t) == 1, ""); + return 1; + case kTfLiteBool: + return sizeof(bool); + case kTfLiteUInt16: + static_assert(sizeof(uint16_t) == 2, ""); + return 2; + case kTfLiteInt16: + static_assert(sizeof(int16_t) == 2, ""); + return 2; + case kTfLiteFloat16: + static_assert(sizeof(int16_t) == 2, ""); + return 2; + case kTfLiteFloat32: + static_assert(sizeof(float) == 4, ""); + return 4; + case kTfLiteInt32: + static_assert(sizeof(int32_t) == 4, ""); + return 4; + case kTfLiteUInt32: + static_assert(sizeof(uint32_t) == 4, ""); + return 4; + case kTfLiteInt64: + static_assert(sizeof(int64_t) == 8, ""); + return 8; + case kTfLiteUInt64: + static_assert(sizeof(uint64_t) == 8, ""); + return 8; + case kTfLiteFloat64: + static_assert(sizeof(double) == 8, ""); + return 8; + case kTfLiteComplex64: + static_assert(sizeof(std::complex) == 8, ""); + return 8; + case kTfLiteComplex128: + static_assert(sizeof(std::complex) == 16, ""); + return 16; + default: + return 0; + } +} + +bool IsMobilePlatform() { +#if defined(ANDROID) || defined(__ANDROID__) + return true; +#elif defined(__APPLE__) +#if TARGET_IPHONE_SIMULATOR || TARGET_OS_IPHONE + return true; +#endif +#endif + return false; +} + +bool HasUnspecifiedDimension(const TfLiteTensor* tensor) { +#ifndef TF_LITE_STATIC_MEMORY + if (tensor->dims_signature) { + for (int i : TfLiteIntArrayView(tensor->dims_signature)) { + if (i == -1) return true; + } + } +#endif // TF_LITE_STATIC_MEMORY + return false; +} + +} // namespace tflite diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/op_macros.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/op_macros.h new file mode 100644 index 0000000..d3c50bb --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/op_macros.h @@ -0,0 +1,38 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_KERNELS_OP_MACROS_H_ +#define TENSORFLOW_LITE_KERNELS_OP_MACROS_H_ + +#include "edge-impulse-sdk/tensorflow/lite/micro/debug_log.h" + +#if !defined(TF_LITE_MCU_DEBUG_LOG) +#include +#define TFLITE_ABORT abort() +#else +inline void AbortImpl() { + DebugLog("HALTED\n"); + while (1) { + } +} +#define TFLITE_ABORT AbortImpl(); +#endif + +#if defined(NDEBUG) +#define TFLITE_ASSERT_FALSE (static_cast(0)) +#else +#define TFLITE_ASSERT_FALSE TFLITE_ABORT +#endif + +#endif // TENSORFLOW_LITE_KERNELS_OP_MACROS_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/padding.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/padding.h new file mode 100644 index 0000000..836ca92 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/padding.h @@ -0,0 +1,115 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_KERNELS_PADDING_H_ +#define TENSORFLOW_LITE_KERNELS_PADDING_H_ + +#include "edge-impulse-sdk/tensorflow/lite/core/c/builtin_op_data.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h" + +namespace tflite { + +inline int ComputePadding(int stride, int dilation_rate, int in_size, + int filter_size, int out_size) { + int effective_filter_size = (filter_size - 1) * dilation_rate + 1; + int padding = ((out_size - 1) * stride + effective_filter_size - in_size) / 2; + return padding > 0 ? padding : 0; +} + +// It's not guaranteed that padding is symmetric. It's important to keep +// offset for algorithms need all paddings. +inline int ComputePaddingWithOffset(int stride, int dilation_rate, int in_size, + int filter_size, int out_size, + int* offset) { + int effective_filter_size = (filter_size - 1) * dilation_rate + 1; + int total_padding = + ((out_size - 1) * stride + effective_filter_size - in_size); + total_padding = total_padding > 0 ? total_padding : 0; + *offset = total_padding % 2; + return total_padding / 2; +} + +// Matching GetWindowedOutputSize in TensorFlow. +inline int ComputeOutSize(TfLitePadding padding, int image_size, + int filter_size, int stride, int dilation_rate = 1) { + int effective_filter_size = (filter_size - 1) * dilation_rate + 1; + + // TODO(b/186448822): This uses 0 since the function has no other way to + // report error case + if (stride == 0) return 0; + + switch (padding) { + case kTfLitePaddingSame: + return (image_size + stride - 1) / stride; + case kTfLitePaddingValid: + return (image_size + stride - effective_filter_size) / stride; + default: + return 0; + } +} + +inline TfLitePaddingValues ComputePaddingHeightWidth( + int stride_height, int stride_width, int dilation_rate_height, + int dilation_rate_width, int in_height, int in_width, int filter_height, + int filter_width, TfLitePadding padding, int* out_height, int* out_width) { + *out_width = ComputeOutSize(padding, in_width, filter_width, stride_width, + dilation_rate_width); + *out_height = ComputeOutSize(padding, in_height, filter_height, stride_height, + dilation_rate_height); + + TfLitePaddingValues padding_values; + int offset = 0; + padding_values.height = + ComputePaddingWithOffset(stride_height, dilation_rate_height, in_height, + filter_height, *out_height, &offset); + padding_values.height_offset = offset; + padding_values.width = + ComputePaddingWithOffset(stride_width, dilation_rate_width, in_width, + filter_width, *out_width, &offset); + padding_values.width_offset = offset; + return padding_values; +} + +inline Padding3DValues ComputePadding3DValues( + int stride_height, int stride_width, int stride_depth, + int dilation_rate_height, int dilation_rate_width, int dilation_rate_depth, + int in_height, int in_width, int in_depth, int filter_height, + int filter_width, int filter_depth, TfLitePadding padding, int* out_height, + int* out_width, int* out_depth) { + *out_width = ComputeOutSize(padding, in_width, filter_width, stride_width, + dilation_rate_width); + *out_height = ComputeOutSize(padding, in_height, filter_height, stride_height, + dilation_rate_height); + *out_depth = ComputeOutSize(padding, in_depth, filter_depth, stride_depth, + dilation_rate_depth); + + Padding3DValues padding_values; + int offset = 0; + padding_values.depth = + ComputePaddingWithOffset(stride_depth, dilation_rate_depth, in_depth, + filter_depth, *out_depth, &offset); + padding_values.depth_offset = offset; + padding_values.height = + ComputePaddingWithOffset(stride_height, dilation_rate_height, in_height, + filter_height, *out_height, &offset); + padding_values.height_offset = offset; + padding_values.width = + ComputePaddingWithOffset(stride_width, dilation_rate_width, in_width, + filter_width, *out_width, &offset); + padding_values.width_offset = offset; + return padding_values; +} +} // namespace tflite + +#endif // TENSORFLOW_LITE_KERNELS_PADDING_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/tree_ensemble_classifier.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/tree_ensemble_classifier.cpp new file mode 100644 index 0000000..e330644 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/tree_ensemble_classifier.cpp @@ -0,0 +1,187 @@ +/* Copyright 2023 Edge Impulse Inc. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#define FLATBUFFERS_LOCALE_INDEPENDENT 0 +#include +#include +#include + +#include +#include +#include +#include + +#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/flexbuffers.h" // from @flatbuffers +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/compatibility.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h" + +#define FEATURE_TYPE float + +namespace tflite { +namespace ops { +namespace custom { +namespace tree_ensemble_classifier { + +struct OpDataTree { + uint32_t num_leaf_nodes; + uint32_t num_internal_nodes; + uint32_t num_trees; + const uint16_t* nodes_modes; + const uint16_t* nodes_featureids; + const float* nodes_values; + const uint16_t* nodes_truenodeids; + const uint16_t* nodes_falsenodeids; + const float* nodes_weights; + const uint8_t* nodes_classids; + const uint16_t* tree_root_ids; + const uint8_t* buffer_t; + size_t buffer_length; +}; + +void* Init(TfLiteContext* context, const char* buffer, size_t length) { + + const uint8_t* buffer_t = reinterpret_cast(buffer); + const flexbuffers::Map& m = flexbuffers::GetRoot(buffer_t, length).AsMap(); + + auto* data = new OpDataTree; + + data->buffer_t = buffer_t; + data->buffer_length = length; + + data->num_leaf_nodes = m["num_leaf_nodes"].AsUInt32(); + data->num_internal_nodes = m["num_internal_nodes"].AsUInt32(); + data->num_trees = m["num_trees"].AsUInt32(); + + data->nodes_modes = (uint16_t*)(m["nodes_modes"].AsBlob().data()); + data->nodes_featureids = (uint16_t*)(m["nodes_featureids"].AsBlob().data()); + data->nodes_values = (float*)(m["nodes_values"].AsBlob().data()); + data->nodes_truenodeids = (uint16_t*)(m["nodes_truenodeids"].AsBlob().data()); + data->nodes_falsenodeids = (uint16_t*)(m["nodes_falsenodeids"].AsBlob().data()); + data->nodes_weights = (float*)(m["nodes_weights"].AsBlob().data()); + data->nodes_classids = (uint8_t*)(m["nodes_classids"].AsBlob().data()); + data->tree_root_ids = (uint16_t*)(m["tree_root_ids"].AsBlob().data()); + + return data; +} + +TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + + const OpDataTree* data = static_cast(node->user_data); + const flexbuffers::Map& m = flexbuffers::GetRoot(data->buffer_t, data->buffer_length).AsMap(); + + // The OOB checks below are very important to prevent vulnerabilities where an adversary sends + // us a malicious TFLite model, similar to: https://nvd.nist.gov/vuln/detail/CVE-2022-23560 + + int num_nodes = data->num_leaf_nodes + data->num_internal_nodes; + + // Check that the tree root ids are valid. + for (uint32_t i = 0; i < data->num_trees; i++) { + TF_LITE_ENSURE_EQ(context, data->tree_root_ids[i] < num_nodes, true); + TF_LITE_ENSURE_EQ(context, data->tree_root_ids[i] >= 0, true); + } + + // Check that all node indices are valid + for (uint32_t i = 0; i < data->num_internal_nodes; i++) { + TF_LITE_ENSURE_EQ(context, data->nodes_truenodeids[i] < num_nodes, true); + TF_LITE_ENSURE_EQ(context, data->nodes_truenodeids[i] >= 0, true); + TF_LITE_ENSURE_EQ(context, data->nodes_falsenodeids[i] < num_nodes, true); + TF_LITE_ENSURE_EQ(context, data->nodes_falsenodeids[i] >= 0, true); + } + + // Check all node arrays have the same length + TF_LITE_ENSURE_EQ(context, data->num_internal_nodes, m["nodes_featureids"].AsBlob().size()); + TF_LITE_ENSURE_EQ(context, data->num_internal_nodes, m["nodes_values"].AsBlob().size()); + TF_LITE_ENSURE_EQ(context, data->num_internal_nodes, m["nodes_truenodeids"].AsBlob().size()); + TF_LITE_ENSURE_EQ(context, data->num_internal_nodes, m["nodes_falsenodeids"].AsBlob().size()); + TF_LITE_ENSURE_EQ(context, data->num_leaf_nodes, m["nodes_weights"].AsBlob().size()); + TF_LITE_ENSURE_EQ(context, data->num_leaf_nodes, m["nodes_classids"].AsBlob().size()); + + // Check data types are supported. Currently we only support one combination. + TF_LITE_ENSURE_EQ(context, strncmp(m["tree_index_type"].AsString().c_str(), "uint16", 6), 0); + TF_LITE_ENSURE_EQ(context, strncmp(m["node_value_type"].AsString().c_str(), "float32", 7), 0); + TF_LITE_ENSURE_EQ(context, strncmp(m["class_index_type"].AsString().c_str(), "uint8", 5), 0); + TF_LITE_ENSURE_EQ(context, strncmp(m["class_weight_type"].AsString().c_str(), "float32", 7), 0); + TF_LITE_ENSURE_EQ(context, strncmp(m["equality_operator"].AsString().c_str(), "leq", 3), 0); + + TF_LITE_ENSURE_EQ(context, NumInputs(node), 1); + TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); + const TfLiteTensor* input = GetInput(context, node, 0); + TF_LITE_ENSURE(context, input != nullptr); + TF_LITE_ENSURE(context, NumDimensions(input) == 2); + TfLiteTensor* output = GetOutput(context, node, 0); + TF_LITE_ENSURE(context, output != nullptr); + + int input_width = SizeOfDimension(input, 1); + int output_width = SizeOfDimension(output, 1); + + // Check that all indices into the input/output tensor are valid + for (uint32_t i = 0; i < data->num_internal_nodes; i++) { + TF_LITE_ENSURE(context, data->nodes_featureids[i] < input_width); + TF_LITE_ENSURE(context, data->nodes_featureids[i] >= 0); + if (data->nodes_modes[i] == 0) { + TF_LITE_ENSURE(context, data->nodes_classids[i] < output_width); + TF_LITE_ENSURE(context, data->nodes_classids[i] >= 0); + } + } + + return kTfLiteOk; +} + +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + + const OpDataTree* data = static_cast(node->user_data); + const TfLiteTensor* input; + TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, 0, &input)); + TfLiteTensor* output; + TF_LITE_ENSURE_OK(context, GetOutputSafe(context, node, 0, &output)); + + float* output_data = GetTensorData(output); + memset(output_data, 0, GetTensorShape(output).FlatSize() * sizeof(float)); + + for (uint32_t i = 0; i < data->num_trees; i++) { + uint16_t ix = data->tree_root_ids[i]; + while (ix < data->num_internal_nodes) { + if (input->data.f[data->nodes_featureids[ix]] <= data->nodes_values[ix]) { + ix = data->nodes_truenodeids[ix]; + } else { + ix = data->nodes_falsenodeids[ix]; + } + } + ix -= data->num_internal_nodes; + output->data.f[data->nodes_classids[ix]] += data->nodes_weights[ix]; + } + + return kTfLiteOk; +} + +} // namespace + +TfLiteRegistration* Register_TREE_ENSEMBLE_CLASSIFIER() { + + static TfLiteRegistration r = { + tree_ensemble_classifier::Init, nullptr, + tree_ensemble_classifier::Prepare, tree_ensemble_classifier::Eval}; + return &r; +} + +TfLiteRegistration* Register_TFLITE_TREE_ENSEMBLE_CLASSIFIER() { + return Register_TREE_ENSEMBLE_CLASSIFIER(); +} + +} // namespace custom +} // namespace ops +} // namespace tflite diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/tree_ensemble_classifier.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/tree_ensemble_classifier.h new file mode 100644 index 0000000..fcdc98a --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/kernels/tree_ensemble_classifier.h @@ -0,0 +1,31 @@ +/* Copyright 2023 Edge Impulse Inc. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_LITE_MICRO_KERNELS_TREE_ENSEMBLE_CLASSIFIER_H_ +#define TENSORFLOW_LITE_MICRO_KERNELS_TREE_ENSEMBLE_CLASSIFIER_H_ + +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" + +namespace tflite { +namespace ops { +namespace custom { + +TfLiteRegistration* Register_TREE_ENSEMBLE_CLASSIFIER(); + +} +} +} // namespace tflite + +#endif // TENSORFLOW_LITE_MICRO_KERNELS_TREE_ENSEMBLE_CLASSIFIER_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/all_ops_resolver.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/all_ops_resolver.cpp new file mode 100644 index 0000000..e9d2d6f --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/all_ops_resolver.cpp @@ -0,0 +1,135 @@ +/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "edge-impulse-sdk/tensorflow/lite/micro/all_ops_resolver.h" + +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/micro_ops.h" + +namespace tflite { + +AllOpsResolver::AllOpsResolver() { + // Please keep this list of Builtin Operators in alphabetical order. + AddAbs(); + AddAdd(); + AddAddN(); + AddArgMax(); + AddArgMin(); + AddAssignVariable(); + AddAveragePool2D(); + AddBatchMatMul(); + AddBatchToSpaceNd(); + AddBroadcastArgs(); + AddBroadcastTo(); + AddCallOnce(); + AddCast(); + AddCeil(); + AddComplexAbs(); + AddCircularBuffer(); + AddConcatenation(); + AddConv2D(); + AddCos(); + AddCumSum(); + AddDepthToSpace(); + AddDepthwiseConv2D(); + AddDequantize(); + AddDetectionPostprocess(); + AddDiv(); + AddElu(); + AddEqual(); + AddEthosU(); + AddExp(); + AddExpandDims(); + AddFill(); + AddFloor(); + AddFloorDiv(); + AddFloorMod(); + AddFullyConnected(); +#ifndef TF_LITE_STATIC_MEMORY + AddGather(); +#endif // TF_LITE_STATIC_MEMORY + AddGatherNd(); + AddGreater(); + AddGreaterEqual(); + AddHardSwish(); + AddImag(); + AddIf(); + AddL2Normalization(); + AddL2Pool2D(); + AddLeakyRelu(); + AddLess(); + AddLessEqual(); + AddLog(); + AddLogicalAnd(); + AddLogicalNot(); + AddLogicalOr(); + AddLogistic(); + AddLogSoftmax(); + AddMaxPool2D(); + AddMaximum(); + AddMean(); + AddMinimum(); + AddMirrorPad(); + AddMul(); + AddNeg(); + AddNotEqual(); + AddPack(); + AddPad(); + AddPadV2(); + AddPrelu(); + AddQuantize(); + AddReal(); + AddReadVariable(); + AddReduceMax(); + AddReduceMin(); + AddRelu(); + AddRelu6(); + AddReshape(); + AddResizeBilinear(); + AddResizeNearestNeighbor(); + AddRfft2D(); + AddRound(); + AddRsqrt(); +#ifndef TF_LITE_STATIC_MEMORY + AddSelect(); + AddSelectV2(); +#endif // TF_LITE_STATIC_MEMORY + AddShape(); + AddSin(); + AddSlice(); + AddSoftmax(); + AddSpaceToBatchNd(); + AddSpaceToDepth(); + AddSplit(); + AddSplitV(); + AddSqrt(); + AddSquare(); + AddSquaredDifference(); + AddSqueeze(); + AddStridedSlice(); + AddSub(); + AddSum(); + AddSvdf(); + AddTanh(); + AddTranspose(); + AddTransposeConv(); + AddTreeEnsembleClassifier(); + AddUnidirectionalSequenceLstm(); + AddUnpack(); + AddVarHandle(); + AddWhile(); + AddZerosLike(); +} + +} // namespace tflite diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/all_ops_resolver.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/all_ops_resolver.h new file mode 100644 index 0000000..8d468ef --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/all_ops_resolver.h @@ -0,0 +1,38 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_MICRO_ALL_OPS_RESOLVER_H_ +#define TENSORFLOW_LITE_MICRO_ALL_OPS_RESOLVER_H_ + +#include "edge-impulse-sdk/tensorflow/lite/micro/compatibility.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_mutable_op_resolver.h" + +namespace tflite { + +// The magic number in the template parameter is the maximum number of ops that +// can be added to AllOpsResolver. It can be increased if needed. And most +// applications that care about the memory footprint will want to directly use +// MicroMutableOpResolver and have an application specific template parameter. +// The examples directory has sample code for this. +class AllOpsResolver : public MicroMutableOpResolver<128> { + public: + AllOpsResolver(); + + private: + TF_LITE_REMOVE_VIRTUAL_DELETE +}; + +} // namespace tflite + +#endif // TENSORFLOW_LITE_MICRO_ALL_OPS_RESOLVER_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/compatibility.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/compatibility.h new file mode 100644 index 0000000..db117ca --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/compatibility.h @@ -0,0 +1,31 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_MICRO_COMPATIBILITY_H_ +#define TENSORFLOW_LITE_MICRO_COMPATIBILITY_H_ + +// C++ will automatically create class-specific delete operators for virtual +// objects, which by default call the global delete function. For embedded +// applications we want to avoid this, and won't be calling new/delete on these +// objects, so we need to override the default implementation with one that does +// nothing to avoid linking in ::delete(). +// This macro needs to be included in all subclasses of a virtual base class in +// the private section. + +// Patched by Edge Impulse, +// actually declaring `void operator delete(void* p) {}` +// yields compiler errors on some compilers +#define TF_LITE_REMOVE_VIRTUAL_DELETE + +#endif // TENSORFLOW_LITE_MICRO_COMPATIBILITY_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/debug_log.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/debug_log.h new file mode 100644 index 0000000..f3ba464 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/debug_log.h @@ -0,0 +1,28 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_MICRO_DEBUG_LOG_H_ +#define TENSORFLOW_LITE_MICRO_DEBUG_LOG_H_ + +// This function should be implemented by each target platform, and provide a +// way for strings to be output to some text stream. For more information, see +// tensorflow/lite/micro/debug_log.cc. +#if defined(__cplusplus) && EI_C_LINKAGE == 1 +extern "C" void DebugLog(const char* s); +#else +void DebugLog(const char* s); +#endif // defined(__cplusplus) && EI_C_LINKAGE == 1 + +#endif // TENSORFLOW_LITE_MICRO_DEBUG_LOG_H_ + diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/fake_micro_context.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/fake_micro_context.cpp new file mode 100644 index 0000000..5ca66ab --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/fake_micro_context.cpp @@ -0,0 +1,110 @@ +/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "edge-impulse-sdk/tensorflow/lite/micro/fake_micro_context.h" + +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/compatibility.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/single_arena_buffer_allocator.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_allocator.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_arena_constants.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h" + +namespace tflite { + +// Patched by Edge Impulse +constexpr int FakeMicroContext::kNumScratchBuffers_; + +namespace { +// Dummy static variables to allow creation of dummy MicroAllocator. +// All tests are guarateed to run serially. +static constexpr int KDummyTensorArenaSize = 256; +static uint8_t dummy_tensor_arena[KDummyTensorArenaSize]; +} // namespace + +FakeMicroContext::FakeMicroContext(TfLiteTensor* tensors, + SingleArenaBufferAllocator* allocator, + MicroGraph* micro_graph) + : MicroContext( + MicroAllocator::Create(dummy_tensor_arena, KDummyTensorArenaSize), + nullptr, micro_graph), + tensors_(tensors), + allocator_(allocator) {} + +TfLiteTensor* FakeMicroContext::AllocateTempTfLiteTensor(int tensor_index) { + allocated_tensor_count_++; + return &tensors_[tensor_index]; +} + +void FakeMicroContext::DeallocateTempTfLiteTensor(TfLiteTensor* tensor) { + allocated_tensor_count_--; +} + +bool FakeMicroContext::IsAllTempTfLiteTensorDeallocated() { + return !allocated_tensor_count_; +} + +TfLiteEvalTensor* FakeMicroContext::GetEvalTensor(int tensor_index) { + TfLiteEvalTensor* eval_tensor = + reinterpret_cast(allocator_->AllocateTemp( + sizeof(TfLiteEvalTensor), alignof(TfLiteEvalTensor))); + TFLITE_DCHECK(eval_tensor != nullptr); + + // In unit tests, the TfLiteTensor pointer contains the source of truth for + // buffers and values: + eval_tensor->data = tensors_[tensor_index].data; + eval_tensor->dims = tensors_[tensor_index].dims; + eval_tensor->type = tensors_[tensor_index].type; + return eval_tensor; +} + +void* FakeMicroContext::AllocatePersistentBuffer(size_t bytes) { + // FakeMicroContext use SingleArenaBufferAllocator, which does not + // automatically apply the buffer alignment like MicroAllocator. The buffer + // alignment is potentially wasteful but allows the fake_micro_context to work + // correctly with optimized kernels. + return allocator_->AllocatePersistentBuffer(bytes, + MicroArenaBufferAlignment()); +} + +TfLiteStatus FakeMicroContext::RequestScratchBufferInArena(size_t bytes, + int* buffer_index) { + TFLITE_DCHECK(buffer_index != nullptr); + + if (scratch_buffer_count_ == kNumScratchBuffers_) { + MicroPrintf("Exceeded the maximum number of scratch tensors allowed (%d).", + kNumScratchBuffers_); + return kTfLiteError; + } + + // For tests, we allocate scratch buffers from the tail and keep them around + // for the lifetime of model. This means that the arena size in the tests will + // be more than what we would have if the scratch buffers could share memory. + scratch_buffers_[scratch_buffer_count_] = + allocator_->AllocatePersistentBuffer(bytes, MicroArenaBufferAlignment()); + TFLITE_DCHECK(scratch_buffers_[scratch_buffer_count_] != nullptr); + + *buffer_index = scratch_buffer_count_++; + return kTfLiteOk; +} + +void* FakeMicroContext::GetScratchBuffer(int buffer_index) { + TFLITE_DCHECK(scratch_buffer_count_ <= kNumScratchBuffers_); + if (buffer_index >= scratch_buffer_count_) { + return nullptr; + } + return scratch_buffers_[buffer_index]; +} + +} // namespace tflite diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/fake_micro_context.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/fake_micro_context.h new file mode 100644 index 0000000..a7af023 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/fake_micro_context.h @@ -0,0 +1,56 @@ +/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_LITE_MICRO_FAKE_MICRO_CONTEXT_H_ +#define TENSORFLOW_LITE_MICRO_FAKE_MICRO_CONTEXT_H_ + +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_context.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_graph.h" + +namespace tflite { +// A fake of MicroContext for kernel util tests. +class FakeMicroContext : public MicroContext { + public: + FakeMicroContext(TfLiteTensor* tensors, SingleArenaBufferAllocator* allocator, + MicroGraph* micro_graph); + + void* AllocatePersistentBuffer(size_t bytes) override; + TfLiteStatus RequestScratchBufferInArena(size_t bytes, + int* buffer_index) override; + void* GetScratchBuffer(int buffer_index) override; + + TfLiteTensor* AllocateTempTfLiteTensor(int tensor_index) override; + void DeallocateTempTfLiteTensor(TfLiteTensor* tensor) override; + bool IsAllTempTfLiteTensorDeallocated(); + + TfLiteEvalTensor* GetEvalTensor(int tensor_index) override; + + private: + static constexpr int kNumScratchBuffers_ = 12; + + int scratch_buffer_count_ = 0; + uint8_t* scratch_buffers_[kNumScratchBuffers_]; + + TfLiteTensor* tensors_; + int allocated_tensor_count_ = 0; + + SingleArenaBufferAllocator* allocator_; + + TF_LITE_REMOVE_VIRTUAL_DELETE +}; + +} // namespace tflite + +#endif // TENSORFLOW_LITE_MICRO_FAKE_MICRO_CONTEXT_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/flatbuffer_conversions_bridge.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/flatbuffer_conversions_bridge.cpp new file mode 100644 index 0000000..2fe1663 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/flatbuffer_conversions_bridge.cpp @@ -0,0 +1,34 @@ +/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "edge-impulse-sdk/tensorflow/lite/micro/flatbuffer_conversions_bridge.h" + +#include "edge-impulse-sdk/tensorflow/lite/c/c_api_types.h" +#include "edge-impulse-sdk/tensorflow/lite/core/api/error_reporter.h" +#include "edge-impulse-sdk/tensorflow/lite/core/api/flatbuffer_conversions.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_error_reporter.h" +#include "edge-impulse-sdk/tensorflow/lite/schema/schema_generated.h" + +namespace tflite { +TfLiteStatus ConvertTensorType(TensorType tensor_type, TfLiteType* type) { + return ConvertTensorType(tensor_type, type, tflite::GetMicroErrorReporter()); +} + +TfLiteStatus CallBuiltinParseFunction(TfLiteBridgeBuiltinParseFunction parser, + const Operator* op, + BuiltinDataAllocator* allocator, + void** builtin_data) { + return parser(op, tflite::GetMicroErrorReporter(), allocator, builtin_data); +} +} // namespace tflite diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/flatbuffer_conversions_bridge.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/flatbuffer_conversions_bridge.h new file mode 100644 index 0000000..a2a1ad4 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/flatbuffer_conversions_bridge.h @@ -0,0 +1,45 @@ +/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_MICRO_TFLITE_BRIDGE_FLATBUFFER_CONVERSIONS_BRIDGE_H_ +#define TENSORFLOW_LITE_MICRO_TFLITE_BRIDGE_FLATBUFFER_CONVERSIONS_BRIDGE_H_ + +#include "edge-impulse-sdk/tensorflow/lite/c/c_api_types.h" +#include "edge-impulse-sdk/tensorflow/lite/core/api/flatbuffer_conversions.h" +#include "edge-impulse-sdk/tensorflow/lite/schema/schema_generated.h" + +namespace tflite { + +// Forward declaration of the ErrorReporter class to hide it from the TFLM code. +class ErrorReporter; + +using TfLiteBridgeBuiltinDataAllocator = BuiltinDataAllocator; + +using TfLiteBridgeBuiltinParseFunction = + TfLiteStatus (*)(const Operator* op, ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, void** builtin_data); + +// Converts the tensor data type used in the flatbuffer to the representation +// used by the runtime. +TfLiteStatus ConvertTensorType(TensorType tensor_type, TfLiteType* type); + +// CallBuiltinParseFunction is a wrapper function to wrap the parser function +// calls to Call parser(op, allocator, builtin_data) +TfLiteStatus CallBuiltinParseFunction(TfLiteBridgeBuiltinParseFunction parser, + const Operator* op, + BuiltinDataAllocator* allocator, + void** builtin_data); +} // namespace tflite + +#endif // TENSORFLOW_LITE_MICRO_TFLITE_BRIDGE_FLATBUFFER_CONVERSIONS_BRIDGE_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/flatbuffer_utils.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/flatbuffer_utils.cpp new file mode 100644 index 0000000..e5d779b --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/flatbuffer_utils.cpp @@ -0,0 +1,85 @@ +/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "edge-impulse-sdk/tensorflow/lite/micro/flatbuffer_utils.h" +#include "edge-impulse-sdk/tensorflow/lite/schema/schema_generated_full.h" + +namespace tflite { + +FlexbufferWrapper::FlexbufferWrapper(const uint8_t* buffer, size_t size) + : flexbuffers::Vector(flexbuffers::GetRoot(buffer, size).AsVector()) {} + +int64_t FlexbufferWrapper::ElementAsInt64(size_t i) const { + const uint8_t* elem = data_ + i * byte_width_; + return ::flexbuffers::ReadInt64(elem, byte_width_); +} + +uint64_t FlexbufferWrapper::ElementAsUInt64(size_t i) const { + const uint8_t* elem = data_ + i * byte_width_; + return ::flexbuffers::ReadUInt64(elem, byte_width_); +} + +int32_t FlexbufferWrapper::ElementAsInt32(size_t i) const { + return static_cast(ElementAsInt64(i)); +} + +bool FlexbufferWrapper::ElementAsBool(size_t i) const { + return static_cast(ElementAsUInt64(i)); +} + +double FlexbufferWrapper::ElementAsDouble(size_t i) const { + const uint8_t* elem = data_ + i * byte_width_; + return ::flexbuffers::ReadDouble(elem, byte_width_); +} + +float FlexbufferWrapper::ElementAsFloat(size_t i) const { + return static_cast(FlexbufferWrapper::ElementAsDouble(i)); +} + +// TODO(b/192589496): Ops must always be there. Remove this function when fixed +uint32_t NumSubgraphOperators(const SubGraph* subgraph) { + if (subgraph->operators() != nullptr) { + return subgraph->operators()->size(); + } else { + return 0; + } +} +// TODO(b/192589496): Ops must always be there. Remove this function when fixed +uint32_t NumSubgraphOperators(const Model* model, int subgraph_idx) { + const SubGraph* subgraph = model->subgraphs()->Get(subgraph_idx); + return NumSubgraphOperators(subgraph); +} + +TfLiteIntArray* FlatBufferVectorToTfLiteTypeArray( + const flatbuffers::Vector* flatbuffer_array) { + // On little-endian machines, TfLiteIntArray happens to have the same memory + // layout as flatbuffers:Vector, so we can reinterpret_cast the + // flatbuffer vector and avoid a copy and malloc. + // TODO(b/188459715): audit this usage of const_cast. + return const_cast( + reinterpret_cast(flatbuffer_array)); +} + +TfLiteFloatArray* FlatBufferVectorToTfLiteTypeArray( + const flatbuffers::Vector* flatbuffer_array) { + // On little-endian machines, TfLiteFloatArray happens to have the same memory + // layout as flatbuffers:Vector, so we can reinterpret_cast the + // flatbuffer vector and avoid a copy and malloc. + // TODO(b/188459715): audit this usage of const_cast. + return const_cast( + reinterpret_cast(flatbuffer_array)); +} + +} // namespace tflite diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/flatbuffer_utils.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/flatbuffer_utils.h new file mode 100644 index 0000000..a5a7f9e --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/flatbuffer_utils.h @@ -0,0 +1,65 @@ +/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef THIRD_PARTY_TFLITE_MICRO_TENSORFLOW_LITE_MICRO_FLATBUFFER_UTILS_H_ +#define THIRD_PARTY_TFLITE_MICRO_TENSORFLOW_LITE_MICRO_FLATBUFFER_UTILS_H_ + +#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/flatbuffers.h" +#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/flexbuffers.h" +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/schema/schema_generated.h" + +namespace tflite { +// Kernels use flexbuffers::Map to pack their init parameters in a tflite file, +// with the parameter names as map keys and the parameter values as the +// corresponding map values. +// Accessing the map values using the flexbuffers:Map class is inline heavy, +// which can cause the code size to bloat beyond what's reasonable for a micro +// application. Use this class instead, when possible. +// FlexbufferWrapper takes advantage of the following properties of +// flexbuffers::Map: +// 1. It can be viewed as a flexbuffers::Vector of the values. +// 2. The values in the vector are ordered alphabetically by their keys. +// 3. All integer and Boolean values are stored as 64-bit numbers. +// 4. All floating point values are stored as double precision numbers. +// The properties are mentioned in the flexbuffers docs, but we rely on +// a unit test to catch design changes. +class FlexbufferWrapper : public flexbuffers::Vector { + public: + // Construct with a serialized flexbuffer 'buffer' of 'size' bytes + explicit FlexbufferWrapper(const uint8_t* buffer, size_t size); + int64_t ElementAsInt64(size_t i) const; + uint64_t ElementAsUInt64(size_t i) const; + int32_t ElementAsInt32(size_t i) const; + bool ElementAsBool(size_t i) const; + double ElementAsDouble(size_t i) const; + float ElementAsFloat(size_t i) const; +}; + +// Return the number of operators in a subgraph tflite +uint32_t NumSubgraphOperators(const SubGraph* subgraph); +uint32_t NumSubgraphOperators(const Model* model, int subgraph_idx); + +// Converts a flatbuffer array to a TfLiteArray. +// TODO(b/188459715): These function convert a const input to a non-const via a +// const_cast. It is unclear exactly why this is required. +TfLiteIntArray* FlatBufferVectorToTfLiteTypeArray( + const flatbuffers::Vector* flatbuffer_array); +TfLiteFloatArray* FlatBufferVectorToTfLiteTypeArray( + const flatbuffers::Vector* flatbuffer_array); + +} // namespace tflite + +#endif // THIRD_PARTY_TFLITE_MICRO_TENSORFLOW_LITE_MICRO_FLATBUFFER_UTILS_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/ibuffer_allocator.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/ibuffer_allocator.h new file mode 100644 index 0000000..287eea3 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/ibuffer_allocator.h @@ -0,0 +1,100 @@ +/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_MICRO_ARENA_ALLOCATOR_IBUFFER_ALLOCATOR_H_ +#define TENSORFLOW_LITE_MICRO_ARENA_ALLOCATOR_IBUFFER_ALLOCATOR_H_ + +#include +#include + +#include "edge-impulse-sdk/tensorflow/lite/c/c_api_types.h" + +namespace tflite { +// Interface classes that the TFLM framework relies on to get buffers it needs. +// There are two types of buffers that the TFLM framework requires: persistent +// and non-persistent. Persistent buffers, once allocated, are never freed by +// the TFLM framework. Non-persist buffers can be allocated and deallocated by +// the TFLM framework. This file defines two interfaces classes that TFLM +// framework will rely on to manage these buffers. + +// Interface class for managing persistent buffers. +class IPersistentBufferAllocator { + public: + IPersistentBufferAllocator() {} + virtual ~IPersistentBufferAllocator() {} + + // Allocates persistent memory. The persistent buffer is never freed. + virtual uint8_t* AllocatePersistentBuffer(size_t size, size_t alignment) = 0; + + // Returns the size of all persistent allocations in bytes. + virtual size_t GetPersistentUsedBytes() const = 0; +}; + +// Interface class for managing non-persistent buffers. +// The default non-persistent buffers are temp buffers that are not resizable. +// Support of at least one resizable buffer is required. +class INonPersistentBufferAllocator { + public: + INonPersistentBufferAllocator() {} + virtual ~INonPersistentBufferAllocator() {} + + // Allocates a temporary buffer. This buffer is not resizable. + virtual uint8_t* AllocateTemp(size_t size, size_t alignment) = 0; + + // Signals that a temporary buffer is no longer needed. + virtual void DeallocateTemp(uint8_t* buf) = 0; + + // Returns true if all temporary buffers are already deallocated. + virtual bool IsAllTempDeallocated() = 0; + + // Signals that all temporary allocations can be reclaimed. TFLM calls this + // API when it knows that all temporary buffers that it requested has been + // deallocated. The goal of API is to facilitate implementations of + // INonPersistentBufferAllocator can reuse buffer with some reasonable + // complexity. + virtual TfLiteStatus ResetTempAllocations() = 0; + + // Returns a buffer that is resizable viable ResizeBuffer(). + virtual uint8_t* AllocateResizableBuffer(size_t size, size_t alignment) = 0; + + // Resizes a buffer that is previously returned by the + // AllocateResizableBuffer. + virtual TfLiteStatus ResizeBuffer(uint8_t* resizable_buf, size_t size, + size_t alignment) = 0; + + // Frees up the memory occupied by the resizable buffer. + virtual TfLiteStatus DeallocateResizableBuffer(uint8_t* resizable_buf) = 0; + + // Returns a pointer pointing to the start of the overlay memory, which is + // used for activation tensors and scratch buffers by kernels at Invoke stage. + virtual uint8_t* GetOverlayMemoryAddress() const = 0; + + // Reserves the size of the overlay memory. This overlay is reserved for the + // kernels at Invoke stage. This is referred to as the overlay because before + // Invoket state, the same memory can be used for temp buffers. The layout of + // the memory is planned by the memory planner separately at Invoke stage. + virtual TfLiteStatus ReserveNonPersistentOverlayMemory(size_t size, + size_t alignment) = 0; + + // Returns the size of non-persistent buffer in use. + virtual size_t GetNonPersistentUsedBytes() const = 0; + + // Returns the number of bytes available with a given alignment. This number + // takes in account any temporary allocations. + virtual size_t GetAvailableMemory(size_t alignment) const = 0; +}; + +} // namespace tflite + +#endif // TENSORFLOW_LITE_MICRO_ARENA_ALLOCATOR_IBUFFER_ALLOCATOR_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/activation_utils.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/activation_utils.h new file mode 100644 index 0000000..895b36c --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/activation_utils.h @@ -0,0 +1,57 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_LITE_MICRO_KERNELS_ACTIVATION_UTILS_H_ +#define TENSORFLOW_LITE_MICRO_KERNELS_ACTIVATION_UTILS_H_ + +#include +#include + +#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/cppmath.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/max.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/min.h" + +namespace tflite { +namespace ops { +namespace micro { + +// Returns the floating point value for a fused activation: +inline float ActivationValFloat(TfLiteFusedActivation act, float a) { + switch (act) { + case kTfLiteActNone: + return a; + case kTfLiteActRelu: + return TfLiteMax(0.0f, a); + case kTfLiteActReluN1To1: + return TfLiteMax(-1.0f, TfLiteMin(a, 1.0f)); + case kTfLiteActRelu6: + return TfLiteMax(0.0f, TfLiteMin(a, 6.0f)); + case kTfLiteActTanh: + return std::tanh(a); + case kTfLiteActSignBit: + return std::signbit(a); + case kTfLiteActSigmoid: + return 1.0f / (1.0f + std::exp(-a)); + } + return 0.0f; // To indicate an unsupported activation (i.e. when a new fused + // activation is added to the enum and not handled here). +} + +} // namespace micro +} // namespace ops +} // namespace tflite + +#endif // TENSORFLOW_LITE_MICRO_KERNELS_ACTIVATION_UTILS_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/activations.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/activations.cpp new file mode 100644 index 0000000..4f4cf81 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/activations.cpp @@ -0,0 +1,120 @@ +/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/activations.h" + +#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h" +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/quantization_util.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/op_macros.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_utils.h" + +namespace tflite { +namespace { + +void* ReluInit(TfLiteContext* context, const char* buffer, size_t length) { + TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); + return context->AllocatePersistentBuffer(context, sizeof(ReluOpData)); +} + +TfLiteStatus ReluEval(TfLiteContext* context, TfLiteNode* node) { + TFLITE_DCHECK(node->user_data != nullptr); + const ReluOpData& data = *(static_cast(node->user_data)); + + const TfLiteEvalTensor* input = + tflite::micro::GetEvalInput(context, node, kActivationsInputTensor); + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kActivationsOutputTensor); + + switch (input->type) { + case kTfLiteFloat32: { + ReluFloat(tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + + return kTfLiteOk; + } + case kTfLiteInt8: { + tflite::ReluQuantized(data, tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorData(output)); + return kTfLiteOk; + } + default: { + MicroPrintf("Only float32 is supported currently, got %s", + TfLiteTypeGetName(input->type)); + return kTfLiteError; + } + } +} + +void* Relu6Init(TfLiteContext* context, const char* buffer, size_t length) { + TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); + return context->AllocatePersistentBuffer(context, sizeof(Relu6OpData)); +} + +TfLiteStatus Relu6Eval(TfLiteContext* context, TfLiteNode* node) { + TFLITE_DCHECK(node->user_data != nullptr); + const Relu6OpData& data = *(static_cast(node->user_data)); + + const TfLiteEvalTensor* input = + tflite::micro::GetEvalInput(context, node, kActivationsInputTensor); + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kActivationsOutputTensor); + + switch (input->type) { + case kTfLiteFloat32: { + Relu6Float(tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + + return kTfLiteOk; + } + case kTfLiteInt8: { + Relu6Quantized(data.zero_int8, data.six_int8, + tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + return kTfLiteOk; + } + default: { + MicroPrintf("Only float32 is supported currently, got %s", + TfLiteTypeGetName(input->type)); + return kTfLiteError; + } + } +} + +} // namespace + +TfLiteRegistration Register_RELU() { + return tflite::micro::RegisterOp(ReluInit, ReluPrepare, ReluEval); +} + +TfLiteRegistration Register_RELU6() { + return tflite::micro::RegisterOp(Relu6Init, Relu6Prepare, Relu6Eval); +} + +} // namespace tflite diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/activations.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/activations.h new file mode 100644 index 0000000..c6dddcd --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/activations.h @@ -0,0 +1,63 @@ +/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_LITE_MICRO_KERNELS_ACTIVATIONS_H_ +#define TENSORFLOW_LITE_MICRO_KERNELS_ACTIVATIONS_H_ + +#include + +#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h" +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h" + +namespace tflite { + +extern const int kActivationsInputTensor; +extern const int kActivationsOutputTensor; + +struct ReluOpData { + ReluParams params; +}; + +struct Relu6OpData { + int8_t six_int8; + int8_t zero_int8; +}; + +void ReluQuantized(const ReluOpData& data, const RuntimeShape& input_shape, + const RuntimeShape& output_shape, const int8_t* input_data, + int8_t* output_data); + +template +void CalculateReluOpData(const TfLiteTensor* input, TfLiteTensor* output, + ReluOpData* data); + +void ReluFloat(const RuntimeShape& input_shape, const float* input_data, + const RuntimeShape& output_shape, float* output_data); + +void Relu6Float(const RuntimeShape& input_shape, const float* input_data, + const RuntimeShape& output_shape, float* output_data); + +void Relu6Quantized(int8_t lower, int8_t upper, const RuntimeShape& input_shape, + const int8_t* input_data, const RuntimeShape& output_shape, + int8_t* output_data); + +TfLiteStatus ReluPrepare(TfLiteContext* context, TfLiteNode* node); + +TfLiteStatus Relu6Prepare(TfLiteContext* context, TfLiteNode* node); + +} // namespace tflite + +#endif // TENSORFLOW_LITE_MICRO_KERNELS_ACTIVATIONS_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/activations_common.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/activations_common.cpp new file mode 100644 index 0000000..d270813 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/activations_common.cpp @@ -0,0 +1,158 @@ +/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include +#include + +#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h" +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/quantization_util.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/op_macros.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/activations.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_utils.h" + +namespace tflite { + +const int kActivationsInputTensor = 0; +const int kActivationsOutputTensor = 0; + +void ReluQuantized(const ReluOpData& data, const RuntimeShape& input_shape, + const RuntimeShape& output_shape, const int8_t* input_data, + int8_t* output_data) { + const int flat_size = MatchingFlatSize(input_shape, output_shape); + for (int i = 0; i < flat_size; ++i) { + const int32_t val = static_cast(input_data[i]); + int32_t clamped = + data.params.output_offset + + MultiplyByQuantizedMultiplier(val - data.params.input_offset, + data.params.output_multiplier, + data.params.output_shift); + clamped = std::max(data.params.quantized_activation_min, clamped); + clamped = std::min(data.params.quantized_activation_max, clamped); + output_data[i] = static_cast(clamped); + } +} + +template +void CalculateReluOpData(const TfLiteTensor* input, TfLiteTensor* output, + ReluOpData* data) { + float act_min = 0.0; + float act_max = std::numeric_limits::infinity(); + double real_multiplier = static_cast(input->params.scale) / + static_cast(output->params.scale); + + const RuntimeShape input_shape = GetTensorShape(input); + const RuntimeShape output_shape = GetTensorShape(output); + + QuantizeMultiplier(real_multiplier, &data->params.output_multiplier, + &data->params.output_shift); + + data->params.quantized_activation_min = std::max( + static_cast(std::numeric_limits::min()), + output->params.zero_point + + static_cast(roundf(act_min / output->params.scale))); + data->params.quantized_activation_max = + act_max == std::numeric_limits::infinity() + ? static_cast(std::numeric_limits::max()) + : std::min(static_cast(std::numeric_limits::max()), + output->params.zero_point + + static_cast( + roundf(act_max / output->params.scale))); + data->params.input_offset = input->params.zero_point; + data->params.output_offset = output->params.zero_point; +} + +void ReluFloat(const RuntimeShape& input_shape, const float* input_data, + const RuntimeShape& output_shape, float* output_data) { + const int flat_size = MatchingFlatSize(input_shape, output_shape); + for (int i = 0; i < flat_size; ++i) { + const float val = input_data[i]; + const float lower = 0.0f; + const float clamped = val < lower ? lower : val; + output_data[i] = clamped; + } +} + +void Relu6Float(const RuntimeShape& input_shape, const float* input_data, + const RuntimeShape& output_shape, float* output_data) { + const int flat_size = MatchingFlatSize(input_shape, output_shape); + for (int i = 0; i < flat_size; ++i) { + const float val = input_data[i]; + const float upper = 6.0f; + const float lower = 0.0f; + const float clamped = val > upper ? upper : val < lower ? lower : val; + output_data[i] = clamped; + } +} + +void Relu6Quantized(int8_t lower, int8_t upper, const RuntimeShape& input_shape, + const int8_t* input_data, const RuntimeShape& output_shape, + int8_t* output_data) { + const int flat_size = MatchingFlatSize(input_shape, output_shape); + for (int i = 0; i < flat_size; ++i) { + const int8_t val = input_data[i]; + const int8_t clamped = val > upper ? upper : val < lower ? lower : val; + output_data[i] = clamped; + } +} + +TfLiteStatus ReluPrepare(TfLiteContext* context, TfLiteNode* node) { + TFLITE_DCHECK(node->user_data != nullptr); + ReluOpData* data = static_cast(node->user_data); + + MicroContext* micro_context = GetMicroContext(context); + TfLiteTensor* input = + micro_context->AllocateTempInputTensor(node, kActivationsInputTensor); + TF_LITE_ENSURE(context, input != nullptr); + TfLiteTensor* output = + micro_context->AllocateTempOutputTensor(node, kActivationsOutputTensor); + TF_LITE_ENSURE(context, output != nullptr); + + if (input->type == kTfLiteInt8) { + CalculateReluOpData(input, output, data); + } + + micro_context->DeallocateTempTfLiteTensor(input); + micro_context->DeallocateTempTfLiteTensor(output); + + return kTfLiteOk; +} + +TfLiteStatus Relu6Prepare(TfLiteContext* context, TfLiteNode* node) { + TFLITE_DCHECK(node->user_data != nullptr); + Relu6OpData* data = static_cast(node->user_data); + + MicroContext* micro_context = GetMicroContext(context); + TfLiteTensor* input = + micro_context->AllocateTempInputTensor(node, kActivationsInputTensor); + TF_LITE_ENSURE(context, input != nullptr); + + if (input->type == kTfLiteInt8) { + data->six_int8 = FloatToQuantizedType(6.0f, input->params.scale, + input->params.zero_point); + data->zero_int8 = input->params.zero_point; + } + + micro_context->DeallocateTempTfLiteTensor(input); + + return kTfLiteOk; +} + +} // namespace tflite diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/add.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/add.cpp new file mode 100644 index 0000000..2140d1f --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/add.cpp @@ -0,0 +1,1383 @@ +// Patched by Edge Impulse to include reference and hardware-accelerated kernels +#include "../../../../classifier/ei_classifier_config.h" +#if 0 == 1 +/* noop */ +#elif EI_CLASSIFIER_TFLITE_ENABLE_CMSIS_NN == 1 +/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/add.h" + +#include "edge-impulse-sdk/CMSIS/NN/Include/arm_nnfunctions.h" +#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/quantization_util.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/add.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/op_macros.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/memory_helpers.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h" + +namespace tflite { + +namespace { +constexpr int kInputTensor1 = 0; +constexpr int kInputTensor2 = 1; +constexpr int kOutputTensor = 0; + +struct OpData { + bool requires_broadcast; + + // These fields are used in both the general 8-bit -> 8bit quantized path, + // and the special 16-bit -> 16bit quantized path + int input1_shift; + int input2_shift; + int32_t output_activation_min; + int32_t output_activation_max; + + // These fields are used only in the general 8-bit -> 8bit quantized path + int32_t input1_multiplier; + int32_t input2_multiplier; + int32_t output_multiplier; + + int output_shift; + int left_shift; + + int32_t input1_offset; + int32_t input2_offset; + int32_t output_offset; + + // Used only for float evals: + float output_activation_min_f32; + float output_activation_max_f32; +}; + +TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteAddParams* params, + const TfLiteTensor* input1, + const TfLiteTensor* input2, TfLiteTensor* output, + OpData* data) { + data->requires_broadcast = !HaveSameShapes(input1, input2); + + if (output->type == kTfLiteInt8 || output->type == kTfLiteInt16) { + // 8bit -> 8bit general quantized path, with general rescalings + data->input1_offset = -input1->params.zero_point; + data->input2_offset = -input2->params.zero_point; + data->output_offset = output->params.zero_point; + data->left_shift = (output->type == kTfLiteInt16) ? 15 : 20; + const double twice_max_input_scale = + 2 * static_cast( + std::max(input1->params.scale, input2->params.scale)); + const double real_input1_multiplier = + static_cast(input1->params.scale) / twice_max_input_scale; + const double real_input2_multiplier = + static_cast(input2->params.scale) / twice_max_input_scale; + const double real_output_multiplier = + twice_max_input_scale / + ((1 << data->left_shift) * static_cast(output->params.scale)); + + QuantizeMultiplierSmallerThanOneExp( + real_input1_multiplier, &data->input1_multiplier, &data->input1_shift); + + QuantizeMultiplierSmallerThanOneExp( + real_input2_multiplier, &data->input2_multiplier, &data->input2_shift); + + QuantizeMultiplierSmallerThanOneExp( + real_output_multiplier, &data->output_multiplier, &data->output_shift); + + TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized( + context, params->activation, output, &data->output_activation_min, + &data->output_activation_max)); + } else if (output->type == kTfLiteFloat32) { + CalculateActivationRange(params->activation, + &data->output_activation_min_f32, + &data->output_activation_max_f32); + } + + return kTfLiteOk; +} + +void UpdateOpParams(tflite::ArithmeticParams* const op_params, + const OpData* data) { + op_params->left_shift = data->left_shift; + op_params->input1_offset = data->input1_offset; + op_params->input1_multiplier = data->input1_multiplier; + op_params->input1_shift = data->input1_shift; + op_params->input2_offset = data->input2_offset; + op_params->input2_multiplier = data->input2_multiplier; + op_params->input2_shift = data->input2_shift; + op_params->output_offset = data->output_offset; + op_params->output_multiplier = data->output_multiplier; + op_params->output_shift = data->output_shift; + SetActivationParams(data->output_activation_min, data->output_activation_max, + op_params); +} + +TfLiteStatus EvalAddQuantizedInt8(TfLiteContext* context, TfLiteNode* node, + TfLiteAddParams* params, const OpData* data, + const TfLiteEvalTensor* input1, + const TfLiteEvalTensor* input2, + TfLiteEvalTensor* output) { + tflite::ArithmeticParams op_params; + UpdateOpParams(&op_params, data); + + bool need_broadcast = reference_ops::ProcessBroadcastShapes( + tflite::micro::GetTensorShape(input1), + tflite::micro::GetTensorShape(input2), &op_params); + + if (need_broadcast) { + reference_integer_ops::BroadcastAdd4DSlow( + op_params, tflite::micro::GetTensorShape(input1), + tflite::micro::GetTensorData(input1), + tflite::micro::GetTensorShape(input2), + tflite::micro::GetTensorData(input2), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + } else { + arm_elementwise_add_s8( + tflite::micro::GetTensorData(input1), + + tflite::micro::GetTensorData(input2), op_params.input1_offset, + op_params.input1_multiplier, op_params.input1_shift, + op_params.input2_offset, op_params.input2_multiplier, + op_params.input2_shift, op_params.left_shift, + tflite::micro::GetTensorData(output), op_params.output_offset, + op_params.output_multiplier, op_params.output_shift, + op_params.quantized_activation_min, op_params.quantized_activation_max, + MatchingElementsSize(tflite::micro::GetTensorShape(input1), + tflite::micro::GetTensorShape(input2), + tflite::micro::GetTensorShape(output))); + } + + return kTfLiteOk; +} + +TfLiteStatus EvalAddQuantizedInt16(TfLiteContext* context, TfLiteNode* node, + TfLiteAddParams* params, const OpData* data, + const TfLiteEvalTensor* input1, + const TfLiteEvalTensor* input2, + TfLiteEvalTensor* output) { + tflite::ArithmeticParams op_params; + UpdateOpParams(&op_params, data); + + bool need_broadcast = reference_ops::ProcessBroadcastShapes( + tflite::micro::GetTensorShape(input1), + tflite::micro::GetTensorShape(input2), &op_params); + + if (need_broadcast) { + reference_ops::BroadcastAdd4DSlow( + op_params, tflite::micro::GetTensorShape(input1), + tflite::micro::GetTensorData(input1), + tflite::micro::GetTensorShape(input2), + tflite::micro::GetTensorData(input2), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + } else { + arm_elementwise_add_s16( + tflite::micro::GetTensorData(input1), + tflite::micro::GetTensorData(input2), op_params.input1_offset, + op_params.input1_multiplier, op_params.input1_shift, + op_params.input2_offset, op_params.input2_multiplier, + op_params.input2_shift, op_params.left_shift, + tflite::micro::GetTensorData(output), op_params.output_offset, + op_params.output_multiplier, op_params.output_shift, + op_params.quantized_activation_min, op_params.quantized_activation_max, + MatchingElementsSize(tflite::micro::GetTensorShape(input1), + tflite::micro::GetTensorShape(input2), + tflite::micro::GetTensorShape(output))); + } + + return kTfLiteOk; +} + +void EvalAddFloat(TfLiteContext* context, TfLiteNode* node, + TfLiteAddParams* params, const OpData* data, + const TfLiteEvalTensor* input1, + const TfLiteEvalTensor* input2, TfLiteEvalTensor* output) { + tflite::ArithmeticParams op_params; + SetActivationParams(data->output_activation_min_f32, + data->output_activation_max_f32, &op_params); + if (data->requires_broadcast) { + reference_ops::BroadcastAdd4DSlow( + op_params, tflite::micro::GetTensorShape(input1), + tflite::micro::GetTensorData(input1), + tflite::micro::GetTensorShape(input2), + tflite::micro::GetTensorData(input2), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + } else { + reference_ops::Add(op_params, tflite::micro::GetTensorShape(input1), + tflite::micro::GetTensorData(input1), + tflite::micro::GetTensorShape(input2), + tflite::micro::GetTensorData(input2), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + } +} + +TfLiteStatus EvalAddQuantized(TfLiteContext* context, TfLiteNode* node, + TfLiteAddParams* params, const OpData* data, + const TfLiteEvalTensor* input1, + const TfLiteEvalTensor* input2, + TfLiteEvalTensor* output) { + switch (output->type) { + case kTfLiteInt8: { + EvalAddQuantizedInt8(context, node, params, data, input1, input2, output); + break; + } + case kTfLiteInt16: { + EvalAddQuantizedInt16(context, node, params, data, input1, input2, + output); + break; + } + default: + MicroPrintf("Type %s (%d) not supported.", + TfLiteTypeGetName(output->type), output->type); + return kTfLiteError; + } + + return kTfLiteOk; +} + +} // namespace + +void* InitAdd(TfLiteContext* context, const char* buffer, size_t length) { + TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); + return context->AllocatePersistentBuffer(context, sizeof(OpData)); +} + +TfLiteStatus PrepareAdd(TfLiteContext* context, TfLiteNode* node) { + TFLITE_DCHECK(node->user_data != nullptr); + TFLITE_DCHECK(node->builtin_data != nullptr); + + MicroContext* micro_context = GetMicroContext(context); + + TfLiteTensor* input1 = + micro_context->AllocateTempInputTensor(node, kInputTensor1); + TF_LITE_ENSURE(context, input1 != nullptr); + TfLiteTensor* input2 = + micro_context->AllocateTempInputTensor(node, kInputTensor2); + TF_LITE_ENSURE(context, input2 != nullptr); + TfLiteTensor* output = + micro_context->AllocateTempOutputTensor(node, kOutputTensor); + TF_LITE_ENSURE(context, output != nullptr); + + if (input1->type == kTfLiteInt16) { + TF_LITE_ENSURE_EQ(context, input1->params.zero_point, 0); + TF_LITE_ENSURE_EQ(context, input2->params.zero_point, 0); + TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0); + } + + OpData* data = static_cast(node->user_data); + auto* params = reinterpret_cast(node->builtin_data); + + TF_LITE_ENSURE_STATUS( + CalculateOpData(context, params, input1, input2, output, data)); + + micro_context->DeallocateTempTfLiteTensor(input1); + micro_context->DeallocateTempTfLiteTensor(input2); + micro_context->DeallocateTempTfLiteTensor(output); + + return kTfLiteOk; +} + +TfLiteStatus EvalAdd(TfLiteContext* context, TfLiteNode* node) { + auto* params = reinterpret_cast(node->builtin_data); + + const TfLiteEvalTensor* input1 = + tflite::micro::GetEvalInput(context, node, kInputTensor1); + const TfLiteEvalTensor* input2 = + tflite::micro::GetEvalInput(context, node, kInputTensor2); + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kOutputTensor); + + TFLITE_DCHECK(node->user_data != nullptr); + const OpData* data = static_cast(node->user_data); + + if (output->type == kTfLiteFloat32) { + EvalAddFloat(context, node, params, data, input1, input2, output); + } else if (output->type == kTfLiteInt8 || output->type == kTfLiteInt16) { + TF_LITE_ENSURE_OK(context, EvalAddQuantized(context, node, params, data, + input1, input2, output)); + } else { + MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(output->type), + output->type); + return kTfLiteError; + } + + return kTfLiteOk; +} + +TfLiteStatus EvalAddInt8(TfLiteContext* context, TfLiteNode* node) { + auto* params = reinterpret_cast(node->builtin_data); + + const TfLiteEvalTensor* input1 = + tflite::micro::GetEvalInput(context, node, kInputTensor1); + const TfLiteEvalTensor* input2 = + tflite::micro::GetEvalInput(context, node, kInputTensor2); + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kOutputTensor); + + TFLITE_DCHECK(node->user_data != nullptr); + TFLITE_DCHECK(output->type == kTfLiteInt8); + const OpData* data = static_cast(node->user_data); + + TF_LITE_ENSURE_OK(context, EvalAddQuantizedInt8(context, node, params, data, + input1, input2, output)); + + return kTfLiteOk; +} + +TfLiteStatus EvalAddInt16(TfLiteContext* context, TfLiteNode* node) { + auto* params = reinterpret_cast(node->builtin_data); + + const TfLiteEvalTensor* input1 = + tflite::micro::GetEvalInput(context, node, kInputTensor1); + const TfLiteEvalTensor* input2 = + tflite::micro::GetEvalInput(context, node, kInputTensor2); + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kOutputTensor); + + TFLITE_DCHECK(node->user_data != nullptr); + TFLITE_DCHECK(output->type == kTfLiteInt16); + const OpData* data = static_cast(node->user_data); + + TF_LITE_ENSURE_OK(context, EvalAddQuantizedInt16(context, node, params, data, + input1, input2, output)); + + return kTfLiteOk; +} + +TfLiteRegistration Register_ADD() { + return tflite::micro::RegisterOp(InitAdd, PrepareAdd, EvalAdd); +} + +TfLiteRegistration Register_ADD_INT8() { + return tflite::micro::RegisterOp(InitAdd, PrepareAdd, EvalAddInt8); +} + +TfLiteRegistration Register_ADD_INT16() { + return tflite::micro::RegisterOp(InitAdd, PrepareAdd, EvalAddInt16); +} + +} // namespace tflite + +#elif EI_CLASSIFIER_TFLITE_ENABLE_ARC == 1 +/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/add.h" + +#include +#include + +#include "mli_api.h" // NOLINT +#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h" +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/quantization_util.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/add.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/op_macros.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/add.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/mli_slicers.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/mli_tf_utils.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/scratch_buf_mgr.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/scratch_buffers.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/memory_helpers.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h" + +namespace tflite { + +constexpr int kInputTensor1 = 0; +constexpr int kInputTensor2 = 1; +constexpr int kOutputTensor = 0; + +struct OpData { + bool requires_broadcast; + + // These fields are used in both the general 8-bit -> 8bit quantized path, + // and the special 16-bit -> 16bit quantized path + int input1_shift; + int input2_shift; + int32_t output_activation_min; + int32_t output_activation_max; + + // These fields are used only in the general 8-bit -> 8bit quantized path + int32_t input1_multiplier; + int32_t input2_multiplier; + int32_t output_multiplier; + int output_shift; + int left_shift; + int32_t input1_offset; + int32_t input2_offset; + int32_t output_offset; + + // Used only for float evals: + float output_activation_min_f32; + float output_activation_max_f32; + + // The result of checking if MLI optimized version of tensors can be used. + bool is_mli_applicable; + + // Tensors in MLI format. + mutable ops::micro::MliTensorInterface mli_input1; + mutable ops::micro::MliTensorInterface mli_input2; + mutable ops::micro::MliTensorInterface mli_out; +}; + +TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteAddParams* params, + const TfLiteTensor* input1, + const TfLiteTensor* input2, TfLiteTensor* output, + OpData* data) { + data->requires_broadcast = !HaveSameShapes(input1, input2); + + if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) { + TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized( + context, params->activation, output, &data->output_activation_min, + &data->output_activation_max)); + + // MLI 2.0 optimized version only supports int8_t datatype and min/max + // within container range. Broadcasting isn't supported on the primitive + // level (but might be implemented as part of slicing in future) +#ifdef MLI_2_0 // + data->is_mli_applicable = + (input1->type == kTfLiteInt8) && (input2->type == kTfLiteInt8) && + (output->type == kTfLiteInt8) && !data->requires_broadcast && + data->output_activation_min == std::numeric_limits::min() && + data->output_activation_max == std::numeric_limits::max(); +#else + data->is_mli_applicable = false; +#endif + + if (data->is_mli_applicable) { + data->mli_input1 = + ops::micro::MliTensorInterface(static_cast( + context->AllocatePersistentBuffer(context, sizeof(mli_tensor)))); + data->mli_input2 = + ops::micro::MliTensorInterface(static_cast( + context->AllocatePersistentBuffer(context, sizeof(mli_tensor)))); + data->mli_out = ops::micro::MliTensorInterface(static_cast( + context->AllocatePersistentBuffer(context, sizeof(mli_tensor)))); + + ops::micro::ConvertToMliTensor(input1, &data->mli_input1); + ops::micro::ConvertToMliTensor(input2, &data->mli_input2); + ops::micro::ConvertToMliTensor(output, &data->mli_out); + /* Flatten tensors to simplify the process (as we don't support + * broadcasting). */ + data->mli_input1.Shape()[0] = + mli_hlp_count_elem_num(data->mli_input1.MliTensor(), 0); + data->mli_input2.Shape()[0] = + mli_hlp_count_elem_num(data->mli_input2.MliTensor(), 0); + data->mli_out.Shape()[0] = + mli_hlp_count_elem_num(data->mli_out.MliTensor(), 0); + data->mli_input1.MemStride()[0] = data->mli_input2.MemStride()[0] = 1; + data->mli_out.MemStride()[0] = 1; + *data->mli_input1.Rank() = *data->mli_input2.Rank() = 1; + *data->mli_out.Rank() = 1; + } + } else { + data->is_mli_applicable = false; + } + +#if !defined(TF_LITE_STRIP_REFERENCE_IMPL) + if (output->type == kTfLiteInt8 || output->type == kTfLiteInt16) { + // 8bit -> 8bit general quantized path, with general rescalings + data->input1_offset = -input1->params.zero_point; + data->input2_offset = -input2->params.zero_point; + data->output_offset = output->params.zero_point; + data->left_shift = (output->type == kTfLiteInt16) ? 15 : 20; + const double twice_max_input_scale = + 2 * static_cast( + std::max(input1->params.scale, input2->params.scale)); + const double real_input1_multiplier = + static_cast(input1->params.scale) / twice_max_input_scale; + const double real_input2_multiplier = + static_cast(input2->params.scale) / twice_max_input_scale; + const double real_output_multiplier = + twice_max_input_scale / + ((1 << data->left_shift) * static_cast(output->params.scale)); + + QuantizeMultiplierSmallerThanOneExp( + real_input1_multiplier, &data->input1_multiplier, &data->input1_shift); + + QuantizeMultiplierSmallerThanOneExp( + real_input2_multiplier, &data->input2_multiplier, &data->input2_shift); + + QuantizeMultiplierSmallerThanOneExp( + real_output_multiplier, &data->output_multiplier, &data->output_shift); + + TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized( + context, params->activation, output, &data->output_activation_min, + &data->output_activation_max)); + } else if (output->type == kTfLiteFloat32) { + CalculateActivationRange(params->activation, + &data->output_activation_min_f32, + &data->output_activation_max_f32); +#endif // !defined(TF_LITE_STRIP_REFERENCE_IMPL) + } + + return kTfLiteOk; +} + +TfLiteStatus EvalAdd(TfLiteContext* context, TfLiteNode* node, + TfLiteAddParams* params, const OpData* data, + const TfLiteEvalTensor* input1, + const TfLiteEvalTensor* input2, TfLiteEvalTensor* output) { +#if !defined(TF_LITE_STRIP_REFERENCE_IMPL) + tflite::ArithmeticParams op_params; + SetActivationParams(data->output_activation_min_f32, + data->output_activation_max_f32, &op_params); + if (data->requires_broadcast) { + reference_ops::BroadcastAdd4DSlow( + op_params, tflite::micro::GetTensorShape(input1), + tflite::micro::GetTensorData(input1), + tflite::micro::GetTensorShape(input2), + tflite::micro::GetTensorData(input2), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + } else { + reference_ops::Add(op_params, tflite::micro::GetTensorShape(input1), + tflite::micro::GetTensorData(input1), + tflite::micro::GetTensorShape(input2), + tflite::micro::GetTensorData(input2), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + } + return kTfLiteOk; +#else + MicroPrintf("Node configuration is not supported by ARC MLI Library."); + return kTfLiteError; +#endif +} + +TfLiteStatus EvalAddQuantized(TfLiteContext* context, TfLiteNode* node, + TfLiteAddParams* params, const OpData* data, + const TfLiteEvalTensor* input1, + const TfLiteEvalTensor* input2, + TfLiteEvalTensor* output) { +#if !defined(TF_LITE_STRIP_REFERENCE_IMPL) + tflite::ArithmeticParams op_params; + op_params.left_shift = data->left_shift; + op_params.input1_offset = data->input1_offset; + op_params.input1_multiplier = data->input1_multiplier; + op_params.input1_shift = data->input1_shift; + op_params.input2_offset = data->input2_offset; + op_params.input2_multiplier = data->input2_multiplier; + op_params.input2_shift = data->input2_shift; + op_params.output_offset = data->output_offset; + op_params.output_multiplier = data->output_multiplier; + op_params.output_shift = data->output_shift; + SetActivationParams(data->output_activation_min, data->output_activation_max, + &op_params); + bool need_broadcast = reference_ops::ProcessBroadcastShapes( + tflite::micro::GetTensorShape(input1), + tflite::micro::GetTensorShape(input2), &op_params); + + switch (output->type) { + case kTfLiteInt8: { + if (need_broadcast) { + reference_integer_ops::BroadcastAdd4DSlow( + op_params, tflite::micro::GetTensorShape(input1), + tflite::micro::GetTensorData(input1), + tflite::micro::GetTensorShape(input2), + tflite::micro::GetTensorData(input2), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + } else { + reference_integer_ops::Add( + op_params, tflite::micro::GetTensorShape(input1), + tflite::micro::GetTensorData(input1), + tflite::micro::GetTensorShape(input2), + tflite::micro::GetTensorData(input2), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + } + break; + } + case kTfLiteInt16: { + if (need_broadcast) { + reference_ops::BroadcastAdd4DSlow( + op_params, tflite::micro::GetTensorShape(input1), + tflite::micro::GetTensorData(input1), + tflite::micro::GetTensorShape(input2), + tflite::micro::GetTensorData(input2), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + } else { + reference_ops::Add(op_params, tflite::micro::GetTensorShape(input1), + tflite::micro::GetTensorData(input1), + tflite::micro::GetTensorShape(input2), + tflite::micro::GetTensorData(input2), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output), + false); + } + break; + } + default: + MicroPrintf("Type %s (%d) not supported.", + TfLiteTypeGetName(output->type), output->type); + return kTfLiteError; + } + + return kTfLiteOk; +#else + MicroPrintf("Node configuration is not supported by ARC MLI Library."); + return kTfLiteError; +#endif +} + +TfLiteStatus EvalMLIAddInt8(TfLiteContext* context, TfLiteNode* node, + TfLiteAddParams* params, const OpData* data, + const TfLiteEvalTensor* input1, + const TfLiteEvalTensor* input2, + TfLiteEvalTensor* output) { +#ifdef MLI_2_0 + TF_LITE_ENSURE(context, data->is_mli_applicable == true); + TF_LITE_ENSURE(context, input1->type == kTfLiteInt8); + TF_LITE_ENSURE(context, input2->type == kTfLiteInt8); + TF_LITE_ENSURE(context, output->type == kTfLiteInt8); + + ops::micro::MliTensorAttachBuffer(input1, &data->mli_input1); + ops::micro::MliTensorAttachBuffer(input2, &data->mli_input2); + ops::micro::MliTensorAttachBuffer(output, &data->mli_out); + + // mli_mov config and tensors for data in fast (local) memory with interface + mli_mov_cfg_t copy_config; + mli_mov_cfg_for_copy(©_config); + mli_tensor input1_local_tsr = *data->mli_input1.MliTensor(); + mli_tensor input2_local_tsr = *data->mli_input2.MliTensor(); + mli_tensor out_local_tsr = *data->mli_out.MliTensor(); + ops::micro::MliTensorInterface input1_local(&input1_local_tsr); + ops::micro::MliTensorInterface input2_local(&input2_local_tsr); + ops::micro::MliTensorInterface out_local(&out_local_tsr); + + /* allocate the local buffers, and compute the slice size */ + TF_LITE_ENSURE_STATUS(ops::micro::get_arc_scratch_buffer_for_eltwise_tensors( + context, &input1_local, &input2_local, &out_local)); + TF_LITE_ENSURE(context, *input1_local.Rank() == 1 && + *input2_local.Rank() == 1 && + *out_local.Rank() == 1); + uint32_t min_capacity = *input1_local.DataCapacity(); + min_capacity = std::min(min_capacity, *input2_local.DataCapacity()); + min_capacity = std::min(min_capacity, *out_local.DataCapacity()); + const int slice_dim = 0; + const int slice_size = + min_capacity / mli_hlp_tensor_element_size(out_local.MliTensor()); + + /* is_local indicates that the tensor is already in local memory, + so in that case the original tensor can be used, + and there is no need to copy it to the local tensor*/ + const bool input1_is_local = + input1_local.Data() == data->mli_input1.Data(); + const bool input2_is_local = + input2_local.Data() == data->mli_input2.Data(); + const bool out_is_local = + out_local.Data() == data->mli_out.Data(); + + ops::micro::TensorSlicer input1_slice(data->mli_input1.MliTensor(), slice_dim, + slice_size); + ops::micro::TensorSlicer input2_slice(data->mli_input2.MliTensor(), slice_dim, + slice_size); + ops::micro::TensorSlicer out_slice(data->mli_out.MliTensor(), slice_dim, + slice_size); + + mli_tensor* input1_tsr = + input1_is_local ? input1_slice.Sub() : input1_local.MliTensor(); + mli_tensor* input2_tsr = + input2_is_local ? input2_slice.Sub() : input2_local.MliTensor(); + mli_tensor* out_tsr = out_is_local ? out_slice.Sub() : out_local.MliTensor(); + + while (!out_slice.Done()) { + mli_mov_tensor_sync(input1_slice.Sub(), ©_config, input1_tsr); + mli_mov_tensor_sync(input2_slice.Sub(), ©_config, input2_tsr); + + mli_krn_eltwise_add_sa8(input1_tsr, input2_tsr, out_tsr); + + mli_mov_tensor_sync(out_tsr, ©_config, out_slice.Sub()); + input1_slice.Next(); + input2_slice.Next(); + out_slice.Next(); + } + return kTfLiteOk; +#else + return kTfLiteError; +#endif +} + +void* AddInit(TfLiteContext* context, const char* buffer, size_t length) { + TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); + return context->AllocatePersistentBuffer(context, sizeof(OpData)); +} + +TfLiteStatus AddEval(TfLiteContext* context, TfLiteNode* node) { + TfLiteStatus ret_val = kTfLiteOk; + auto* params = reinterpret_cast(node->builtin_data); + + TFLITE_DCHECK(node->user_data != nullptr); + const OpData* data = static_cast(node->user_data); + + const TfLiteEvalTensor* input1 = + tflite::micro::GetEvalInput(context, node, kInputTensor1); + const TfLiteEvalTensor* input2 = + tflite::micro::GetEvalInput(context, node, kInputTensor2); + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kOutputTensor); + if (data->is_mli_applicable) { + ret_val = + EvalMLIAddInt8(context, node, params, data, input1, input2, output); + } else if (output->type == kTfLiteFloat32) { + ret_val = EvalAdd(context, node, params, data, input1, input2, output); + } else if (output->type == kTfLiteInt8 || output->type == kTfLiteInt16) { + ret_val = + EvalAddQuantized(context, node, params, data, input1, input2, output); + } else { + MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(output->type), + output->type); + ret_val = kTfLiteError; + } + + return ret_val; +} + +TfLiteRegistration Register_ADD() { + return tflite::micro::RegisterOp(AddInit, AddPrepare, AddEval); +} + +} // namespace tflite + +#elif EI_CLASSIFIER_TFLITE_ENABLE_SILABS_MVP == 1 +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/add.h" + +#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h" +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/quantization_util.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/add.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/op_macros.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" +#include "sl_mvp_ml_add.h" + +namespace tflite { +namespace sl { +namespace add { + +constexpr int kInputTensor1 = 0; +constexpr int kInputTensor2 = 1; +constexpr int kOutputTensor = 0; + +struct OpData { + bool requires_broadcast; + + int input1_shift; + int input2_shift; + int32_t input1_multiplier; + int32_t input2_multiplier; + int32_t output_multiplier; + int output_shift; + int left_shift; + + sli_mvp_ml_add_s8_params_t params; + + // Used only for float evals: + float output_activation_min_f32; + float output_activation_max_f32; +}; + +TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteAddParams* params, + const TfLiteTensor* input1, + const TfLiteTensor* input2, TfLiteTensor* output, + OpData* data) { + data->requires_broadcast = !HaveSameShapes(input1, input2); + + if (output->type == kTfLiteInt8) { + data->params.input1_offset = -input1->params.zero_point; + data->params.input2_offset = -input2->params.zero_point; + data->params.output_offset = output->params.zero_point; + data->params.input1_multiplier = input1->params.scale; + data->params.input2_multiplier = input2->params.scale; + data->params.output_multiplier = 1.0 / output->params.scale; + data->params.length = GetTensorShape(input1).FlatSize(); + + int32_t activation_min; + int32_t activation_max; + TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized( + context, params->activation, output, &activation_min, + &activation_max)); + data->params.activation_min = static_cast(activation_min); + data->params.activation_max = static_cast(activation_max); + + // These multipliers and parameters are not used by the MVP codepath, + // however are needed in cases where broadcast is used. + data->left_shift = 20; + const double twice_max_input_scale = + 2 * static_cast( + std::max(input1->params.scale, input2->params.scale)); + const double real_input1_multiplier = + static_cast(input1->params.scale) / twice_max_input_scale; + const double real_input2_multiplier = + static_cast(input2->params.scale) / twice_max_input_scale; + const double real_output_multiplier = + twice_max_input_scale / + ((1 << data->left_shift) * static_cast(output->params.scale)); + + QuantizeMultiplierSmallerThanOneExp( + real_input1_multiplier, &data->input1_multiplier, &data->input1_shift); + + QuantizeMultiplierSmallerThanOneExp( + real_input2_multiplier, &data->input2_multiplier, &data->input2_shift); + + QuantizeMultiplierSmallerThanOneExp( + real_output_multiplier, &data->output_multiplier, &data->output_shift); + + } else if (output->type == kTfLiteFloat32) { + CalculateActivationRange(params->activation, + &data->output_activation_min_f32, + &data->output_activation_max_f32); + } + + return kTfLiteOk; +} + +void EvalAdd(TfLiteContext* context, TfLiteNode* node, TfLiteAddParams* params, + const OpData* data, const TfLiteEvalTensor* input1, + const TfLiteEvalTensor* input2, TfLiteEvalTensor* output) { + tflite::ArithmeticParams op_params; + SetActivationParams(data->output_activation_min_f32, + data->output_activation_max_f32, &op_params); + if (data->requires_broadcast) { + reference_ops::BroadcastAdd4DSlow(op_params, tflite::micro::GetTensorShape(input1), tflite::micro::GetTensorData(input1), + tflite::micro::GetTensorShape(input2), tflite::micro::GetTensorData(input2), + tflite::micro::GetTensorShape(output), tflite::micro::GetTensorData(output)); + } else { + reference_ops::Add(op_params, + tflite::micro::GetTensorShape(input1), tflite::micro::GetTensorData(input1), + tflite::micro::GetTensorShape(input2), tflite::micro::GetTensorData(input2), + tflite::micro::GetTensorShape(output), tflite::micro::GetTensorData(output)); + } +} + +TfLiteStatus EvalAddQuantized(TfLiteContext* context, TfLiteNode* node, + TfLiteAddParams* params, const OpData* data, + const TfLiteEvalTensor* input1, + const TfLiteEvalTensor* input2, + TfLiteEvalTensor* output) { + TfLiteStatus status = kTfLiteOk; + tflite::ArithmeticParams op_params; + op_params.left_shift = data->left_shift; + op_params.input1_offset = data->params.input1_offset; + op_params.input1_multiplier = data->input1_multiplier; + op_params.input1_shift = data->input1_shift; + op_params.input2_offset = data->params.input2_offset; + op_params.input2_multiplier = data->input2_multiplier; + op_params.input2_shift = data->input2_shift; + op_params.output_offset = data->params.output_offset; + op_params.output_multiplier = data->output_multiplier; + op_params.output_shift = data->output_shift; + op_params.quantized_activation_min = data->params.activation_min; + op_params.quantized_activation_max = data->params.activation_max; + + // TODO: Do we need to support the broadcast scenario? + bool need_broadcast = reference_ops::ProcessBroadcastShapes(tflite::micro::GetTensorShape(input1), tflite::micro::GetTensorShape(input2), &op_params); + + if (need_broadcast) { + reference_integer_ops::BroadcastAdd4DSlow(op_params, + tflite::micro::GetTensorShape(input1), tflite::micro::GetTensorData(input1), + tflite::micro::GetTensorShape(input2), tflite::micro::GetTensorData(input2), + tflite::micro::GetTensorShape(output), tflite::micro::GetTensorData(output)); + } else { + sli_mvp_ml_add_s8_params_t params = data->params; + params.input1 = tflite::micro::GetTensorData(input1); + params.input2 = tflite::micro::GetTensorData(input2); + params.output = tflite::micro::GetTensorData(output); + sl_status_t ret = sli_mvp_ml_add_s8(¶ms); + if (ret != SL_STATUS_OK) { + status = kTfLiteError; + } + } + + return status; +} + +void* Init(TfLiteContext* context, const char* buffer, size_t length) { + TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); + return context->AllocatePersistentBuffer(context, sizeof(OpData)); +} + +TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + TFLITE_DCHECK(node->user_data != nullptr); + TFLITE_DCHECK(node->builtin_data != nullptr); + + const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1); + TF_LITE_ENSURE(context, input1 != nullptr); + const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2); + TF_LITE_ENSURE(context, input2 != nullptr); + TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + TF_LITE_ENSURE(context, output != nullptr); + + OpData* data = static_cast(node->user_data); + auto* params = reinterpret_cast(node->builtin_data); + + TF_LITE_ENSURE_STATUS( + CalculateOpData(context, params, input1, input2, output, data)); + + return kTfLiteOk; +} + +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + auto* params = reinterpret_cast(node->builtin_data); + + TFLITE_DCHECK(node->user_data != nullptr); + const OpData* data = static_cast(node->user_data); + + const TfLiteEvalTensor* input1 = tflite::micro::GetEvalInput(context, node, kInputTensor1); + const TfLiteEvalTensor* input2 = tflite::micro::GetEvalInput(context, node, kInputTensor2); + TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, kOutputTensor); + + if (output->type == kTfLiteFloat32) { + EvalAdd(context, node, params, data, input1, input2, output); + } else if (output->type == kTfLiteInt8) { + TF_LITE_ENSURE_OK(context, EvalAddQuantized(context, node, params, data, + input1, input2, output)); + } else { + TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.", + TfLiteTypeGetName(output->type), output->type); + return kTfLiteError; + } + + return kTfLiteOk; +} + +} // namespace add +} // namespace sl + +TfLiteRegistration Register_ADD() { + return {/*init=*/sl::add::Init, + /*free=*/nullptr, + /*prepare=*/sl::add::Prepare, + /*invoke=*/sl::add::Eval, + /*profiling_string=*/nullptr, + /*builtin_code=*/0, + /*custom_name=*/nullptr, + /*version=*/0}; +} + +} // namespace tflite + +#elif EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN == 1 +/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/add.h" + +#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h" +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/quantization_util.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/add.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/op_macros.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/add.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/memory_helpers.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h" + +#include + +#if ESP_NN +#include "edge-impulse-sdk/porting/espressif/ESP-NN/include/esp_nn.h" +#endif + +long long add_total_time = 0; + +namespace tflite { + +void EvalAdd(TfLiteContext* context, TfLiteNode* node, TfLiteAddParams* params, + const OpDataAdd* data, const TfLiteEvalTensor* input1, + const TfLiteEvalTensor* input2, TfLiteEvalTensor* output) { + tflite::ArithmeticParams op_params; + SetActivationParams(data->output_activation_min_f32, + data->output_activation_max_f32, &op_params); + if (data->requires_broadcast) { + reference_ops::BroadcastAdd4DSlow( + op_params, tflite::micro::GetTensorShape(input1), + tflite::micro::GetTensorData(input1), + tflite::micro::GetTensorShape(input2), + tflite::micro::GetTensorData(input2), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + } else { + reference_ops::Add(op_params, tflite::micro::GetTensorShape(input1), + tflite::micro::GetTensorData(input1), + tflite::micro::GetTensorShape(input2), + tflite::micro::GetTensorData(input2), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + } +} + +TfLiteStatus EvalAddQuantized(TfLiteContext* context, TfLiteNode* node, + TfLiteAddParams* params, const OpDataAdd* data, + const TfLiteEvalTensor* input1, + const TfLiteEvalTensor* input2, + TfLiteEvalTensor* output) { + tflite::ArithmeticParams op_params; + op_params.left_shift = data->left_shift; + op_params.input1_offset = data->input1_offset; + op_params.input1_multiplier = data->input1_multiplier; + op_params.input1_shift = data->input1_shift; + op_params.input2_offset = data->input2_offset; + op_params.input2_multiplier = data->input2_multiplier; + op_params.input2_shift = data->input2_shift; + op_params.output_offset = data->output_offset; + op_params.output_multiplier = data->output_multiplier; + op_params.output_shift = data->output_shift; + SetActivationParams(data->output_activation_min, data->output_activation_max, + &op_params); + bool need_broadcast = reference_ops::ProcessBroadcastShapes( + tflite::micro::GetTensorShape(input1), + tflite::micro::GetTensorShape(input2), &op_params); + + switch (output->type) { + case kTfLiteInt8: { + if (need_broadcast) { + reference_integer_ops::BroadcastAdd4DSlow( + op_params, tflite::micro::GetTensorShape(input1), + tflite::micro::GetTensorData(input1), + tflite::micro::GetTensorShape(input2), + tflite::micro::GetTensorData(input2), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + } else { +#if ESP_NN + const int8_t *input1_data = tflite::micro::GetTensorData(input1); + const int8_t *input2_data = tflite::micro::GetTensorData(input2); + int8_t *out_data = tflite::micro::GetTensorData(output); + + esp_nn_add_elementwise_s8(input1_data, + input2_data, + data->input1_offset, + data->input2_offset, + data->input1_multiplier, + data->input2_multiplier, + data->input1_shift, + data->input2_shift, + data->left_shift, + out_data, + data->output_offset, + data->output_multiplier, + data->output_shift, + data->output_activation_min, + data->output_activation_max, + MatchingElementsSize(tflite::micro::GetTensorShape(input1), + tflite::micro::GetTensorShape(input2), + tflite::micro::GetTensorShape(output)) + ); +#else + reference_integer_ops::Add( + op_params, tflite::micro::GetTensorShape(input1), + tflite::micro::GetTensorData(input1), + tflite::micro::GetTensorShape(input2), + tflite::micro::GetTensorData(input2), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); +#endif + } + break; + } + case kTfLiteInt16: { + if (need_broadcast) { + reference_ops::BroadcastAdd4DSlow( + op_params, tflite::micro::GetTensorShape(input1), + tflite::micro::GetTensorData(input1), + tflite::micro::GetTensorShape(input2), + tflite::micro::GetTensorData(input2), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + } else { + reference_ops::Add(op_params, tflite::micro::GetTensorShape(input1), + tflite::micro::GetTensorData(input1), + tflite::micro::GetTensorShape(input2), + tflite::micro::GetTensorData(input2), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output), + false); + } + break; + } + default: + MicroPrintf("Type %s (%d) not supported.", + TfLiteTypeGetName(output->type), output->type); + return kTfLiteError; + } + + return kTfLiteOk; +} + +void* AddInit(TfLiteContext* context, const char* buffer, size_t length) { + TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); + return context->AllocatePersistentBuffer(context, sizeof(OpDataAdd)); +} + +TfLiteStatus AddEval(TfLiteContext* context, TfLiteNode* node) { + auto* params = reinterpret_cast(node->builtin_data); + + TFLITE_DCHECK(node->user_data != nullptr); + const OpDataAdd* data = static_cast(node->user_data); + + const TfLiteEvalTensor* input1 = + tflite::micro::GetEvalInput(context, node, kAddInputTensor1); + const TfLiteEvalTensor* input2 = + tflite::micro::GetEvalInput(context, node, kAddInputTensor2); + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kAddOutputTensor); + + long long start_time = esp_timer_get_time(); + + if (output->type == kTfLiteFloat32) { + EvalAdd(context, node, params, data, input1, input2, output); + } else if (output->type == kTfLiteInt8 || output->type == kTfLiteInt16) { + TF_LITE_ENSURE_OK(context, EvalAddQuantized(context, node, params, data, + input1, input2, output)); + } else { + MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(output->type), + output->type); + return kTfLiteError; + } + add_total_time += esp_timer_get_time() - start_time; + + return kTfLiteOk; +} + +TfLiteRegistration Register_ADD() { + return tflite::micro::RegisterOp(AddInit, AddPrepare, AddEval); +} + +} // namespace tflite + +#else +/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/add.h" + +#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h" +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/quantization_util.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/add.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/op_macros.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/add.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/memory_helpers.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h" + +namespace tflite { + +void EvalAdd(TfLiteContext* context, TfLiteNode* node, TfLiteAddParams* params, + const OpDataAdd* data, const TfLiteEvalTensor* input1, + const TfLiteEvalTensor* input2, TfLiteEvalTensor* output) { + tflite::ArithmeticParams op_params; + SetActivationParams(data->output_activation_min_f32, + data->output_activation_max_f32, &op_params); + if (data->requires_broadcast) { + reference_ops::BroadcastAdd4DSlow( + op_params, tflite::micro::GetTensorShape(input1), + tflite::micro::GetTensorData(input1), + tflite::micro::GetTensorShape(input2), + tflite::micro::GetTensorData(input2), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + } else { + reference_ops::Add(op_params, tflite::micro::GetTensorShape(input1), + tflite::micro::GetTensorData(input1), + tflite::micro::GetTensorShape(input2), + tflite::micro::GetTensorData(input2), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + } +} + +TfLiteStatus EvalAddQuantized(TfLiteContext* context, TfLiteNode* node, + TfLiteAddParams* params, const OpDataAdd* data, + const TfLiteEvalTensor* input1, + const TfLiteEvalTensor* input2, + TfLiteEvalTensor* output) { + tflite::ArithmeticParams op_params; + op_params.left_shift = data->left_shift; + op_params.input1_offset = data->input1_offset; + op_params.input1_multiplier = data->input1_multiplier; + op_params.input1_shift = data->input1_shift; + op_params.input2_offset = data->input2_offset; + op_params.input2_multiplier = data->input2_multiplier; + op_params.input2_shift = data->input2_shift; + op_params.output_offset = data->output_offset; + op_params.output_multiplier = data->output_multiplier; + op_params.output_shift = data->output_shift; + SetActivationParams(data->output_activation_min, data->output_activation_max, + &op_params); + bool need_broadcast = reference_ops::ProcessBroadcastShapes( + tflite::micro::GetTensorShape(input1), + tflite::micro::GetTensorShape(input2), &op_params); + + switch (output->type) { + case kTfLiteInt8: { + if (need_broadcast) { + reference_integer_ops::BroadcastAdd4DSlow( + op_params, tflite::micro::GetTensorShape(input1), + tflite::micro::GetTensorData(input1), + tflite::micro::GetTensorShape(input2), + tflite::micro::GetTensorData(input2), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + } else { + reference_integer_ops::Add( + op_params, tflite::micro::GetTensorShape(input1), + tflite::micro::GetTensorData(input1), + tflite::micro::GetTensorShape(input2), + tflite::micro::GetTensorData(input2), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + } + break; + } + case kTfLiteInt16: { + if (need_broadcast) { + reference_ops::BroadcastAdd4DSlow( + op_params, tflite::micro::GetTensorShape(input1), + tflite::micro::GetTensorData(input1), + tflite::micro::GetTensorShape(input2), + tflite::micro::GetTensorData(input2), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + } else { + reference_ops::Add(op_params, tflite::micro::GetTensorShape(input1), + tflite::micro::GetTensorData(input1), + tflite::micro::GetTensorShape(input2), + tflite::micro::GetTensorData(input2), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output), + false); + } + break; + } + default: + MicroPrintf("Type %s (%d) not supported.", + TfLiteTypeGetName(output->type), output->type); + return kTfLiteError; + } + + return kTfLiteOk; +} + +void* AddInit(TfLiteContext* context, const char* buffer, size_t length) { + TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); + return context->AllocatePersistentBuffer(context, sizeof(OpDataAdd)); +} + +TfLiteStatus AddEval(TfLiteContext* context, TfLiteNode* node) { + auto* params = reinterpret_cast(node->builtin_data); + + TFLITE_DCHECK(node->user_data != nullptr); + const OpDataAdd* data = static_cast(node->user_data); + + const TfLiteEvalTensor* input1 = + tflite::micro::GetEvalInput(context, node, kAddInputTensor1); + const TfLiteEvalTensor* input2 = + tflite::micro::GetEvalInput(context, node, kAddInputTensor2); + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kAddOutputTensor); + + if (output->type == kTfLiteFloat32) { + EvalAdd(context, node, params, data, input1, input2, output); + } else if (output->type == kTfLiteInt8 || output->type == kTfLiteInt16) { + TF_LITE_ENSURE_OK(context, EvalAddQuantized(context, node, params, data, + input1, input2, output)); + } else { + MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(output->type), + output->type); + return kTfLiteError; + } + + return kTfLiteOk; +} + +TfLiteRegistration Register_ADD() { + return tflite::micro::RegisterOp(AddInit, AddPrepare, AddEval); +} + +} // namespace tflite + +#endif diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/add.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/add.h new file mode 100644 index 0000000..e91ffb3 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/add.h @@ -0,0 +1,77 @@ +/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_LITE_MICRO_KERNELS_ADD_H_ +#define TENSORFLOW_LITE_MICRO_KERNELS_ADD_H_ + +#include + +#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h" +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" + +namespace tflite { + +extern const int kAddInputTensor1; +extern const int kAddInputTensor2; +extern const int kAddOutputTensor; + +struct OpDataAdd { + bool requires_broadcast; + + // These fields are used in both the general 8-bit -> 8bit quantized path, + // and the special 16-bit -> 16bit quantized path + int input1_shift; + int input2_shift; + int32_t output_activation_min; + int32_t output_activation_max; + + // These fields are used only in the general 8-bit -> 8bit quantized path + int32_t input1_multiplier; + int32_t input2_multiplier; + int32_t output_multiplier; + int output_shift; + int left_shift; + int32_t input1_offset; + int32_t input2_offset; + int32_t output_offset; + + // Used only for float evals: + float output_activation_min_f32; + float output_activation_max_f32; +}; + +TfLiteStatus CalculateOpDataAdd(TfLiteContext* context, TfLiteAddParams* params, + const TfLiteTensor* input1, + const TfLiteTensor* input2, + TfLiteTensor* output, OpDataAdd* data); + +TfLiteStatus AddPrepare(TfLiteContext* context, TfLiteNode* node); + +// Generic must define registration function. +TfLiteRegistration Register_ADD(); + +#if defined(CMSIS_NN) +TfLiteRegistration Register_ADD_INT8(); + +TfLiteRegistration Register_ADD_INT16(); +#else +// Fallback registration +inline TfLiteRegistration Register_ADD_INT8() { return Register_ADD(); } + +inline TfLiteRegistration Register_ADD_INT16() { return Register_ADD(); } +#endif +} // namespace tflite + +#endif // TENSORFLOW_LITE_MICRO_KERNELS_ADD_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/add_common.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/add_common.cpp new file mode 100644 index 0000000..d9622a2 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/add_common.cpp @@ -0,0 +1,106 @@ +/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h" +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/quantization_util.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/add.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/add.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/op_macros.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/add.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/memory_helpers.h" + +namespace tflite { + +const int kAddInputTensor1 = 0; +const int kAddInputTensor2 = 1; +const int kAddOutputTensor = 0; + +TfLiteStatus CalculateOpDataAdd(TfLiteContext* context, TfLiteAddParams* params, + const TfLiteTensor* input1, + const TfLiteTensor* input2, + TfLiteTensor* output, OpDataAdd* data) { + data->requires_broadcast = !HaveSameShapes(input1, input2); + + if (output->type == kTfLiteInt8 || output->type == kTfLiteInt16) { + // 8bit -> 8bit general quantized path, with general rescalings + data->input1_offset = -input1->params.zero_point; + data->input2_offset = -input2->params.zero_point; + data->output_offset = output->params.zero_point; + data->left_shift = (output->type == kTfLiteInt16) ? 15 : 20; + const double twice_max_input_scale = + 2 * static_cast( + std::max(input1->params.scale, input2->params.scale)); + const double real_input1_multiplier = + static_cast(input1->params.scale) / twice_max_input_scale; + const double real_input2_multiplier = + static_cast(input2->params.scale) / twice_max_input_scale; + const double real_output_multiplier = + twice_max_input_scale / + ((1 << data->left_shift) * static_cast(output->params.scale)); + + QuantizeMultiplierSmallerThanOneExp( + real_input1_multiplier, &data->input1_multiplier, &data->input1_shift); + + QuantizeMultiplierSmallerThanOneExp( + real_input2_multiplier, &data->input2_multiplier, &data->input2_shift); + + QuantizeMultiplierSmallerThanOneExp( + real_output_multiplier, &data->output_multiplier, &data->output_shift); + + TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized( + context, params->activation, output, &data->output_activation_min, + &data->output_activation_max)); + } else if (output->type == kTfLiteFloat32) { + CalculateActivationRange(params->activation, + &data->output_activation_min_f32, + &data->output_activation_max_f32); + } + + return kTfLiteOk; +} + +TfLiteStatus AddPrepare(TfLiteContext* context, TfLiteNode* node) { + TFLITE_DCHECK(node->user_data != nullptr); + TFLITE_DCHECK(node->builtin_data != nullptr); + + MicroContext* micro_context = GetMicroContext(context); + TfLiteTensor* input1 = + micro_context->AllocateTempInputTensor(node, kAddInputTensor1); + TF_LITE_ENSURE(context, input1 != nullptr); + TfLiteTensor* input2 = + micro_context->AllocateTempInputTensor(node, kAddInputTensor2); + TF_LITE_ENSURE(context, input2 != nullptr); + TfLiteTensor* output = + micro_context->AllocateTempOutputTensor(node, kAddOutputTensor); + TF_LITE_ENSURE(context, output != nullptr); + + OpDataAdd* data = static_cast(node->user_data); + auto* params = reinterpret_cast(node->builtin_data); + + TF_LITE_ENSURE_STATUS( + CalculateOpDataAdd(context, params, input1, input2, output, data)); + + micro_context->DeallocateTempTfLiteTensor(input1); + micro_context->DeallocateTempTfLiteTensor(input2); + micro_context->DeallocateTempTfLiteTensor(output); + return kTfLiteOk; +} + +} // namespace tflite diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/add_n.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/add_n.cpp new file mode 100644 index 0000000..0ec3276 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/add_n.cpp @@ -0,0 +1,215 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/add_n.h" + +#include + +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/quantization_util.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h" + +namespace tflite { +namespace { + +constexpr int kInputTensor0 = 0; +constexpr int kOutputTensor = 0; + +constexpr int kAddNIntegerShift = 20; + +// only used with INT8 tensors +struct OpData { + int32_t output_activation_min; + int32_t output_activation_max; + int32_t input_offset; + int32_t output_offset; + int32_t input_multiplier; + int32_t output_multiplier; + int input_shift; + int output_shift; + int left_shift; + int scratch_index; +}; + +TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node) { + int num_inputs = NumInputs(node); + TF_LITE_ENSURE(context, num_inputs >= 2); + TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); + + MicroContext* micro_context = GetMicroContext(context); + TfLiteTensor* input_tensor_first = + micro_context->AllocateTempInputTensor(node, kInputTensor0); + TF_LITE_ENSURE(context, input_tensor_first != nullptr); + TfLiteTensor* output = + micro_context->AllocateTempOutputTensor(node, kOutputTensor); + TF_LITE_ENSURE(context, output != nullptr); + + // Check that all tensors have the same shape and type. + TF_LITE_ENSURE_TYPES_EQ(context, output->type, input_tensor_first->type); + for (int i = kInputTensor0 + 1; i < num_inputs; ++i) { + TfLiteTensor* input = micro_context->AllocateTempInputTensor(node, i); + TF_LITE_ENSURE(context, input != nullptr); + TF_LITE_ENSURE(context, HaveSameShapes(input_tensor_first, input)); + TF_LITE_ENSURE_TYPES_EQ(context, input_tensor_first->type, input->type); + + // Check that all INT8 input tensors have the same zero-point and scale. + if (input_tensor_first->type == kTfLiteInt8) { + TF_LITE_ENSURE(context, input_tensor_first->params.zero_point == + input->params.zero_point); + TF_LITE_ENSURE(context, + input_tensor_first->params.scale == input->params.scale); + } + + micro_context->DeallocateTempTfLiteTensor(input); + } + + if (output->type == kTfLiteFloat32) { + // Allocate scratch buffer space for pointer to each tensor's data + // and store the scratch buffer index in the node's user_data + int scratch_index; + size_t scratch_size = sizeof(float*) * num_inputs; + TF_LITE_ENSURE_OK(context, context->RequestScratchBufferInArena( + context, scratch_size, &scratch_index)); + node->user_data = + reinterpret_castuser_data)>(scratch_index); + } else if (output->type == kTfLiteInt8) { + node->user_data = + context->AllocatePersistentBuffer(context, sizeof(OpData)); + OpData* data = static_cast(node->user_data); + + // Allocate scratch buffer space for pointer to each tensor's data + // and store the scratch buffer index in OpData + size_t scratch_size = sizeof(int8_t*) * num_inputs; + TF_LITE_ENSURE_OK( + context, context->RequestScratchBufferInArena(context, scratch_size, + &data->scratch_index)); + + // 8bit -> 8bit general quantized path, with general rescalings + data->input_offset = -input_tensor_first->params.zero_point; + data->output_offset = output->params.zero_point; + data->left_shift = kAddNIntegerShift; + const double twice_max_input_scale = + 2 * static_cast(input_tensor_first->params.scale); + const double real_input_multiplier = + static_cast(input_tensor_first->params.scale) / + twice_max_input_scale; + const double real_output_multiplier = + twice_max_input_scale / + ((1 << data->left_shift) * static_cast(output->params.scale)); + + QuantizeMultiplierSmallerThanOneExp( + real_input_multiplier, &data->input_multiplier, &data->input_shift); + + QuantizeMultiplierSmallerThanOneExp( + real_output_multiplier, &data->output_multiplier, &data->output_shift); + + TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized( + context, kTfLiteActNone, output, &data->output_activation_min, + &data->output_activation_max)); + } else { + MicroPrintf("ADD_N only supports FLOAT32 and INT8, got %s.", + TfLiteTypeGetName(output->type)); + return kTfLiteError; + } + + micro_context->DeallocateTempTfLiteTensor(input_tensor_first); + micro_context->DeallocateTempTfLiteTensor(output); + + return kTfLiteOk; +} + +TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + return CalculateOpData(context, node); +} + +template +inline const T** CopyInputsToScratchBuffer(TfLiteContext* context, + TfLiteNode* node, + const int scratch_index) { + int num_inputs = NumInputs(node); + void* scratch_buffer = context->GetScratchBuffer(context, scratch_index); + const T** all_inputs = static_cast(scratch_buffer); + for (int i = 0; i < num_inputs; i++) { + const TfLiteEvalTensor* next_input = + tflite::micro::GetEvalInput(context, node, kInputTensor0 + i); + all_inputs[i] = tflite::micro::GetTensorData(next_input); + } + + return all_inputs; +} + +template +void EvalAddN(TfLiteContext* context, TfLiteNode* node, + TfLiteEvalTensor* output) { + int num_inputs = NumInputs(node); + + int scratch_index = + static_cast(reinterpret_cast(node->user_data)); + const T** all_inputs = + CopyInputsToScratchBuffer(context, node, scratch_index); + + reference_ops::AddN(tflite::micro::GetTensorShape(output), num_inputs, + all_inputs, tflite::micro::GetTensorData(output)); +} + +template +void EvalAddNQuantized(TfLiteContext* context, TfLiteNode* node, + TfLiteEvalTensor* output) { + int num_inputs = NumInputs(node); + + OpData* data = static_cast(node->user_data); + const T** all_inputs = + CopyInputsToScratchBuffer(context, node, data->scratch_index); + + ArithmeticParams params; + params.left_shift = data->left_shift; + params.input1_offset = data->input_offset; + params.input1_multiplier = data->input_multiplier; + params.input1_shift = data->input_shift; + params.output_offset = data->output_offset; + params.output_multiplier = data->output_multiplier; + params.output_shift = data->output_shift; + SetActivationParams(data->output_activation_min, data->output_activation_max, + ¶ms); + + reference_ops::AddN(params, tflite::micro::GetTensorShape(output), num_inputs, + all_inputs, tflite::micro::GetTensorData(output)); +} + +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kOutputTensor); + if (output->type == kTfLiteFloat32) { + EvalAddN(context, node, output); + } else if (output->type == kTfLiteInt8) { + EvalAddNQuantized(context, node, output); + } else { + MicroPrintf("ADD_N only supports FLOAT32 and INT8, got %s.", + TfLiteTypeGetName(output->type)); + return kTfLiteError; + } + return kTfLiteOk; +} + +} // namespace + +TfLiteRegistration Register_ADD_N() { + return tflite::micro::RegisterOp(nullptr, Prepare, Eval); +} + +} // namespace tflite diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/arg_min_max.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/arg_min_max.cpp new file mode 100644 index 0000000..f781ab5 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/arg_min_max.cpp @@ -0,0 +1,118 @@ +/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/arg_min_max.h" + +#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h" +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/comparisons.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h" + +namespace tflite { + +namespace { + +constexpr int kInputTensor = 0; +constexpr int kAxis = 1; +constexpr int kOutputTensor = 0; + +template +inline void ArgMinMaxHelper(const RuntimeShape& input1_shape, + const T1* input1_data, const T3* input2_data, + const RuntimeShape& output_shape, T2* output_data, + bool is_arg_max) { + // Use Greater/Less from comparisons.h (formerly from kernels/micro_utils.h + // which was deprecated). Same as gtl::Greater but used here to reduce + // dependencies and binary size for micro environment. + if (is_arg_max) { + reference_ops::ArgMinMax(input1_shape, input1_data, input2_data, + output_shape, output_data, + reference_ops::GreaterFn); + } else { + reference_ops::ArgMinMax(input1_shape, input1_data, input2_data, + output_shape, output_data, + reference_ops::LessFn); + } +} + +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node, bool is_arg_max) { + const TfLiteEvalTensor* input = + tflite::micro::GetEvalInput(context, node, kInputTensor); + const TfLiteEvalTensor* axis = + tflite::micro::GetEvalInput(context, node, kAxis); + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kOutputTensor); + +#define TF_LITE_ARG_MIN_MAX(data_type, axis_type, output_type) \ + ArgMinMaxHelper(tflite::micro::GetTensorShape(input), \ + tflite::micro::GetTensorData(input), \ + tflite::micro::GetTensorData(axis), \ + tflite::micro::GetTensorShape(output), \ + tflite::micro::GetTensorData(output), \ + is_arg_max) + if (axis->type == kTfLiteInt32) { + if (output->type == kTfLiteInt32) { + switch (input->type) { + case kTfLiteFloat32: + TF_LITE_ARG_MIN_MAX(float, int32_t, int32_t); + break; + case kTfLiteInt8: + TF_LITE_ARG_MIN_MAX(int8_t, int32_t, int32_t); + break; + default: + MicroPrintf( + "Only float32, uint8_t and int8_t are " + "supported currently, got %s.", + TfLiteTypeGetName(input->type)); + return kTfLiteError; + } + } else { + MicroPrintf("Only int32_t are supported currently, got %s.", + TfLiteTypeGetName(output->type)); + return kTfLiteError; + } + } else { + MicroPrintf("Only int32_t are supported currently, got %s.", + TfLiteTypeGetName(axis->type)); + return kTfLiteError; + } + +#undef TF_LITE_ARG_MIN_MAX + + return kTfLiteOk; +} + +TfLiteStatus ArgMinEval(TfLiteContext* context, TfLiteNode* node) { + return Eval(context, node, false); +} + +TfLiteStatus ArgMaxEval(TfLiteContext* context, TfLiteNode* node) { + return Eval(context, node, true); +} + +} // namespace + +TfLiteRegistration Register_ARG_MAX() { + return tflite::micro::RegisterOp(nullptr, nullptr, ArgMaxEval); +} + +TfLiteRegistration Register_ARG_MIN() { + return tflite::micro::RegisterOp(nullptr, nullptr, ArgMinEval); +} + +} // namespace tflite diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/assign_variable.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/assign_variable.cpp new file mode 100644 index 0000000..e650294 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/assign_variable.cpp @@ -0,0 +1,101 @@ +/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include + +#include + +#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h" +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/compatibility.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/memory_helpers.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_graph.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_resource_variable.h" +#include "edge-impulse-sdk/tensorflow/lite/schema/schema_generated.h" + +namespace tflite { + +namespace { + +constexpr int kInputVariableId = 0; +constexpr int kInputValue = 1; + +TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + TF_LITE_ENSURE_EQ(context, NumInputs(node), 2); + TF_LITE_ENSURE_EQ(context, NumOutputs(node), 0); + + // This must be a TfLiteEvalTensor despite this being in Prepare, because + // CreateTensor allocates a temp tensor from the flatbuffer, which does not + // contain the correct ID generated within the VAR_HANDLE op. EvalTensors are + // all allocated during StartModelAllocation which happens before + // init/prepare, and VAR_HANDLE Prepare() references its own op_data in the + // TfLiteEvalTensor, so reading the ID here is valid. + const TfLiteEvalTensor* input_resource_id_tensor = + tflite::micro::GetEvalInput(context, node, kInputVariableId); + TFLITE_DCHECK(input_resource_id_tensor != nullptr); + TF_LITE_ENSURE(context, (input_resource_id_tensor->type == kTfLiteResource || + input_resource_id_tensor->type == kTfLiteInt32)); + TF_LITE_ENSURE_EQ(context, NumElements(input_resource_id_tensor->dims), 1); + + tflite::MicroContext* micro_context = tflite::GetMicroContext(context); + TfLiteTensor* input_value = + micro_context->AllocateTempInputTensor(node, kInputValue); + TFLITE_DCHECK(input_value != nullptr); + + MicroGraph& graph_info = micro_context->graph(); + + MicroResourceVariables* resources = graph_info.GetResourceVariables(); + TF_LITE_ENSURE_OK(context, + resources->Allocate(input_resource_id_tensor->data.i32[0], + context, input_value)); + + micro_context->DeallocateTempTfLiteTensor(input_value); + return kTfLiteOk; +} + +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + const TfLiteEvalTensor* input_id = + tflite::micro::GetEvalInput(context, node, kInputVariableId); + TFLITE_DCHECK(input_id != nullptr); + + const TfLiteEvalTensor* input_value = + tflite::micro::GetEvalInput(context, node, kInputValue); + TFLITE_DCHECK(input_value != nullptr); + + tflite::MicroContext* micro_context = tflite::GetMicroContext(context); + MicroGraph& graph_info = micro_context->graph(); + + MicroResourceVariables* resources = graph_info.GetResourceVariables(); + if (resources == nullptr) { + MicroPrintf( + "ASSIGN_VARIABLE requires resource variables. Please create " + "ResourceVariables and pass it to the interpreter."); + return kTfLiteError; + } + TF_LITE_ENSURE_OK(context, + resources->Assign(input_id->data.i32[0], input_value)); + return kTfLiteOk; +} + +} // namespace. + +TfLiteRegistration Register_ASSIGN_VARIABLE() { + return tflite::micro::RegisterOp(nullptr, Prepare, Eval); +} + +} // namespace tflite diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/batch_matmul.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/batch_matmul.cpp new file mode 100644 index 0000000..3858f73 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/batch_matmul.cpp @@ -0,0 +1,644 @@ +/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/batch_matmul.h" + +#include +#include +#include + +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/quantization_util.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/transpose.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" + +namespace tflite { +namespace { + +constexpr int kInputLHSTensor = 0; +constexpr int kInputRHSTensor = 1; +constexpr int kOutputTensor = 0; + +constexpr int kInvalidScratchBufferIndex = -1; + +struct QuantizationOpData { + // The scaling factor from input to output (aka the 'real multiplier') can + // be represented as a fixed point multiplier plus a left shift. + int32_t output_multiplier; + int output_shift; // exponent + + // The range of the fused activation layer. For example for kNone and + // int8_t these would be -128 and 127. + int32_t output_activation_min; + int32_t output_activation_max; + + int32_t lhs_zero_point; + int32_t rhs_zero_point; + int32_t output_zero_point; +}; + +struct HybridOpData { + float filter_scale; // RHS tensor scale + + // scratch buffer indices + int input_quantized_index; + int scaling_factors_index; + int input_offsets_index; + + // row_sums_buffer may be re-used across eval calls + int32_t* row_sums_buffer; + + bool compute_row_sums; +}; + +struct OpData { + union { + QuantizationOpData* quantization; + HybridOpData* hybrid; + }; + + // Transpose tensors and state + TfLiteEvalTensor* lhs_transposed_tensor; + TfLiteEvalTensor* rhs_transposed_tensor; + bool rhs_is_transposed; + bool lhs_is_constant_tensor; + bool rhs_is_constant_tensor; +}; + +struct OpContext { + OpContext(TfLiteContext* context, TfLiteNode* node) { + params = reinterpret_cast(node->builtin_data); + opdata = static_cast(node->user_data); + } + + TfLiteBatchMatMulParams* params; + OpData* opdata; +}; + +struct PrepareOpContext : OpContext { + PrepareOpContext(TfLiteContext* context, TfLiteNode* node) + : OpContext(context, node) { + MicroContext* micro_context = GetMicroContext(context); + lhs = micro_context->AllocateTempInputTensor(node, kInputLHSTensor); + rhs = micro_context->AllocateTempInputTensor(node, kInputRHSTensor); + output = micro_context->AllocateTempOutputTensor(node, kOutputTensor); + } + TfLiteTensor* lhs; + TfLiteTensor* rhs; + TfLiteTensor* output; +}; + +struct EvalOpContext : OpContext { + EvalOpContext(TfLiteContext* context, TfLiteNode* node) + : OpContext(context, node) { + lhs = tflite::micro::GetEvalInput(context, node, kInputLHSTensor); + rhs = tflite::micro::GetEvalInput(context, node, kInputRHSTensor); + output = tflite::micro::GetEvalOutput(context, node, kOutputTensor); + } + + const TfLiteEvalTensor* lhs; + const TfLiteEvalTensor* rhs; + TfLiteEvalTensor* output; +}; + +TfLiteStatus ResizeOutputTensor(TfLiteContext* context, TfLiteNode* node, + const RuntimeShape& extended_lhs_shape, + const RuntimeShape& extended_rhs_shape, + bool adj_x, bool adj_y, int output_rank, + TfLiteTensor* output) { + auto orig_size = NumElements(output); + + // make sure output tensor dims are not in the FlatBuffer + TfLiteEvalTensor* output_eval = + tflite::micro::GetEvalOutput(context, node, kOutputTensor); + TF_LITE_ENSURE_OK(context, tflite::micro::CreateWritableTensorDimsWithCopy( + context, output, output_eval)); + + // Fill in any broadcast dimensions. + for (int i = 0; i < output_rank - 2; ++i) { + const int lhs_dim = extended_lhs_shape.Dims(i); + const int rhs_dim = extended_rhs_shape.Dims(i); + int broadcast_dim = lhs_dim; + if ((lhs_dim != rhs_dim) && (lhs_dim == 1)) { + broadcast_dim = rhs_dim; + } + output->dims->data[i] = broadcast_dim; + } + // Fill in the matmul dimensions. + int lhs_rows_index = adj_x ? output_rank - 1 : output_rank - 2; + int rhs_cols_index = adj_y ? output_rank - 2 : output_rank - 1; + + output->dims->data[output_rank - 2] = extended_lhs_shape.Dims(lhs_rows_index); + output->dims->data[output_rank - 1] = extended_rhs_shape.Dims(rhs_cols_index); + output->dims->size = output_rank; + + // Check that output tensor has not been resized + // since TFLM doesn't support tensor resizing. + TF_LITE_ENSURE_EQ(context, orig_size, NumElements(output)); + + return kTfLiteOk; +} + +TfLiteEvalTensor* AllocInitTransposeTensorFromTfLiteTensor( + TfLiteContext* context, const TfLiteTensor& tensor) { + TfLiteEvalTensor* eval_tensor = static_cast( + context->AllocatePersistentBuffer(context, sizeof(TfLiteEvalTensor))); + + eval_tensor->type = tensor.type; + + const int tensor_rank = NumDimensions(&tensor); + auto eval_dims_size = TfLiteIntArrayGetSizeInBytes(tensor_rank); + eval_tensor->dims = static_cast( + context->AllocatePersistentBuffer(context, eval_dims_size)); + eval_tensor->dims->size = tensor_rank; + for (int i = 0; i < tensor_rank - 2; ++i) { + eval_tensor->dims->data[i] = tensor.dims->data[i]; + } + // Swap last two dimensions. + eval_tensor->dims->data[tensor_rank - 2] = tensor.dims->data[tensor_rank - 1]; + eval_tensor->dims->data[tensor_rank - 1] = tensor.dims->data[tensor_rank - 2]; + + size_t eval_data_size = static_cast(NumElements(&tensor)); + if (tensor.type == kTfLiteFloat32) { + eval_data_size *= sizeof(float); + } + eval_tensor->data.data = + context->AllocatePersistentBuffer(context, eval_data_size); + + return eval_tensor; +} + +// Initializes tensors to store transposed operands. +// Allocate storage for hybrid quantization if needed. +// Allocate normal quantization data if needed. +TfLiteStatus InitializeTemporaries(TfLiteContext* context, TfLiteNode* node, + const PrepareOpContext& op_context) { + OpData* op_data = op_context.opdata; + const TfLiteTensor* lhs = op_context.lhs; + const TfLiteTensor* rhs = op_context.rhs; + + // For "hybrid" quantization, we impose the constraint that the LHS + // is float (typically an activation from a prior layer) and the RHS + // is quantized int8. + bool is_hybrid = (lhs->type == kTfLiteFloat32 && rhs->type == kTfLiteInt8); + if (is_hybrid) { + op_data->hybrid = static_casthybrid)>( + context->AllocatePersistentBuffer(context, sizeof(*op_data->hybrid))); + TF_LITE_ENSURE(context, op_data->hybrid != nullptr); + op_data->hybrid->input_quantized_index = kInvalidScratchBufferIndex; + op_data->hybrid->scaling_factors_index = kInvalidScratchBufferIndex; + op_data->hybrid->row_sums_buffer = nullptr; + op_data->hybrid->input_offsets_index = kInvalidScratchBufferIndex; + } else if (lhs->type == kTfLiteInt8) { + op_data->quantization = static_castquantization)>( + context->AllocatePersistentBuffer(context, + sizeof(*op_data->quantization))); + TF_LITE_ENSURE(context, op_data->quantization != nullptr); + } else { + op_data->quantization = nullptr; // also op_data->hybrid + } + + // tensor for Transposed LHS; + if (op_context.params->adj_x) { + op_data->lhs_transposed_tensor = + AllocInitTransposeTensorFromTfLiteTensor(context, *lhs); + } else { + op_data->lhs_transposed_tensor = nullptr; + } + + // We need a buffer for the RHS if we need to transpose the RHS. We + // transpose by default, so that the two inputs (LHS and RHS) are in a proper + // layout for our fast matrix multiplication routines. If the transpose flag + // is set by the caller, the data is already in the desired layout. + if (!op_context.params->adj_y) { + op_data->rhs_transposed_tensor = + AllocInitTransposeTensorFromTfLiteTensor(context, *rhs); + } else { + op_data->rhs_transposed_tensor = nullptr; + } + + // If we have to perform on-the-fly quantization (with quantized weights and + // float inputs) first we need to quantize the inputs. Allocate temporary + // buffer to store the intermediate quantized values, the batch scaling + // factors, the input offsets, and persistent storage for the sums of the + // rows for each weights matrix. + // RHS = weights, LHS = inputs + if (is_hybrid) { + const int lhs_rank = NumDimensions(lhs); + const int rhs_rank = NumDimensions(rhs); + const int batch_size = op_context.params->adj_x + ? lhs->dims->data[lhs_rank - 1] + : lhs->dims->data[lhs_rank - 2]; + const int num_units = rhs->dims->data[rhs_rank - 1]; + + // Calculate the total number of LHS batches. + int num_batches = 1; + for (int i = 0; i < lhs_rank - 2; ++i) { + num_batches *= lhs->dims->data[i]; + } + int num_weights_matrices = 1; + for (int i = 0; i < rhs_rank - 2; ++i) { + num_weights_matrices *= rhs->dims->data[i]; + } + + const size_t input_quantized_size = static_cast( + NumElements(lhs->dims) * TfLiteTypeGetSize(rhs->type)); + TF_LITE_ENSURE_OK(context, context->RequestScratchBufferInArena( + context, input_quantized_size, + &op_data->hybrid->input_quantized_index)); + + const size_t scaling_factors_size = + static_cast(batch_size * num_batches * sizeof(float)); + TF_LITE_ENSURE_OK(context, context->RequestScratchBufferInArena( + context, scaling_factors_size, + &op_data->hybrid->scaling_factors_index)); + + const size_t input_offsets_size = + static_cast(batch_size * num_batches * sizeof(int32_t)); + TF_LITE_ENSURE_OK(context, context->RequestScratchBufferInArena( + context, input_offsets_size, + &op_data->hybrid->input_offsets_index)); + + const size_t row_sums_size = + static_cast(num_weights_matrices * num_units * sizeof(int32_t)); + op_data->hybrid->row_sums_buffer = static_cast( + context->AllocatePersistentBuffer(context, row_sums_size)); + TF_LITE_ENSURE(context, op_data->hybrid->row_sums_buffer != nullptr); + + op_data->hybrid->compute_row_sums = true; + op_data->hybrid->filter_scale = rhs->params.scale; + } + + return kTfLiteOk; +} + +template +void TransposeRowsColumnsImpl(const TfLiteEvalTensor& tensor_in, + const scalar* input, TfLiteEvalTensor* tensor_out, + scalar* output) { + RuntimeShape transposed_shape(tflite::micro::GetTensorShape(&tensor_in)); + RuntimeShape shape(transposed_shape); + TransposeParams params; + int rank = shape.DimensionsCount(); + params.perm_count = rank; + for (int i = 0; i < rank - 2; ++i) { + params.perm[i] = i; + } + // Transpose the last two dimensions. + params.perm[rank - 2] = rank - 1; + params.perm[rank - 1] = rank - 2; + transposed_shape.SetDim(rank - 1, shape.Dims(rank - 2)); + transposed_shape.SetDim(rank - 2, shape.Dims(rank - 1)); + reference_ops::Transpose(params, shape, input, transposed_shape, output); +} + +TfLiteStatus TransposeRowsColumns(TfLiteContext* context, + const TfLiteEvalTensor& tensor_in, + TfLiteEvalTensor* tensor_out) { + if (tensor_in.type == kTfLiteFloat32) { + TransposeRowsColumnsImpl( + tensor_in, tflite::micro::GetTensorData(&tensor_in), tensor_out, + tflite::micro::GetTensorData(tensor_out)); + return kTfLiteOk; + } else if (tensor_in.type == kTfLiteInt8) { + TransposeRowsColumnsImpl( + tensor_in, tflite::micro::GetTensorData(&tensor_in), tensor_out, + tflite::micro::GetTensorData(tensor_out)); + return kTfLiteOk; + } else { + TF_LITE_KERNEL_LOG(context, + "BATCH_MATMUL can only transpose tensors with float, " + "int8 type."); + return kTfLiteError; + } +} + +RuntimeShape SwapRowColumnDims(const RuntimeShape& shape) { + RuntimeShape swapped_shape(shape); + const int32_t dims = shape.DimensionsCount(); + swapped_shape.SetDim(dims - 2, shape.Dims(dims - 1)); + swapped_shape.SetDim(dims - 1, shape.Dims(dims - 2)); + return swapped_shape; +} + +TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node) { + TF_LITE_ENSURE_EQ(context, NumInputs(node), 2); + TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); + + MicroContext* micro_context = GetMicroContext(context); + + PrepareOpContext op_context(context, node); + const TfLiteTensor* lhs_data = op_context.lhs; + TF_LITE_ENSURE(context, lhs_data != nullptr); + const TfLiteTensor* rhs_data = op_context.rhs; + TF_LITE_ENSURE(context, rhs_data != nullptr); + TfLiteTensor* output = op_context.output; + TF_LITE_ENSURE(context, output != nullptr); + + TF_LITE_ENSURE(context, lhs_data->type == kTfLiteFloat32 || + lhs_data->type == kTfLiteInt8); + TF_LITE_ENSURE(context, rhs_data->type == kTfLiteFloat32 || + rhs_data->type == kTfLiteInt8); + // Either we have a hybrid quantization with a float32 and an int8 input, + // otherwise both inputs should be of the same type. + TF_LITE_ENSURE(context, (lhs_data->type == kTfLiteFloat32 && + rhs_data->type == kTfLiteInt8) || + lhs_data->type == rhs_data->type); + + const int lhs_rank = NumDimensions(lhs_data); + const int rhs_rank = NumDimensions(rhs_data); + // Support dimensions between 2 and 4, inclusive. + TF_LITE_ENSURE(context, lhs_rank >= 2); + TF_LITE_ENSURE(context, lhs_rank <= 4); + TF_LITE_ENSURE(context, rhs_rank >= 2); + TF_LITE_ENSURE(context, rhs_rank <= 4); + + TF_LITE_ENSURE_OK(context, InitializeTemporaries(context, node, op_context)); + + OpData* op_data = op_context.opdata; + // If the RHS is constant, we only transpose once. + op_data->rhs_is_transposed = false; + op_data->lhs_is_constant_tensor = IsConstantTensor(lhs_data); + op_data->rhs_is_constant_tensor = IsConstantTensor(rhs_data); + + bool adj_x = op_context.params->adj_x; + bool adj_y = op_context.params->adj_y; + + // Note that quantized inference requires that all tensors have their + // parameters set. This is usually done during quantized training. + if (lhs_data->type == kTfLiteInt8) { + TF_LITE_ENSURE(context, op_data->quantization != nullptr); + double real_multiplier = 0.0; + TF_LITE_ENSURE_STATUS(GetQuantizedConvolutionMultipler( + context, lhs_data, rhs_data, output, &real_multiplier)); + QuantizeMultiplier(real_multiplier, + &op_data->quantization->output_multiplier, + &op_data->quantization->output_shift); + // BatchMatMul has no fused activation functions. Therefore, set + // output activation min and max to min and max of int8_t type. + op_data->quantization->output_activation_min = + std::numeric_limits::min(); + op_data->quantization->output_activation_max = + std::numeric_limits::max(); + + // set zero_point for Int8 only + op_data->quantization->lhs_zero_point = lhs_data->params.zero_point; + op_data->quantization->rhs_zero_point = rhs_data->params.zero_point; + op_data->quantization->output_zero_point = output->params.zero_point; + } + + const int output_rank = std::max(lhs_rank, rhs_rank); + const RuntimeShape extended_lhs_shape = + RuntimeShape::ExtendedShape(output_rank, GetTensorShape(lhs_data)); + const RuntimeShape extended_rhs_shape = + RuntimeShape::ExtendedShape(output_rank, GetTensorShape(rhs_data)); + + // Ensure any batch dimensions obey broacasting rules. + for (int i = 0; i < output_rank - 2; ++i) { + const int lhs_dim = extended_lhs_shape.Dims(i); + const int rhs_dim = extended_rhs_shape.Dims(i); + if (lhs_dim != rhs_dim) { + if (lhs_dim != 1) { + TF_LITE_ENSURE_EQ(context, rhs_dim, 1); + } + } + } + // Ensure other dimensions work for matrix multiplication. + int accum_dim_lhs = adj_x ? extended_lhs_shape.Dims(output_rank - 2) + : extended_lhs_shape.Dims(output_rank - 1); + int accum_dim_rhs = adj_y ? extended_rhs_shape.Dims(output_rank - 1) + : extended_rhs_shape.Dims(output_rank - 2); + + TF_LITE_ENSURE_EQ(context, accum_dim_lhs, accum_dim_rhs); + TfLiteStatus status = + ResizeOutputTensor(context, node, extended_lhs_shape, extended_rhs_shape, + adj_x, adj_y, output_rank, output); + + micro_context->DeallocateTempTfLiteTensor(op_context.lhs); + micro_context->DeallocateTempTfLiteTensor(op_context.rhs); + micro_context->DeallocateTempTfLiteTensor(op_context.output); + + return status; +} + +void* Init(TfLiteContext* context, const char* buffer, size_t length) { + // This is a builtin op, so we don't use the contents in 'buffer', if any. + // Instead, we allocate a new object to carry information from Prepare() to + // Eval(). + TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); + return context->AllocatePersistentBuffer(context, sizeof(OpData)); +} + +TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + return CalculateOpData(context, node); +} + +TfLiteStatus EvalHybrid(TfLiteContext* context, TfLiteNode* node, + const OpData& data, const RuntimeShape& input_shape, + const TfLiteEvalTensor& input, + const RuntimeShape& filter_shape, + const TfLiteEvalTensor& filter, + TfLiteEvalTensor* output) { + const auto* params = + static_cast(node->builtin_data); + const int32_t num_input_dims = input_shape.DimensionsCount(); + + // Input row/cols have been swapped at this point, so dims are + // {input_size, num_batches} + const int input_size = input_shape.Dims(num_input_dims - 2); + const int batch_size = input_shape.Dims(num_input_dims - 1); + + int num_batches_to_quantize = batch_size; + for (int i = 0; i < input_shape.DimensionsCount() - 2; ++i) { + num_batches_to_quantize *= input_shape.Dims(i); + } + // Quantize input from float to uint8 + quantization params (scaling factor). + float* scaling_factors_ptr = static_cast( + context->GetScratchBuffer(context, data.hybrid->scaling_factors_index)); + int32_t* input_offset_ptr = static_cast( + context->GetScratchBuffer(context, data.hybrid->input_offsets_index)); + int32_t* row_sums_ptr = data.hybrid->row_sums_buffer; + if (!params->asymmetric_quantize_inputs) { + std::fill_n(input_offset_ptr, num_batches_to_quantize, 0); + } + + int8_t* quant_data = static_cast( + context->GetScratchBuffer(context, data.hybrid->input_quantized_index)); + const int8_t* filter_data = tflite::micro::GetTensorData(&filter); + const float* input_ptr = tflite::micro::GetTensorData(&input); + // Quantize each batch independently. + tensor_utils::BatchQuantizeFloats(input_ptr, num_batches_to_quantize, + input_size, quant_data, scaling_factors_ptr, + input_offset_ptr, + params->asymmetric_quantize_inputs); + for (int b = 0; b < num_batches_to_quantize; ++b) { + // Incorporate scaling of the filter. + scaling_factors_ptr[b] *= data.hybrid->filter_scale; + } + + RuntimeShape output_shape = tflite::micro::GetTensorShape(output); + int output_size = NumElements(output->dims); + std::fill_n(tflite::micro::GetTensorData(output), output_size, 0.0f); + reference_ops::BatchMatMul( + filter_shape, filter_data, input_shape, quant_data, scaling_factors_ptr, + input_offset_ptr, row_sums_ptr, tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output), + &(data.hybrid->compute_row_sums)); + + return kTfLiteOk; +} + +TfLiteStatus EvalInt8(TfLiteContext* context, const OpData& data, + const RuntimeShape& lhs_shape, + const TfLiteEvalTensor& lhs, + const RuntimeShape& rhs_shape, + const TfLiteEvalTensor& rhs, + const RuntimeShape& output_shape, + TfLiteEvalTensor* output) { + TF_LITE_ENSURE(context, data.quantization != nullptr); + + // Reuse params struct from FullyConnected Op. + FullyConnectedParams op_params; + op_params.input_offset = -data.quantization->lhs_zero_point; + op_params.weights_offset = + -data.quantization->rhs_zero_point; // filter offset + op_params.output_offset = data.quantization->output_zero_point; + op_params.output_multiplier = data.quantization->output_multiplier; + op_params.output_shift = data.quantization->output_shift; + op_params.quantized_activation_min = data.quantization->output_activation_min; + op_params.quantized_activation_max = data.quantization->output_activation_max; + op_params.lhs_cacheable = data.lhs_is_constant_tensor; + op_params.rhs_cacheable = data.rhs_is_constant_tensor; + + // Note we pass RHS args first, LHS args second. See note for Eval. + reference_ops::BatchMatMul( + op_params, rhs_shape, tflite::micro::GetTensorData(&rhs), + lhs_shape, tflite::micro::GetTensorData(&lhs), output_shape, + tflite::micro::GetTensorData(output)); + + return kTfLiteOk; +} + +TfLiteStatus EvalQuantized(TfLiteContext* context, TfLiteNode* node, + const OpData& data, const RuntimeShape& lhs_shape, + const TfLiteEvalTensor& lhs, + const RuntimeShape& rhs_shape, + const TfLiteEvalTensor& rhs, + TfLiteEvalTensor* output) { + if (lhs.type == kTfLiteFloat32 && rhs.type == kTfLiteInt8) { + TF_LITE_ENSURE(context, data.hybrid != nullptr); + TF_LITE_ENSURE(context, data.hybrid->row_sums_buffer != nullptr); + TF_LITE_ENSURE(context, data.hybrid->input_quantized_index != + kInvalidScratchBufferIndex); + TF_LITE_ENSURE(context, data.hybrid->scaling_factors_index != + kInvalidScratchBufferIndex); + TF_LITE_ENSURE(context, data.hybrid->input_offsets_index != + kInvalidScratchBufferIndex); + return EvalHybrid(context, node, data, lhs_shape, lhs, rhs_shape, rhs, + output); + } else if (lhs.type == kTfLiteInt8 && rhs.type == kTfLiteInt8) { + return EvalInt8(context, data, lhs_shape, lhs, rhs_shape, rhs, + tflite::micro::GetTensorShape(output), output); + } else { + TF_LITE_KERNEL_LOG( + context, "BATCH_MATMUL only supports hybrid, int8 quantization.\n"); + } + return kTfLiteError; +} + +// Perform a batch matrix multiply on +// LHS <..., A, B> X RHS<..., B, C> +// where the leading dimensions of LHS and RHS obey broadcasting rules +// (this Op will apply broadcasting rules). +// We assume that LHS and RHS are both row oriented (adjacent values in memory +// are in the same row) and will output in the same memory layout. However, +// our fast GEMM libraries assume RCC layout (LHS row oriented, +// RHS column oriented, output column oriented). Therefore, we perform +// RHS <..., C, B> X LHS <..., B, A> +// where output is a C X A column-oriented, which is equivalent to +// A X C row-oriented. +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + EvalOpContext op_context(context, node); + OpData* op_data = op_context.opdata; + const TfLiteEvalTensor* lhs = op_context.lhs; + const TfLiteEvalTensor* rhs = op_context.rhs; + TfLiteEvalTensor* output = op_context.output; + RuntimeShape orig_lhs_shape = tflite::micro::GetTensorShape(lhs); + RuntimeShape orig_rhs_shape = tflite::micro::GetTensorShape(rhs); + + bool adj_y = op_context.params->adj_y; + bool adj_x = op_context.params->adj_x; + + TfLiteEvalTensor* rhs_tensor = adj_y ? const_cast(rhs) + : op_data->rhs_transposed_tensor; + TfLiteEvalTensor* lhs_tensor = adj_x ? op_data->lhs_transposed_tensor + : const_cast(lhs); + TF_LITE_ENSURE(context, rhs_tensor != nullptr); + TF_LITE_ENSURE(context, lhs_tensor != nullptr); + if (!adj_y) { + // OLD-TODO(b/154760341) Constant tensors should already be transposed, but + // we transpose once if necessary for now. + if (!(op_data->rhs_is_constant_tensor && op_data->rhs_is_transposed)) { + TransposeRowsColumns(context, *rhs, rhs_tensor); + op_data->rhs_is_transposed = true; + } + } + if (adj_x) { + TransposeRowsColumns(context, *lhs, lhs_tensor); + } + RuntimeShape rhs_shape = + adj_y ? orig_rhs_shape : SwapRowColumnDims(orig_rhs_shape); + RuntimeShape lhs_shape = + adj_x ? orig_lhs_shape : SwapRowColumnDims(orig_lhs_shape); + + switch (rhs->type) { + case kTfLiteFloat32: + // Note we pass RHS args first, LHS args second. See note above. + reference_ops::BatchMatMul( + rhs_shape, tflite::micro::GetTensorData(rhs_tensor), lhs_shape, + tflite::micro::GetTensorData(lhs_tensor), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + break; + case kTfLiteInt8: + return EvalQuantized(context, node, *op_data, lhs_shape, *lhs_tensor, + rhs_shape, *rhs_tensor, output); + default: + TF_LITE_KERNEL_LOG(context, + "Currently BATCH_MATMUL doesn't support type: %s", + TfLiteTypeGetName(lhs->type)); + return kTfLiteError; + } + return kTfLiteOk; +} + +} // namespace + +TfLiteRegistration Register_BATCH_MATMUL() { + return {/*init=*/Init, + /*free=*/nullptr, + /*prepare=*/Prepare, + /*invoke=*/Eval, + /*profiling_string=*/nullptr, + /*builtin_code=*/0, + /*custom_name=*/nullptr, + /*version=*/0}; +} + +} // namespace tflite diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/batch_to_space_nd.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/batch_to_space_nd.cpp new file mode 100644 index 0000000..9959e47 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/batch_to_space_nd.cpp @@ -0,0 +1,112 @@ +/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/batch_to_space_nd.h" + +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_utils.h" + +namespace tflite { + +namespace { + +constexpr int kInputTensor = 0; +constexpr int kBlockShapeTensor = 1; +constexpr int kCropsTensor = 2; +constexpr int kOutputTensor = 0; + +// Currently, only 3D NHC and 4D NHWC input/output op_context are supported. +// In case of 3D input, it will be extended to 3D NHWC by adding W=1. +// The 4D array need to have exactly 2 spatial dimensions. +// TODO(b/149952582): Support arbitrary dimension in SpaceToBatchND. +const int kInputOutputMinDimensionNum = 3; +const int kInputOutputMaxDimensionNum = 4; + +TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + TF_LITE_ENSURE_EQ(context, NumInputs(node), 3); + TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); + + MicroContext* micro_context = GetMicroContext(context); + + TfLiteTensor* input = + micro_context->AllocateTempInputTensor(node, kInputTensor); + TfLiteTensor* output = + micro_context->AllocateTempOutputTensor(node, kOutputTensor); + TF_LITE_ENSURE(context, input != nullptr && output != nullptr); + + TF_LITE_ENSURE(context, NumDimensions(input) >= kInputOutputMinDimensionNum); + TF_LITE_ENSURE(context, NumDimensions(output) >= kInputOutputMinDimensionNum); + TF_LITE_ENSURE(context, NumDimensions(input) <= kInputOutputMaxDimensionNum); + TF_LITE_ENSURE(context, NumDimensions(output) <= kInputOutputMaxDimensionNum); + TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type); + + micro_context->DeallocateTempTfLiteTensor(input); + micro_context->DeallocateTempTfLiteTensor(output); + + return kTfLiteOk; +} + +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + const TfLiteEvalTensor* input = + tflite::micro::GetEvalInput(context, node, kInputTensor); + const TfLiteEvalTensor* block_shape = + tflite::micro::GetEvalInput(context, node, kBlockShapeTensor); + const TfLiteEvalTensor* crops = + tflite::micro::GetEvalInput(context, node, kCropsTensor); + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kOutputTensor); + + switch (input->type) { // Already know in/out types are same. + case kTfLiteFloat32: + reference_ops::BatchToSpaceND( + tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(block_shape), + tflite::micro::GetTensorData(block_shape), + tflite::micro::GetTensorShape(crops), + tflite::micro::GetTensorData(crops), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + break; + case kTfLiteInt8: + reference_ops::BatchToSpaceND( + tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(block_shape), + tflite::micro::GetTensorData(block_shape), + tflite::micro::GetTensorShape(crops), + tflite::micro::GetTensorData(crops), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + break; + default: + MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(input->type), + input->type); + return kTfLiteError; + } + return kTfLiteOk; +} + +} // namespace. + +TfLiteRegistration Register_BATCH_TO_SPACE_ND() { + return tflite::micro::RegisterOp(nullptr, Prepare, Eval); +} + +} // namespace tflite diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/broadcast_args.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/broadcast_args.cpp new file mode 100644 index 0000000..002a192 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/broadcast_args.cpp @@ -0,0 +1,91 @@ +/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/broadcast_args.h" + +#include + +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_context.h" + +namespace tflite { +namespace { +constexpr int kShape1Tensor = 0; +constexpr int kShape2Tensor = 1; +constexpr int kOutputTensor = 0; + +TfLiteStatus BroadcastArgsPrepare(TfLiteContext* context, TfLiteNode* node) { + TF_LITE_ENSURE(context, NumInputs(node) == 2); + TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); + + MicroContext* micro_context = GetMicroContext(context); + TfLiteTensor* shape1 = + micro_context->AllocateTempInputTensor(node, kShape1Tensor); + TfLiteTensor* shape2 = + micro_context->AllocateTempInputTensor(node, kShape2Tensor); + TfLiteTensor* output = + micro_context->AllocateTempOutputTensor(node, kOutputTensor); + + TF_LITE_ENSURE(context, + shape1->type == kTfLiteInt32 || shape1->type == kTfLiteInt64); + TF_LITE_ENSURE_EQ(context, shape1->type, shape2->type); + TF_LITE_ENSURE_EQ(context, shape1->type, output->type); + + // Ensures the shapes are 1D tensor. + TF_LITE_ENSURE_EQ(context, NumDimensions(shape1), 1); + TF_LITE_ENSURE_EQ(context, NumDimensions(shape2), 1); + + // Ensure the shape of the output tensor is compatible + TF_LITE_ENSURE_EQ(context, NumDimensions(output), 1); + + micro_context->DeallocateTempTfLiteTensor(shape1); + micro_context->DeallocateTempTfLiteTensor(shape2); + micro_context->DeallocateTempTfLiteTensor(output); + + return kTfLiteOk; +} + +TfLiteStatus BroadcastArgsEval(TfLiteContext* context, TfLiteNode* node) { + const TfLiteEvalTensor* shape1 = + micro::GetEvalInput(context, node, kShape1Tensor); + const TfLiteEvalTensor* shape2 = + micro::GetEvalInput(context, node, kShape2Tensor); + TfLiteEvalTensor* output = micro::GetEvalOutput(context, node, kOutputTensor); + + if (output->type == kTfLiteInt32) { + reference_ops::BroadcastArgs( + micro::GetTensorShape(shape1), micro::GetTensorData(shape1), + micro::GetTensorShape(shape2), micro::GetTensorData(shape2), + micro::GetTensorShape(output), micro::GetTensorData(output)); + } else { + reference_ops::BroadcastArgs( + micro::GetTensorShape(shape1), micro::GetTensorData(shape1), + micro::GetTensorShape(shape2), micro::GetTensorData(shape2), + micro::GetTensorShape(output), micro::GetTensorData(output)); + } + + return kTfLiteOk; +} + +} // namespace + +TfLiteRegistration Register_BROADCAST_ARGS() { + return tflite::micro::RegisterOp(nullptr, BroadcastArgsPrepare, + BroadcastArgsEval); +} + +} // namespace tflite diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/broadcast_to.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/broadcast_to.cpp new file mode 100644 index 0000000..51b19e0 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/broadcast_to.cpp @@ -0,0 +1,123 @@ +/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/broadcast_to.h" + +#include + +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_context.h" + +namespace tflite { + +namespace { +constexpr int kInputTensor = 0; +constexpr int kShapeTensor = 1; +constexpr int kOutputTensor = 0; +// Support a maximum of 5 dimensions in TFLM. +constexpr int kMaxDims = 5; + +TfLiteStatus ValidateOutputTensor(TfLiteContext* context, TfLiteTensor* input, + TfLiteTensor* shape, TfLiteTensor* output) { + // Ensures the shape is 1D tensor. + TF_LITE_ENSURE_EQ(context, NumDimensions(shape), 1); + + // Ensure output dims is not less than input dims. + int input_num_dims = NumDimensions(input); + int output_num_dims = NumDimensions(output); + int shape_num_dims = SizeOfDimension(shape, 0); + TF_LITE_ENSURE_MSG(context, output_num_dims == shape_num_dims, + "Output must match with the expected shape dimension."); + TF_LITE_ENSURE_MSG(context, input_num_dims <= output_num_dims, + "Output shape must be broadcastable from input shape."); + TF_LITE_ENSURE_MSG(context, output_num_dims <= kMaxDims, + "BroadcastTo only supports 1-5D tensor."); + + // Check if output shape is broadcastable from input shape. + auto get_shape_data = [shape](int i) -> int32_t { + if (shape->type == kTfLiteInt32) { + return GetTensorData(shape)[i]; + } else { + return GetTensorData(shape)[i]; + } + }; + + int extending_dims = output_num_dims - input_num_dims; + for (int idx = 0; idx < input_num_dims; ++idx) { + TF_LITE_ENSURE_MSG( + context, + (SizeOfDimension(input, idx) == 1 || + SizeOfDimension(input, idx) == get_shape_data(extending_dims + idx)), + "Output shape must be broadcastable from input shape."); + } + + // Validating the shape of the output tensor. + tflite::RuntimeShape output_shape = tflite::GetTensorShape(output); + for (int idx = 0; idx < output_num_dims; ++idx) { + TF_LITE_ENSURE(context, output_shape.Dims(idx) == get_shape_data(idx)); + } + return kTfLiteOk; +} + +TfLiteStatus BroadcastToPrepare(TfLiteContext* context, TfLiteNode* node) { + TF_LITE_ENSURE(context, NumInputs(node) == 2); + TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); + MicroContext* micro_context = GetMicroContext(context); + TfLiteTensor* input = + micro_context->AllocateTempInputTensor(node, kInputTensor); + TfLiteTensor* shape = + micro_context->AllocateTempInputTensor(node, kShapeTensor); + TfLiteTensor* output = + micro_context->AllocateTempOutputTensor(node, kOutputTensor); + + TF_LITE_ENSURE_MSG(context, (NumDimensions(input) <= kMaxDims), + "BroadcastTo only supports 1-5D tensor."); + + TF_LITE_ENSURE(context, + shape->type == kTfLiteInt32 || shape->type == kTfLiteInt64); + TF_LITE_ENSURE_EQ(context, input->type, output->type); + + // Does not support String type due to its variable size. This limitation is + // the same as TFLite. + TF_LITE_ENSURE(context, input->type != kTfLiteString); + + TF_LITE_ENSURE_STATUS(ValidateOutputTensor(context, input, shape, output)); + micro_context->DeallocateTempTfLiteTensor(input); + micro_context->DeallocateTempTfLiteTensor(shape); + micro_context->DeallocateTempTfLiteTensor(output); + return kTfLiteOk; +} + +TfLiteStatus BroadcastToEval(TfLiteContext* context, TfLiteNode* node) { + const TfLiteEvalTensor* input = + micro::GetEvalInput(context, node, kInputTensor); + TfLiteEvalTensor* output = micro::GetEvalOutput(context, node, kOutputTensor); + + // BroadcastTo op support upto 5 dims, different from 8 dims in TFLite. + reference_ops::BroadcastTo( + micro::GetTensorShape(input), input->data.raw, + micro::GetTensorShape(output), output->data.raw, input->type); + return kTfLiteOk; +} +} // namespace + +TfLiteRegistration Register_BROADCAST_TO() { + return tflite::micro::RegisterOp(nullptr, BroadcastToPrepare, + BroadcastToEval); +} + +} // namespace tflite diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/call_once.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/call_once.cpp new file mode 100644 index 0000000..21643c8 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/call_once.cpp @@ -0,0 +1,88 @@ +/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include + +#include + +#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h" +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/compatibility.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/memory_helpers.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_context.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_graph.h" +#include "edge-impulse-sdk/tensorflow/lite/schema/schema_generated.h" + +namespace tflite { + +namespace { + +struct OpData { + int init_subgraph_index; + bool has_run; +}; + +void* Init(TfLiteContext* context, const char* buffer, size_t length) { + TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); + return context->AllocatePersistentBuffer(context, sizeof(OpData)); +} + +TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + OpData* op_data = reinterpret_cast(node->user_data); + const auto* params = + reinterpret_cast(node->builtin_data); + op_data->init_subgraph_index = params->init_subgraph_index; + op_data->has_run = false; + + TF_LITE_ENSURE(context, NumInputs(node) == 0); + TF_LITE_ENSURE(context, NumOutputs(node) == 0); + + tflite::MicroContext* micro_context = tflite::GetMicroContext(context); + MicroGraph& graph_info = micro_context->graph(); + + TF_LITE_ENSURE(context, + op_data->init_subgraph_index < graph_info.NumSubgraphs()); + + return kTfLiteOk; +} + +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + OpData* op_data = reinterpret_cast(node->user_data); + + // Call once only runs one time then is a no-op for every subsequent call. + if (op_data->has_run) { + return kTfLiteOk; + } + + tflite::MicroContext* micro_context = tflite::GetMicroContext(context); + MicroGraph& graph_info = micro_context->graph(); + + TF_LITE_ENSURE_OK(context, + graph_info.InvokeSubgraph(op_data->init_subgraph_index)); + + op_data->has_run = true; + + return kTfLiteOk; +} + +} // namespace. + +TfLiteRegistration Register_CALL_ONCE() { + return tflite::micro::RegisterOp(Init, Prepare, Eval); +} + +} // namespace tflite diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/cast.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/cast.cpp new file mode 100644 index 0000000..19e545f --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/cast.cpp @@ -0,0 +1,114 @@ +/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h" + +namespace tflite { +namespace { + +constexpr int kInputTensor = 0; +constexpr int kOutputTensor = 0; + +TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + TF_LITE_ENSURE_EQ(context, NumInputs(node), 1); + TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); + + MicroContext* micro_context = GetMicroContext(context); + + TfLiteTensor* input = + micro_context->AllocateTempInputTensor(node, kInputTensor); + TF_LITE_ENSURE(context, input != nullptr); + TfLiteTensor* output = + micro_context->AllocateTempOutputTensor(node, kOutputTensor); + TF_LITE_ENSURE(context, output != nullptr); + + micro_context->DeallocateTempTfLiteTensor(input); + micro_context->DeallocateTempTfLiteTensor(output); + + return kTfLiteOk; +} + +template +void copyCast(const FromT* in, ToT* out, int num_elements) { + std::transform(in, in + num_elements, out, + [](FromT a) { return static_cast(a); }); +} + +template +TfLiteStatus copyToTensor(TfLiteContext* context, const FromT* in, + TfLiteEvalTensor* out, int num_elements) { + switch (out->type) { + case kTfLiteInt8: + copyCast(in, out->data.int8, num_elements); + break; + case kTfLiteInt16: + copyCast(in, out->data.i16, num_elements); + break; + case kTfLiteInt32: + copyCast(in, out->data.i32, num_elements); + break; + case kTfLiteFloat32: + copyCast(in, tflite::micro::GetTensorData(out), num_elements); + break; + default: + // Unsupported type. + MicroPrintf("Output type %s (%d) not supported.", + TfLiteTypeGetName(out->type), out->type); + } + return kTfLiteOk; +} + +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + const TfLiteEvalTensor* input = + tflite::micro::GetEvalInput(context, node, kInputTensor); + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kOutputTensor); + int num_elements = MatchingFlatSize(tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorShape(output)); + + switch (input->type) { + case kTfLiteInt8: + return copyToTensor(context, input->data.int8, output, num_elements); + case kTfLiteInt16: + return copyToTensor(context, tflite::micro::GetTensorData(input), + output, num_elements); + case kTfLiteInt32: + return copyToTensor(context, tflite::micro::GetTensorData(input), + output, num_elements); + case kTfLiteUInt32: + return copyToTensor(context, + tflite::micro::GetTensorData(input), output, + num_elements); + case kTfLiteFloat32: + return copyToTensor(context, tflite::micro::GetTensorData(input), + output, num_elements); + default: + // Unsupported type. + MicroPrintf("Input type %s (%d) not supported.", + TfLiteTypeGetName(input->type), input->type); + } + return kTfLiteOk; +} +} // namespace + +TfLiteRegistration Register_CAST() { + return tflite::micro::RegisterOp(nullptr, Prepare, Eval); +} + +} // namespace tflite diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/ceil.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/ceil.cpp new file mode 100644 index 0000000..0f09137 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/ceil.cpp @@ -0,0 +1,73 @@ +/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/ceil.h" + +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" + +namespace tflite { + +namespace { + +constexpr int kInputTensor = 0; +constexpr int kOutputTensor = 0; + +TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + MicroContext* micro_context = GetMicroContext(context); + + TfLiteTensor* input = + micro_context->AllocateTempInputTensor(node, kInputTensor); + TF_LITE_ENSURE(context, input != nullptr); + TfLiteTensor* output = + micro_context->AllocateTempOutputTensor(node, kOutputTensor); + TF_LITE_ENSURE(context, output != nullptr); + TF_LITE_ENSURE_EQ(context, NumInputs(node), 1); + TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); + TF_LITE_ENSURE_TYPES_EQ(context, input->type, kTfLiteFloat32); + TF_LITE_ENSURE_TYPES_EQ(context, output->type, input->type); + TF_LITE_ENSURE_EQ(context, output->bytes, input->bytes); + TF_LITE_ENSURE_EQ(context, output->dims->size, input->dims->size); + for (int i = 0; i < output->dims->size; ++i) { + TF_LITE_ENSURE_EQ(context, output->dims->data[i], input->dims->data[i]); + } + micro_context->DeallocateTempTfLiteTensor(input); + micro_context->DeallocateTempTfLiteTensor(output); + return kTfLiteOk; +} + +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + const TfLiteEvalTensor* input = + tflite::micro::GetEvalInput(context, node, kInputTensor); + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kOutputTensor); + + reference_ops::Ceil(tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + + return kTfLiteOk; +} + +} // namespace + +TfLiteRegistration Register_CEIL() { + return tflite::micro::RegisterOp(nullptr, Prepare, Eval); +} + +} // namespace tflite diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/circular_buffer.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/circular_buffer.cpp new file mode 100644 index 0000000..bf69599 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/circular_buffer.cpp @@ -0,0 +1,117 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/circular_buffer.h" + +#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h" +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/compatibility.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/quantization_util.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/op_macros.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/flatbuffer_utils.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h" + +/* + * The circular buffer custom operator is used to implement strided streaming + * convolutions on TFLite Micro. Each time this operator is invoked, it checks + * whether or not to run, based on a predetermined stride in time. If the op + * runs, it inserts the input into the end of the output buffer and shifts the + * output values towards the start of the buffer. It discards the oldest value + * in the output buffer. + * + * Input: [, , , ] + * + * After shifting: + * Output: [, , , ] + * + * We make some assumptions in this custom operator: + * - Input shape must be [1, 1, 1, depth] + * - Output shape must be [1, num_slots, 1, depth] + * - Input and output types must match. + * - Input and output quantization params must be identical. + */ +namespace tflite { + +void* CircularBufferInit(TfLiteContext* context, const char* buffer, + size_t length) { + TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); + OpDataCircularBuffer* op_data = static_cast( + context->AllocatePersistentBuffer(context, sizeof(OpDataCircularBuffer))); + + if (buffer != nullptr && length > 0) { + const uint8_t* buffer_t = reinterpret_cast(buffer); + tflite::FlexbufferWrapper wrapper(buffer_t, length); + op_data->cycles_max = wrapper.ElementAsInt32(kCircularBufferCyclesMaxIndex); + } else { + op_data->cycles_max = 0; + } + + return op_data; +} + +// Shifts buffer over by the output depth, and write new input to end of buffer. +// num_slots is the number of samples stored in the output buffer. +// depth is the size of each sample. +void EvalInt8(const int8_t* input, int num_slots, int depth, int8_t* output) { + memmove(output, &output[depth], (num_slots - 1) * depth); + memcpy(&output[(num_slots - 1) * depth], input, depth); +} + +TfLiteStatus CircularBufferEval(TfLiteContext* context, TfLiteNode* node) { + const TfLiteEvalTensor* input = + tflite::micro::GetEvalInput(context, node, kCircularBufferInputTensor); + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kCircularBufferOutputTensor); + + TFLITE_DCHECK(node->user_data != nullptr); + OpDataCircularBuffer* data = + reinterpret_cast(node->user_data); + + int num_slots = output->dims->data[1]; + int depth = output->dims->data[2] * output->dims->data[3]; + + if (input->type == kTfLiteInt8) { + EvalInt8(tflite::micro::GetTensorData(input), num_slots, depth, + tflite::micro::GetTensorData(output)); + } else { + MicroPrintf("Type %s (%d) not supported.", + TfLiteTypeGetName(input->type), input->type); + return kTfLiteError; + } + + if (--data->cycles_until_run != 0) { + // Signal the interpreter to end current run if the delay before op invoke + // has not been reached. + // TODO(b/149795762): Add kTfLiteAbort to TfLiteStatus enum. + return static_cast(kTfLiteAbort); + } + + data->cycles_until_run = data->cycles_max; + + return kTfLiteOk; +} + +TfLiteRegistration* Register_CIRCULAR_BUFFER() { + static TfLiteRegistration r = tflite::micro::RegisterOp( + CircularBufferInit, CircularBufferPrepare, CircularBufferEval); + return &r; +} + +} // namespace tflite diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/circular_buffer.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/circular_buffer.h new file mode 100644 index 0000000..c52a1ec --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/circular_buffer.h @@ -0,0 +1,48 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_LITE_MICRO_KERNELS_CIRCULAR_BUFFER_H_ +#define TENSORFLOW_LITE_MICRO_KERNELS_CIRCULAR_BUFFER_H_ + +#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h" +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" + +namespace tflite { + +// The CircularBuffer op has one input and one output tensor. +extern const int kCircularBufferInputTensor; +extern const int kCircularBufferOutputTensor; + +// Indices into the init flexbuffer's vector. +// The parameter's name is in the comment that follows. +// Elements in the vectors are ordered alphabetically by parameter name. +extern const int kCircularBufferCyclesMaxIndex; // 'cycles_max' + +// TODO(b/149795762): Add this to TfLiteStatus enum. +extern const TfLiteStatus kTfLiteAbort; + +// These fields control the stride period of a strided streaming model. This op +// returns kTfLiteAbort until cycles_until_run-- is zero. At this time, +// cycles_until_run is reset to cycles_max. +struct OpDataCircularBuffer { + int cycles_until_run; + int cycles_max; +}; + +TfLiteStatus CircularBufferPrepare(TfLiteContext* context, TfLiteNode* node); + +} // namespace tflite + +#endif // TENSORFLOW_LITE_MICRO_KERNELS_CIRCULAR_BUFFER_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/circular_buffer_common.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/circular_buffer_common.cpp new file mode 100644 index 0000000..b6d1f0d --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/circular_buffer_common.cpp @@ -0,0 +1,97 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h" +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/compatibility.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/quantization_util.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/op_macros.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/flatbuffer_utils.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/circular_buffer.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" + +namespace tflite { + +// The CircularBuffer op has one input and one output tensor. +const int kCircularBufferInputTensor = 0; +const int kCircularBufferOutputTensor = 0; + +// Indices into the init flexbuffer's vector. +// The parameter's name is in the comment that follows. +// Elements in the vectors are ordered alphabetically by parameter name. +const int kCircularBufferCyclesMaxIndex = 0; // 'cycles_max' + +// TODO(b/149795762): Add this to TfLiteStatus enum. +const TfLiteStatus kTfLiteAbort = static_cast(-9); + +TfLiteStatus CircularBufferPrepare(TfLiteContext* context, TfLiteNode* node) { + MicroContext* micro_context = GetMicroContext(context); + + TfLiteTensor* input = + micro_context->AllocateTempInputTensor(node, kCircularBufferInputTensor); + TfLiteTensor* output = micro_context->AllocateTempOutputTensor( + node, kCircularBufferOutputTensor); + + TFLITE_DCHECK(node->user_data != nullptr); + OpDataCircularBuffer* op_data = + static_cast(node->user_data); + + TF_LITE_ENSURE(context, input != nullptr); + TF_LITE_ENSURE(context, output != nullptr); + TF_LITE_ENSURE_EQ(context, input->dims->data[0], output->dims->data[0]); + TF_LITE_ENSURE_EQ(context, 1, input->dims->data[1]); + TF_LITE_ENSURE_EQ(context, input->dims->data[2], output->dims->data[2]); + TF_LITE_ENSURE_EQ(context, output->dims->data[3], input->dims->data[3]); + + TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type); + + // The circular buffer custom operator currently only supports int8. + TF_LITE_ENSURE_TYPES_EQ(context, input->type, kTfLiteInt8); + + if (op_data->cycles_max <= 0) { + // The last circular buffer layer simply accumulates outputs, and does not + // run periodically. + // TODO(b/150001379): Move this special case logic to the tflite flatbuffer. + static int cb_prepare_count = 0; + cb_prepare_count++; + // These checks specifically work for the only two streaming models + // supported on TFLM. They use the shape of the output tensor along with the + // layer number to determine if the circular buffer period should be 1 or 2. + + // These models are outlined int the following documents: + // https://docs.google.com/document/d/1lc_G2ZFhjiKFo02UHjBaljye1xsL0EkfybkaVELEE3Q/edit?usp=sharing + // https://docs.google.com/document/d/1pGc42PuWyrk-Jy1-9qeqtggvsmHr1ifz8Lmqfpr2rKA/edit?usp=sharing + if (output->dims->data[1] == 5 || output->dims->data[1] == 13 || + output->dims->data[1] == 25 || + (cb_prepare_count == 5 && output->dims->data[2] == 2 && + output->dims->data[3] == 96)) { + op_data->cycles_max = 1; + cb_prepare_count = 0; + } else { + op_data->cycles_max = 2; + } + } + op_data->cycles_until_run = op_data->cycles_max; + node->user_data = op_data; + + micro_context->DeallocateTempTfLiteTensor(input); + micro_context->DeallocateTempTfLiteTensor(output); + + return kTfLiteOk; +} + +} // namespace tflite diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/circular_buffer_flexbuffers_generated_data.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/circular_buffer_flexbuffers_generated_data.h new file mode 100644 index 0000000..2fbf4fe --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/circular_buffer_flexbuffers_generated_data.h @@ -0,0 +1,22 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_LITE_MICRO_KERNELS_FLEXBUFFERS_GENERATED_DATA_H +#define TENSORFLOW_LITE_MICRO_KERNELS_FLEXBUFFERS_GENERATED_DATA_H + +extern const int g_gen_data_size_circular_buffer_config; +extern const unsigned char g_gen_data_circular_buffer_config[]; + +#endif diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/comparisons.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/comparisons.cpp new file mode 100644 index 0000000..1a8fbb0 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/comparisons.cpp @@ -0,0 +1,606 @@ +/* Copyright 2023 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/comparisons.h" + +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/quantization_util.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h" + +namespace tflite { + +namespace { + +struct OpData { + ComparisonParams params; +}; + +constexpr int kInputTensor1 = 0; +constexpr int kInputTensor2 = 1; +constexpr int kOutputTensor = 0; + +TfLiteStatus EqualEval(TfLiteContext* context, TfLiteNode* node) { + TFLITE_DCHECK(node->user_data != nullptr); + const OpData* data = static_cast(node->user_data); + + const TfLiteEvalTensor* input1 = + tflite::micro::GetEvalInput(context, node, kInputTensor1); + const TfLiteEvalTensor* input2 = + tflite::micro::GetEvalInput(context, node, kInputTensor2); + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kOutputTensor); + + RuntimeShape input1_shape = tflite::micro::GetTensorShape(input1); + RuntimeShape input2_shape = tflite::micro::GetTensorShape(input2); + RuntimeShape output_shape = tflite::micro::GetTensorShape(output); + bool* output_data = tflite::micro::GetTensorData(output); + + bool requires_broadcast = !tflite::micro::HaveSameShapes(input1, input2); + switch (input1->type) { + case kTfLiteBool: + requires_broadcast + ? reference_ops::Broadcast4DSlowEqualNoScaling( + data->params, input1_shape, + tflite::micro::GetTensorData(input1), input2_shape, + tflite::micro::GetTensorData(input2), output_shape, + output_data) + : reference_ops::EqualNoScaling( + data->params, input1_shape, + tflite::micro::GetTensorData(input1), input2_shape, + tflite::micro::GetTensorData(input2), output_shape, + output_data); + break; + case kTfLiteFloat32: + requires_broadcast + ? reference_ops::Broadcast4DSlowEqualNoScaling( + data->params, input1_shape, + tflite::micro::GetTensorData(input1), input2_shape, + tflite::micro::GetTensorData(input2), output_shape, + output_data) + : reference_ops::EqualNoScaling( + data->params, input1_shape, + tflite::micro::GetTensorData(input1), input2_shape, + tflite::micro::GetTensorData(input2), output_shape, + output_data); + break; + case kTfLiteInt32: + requires_broadcast + ? reference_ops::Broadcast4DSlowEqualNoScaling( + data->params, input1_shape, + tflite::micro::GetTensorData(input1), input2_shape, + tflite::micro::GetTensorData(input2), output_shape, + output_data) + : reference_ops::EqualNoScaling( + data->params, input1_shape, + tflite::micro::GetTensorData(input1), input2_shape, + tflite::micro::GetTensorData(input2), output_shape, + output_data); + break; + case kTfLiteInt64: + requires_broadcast + ? reference_ops::Broadcast4DSlowEqualNoScaling( + data->params, input1_shape, + tflite::micro::GetTensorData(input1), input2_shape, + tflite::micro::GetTensorData(input2), output_shape, + output_data) + : reference_ops::EqualNoScaling( + data->params, input1_shape, + tflite::micro::GetTensorData(input1), input2_shape, + tflite::micro::GetTensorData(input2), output_shape, + output_data); + break; + case kTfLiteInt8: + requires_broadcast + ? reference_ops::Broadcast4DSlowEqualWithScaling( + data->params, input1_shape, + tflite::micro::GetTensorData(input1), input2_shape, + tflite::micro::GetTensorData(input2), output_shape, + output_data) + : reference_ops::EqualWithScaling( + data->params, input1_shape, + tflite::micro::GetTensorData(input1), input2_shape, + tflite::micro::GetTensorData(input2), output_shape, + output_data); + break; + default: + MicroPrintf("Type %s (%d) not supported.", + TfLiteTypeGetName(input1->type), input1->type); + return kTfLiteError; + } + return kTfLiteOk; +} + +// TODO(renjieliu): Refactor the logic to avoid duplications. +TfLiteStatus NotEqualEval(TfLiteContext* context, TfLiteNode* node) { + TFLITE_DCHECK(node->user_data != nullptr); + const OpData* data = static_cast(node->user_data); + + const TfLiteEvalTensor* input1 = + tflite::micro::GetEvalInput(context, node, kInputTensor1); + const TfLiteEvalTensor* input2 = + tflite::micro::GetEvalInput(context, node, kInputTensor2); + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kOutputTensor); + + RuntimeShape input1_shape = tflite::micro::GetTensorShape(input1); + RuntimeShape input2_shape = tflite::micro::GetTensorShape(input2); + RuntimeShape output_shape = tflite::micro::GetTensorShape(output); + bool* output_data = tflite::micro::GetTensorData(output); + + bool requires_broadcast = !tflite::micro::HaveSameShapes(input1, input2); + switch (input1->type) { + case kTfLiteBool: + requires_broadcast + ? reference_ops::Broadcast4DSlowNotEqualNoScaling( + data->params, input1_shape, + tflite::micro::GetTensorData(input1), input2_shape, + tflite::micro::GetTensorData(input2), output_shape, + output_data) + : reference_ops::NotEqualNoScaling( + data->params, input1_shape, + tflite::micro::GetTensorData(input1), input2_shape, + tflite::micro::GetTensorData(input2), output_shape, + output_data); + break; + case kTfLiteFloat32: + requires_broadcast + ? reference_ops::Broadcast4DSlowNotEqualNoScaling( + data->params, input1_shape, + tflite::micro::GetTensorData(input1), input2_shape, + tflite::micro::GetTensorData(input2), output_shape, + output_data) + : reference_ops::NotEqualNoScaling( + data->params, input1_shape, + tflite::micro::GetTensorData(input1), input2_shape, + tflite::micro::GetTensorData(input2), output_shape, + output_data); + break; + case kTfLiteInt32: + requires_broadcast + ? reference_ops::Broadcast4DSlowNotEqualNoScaling( + data->params, input1_shape, + tflite::micro::GetTensorData(input1), input2_shape, + tflite::micro::GetTensorData(input2), output_shape, + output_data) + : reference_ops::NotEqualNoScaling( + data->params, input1_shape, + tflite::micro::GetTensorData(input1), input2_shape, + tflite::micro::GetTensorData(input2), output_shape, + output_data); + break; + case kTfLiteInt64: + requires_broadcast + ? reference_ops::Broadcast4DSlowNotEqualNoScaling( + data->params, input1_shape, + tflite::micro::GetTensorData(input1), input2_shape, + tflite::micro::GetTensorData(input2), output_shape, + output_data) + : reference_ops::NotEqualNoScaling( + data->params, input1_shape, + tflite::micro::GetTensorData(input1), input2_shape, + tflite::micro::GetTensorData(input2), output_shape, + output_data); + break; + case kTfLiteInt8: + requires_broadcast + ? reference_ops::Broadcast4DSlowNotEqualWithScaling( + data->params, input1_shape, + tflite::micro::GetTensorData(input1), input2_shape, + tflite::micro::GetTensorData(input2), output_shape, + output_data) + : reference_ops::NotEqualWithScaling( + data->params, input1_shape, + tflite::micro::GetTensorData(input1), input2_shape, + tflite::micro::GetTensorData(input2), output_shape, + output_data); + break; + default: + MicroPrintf("Type %s (%d) not supported.", + TfLiteTypeGetName(input1->type), input1->type); + return kTfLiteError; + } + return kTfLiteOk; +} + +TfLiteStatus GreaterEval(TfLiteContext* context, TfLiteNode* node) { + TFLITE_DCHECK(node->user_data != nullptr); + const OpData* data = static_cast(node->user_data); + + const TfLiteEvalTensor* input1 = + tflite::micro::GetEvalInput(context, node, kInputTensor1); + const TfLiteEvalTensor* input2 = + tflite::micro::GetEvalInput(context, node, kInputTensor2); + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kOutputTensor); + + RuntimeShape input1_shape = tflite::micro::GetTensorShape(input1); + RuntimeShape input2_shape = tflite::micro::GetTensorShape(input2); + RuntimeShape output_shape = tflite::micro::GetTensorShape(output); + bool* output_data = tflite::micro::GetTensorData(output); + + bool requires_broadcast = !tflite::micro::HaveSameShapes(input1, input2); + switch (input1->type) { + case kTfLiteFloat32: + requires_broadcast + ? reference_ops::Broadcast4DSlowGreaterNoScaling( + data->params, input1_shape, + tflite::micro::GetTensorData(input1), input2_shape, + tflite::micro::GetTensorData(input2), output_shape, + output_data) + : reference_ops::GreaterNoScaling( + data->params, input1_shape, + tflite::micro::GetTensorData(input1), input2_shape, + tflite::micro::GetTensorData(input2), output_shape, + output_data); + break; + case kTfLiteInt32: + requires_broadcast + ? reference_ops::Broadcast4DSlowGreaterNoScaling( + data->params, input1_shape, + tflite::micro::GetTensorData(input1), input2_shape, + tflite::micro::GetTensorData(input2), output_shape, + output_data) + : reference_ops::GreaterNoScaling( + data->params, input1_shape, + tflite::micro::GetTensorData(input1), input2_shape, + tflite::micro::GetTensorData(input2), output_shape, + output_data); + break; + case kTfLiteInt64: + requires_broadcast + ? reference_ops::Broadcast4DSlowGreaterNoScaling( + data->params, input1_shape, + tflite::micro::GetTensorData(input1), input2_shape, + tflite::micro::GetTensorData(input2), output_shape, + output_data) + : reference_ops::GreaterNoScaling( + data->params, input1_shape, + tflite::micro::GetTensorData(input1), input2_shape, + tflite::micro::GetTensorData(input2), output_shape, + output_data); + break; + case kTfLiteInt8: + requires_broadcast + ? reference_ops::Broadcast4DSlowGreaterWithScaling( + data->params, input1_shape, + tflite::micro::GetTensorData(input1), input2_shape, + tflite::micro::GetTensorData(input2), output_shape, + output_data) + : reference_ops::GreaterWithScaling( + data->params, input1_shape, + tflite::micro::GetTensorData(input1), input2_shape, + tflite::micro::GetTensorData(input2), output_shape, + output_data); + break; + default: + MicroPrintf("Type %s (%d) not supported.", + TfLiteTypeGetName(input1->type), input1->type); + return kTfLiteError; + } + return kTfLiteOk; +} + +TfLiteStatus GreaterEqualEval(TfLiteContext* context, TfLiteNode* node) { + TFLITE_DCHECK(node->user_data != nullptr); + const OpData* data = static_cast(node->user_data); + + const TfLiteEvalTensor* input1 = + tflite::micro::GetEvalInput(context, node, kInputTensor1); + const TfLiteEvalTensor* input2 = + tflite::micro::GetEvalInput(context, node, kInputTensor2); + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kOutputTensor); + + RuntimeShape input1_shape = tflite::micro::GetTensorShape(input1); + RuntimeShape input2_shape = tflite::micro::GetTensorShape(input2); + RuntimeShape output_shape = tflite::micro::GetTensorShape(output); + bool* output_data = tflite::micro::GetTensorData(output); + + bool requires_broadcast = !tflite::micro::HaveSameShapes(input1, input2); + switch (input1->type) { + case kTfLiteFloat32: + requires_broadcast + ? reference_ops::Broadcast4DSlowGreaterEqualNoScaling( + data->params, input1_shape, + tflite::micro::GetTensorData(input1), input2_shape, + tflite::micro::GetTensorData(input2), output_shape, + output_data) + : reference_ops::GreaterEqualNoScaling( + data->params, input1_shape, + tflite::micro::GetTensorData(input1), input2_shape, + tflite::micro::GetTensorData(input2), output_shape, + output_data); + break; + case kTfLiteInt32: + requires_broadcast + ? reference_ops::Broadcast4DSlowGreaterEqualNoScaling( + data->params, input1_shape, + tflite::micro::GetTensorData(input1), input2_shape, + tflite::micro::GetTensorData(input2), output_shape, + output_data) + : reference_ops::GreaterEqualNoScaling( + data->params, input1_shape, + tflite::micro::GetTensorData(input1), input2_shape, + tflite::micro::GetTensorData(input2), output_shape, + output_data); + break; + case kTfLiteInt64: + requires_broadcast + ? reference_ops::Broadcast4DSlowGreaterEqualNoScaling( + data->params, input1_shape, + tflite::micro::GetTensorData(input1), input2_shape, + tflite::micro::GetTensorData(input2), output_shape, + output_data) + : reference_ops::GreaterEqualNoScaling( + data->params, input1_shape, + tflite::micro::GetTensorData(input1), input2_shape, + tflite::micro::GetTensorData(input2), output_shape, + output_data); + break; + case kTfLiteInt8: + requires_broadcast + ? reference_ops::Broadcast4DSlowGreaterEqualWithScaling( + data->params, input1_shape, + tflite::micro::GetTensorData(input1), input2_shape, + tflite::micro::GetTensorData(input2), output_shape, + output_data) + : reference_ops::GreaterEqualWithScaling( + data->params, input1_shape, + tflite::micro::GetTensorData(input1), input2_shape, + tflite::micro::GetTensorData(input2), output_shape, + output_data); + break; + default: + MicroPrintf("Type %s (%d) not supported.", + TfLiteTypeGetName(input1->type), input1->type); + return kTfLiteError; + } + return kTfLiteOk; +} + +TfLiteStatus LessEval(TfLiteContext* context, TfLiteNode* node) { + TFLITE_DCHECK(node->user_data != nullptr); + const OpData* data = static_cast(node->user_data); + + const TfLiteEvalTensor* input1 = + tflite::micro::GetEvalInput(context, node, kInputTensor1); + const TfLiteEvalTensor* input2 = + tflite::micro::GetEvalInput(context, node, kInputTensor2); + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kOutputTensor); + + RuntimeShape input1_shape = tflite::micro::GetTensorShape(input1); + RuntimeShape input2_shape = tflite::micro::GetTensorShape(input2); + RuntimeShape output_shape = tflite::micro::GetTensorShape(output); + bool* output_data = tflite::micro::GetTensorData(output); + + bool requires_broadcast = !tflite::micro::HaveSameShapes(input1, input2); + switch (input1->type) { + case kTfLiteFloat32: + requires_broadcast + ? reference_ops::Broadcast4DSlowLessNoScaling( + data->params, input1_shape, + tflite::micro::GetTensorData(input1), input2_shape, + tflite::micro::GetTensorData(input2), output_shape, + output_data) + : reference_ops::LessNoScaling( + data->params, input1_shape, + tflite::micro::GetTensorData(input1), input2_shape, + tflite::micro::GetTensorData(input2), output_shape, + output_data); + break; + case kTfLiteInt32: + requires_broadcast + ? reference_ops::Broadcast4DSlowLessNoScaling( + data->params, input1_shape, + tflite::micro::GetTensorData(input1), input2_shape, + tflite::micro::GetTensorData(input2), output_shape, + output_data) + : reference_ops::LessNoScaling( + data->params, input1_shape, + tflite::micro::GetTensorData(input1), input2_shape, + tflite::micro::GetTensorData(input2), output_shape, + output_data); + break; + case kTfLiteInt64: + requires_broadcast + ? reference_ops::Broadcast4DSlowLessNoScaling( + data->params, input1_shape, + tflite::micro::GetTensorData(input1), input2_shape, + tflite::micro::GetTensorData(input2), output_shape, + output_data) + : reference_ops::LessNoScaling( + data->params, input1_shape, + tflite::micro::GetTensorData(input1), input2_shape, + tflite::micro::GetTensorData(input2), output_shape, + output_data); + break; + case kTfLiteInt8: + requires_broadcast + ? reference_ops::Broadcast4DSlowLessWithScaling( + data->params, input1_shape, + tflite::micro::GetTensorData(input1), input2_shape, + tflite::micro::GetTensorData(input2), output_shape, + output_data) + : reference_ops::LessWithScaling( + data->params, input1_shape, + tflite::micro::GetTensorData(input1), input2_shape, + tflite::micro::GetTensorData(input2), output_shape, + output_data); + break; + default: + MicroPrintf("Type %s (%d) not supported.", + TfLiteTypeGetName(input1->type), input1->type); + return kTfLiteError; + } + return kTfLiteOk; +} + +TfLiteStatus LessEqualEval(TfLiteContext* context, TfLiteNode* node) { + TFLITE_DCHECK(node->user_data != nullptr); + const OpData* data = static_cast(node->user_data); + + const TfLiteEvalTensor* input1 = + tflite::micro::GetEvalInput(context, node, kInputTensor1); + const TfLiteEvalTensor* input2 = + tflite::micro::GetEvalInput(context, node, kInputTensor2); + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kOutputTensor); + + RuntimeShape input1_shape = tflite::micro::GetTensorShape(input1); + RuntimeShape input2_shape = tflite::micro::GetTensorShape(input2); + RuntimeShape output_shape = tflite::micro::GetTensorShape(output); + bool* output_data = tflite::micro::GetTensorData(output); + + bool requires_broadcast = !tflite::micro::HaveSameShapes(input1, input2); + switch (input1->type) { + case kTfLiteFloat32: + requires_broadcast + ? reference_ops::Broadcast4DSlowLessEqualNoScaling( + data->params, input1_shape, + tflite::micro::GetTensorData(input1), input2_shape, + tflite::micro::GetTensorData(input2), output_shape, + output_data) + : reference_ops::LessEqualNoScaling( + data->params, input1_shape, + tflite::micro::GetTensorData(input1), input2_shape, + tflite::micro::GetTensorData(input2), output_shape, + output_data); + break; + case kTfLiteInt32: + requires_broadcast + ? reference_ops::Broadcast4DSlowLessEqualNoScaling( + data->params, input1_shape, + tflite::micro::GetTensorData(input1), input2_shape, + tflite::micro::GetTensorData(input2), output_shape, + output_data) + : reference_ops::LessEqualNoScaling( + data->params, input1_shape, + tflite::micro::GetTensorData(input1), input2_shape, + tflite::micro::GetTensorData(input2), output_shape, + output_data); + break; + case kTfLiteInt64: + requires_broadcast + ? reference_ops::Broadcast4DSlowLessEqualNoScaling( + data->params, input1_shape, + tflite::micro::GetTensorData(input1), input2_shape, + tflite::micro::GetTensorData(input2), output_shape, + output_data) + : reference_ops::LessEqualNoScaling( + data->params, input1_shape, + tflite::micro::GetTensorData(input1), input2_shape, + tflite::micro::GetTensorData(input2), output_shape, + output_data); + break; + case kTfLiteInt8: + requires_broadcast + ? reference_ops::Broadcast4DSlowLessEqualWithScaling( + data->params, input1_shape, + tflite::micro::GetTensorData(input1), input2_shape, + tflite::micro::GetTensorData(input2), output_shape, + output_data) + : reference_ops::LessEqualWithScaling( + data->params, input1_shape, + tflite::micro::GetTensorData(input1), input2_shape, + tflite::micro::GetTensorData(input2), output_shape, + output_data); + break; + default: + MicroPrintf("Type %s (%d) not supported.", + TfLiteTypeGetName(input1->type), input1->type); + return kTfLiteError; + } + return kTfLiteOk; +} + +void* Init(TfLiteContext* context, const char* buffer, size_t length) { + TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); + return context->AllocatePersistentBuffer(context, sizeof(OpData)); +} + +TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + TFLITE_DCHECK(node->user_data != nullptr); + OpData* data = static_cast(node->user_data); + + MicroContext* micro_context = GetMicroContext(context); + + TfLiteTensor* input1 = + micro_context->AllocateTempInputTensor(node, kInputTensor1); + TF_LITE_ENSURE(context, input1 != nullptr); + TfLiteTensor* input2 = + micro_context->AllocateTempInputTensor(node, kInputTensor2); + TF_LITE_ENSURE(context, input2 != nullptr); + + if (input1->type == kTfLiteInt8) { + auto input1_offset = -input1->params.zero_point; + auto input2_offset = -input2->params.zero_point; + const int kLeftShift = 8; + + int32_t input1_multiplier; + int input1_shift; + QuantizeMultiplierSmallerThanOneExp( + static_cast(input1->params.scale), &input1_multiplier, + &input1_shift); + int32_t input2_multiplier; + int input2_shift; + QuantizeMultiplierSmallerThanOneExp( + static_cast(input2->params.scale), &input2_multiplier, + &input2_shift); + + data->params.left_shift = kLeftShift; + data->params.input1_offset = input1_offset; + data->params.input1_multiplier = input1_multiplier; + data->params.input1_shift = input1_shift; + data->params.input2_offset = input2_offset; + data->params.input2_multiplier = input2_multiplier; + data->params.input2_shift = input2_shift; + } + + micro_context->DeallocateTempTfLiteTensor(input1); + micro_context->DeallocateTempTfLiteTensor(input2); + + return kTfLiteOk; +} + +} // namespace + +TfLiteRegistration Register_EQUAL() { + return tflite::micro::RegisterOp(Init, Prepare, EqualEval); +} + +TfLiteRegistration Register_NOT_EQUAL() { + return tflite::micro::RegisterOp(Init, Prepare, NotEqualEval); +} + +TfLiteRegistration Register_GREATER() { + return tflite::micro::RegisterOp(Init, Prepare, GreaterEval); +} + +TfLiteRegistration Register_GREATER_EQUAL() { + return tflite::micro::RegisterOp(Init, Prepare, GreaterEqualEval); +} + +TfLiteRegistration Register_LESS() { + return tflite::micro::RegisterOp(Init, Prepare, LessEval); +} + +TfLiteRegistration Register_LESS_EQUAL() { + return tflite::micro::RegisterOp(Init, Prepare, LessEqualEval); +} + +} // namespace tflite diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/complex_abs.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/complex_abs.cpp new file mode 100644 index 0000000..94a6107 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/complex_abs.cpp @@ -0,0 +1,103 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include +#include +#include +#include + +#include +#include + +#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h" +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_utils.h" + +namespace tflite { +namespace ops { +namespace micro { +namespace complex_abs { + +using std::complex; + +constexpr int kInputTensor = 0; +constexpr int kOutputTensor = 0; + +TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + TF_LITE_ENSURE_EQ(context, NumInputs(node), 1); + TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); + + // Check type and shape of the input tensor + MicroContext* micro_context = GetMicroContext(context); + + TfLiteTensor* input = + micro_context->AllocateTempInputTensor(node, kInputTensor); + + TfLiteTensor* output = + micro_context->AllocateTempOutputTensor(node, kOutputTensor); + + if (input->type != kTfLiteComplex64 || output->type != kTfLiteFloat32) { + TF_LITE_KERNEL_LOG(context, "Types input %s (%d), output %s (%d) not supported.", + TfLiteTypeGetName(input->type), input->type, + TfLiteTypeGetName(output->type), output->type); + return kTfLiteError; + } + + micro_context->DeallocateTempTfLiteTensor(input); + micro_context->DeallocateTempTfLiteTensor(output); + + return kTfLiteOk; +} + +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + + const TfLiteEvalTensor* input = + tflite::micro::GetEvalInput(context, node, kInputTensor); + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kOutputTensor); + TF_LITE_ENSURE(context, input != nullptr); + TF_LITE_ENSURE(context, output != nullptr); + + size_t total_input_els = 1; + for (size_t dim_ix = 0; dim_ix < input->dims->size; dim_ix++) { + total_input_els *= input->dims->data[dim_ix]; + } + + for (size_t ix = 0; ix < total_input_els; ix++) { + output->data.f[ix] = sqrt(pow(input->data.c64[ix].re, 2) + pow(input->data.c64[ix].im, 2)); + } + + return kTfLiteOk; +} + +} // namespace complex_abs +} // namespace micro +} // namespace ops + +TfLiteRegistration Register_COMPLEX_ABS() { + return {/*init=*/nullptr, + /*free=*/nullptr, + /*prepare=*/ops::micro::complex_abs::Prepare, + /*invoke=*/ops::micro::complex_abs::Eval, + /*profiling_string=*/nullptr, + /*builtin_code=*/0, + /*custom_name=*/nullptr, + /*version=*/0}; +} + +} // namespace tflite diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/concatenation.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/concatenation.cpp new file mode 100644 index 0000000..13a5d63 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/concatenation.cpp @@ -0,0 +1,263 @@ +/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/concatenation.h" + +#include + +#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h" +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/portable_tensor.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h" + +namespace tflite { + +// Patched by Edge Impulse +constexpr int RuntimeShape::kMaxSmallSize; + +namespace { + +constexpr int kMaxInputNum = 10; // Maximum number of input tensors +constexpr int kOutputTensor = 0; + +struct OpData { + ConcatenationParams params; +}; + +// Handles negative axis index, coerces to positive index value. +inline int CalculatePositiveAxis(int axis, const TfLiteTensor* output_tensor) { + if (axis >= 0) { + return axis; + } else { + return NumDimensions(output_tensor) + axis; + } +} + +// The following functions are helpers to get tensor data in the format that the +// reference op implementation expects. They provide the same functionality as +// class VectorOfTensors and class VectorOfQuantizedTensors in TFLite. + +// Gets shapes from a list of tensors. +inline void GetAllInputTensorShapes(const TfLiteContext* context, + const TfLiteNode* node, + RuntimeShape all_shapes[kMaxInputNum]) { + TFLITE_DCHECK(context != nullptr); + TFLITE_DCHECK(node != nullptr); + for (int i = 0; i < node->inputs->size; ++i) { + const TfLiteEvalTensor* t = tflite::micro::GetEvalInput(context, node, i); + RuntimeShape shape = tflite::micro::GetTensorShape(t); + all_shapes[i].ReplaceWith(shape.DimensionsCount(), shape.DimsData()); + } +} + +// Get shape pointers from a list of shapes. +inline void GetShapesPointers(const RuntimeShape* shapes, size_t num, + const RuntimeShape* pointers[]) { + for (size_t i = 0; i < num; ++i) { + pointers[i] = &shapes[i]; + } +} + +// Gets data pointers from a list of tensors. +template +inline void GetAllInputTensorData(const TfLiteContext* context, + const TfLiteNode* node, + T* all_data[kMaxInputNum]) { + TFLITE_DCHECK(context != nullptr); + TFLITE_DCHECK(node != nullptr); + for (int i = 0; i < node->inputs->size; ++i) { + const TfLiteEvalTensor* t = tflite::micro::GetEvalInput(context, node, i); + all_data[i] = tflite::micro::GetTensorData(t); + } +} + +template +void EvalUnquantized(TfLiteContext* context, TfLiteNode* node) { + // Collect the shapes and data pointer of input tensors + RuntimeShape inputs_shape[kMaxInputNum]; + const RuntimeShape* inputs_shape_ptr[kMaxInputNum]; + const data_type* inputs_data[kMaxInputNum]; + GetAllInputTensorShapes(context, node, inputs_shape); + GetShapesPointers(inputs_shape, node->inputs->size, inputs_shape_ptr); + GetAllInputTensorData(context, node, inputs_data); + + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kOutputTensor); + + TFLITE_DCHECK(node->user_data != nullptr); + const OpData* data = static_cast(node->user_data); + + reference_ops::Concatenation(data->params, inputs_shape_ptr, inputs_data, + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); +} + +void* Init(TfLiteContext* context, const char* buffer, size_t length) { + TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); + return context->AllocatePersistentBuffer(context, sizeof(OpData)); +} + + +TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + // This function only checks the types. Additional shape validations are + // performed in the reference implementation called during Eval(). + const TfLiteConcatenationParams* params = + reinterpret_cast(node->builtin_data); + + MicroContext* micro_context = GetMicroContext(context); + + TfLiteTensor* input_tensor = micro_context->AllocateTempInputTensor(node, 0); + TF_LITE_ENSURE(context, input_tensor != nullptr); + TfLiteType input_type = input_tensor->type; + TfLiteTensor* output_tensor = + micro_context->AllocateTempOutputTensor(node, kOutputTensor); + TF_LITE_ENSURE(context, output_tensor != nullptr); + TfLiteType output_type = output_tensor->type; + + micro_context->DeallocateTempTfLiteTensor(input_tensor); + micro_context->DeallocateTempTfLiteTensor(output_tensor); + + // Check activation and input type + TF_LITE_ENSURE_EQ(context, params->activation, kTfLiteActNone); + TF_LITE_ENSURE(context, + input_type == kTfLiteFloat32 || input_type == kTfLiteInt8 || + input_type == kTfLiteInt16 || input_type == kTfLiteInt32 || + input_type == kTfLiteInt64 || input_type == kTfLiteBool); + + // Output type must match input type + TF_LITE_ENSURE_EQ(context, output_type, input_type); + + // This implementation does not support large number of input tensors + const int num_inputs = NumInputs(node); + TF_LITE_ENSURE(context, num_inputs <= kMaxInputNum); + + // Shapes with dimensions >4 are not yet supported with static allocation. + for (int i = 0; i < num_inputs; ++i) { + TfLiteTensor* input = micro_context->AllocateTempInputTensor(node, i); + TF_LITE_ENSURE(context, input != nullptr); + int num_dimensions = NumDimensions(input); + + if (num_dimensions > RuntimeShape::kMaxSmallSize) { + MicroPrintf( + "Op Concatenation does not currently support num dimensions > %d " + "Tensor has %d dimensions.", + RuntimeShape::kMaxSmallSize, num_dimensions); + return kTfLiteError; + } + micro_context->DeallocateTempTfLiteTensor(input); + } + + // Calculate OpData. + TFLITE_DCHECK(node->user_data != nullptr); + OpData* data = static_cast(node->user_data); + + TfLiteTensor* output = + micro_context->AllocateTempOutputTensor(node, kOutputTensor); + TF_LITE_ENSURE(context, output != nullptr); + + switch (output_type) { // Already know in/outtypes are same. + case kTfLiteBool: + case kTfLiteFloat32: + case kTfLiteInt16: + case kTfLiteInt32: + case kTfLiteInt64: { + data->params.axis = CalculatePositiveAxis(params->axis, output); + data->params.inputs_count = node->inputs->size; + break; + } + case kTfLiteInt8: { + data->params.axis = CalculatePositiveAxis(params->axis, output); + data->params.inputs_count = node->inputs->size; + + float* input_scales = + reinterpret_cast(context->AllocatePersistentBuffer( + context, node->inputs->size * sizeof(float))); + + int32_t* input_zero_points = + reinterpret_cast(context->AllocatePersistentBuffer( + context, node->inputs->size * sizeof(int32_t))); + + // Allocate persistent scale and zeropoint buffers. + // Store input scale and zero point values in OpParams: + for (int i = 0; i < node->inputs->size; ++i) { + TfLiteTensor* t = micro_context->AllocateTempInputTensor(node, i); + TF_LITE_ENSURE(context, t != nullptr); + input_scales[i] = t->params.scale; + input_zero_points[i] = t->params.zero_point; + micro_context->DeallocateTempTfLiteTensor(t); + } + + data->params.input_scale = input_scales; + data->params.input_zeropoint = input_zero_points; + data->params.output_zeropoint = output->params.zero_point; + data->params.output_scale = output->params.scale; + break; + } + default: + MicroPrintf("Op Concatenation does not currently support Type '%s'.", + TfLiteTypeGetName(output_type)); + return kTfLiteError; + } + + micro_context->DeallocateTempTfLiteTensor(output); + + return kTfLiteOk; +} + +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + const TfLiteEvalTensor* output_tensor = + tflite::micro::GetEvalOutput(context, node, kOutputTensor); + TF_LITE_ENSURE(context, output_tensor != nullptr); + TfLiteType output_type = output_tensor->type; + + switch (output_type) { // Already know in/outtypes are same. + case kTfLiteFloat32: + EvalUnquantized(context, node); + break; + case kTfLiteInt32: + EvalUnquantized(context, node); + break; + case kTfLiteInt8: + EvalUnquantized(context, node); + break; + case kTfLiteInt64: + EvalUnquantized(context, node); + break; + case kTfLiteInt16: + EvalUnquantized(context, node); + break; + case kTfLiteBool: + EvalUnquantized(context, node); + break; + + default: + MicroPrintf("Op Concatenation does not currently support Type '%s'.", + TfLiteTypeGetName(output_type)); + return kTfLiteError; + } + + return kTfLiteOk; +} + +} // namespace + +TfLiteRegistration Register_CONCATENATION() { + return tflite::micro::RegisterOp(Init, Prepare, Eval); +} + +} // namespace tflite diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/conv.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/conv.cpp new file mode 100644 index 0000000..32177b3 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/conv.cpp @@ -0,0 +1,2213 @@ +// Patched by Edge Impulse to include reference and hardware-accelerated kernels +#include "../../../../classifier/ei_classifier_config.h" +#if 0 == 1 +/* noop */ +#elif EI_CLASSIFIER_TFLITE_ENABLE_CMSIS_NN == 1 +/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/conv.h" + +#include "edge-impulse-sdk/CMSIS/NN/Include/arm_nn_types.h" +#include "edge-impulse-sdk/CMSIS/NN/Include/arm_nnfunctions.h" +#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h" +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/quantization_util.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/conv.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/conv.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/padding.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h" + +namespace tflite { +namespace { + +struct OpData { + OpDataConv reference_op_data; + + // Index to buffer for optimizations if applicable. + int buffer_idx; +}; + +void* Init(TfLiteContext* context, const char* buffer, size_t length) { + TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); + return context->AllocatePersistentBuffer(context, sizeof(OpData)); +} + +TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + TFLITE_DCHECK(node->user_data != nullptr); + TFLITE_DCHECK(node->builtin_data != nullptr); + + int32_t buf_size = 0; + const auto& params = + *(static_cast(node->builtin_data)); + OpData* data = static_cast(node->user_data); + + MicroContext* micro_context = GetMicroContext(context); + + TfLiteTensor* input = + micro_context->AllocateTempInputTensor(node, kConvInputTensor); + TF_LITE_ENSURE(context, input != nullptr); + TfLiteTensor* filter = + micro_context->AllocateTempInputTensor(node, kConvWeightsTensor); + TF_LITE_ENSURE(context, filter != nullptr); + TfLiteTensor* output = + micro_context->AllocateTempOutputTensor(node, kConvOutputTensor); + TF_LITE_ENSURE(context, output != nullptr); + + RuntimeShape input_shape = GetTensorShape(input); + RuntimeShape output_shape = GetTensorShape(output); + + // Initialize cmsis_nn input dimensions + cmsis_nn_dims input_dims; + input_dims.n = MatchingDim(input_shape, 0, output_shape, 0); + input_dims.h = input->dims->data[1]; + input_dims.w = input->dims->data[2]; + input_dims.c = input_shape.Dims(3); + + // Initialize cmsis_nn filter dimensions + cmsis_nn_dims filter_dims; + filter_dims.n = output_shape.Dims(3); + filter_dims.h = filter->dims->data[1]; + filter_dims.w = filter->dims->data[2]; + filter_dims.c = input_dims.c; + + // Initialize cmsis_nn output dimensions + cmsis_nn_dims output_dims; + output_dims.n = input_dims.n; + output_dims.h = output->dims->data[1]; + output_dims.w = output->dims->data[2]; + output_dims.c = output_shape.Dims(3); + + if (filter->type == kTfLiteInt4) { + int filter_size = + RuntimeShape(filter->dims->size, + reinterpret_cast(filter->dims->data)) + .FlatSize(); + context->RequestScratchBufferInArena( + context, filter_size, &data->reference_op_data.filter_buffer_index); + } + + if (input->type == kTfLiteInt8 || input->type == kTfLiteInt16) { + const int num_channels = filter->dims->data[kConvQuantizedDimension]; + data->reference_op_data.per_channel_output_multiplier = + static_cast(context->AllocatePersistentBuffer( + context, num_channels * sizeof(int32_t))); + data->reference_op_data.per_channel_output_shift = + static_cast(context->AllocatePersistentBuffer( + context, num_channels * sizeof(int32_t))); + } + + TF_LITE_ENSURE_STATUS(CalculateOpDataConv( + context, node, params, input_dims.w, input_dims.h, filter_dims.w, + filter_dims.h, output_dims.w, output_dims.h, input->type, + &data->reference_op_data)); + + if (input->type == kTfLiteInt8 || input->type == kTfLiteInt16) { + // Initialize cmsis_nn convolution parameters + cmsis_nn_conv_params conv_params; + conv_params.input_offset = -input->params.zero_point; + conv_params.output_offset = output->params.zero_point; + conv_params.stride.h = params.stride_height; + conv_params.stride.w = params.stride_width; + conv_params.dilation.h = params.dilation_height_factor; + conv_params.dilation.w = params.dilation_width_factor; + conv_params.padding.h = data->reference_op_data.padding.height; + conv_params.padding.w = data->reference_op_data.padding.width; + conv_params.activation.min = data->reference_op_data.output_activation_min; + conv_params.activation.max = data->reference_op_data.output_activation_max; + + if (input->type == kTfLiteInt8) { + buf_size = arm_convolve_wrapper_s8_get_buffer_size( + &conv_params, &input_dims, &filter_dims, &output_dims); + } else if (input->type == kTfLiteInt16) { + TF_LITE_ENSURE_EQ(context, input->params.zero_point, 0); + TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0); + buf_size = arm_convolve_wrapper_s16_get_buffer_size( + &conv_params, &input_dims, &filter_dims, &output_dims); + } + + if (buf_size > 0) { + TF_LITE_ENSURE_STATUS(context->RequestScratchBufferInArena( + context, buf_size, &data->buffer_idx)); + } else { + data->buffer_idx = -1; + } + } + + micro_context->DeallocateTempTfLiteTensor(output); + micro_context->DeallocateTempTfLiteTensor(input); + micro_context->DeallocateTempTfLiteTensor(filter); + + return kTfLiteOk; +} + +TfLiteStatus EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node, + const TfLiteConvParams& params, + const OpData& data, + const TfLiteEvalTensor* input, + const TfLiteEvalTensor* filter, + const TfLiteEvalTensor* bias, + TfLiteEvalTensor* output) { + cmsis_nn_conv_params conv_params; + conv_params.dilation.h = params.dilation_height_factor; + conv_params.dilation.w = params.dilation_width_factor; + + // Initialize cmsis_nn convolution parameters + conv_params.input_offset = -data.reference_op_data.input_zero_point; + conv_params.output_offset = data.reference_op_data.output_zero_point; + conv_params.stride.h = params.stride_height; + conv_params.stride.w = params.stride_width; + conv_params.padding.h = data.reference_op_data.padding.height; + conv_params.padding.w = data.reference_op_data.padding.width; + conv_params.activation.min = data.reference_op_data.output_activation_min; + conv_params.activation.max = data.reference_op_data.output_activation_max; + + // Initialize cmsis_nn per channel quantization parameters + cmsis_nn_per_channel_quant_params quant_params; + quant_params.multiplier = const_cast( + data.reference_op_data.per_channel_output_multiplier); + quant_params.shift = + const_cast(data.reference_op_data.per_channel_output_shift); + + RuntimeShape filter_shape = tflite::micro::GetTensorShape(filter); + RuntimeShape input_shape = tflite::micro::GetTensorShape(input); + RuntimeShape output_shape = tflite::micro::GetTensorShape(output); + RuntimeShape bias_shape = tflite::micro::GetTensorShape(bias); + + // Consistency check. + TFLITE_DCHECK_LE(conv_params.activation.min, conv_params.activation.max); + TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4); + TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4); + TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4); + const int batch_size = MatchingDim(input_shape, 0, output_shape, 0); + const int input_depth = MatchingDim(input_shape, 3, filter_shape, 3); + const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3); + if (tflite::micro::GetOptionalTensorData(bias)) { + TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth); + } + + // Initialize cmsis_nn dimensions + // Input + cmsis_nn_dims input_dims; + input_dims.n = batch_size; + input_dims.h = input_shape.Dims(1); + input_dims.w = input_shape.Dims(2); + input_dims.c = input_depth; + + // Filter + cmsis_nn_dims filter_dims; + filter_dims.n = output_depth; + filter_dims.h = filter_shape.Dims(1); + filter_dims.w = filter_shape.Dims(2); + filter_dims.c = input_depth; + + // Bias + cmsis_nn_dims bias_dims; + bias_dims.n = 1; + bias_dims.h = 1; + bias_dims.w = 1; + bias_dims.c = output_depth; + + // Output + cmsis_nn_dims output_dims; + output_dims.n = batch_size; + output_dims.h = output_shape.Dims(1); + output_dims.w = output_shape.Dims(2); + output_dims.c = output_depth; + + // Initialize cmsis_nn context + cmsis_nn_context ctx; + ctx.buf = nullptr; + ctx.size = 0; + + if (data.buffer_idx > -1) { + ctx.buf = context->GetScratchBuffer(context, data.buffer_idx); + // Note: ctx.size is currently not used in cmsis_nn. + // The buffer should be allocated in the Prepare function through + // arm_convolve_wrapper_s8_get_buffer_size + } + + // arm_convolve_wrapper_s8 dispatches the optimized kernel accordingly with + // the parameters passed + TFLITE_DCHECK_EQ( + arm_convolve_wrapper_s8( + &ctx, &conv_params, &quant_params, &input_dims, + tflite::micro::GetTensorData(input), &filter_dims, + tflite::micro::GetTensorData(filter), &bias_dims, + tflite::micro::GetOptionalTensorData(bias), &output_dims, + tflite::micro::GetTensorData(output)), + ARM_CMSIS_NN_SUCCESS); + + return kTfLiteOk; +} + +TfLiteStatus EvalQuantizedPerChannel16x8( + TfLiteContext* context, TfLiteNode* node, const TfLiteConvParams& params, + const OpData& data, const TfLiteEvalTensor* input, + const TfLiteEvalTensor* filter, const TfLiteEvalTensor* bias, + TfLiteEvalTensor* output) { + cmsis_nn_conv_params conv_params; + conv_params.dilation.h = params.dilation_height_factor; + conv_params.dilation.w = params.dilation_width_factor; + + // Initialize cmsis_nn convolution parameters + conv_params.input_offset = -data.reference_op_data.input_zero_point; + conv_params.output_offset = data.reference_op_data.output_zero_point; + conv_params.stride.h = params.stride_height; + conv_params.stride.w = params.stride_width; + conv_params.padding.h = data.reference_op_data.padding.height; + conv_params.padding.w = data.reference_op_data.padding.width; + conv_params.activation.min = data.reference_op_data.output_activation_min; + conv_params.activation.max = data.reference_op_data.output_activation_max; + + // Initialize cmsis_nn per channel quantization parameters + cmsis_nn_per_channel_quant_params quant_params; + quant_params.multiplier = const_cast( + data.reference_op_data.per_channel_output_multiplier); + quant_params.shift = + const_cast(data.reference_op_data.per_channel_output_shift); + + RuntimeShape filter_shape = tflite::micro::GetTensorShape(filter); + RuntimeShape input_shape = tflite::micro::GetTensorShape(input); + RuntimeShape output_shape = tflite::micro::GetTensorShape(output); + RuntimeShape bias_shape = tflite::micro::GetTensorShape(bias); + + // Consistency check. + TFLITE_DCHECK_LE(conv_params.activation.min, conv_params.activation.max); + TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4); + TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4); + TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4); + const int batch_size = MatchingDim(input_shape, 0, output_shape, 0); + const int input_depth = MatchingDim(input_shape, 3, filter_shape, 3); + const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3); + if (tflite::micro::GetOptionalTensorData(bias)) { + TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth); + } + + // Initialize cmsis_nn dimensions + // Input + cmsis_nn_dims input_dims; + input_dims.n = batch_size; + input_dims.h = input_shape.Dims(1); + input_dims.w = input_shape.Dims(2); + input_dims.c = input_depth; + + // Filter + cmsis_nn_dims filter_dims; + filter_dims.n = output_depth; + filter_dims.h = filter_shape.Dims(1); + filter_dims.w = filter_shape.Dims(2); + filter_dims.c = input_depth; + + // Bias + cmsis_nn_dims bias_dims; + bias_dims.n = 1; + bias_dims.h = 1; + bias_dims.w = 1; + bias_dims.c = output_depth; + + // Output + cmsis_nn_dims output_dims; + output_dims.n = batch_size; + output_dims.h = output_shape.Dims(1); + output_dims.w = output_shape.Dims(2); + output_dims.c = output_depth; + + // Initialize cmsis_nn context + cmsis_nn_context ctx; + ctx.buf = nullptr; + ctx.size = 0; + + if (data.buffer_idx > -1) { + ctx.buf = context->GetScratchBuffer(context, data.buffer_idx); + // Note: ctx.size is currently not used in cmsis_nn. + // The buffer should be allocated in the Prepare function through + // arm_convolve_wrapper_s8_get_buffer_size + } + + TFLITE_DCHECK_EQ( + arm_convolve_wrapper_s16( + &ctx, &conv_params, &quant_params, &input_dims, + tflite::micro::GetTensorData(input), &filter_dims, + tflite::micro::GetTensorData(filter), &bias_dims, + tflite::micro::GetOptionalTensorData(bias), &output_dims, + tflite::micro::GetTensorData(output)), + ARM_CMSIS_NN_SUCCESS); + + return kTfLiteOk; +} + +TfLiteStatus EvalInt8(TfLiteContext* context, TfLiteNode* node) { + const TfLiteEvalTensor* input = + tflite::micro::GetEvalInput(context, node, kConvInputTensor); + const TfLiteEvalTensor* filter = + tflite::micro::GetEvalInput(context, node, kConvWeightsTensor); + const TfLiteEvalTensor* bias = + (NumInputs(node) == 3) + ? tflite::micro::GetEvalInput(context, node, kConvBiasTensor) + : nullptr; + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kConvOutputTensor); + + TFLITE_DCHECK(node->builtin_data != nullptr); + const auto& params = + *(reinterpret_cast(node->builtin_data)); + TFLITE_DCHECK(node->user_data != nullptr); + const OpData& data = *(static_cast(node->user_data)); + TfLiteEvalTensor filter_int8 = tflite::micro::MakeUnpackedInt4Tensor( + context, data.reference_op_data.filter_buffer_index, filter); + + return EvalQuantizedPerChannel(context, node, params, data, input, + &filter_int8, bias, output); +} + +TfLiteStatus EvalInt16x8(TfLiteContext* context, TfLiteNode* node) { + const TfLiteEvalTensor* input = + tflite::micro::GetEvalInput(context, node, kConvInputTensor); + const TfLiteEvalTensor* filter = + tflite::micro::GetEvalInput(context, node, kConvWeightsTensor); + const TfLiteEvalTensor* bias = + (NumInputs(node) == 3) + ? tflite::micro::GetEvalInput(context, node, kConvBiasTensor) + : nullptr; + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kConvOutputTensor); + + TFLITE_DCHECK(node->builtin_data != nullptr); + const auto& params = + *(reinterpret_cast(node->builtin_data)); + TFLITE_DCHECK(node->user_data != nullptr); + const OpData& data = *(static_cast(node->user_data)); + + return EvalQuantizedPerChannel16x8(context, node, params, data, input, filter, + bias, output); +} + +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + const TfLiteEvalTensor* input = + tflite::micro::GetEvalInput(context, node, kConvInputTensor); + const TfLiteEvalTensor* filter = + tflite::micro::GetEvalInput(context, node, kConvWeightsTensor); + const TfLiteEvalTensor* bias = + (NumInputs(node) == 3) + ? tflite::micro::GetEvalInput(context, node, kConvBiasTensor) + : nullptr; + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kConvOutputTensor); + + TFLITE_DCHECK(node->builtin_data != nullptr); + const auto& params = + *(reinterpret_cast(node->builtin_data)); + TFLITE_DCHECK(node->user_data != nullptr); + const OpData& data = *(static_cast(node->user_data)); + + TF_LITE_ENSURE_EQ(context, input->type, output->type); + TF_LITE_ENSURE_MSG( + context, + input->type == filter->type || + (input->type == kTfLiteInt16 && filter->type == kTfLiteInt8) || + (input->type == kTfLiteInt8 && filter->type == kTfLiteInt4), + "Hybrid models are not supported on TFLite Micro."); + + TfLiteEvalTensor filter_int8 = tflite::micro::MakeUnpackedInt4Tensor( + context, data.reference_op_data.filter_buffer_index, filter); + + switch (input->type) { // Already know in/out types are same. + case kTfLiteFloat32: { +#if EI_TFLITE_DISABLE_CONV_2D_IN_F32 + MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(input->type), + input->type); + return kTfLiteError; +#endif + tflite::reference_ops::Conv( + ConvParamsFloat(params, data.reference_op_data), + tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(filter), + tflite::micro::GetTensorData(filter), + tflite::micro::GetTensorShape(bias), + tflite::micro::GetOptionalTensorData(bias), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output), + tflite::micro::GetTensorShape(nullptr), nullptr); + break; + } + case kTfLiteInt8: +#if EI_TFLITE_DISABLE_CONV_2D_IN_I8 + MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(input->type), + input->type); + return kTfLiteError; +#endif + switch (filter_int8.type) { + case kTfLiteInt8: { + return EvalQuantizedPerChannel(context, node, params, data, input, + &filter_int8, bias, output); + } + + default: { + MicroPrintf("Filter type %s (%d) not supported.", + TfLiteTypeGetName(filter->type), filter->type); + return kTfLiteError; + } + } + + break; + case kTfLiteInt16: + return EvalQuantizedPerChannel16x8(context, node, params, data, input, + filter, bias, output); + break; + default: + MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(input->type), + input->type); + return kTfLiteError; + } + return kTfLiteOk; +} + +} // namespace + +TfLiteRegistration Register_CONV_2D() { + return tflite::micro::RegisterOp(Init, Prepare, Eval); +} + +TfLiteRegistration Register_CONV_2D_INT8() { + return tflite::micro::RegisterOp(Init, Prepare, EvalInt8); +} + +TfLiteRegistration Register_CONV_2D_INT16() { + return tflite::micro::RegisterOp(Init, Prepare, EvalInt16x8); +} + +} // namespace tflite + +#elif EI_CLASSIFIER_TFLITE_ENABLE_ARC == 1 +/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/conv.h" + +#include "mli_api.h" // NOLINT +#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h" +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/quantization_util.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/conv.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/padding.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/mli_function_specializations.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/mli_slicers.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/mli_tf_utils.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/scratch_buf_mgr.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/scratch_buffers.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h" + +namespace tflite { +namespace { + +constexpr int kInputTensor = 0; +constexpr int kFilterTensor = 1; +constexpr int kBiasTensor = 2; +constexpr int kOutputTensor = 0; + +// Conv is quantized along dimension 0: +// https://www.tensorflow.org/lite/performance/quantization_spec +#if defined(MLI_2_0) && !defined(MLI_2_0_KRNL_TEST) +constexpr int kConvQuantizedDimension = 3; +#else +constexpr int kConvQuantizedDimension = 0; +#endif + +// This file has 2 implementation of Conv. + +struct OpData { + TfLitePaddingValues padding; + + // Cached tensor zero point values for quantized operations. + int32_t input_zero_point; + int32_t filter_zero_point; + int32_t output_zero_point; + + // The scaling factor from input to output (aka the 'real multiplier') can + // be represented as a fixed point multiplier plus a left shift. + int32_t output_multiplier; + int output_shift; + + // Per channel output multiplier and shift. + int32_t* per_channel_output_multiplier; + int32_t* per_channel_output_shift; +#ifdef MLI_2_0 + int8_t* per_channel_scale_frac_bits; +#endif + + // The range of the fused activation layer. For example for kNone and + // uint8_t these would be 0 and 255. + int32_t output_activation_min; + int32_t output_activation_max; + + // The result of checking if MLI optimized version of tensors can be used. + bool is_mli_applicable; + + // Tensors in MLI format. + mutable ops::micro::MliTensorInterface mli_in; + mutable ops::micro::MliTensorInterface mli_weights; + mutable ops::micro::MliTensorInterface mli_bias; + mutable ops::micro::MliTensorInterface mli_out; + mli_conv2d_cfg* cfg; + + // Pointer to the mli convolution function. + conv_func_ptr p_mli_krn_conv2d_sa8_sa8_sa32; +}; + +#if !defined(TF_LITE_STRIP_REFERENCE_IMPL) +inline PaddingType RuntimePaddingType(TfLitePadding padding) { + switch (padding) { + case TfLitePadding::kTfLitePaddingSame: + return PaddingType::kSame; + case TfLitePadding::kTfLitePaddingValid: + return PaddingType::kValid; + case TfLitePadding::kTfLitePaddingUnknown: + default: + return PaddingType::kNone; + } +} +#endif + +bool IsMliApplicable(TfLiteContext* context, const TfLiteTensor* input, + const TfLiteTensor* filter, const TfLiteTensor* bias, + const TfLiteConvParams* params) { + const auto* affine_quantization = + reinterpret_cast(filter->quantization.params); + // MLI optimized version only supports int8_t datatype, dilation factor of 1 + // and per-axis quantization of weights (no broadcasting/per-tensor) + bool ret_val = (filter->type == kTfLiteInt8) && + (input->type == kTfLiteInt8) && (bias->type == kTfLiteInt32) && + (params->dilation_width_factor == 1) && + (params->dilation_height_factor == 1) && + (affine_quantization->scale->size == + filter->dims->data[kConvQuantizedDimension]); + return ret_val; +} + +TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node, + const TfLiteConvParams* params, int width, + int height, int filter_width, int filter_height, + int out_width, int out_height, + const TfLiteType data_type, OpData* data) { + bool has_bias = node->inputs->size == 3; + // Check number of inputs/outputs + TF_LITE_ENSURE(context, has_bias || node->inputs->size == 2); + TF_LITE_ENSURE_EQ(context, node->outputs->size, 1); + + // Matching GetWindowedOutputSize in TensorFlow. + auto padding = params->padding; + data->padding = ComputePaddingHeightWidth( + params->stride_height, params->stride_width, + params->dilation_height_factor, params->dilation_width_factor, height, + width, filter_height, filter_width, padding, &out_height, &out_width); + // Note that quantized inference requires that all tensors have their + // parameters set. This is usually done during quantized training. +#if !defined(TF_LITE_STRIP_REFERENCE_IMPL) + MicroContext* micro_context = GetMicroContext(context); + TfLiteTensor* input = + micro_context->AllocateTempInputTensor(node, kInputTensor); + TfLiteTensor* filter = + micro_context->AllocateTempInputTensor(node, kFilterTensor); + TfLiteTensor* bias = + micro_context->AllocateTempInputTensor(node, kBiasTensor); + TfLiteTensor* output = + micro_context->AllocateTempOutputTensor(node, kOutputTensor); + + if (data_type != kTfLiteFloat32 && !data->is_mli_applicable) { + int output_channels = filter->dims->data[kConvQuantizedDimension]; + + TF_LITE_ENSURE_STATUS(tflite::PopulateConvolutionQuantizationParams( + context, input, filter, bias, output, params->activation, + &data->output_multiplier, &data->output_shift, + &data->output_activation_min, &data->output_activation_max, + data->per_channel_output_multiplier, + reinterpret_cast(data->per_channel_output_shift), + output_channels)); + } + + micro_context->DeallocateTempTfLiteTensor(input); + micro_context->DeallocateTempTfLiteTensor(filter); + micro_context->DeallocateTempTfLiteTensor(bias); + micro_context->DeallocateTempTfLiteTensor(output); +#endif + return kTfLiteOk; +} +void* Init(TfLiteContext* context, const char* buffer, size_t length) { + TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); + return context->AllocatePersistentBuffer(context, sizeof(OpData)); +} + +TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + TFLITE_DCHECK(node->user_data != nullptr); + TFLITE_DCHECK(node->builtin_data != nullptr); + + OpData* data = static_cast(node->user_data); + const auto params = static_cast(node->builtin_data); + + MicroContext* micro_context = GetMicroContext(context); + + TfLiteTensor* output = + micro_context->AllocateTempOutputTensor(node, kOutputTensor); + TfLiteTensor* input = + micro_context->AllocateTempInputTensor(node, kInputTensor); + TfLiteTensor* filter = + micro_context->AllocateTempInputTensor(node, kFilterTensor); + TfLiteTensor* bias = + micro_context->AllocateTempInputTensor(node, kBiasTensor); + + int input_width = input->dims->data[2]; + int input_height = input->dims->data[1]; +#if defined(MLI_2_0) && !defined(MLI_2_0_KRNL_TEST) + int filter_width = filter->dims->data[1]; + int filter_height = filter->dims->data[0]; +#else + int filter_width = filter->dims->data[2]; + int filter_height = filter->dims->data[1]; +#endif + int output_width = output->dims->data[2]; + int output_height = output->dims->data[1]; + + // Dynamically allocate per-channel quantization parameters. + const int num_channels = filter->dims->data[kConvQuantizedDimension]; + data->per_channel_output_multiplier = + reinterpret_cast(context->AllocatePersistentBuffer( + context, num_channels * sizeof(int32_t))); + data->per_channel_output_shift = + reinterpret_cast(context->AllocatePersistentBuffer( + context, num_channels * sizeof(int32_t))); + + data->is_mli_applicable = + IsMliApplicable(context, input, filter, bias, params); + + // All per-channel quantized tensors need valid zero point and scale arrays. + if (input->type == kTfLiteInt8) { + TF_LITE_ENSURE_EQ(context, filter->quantization.type, + kTfLiteAffineQuantization); + + const auto* affine_quantization = + static_cast(filter->quantization.params); + TF_LITE_ENSURE(context, affine_quantization); + TF_LITE_ENSURE(context, affine_quantization->scale); + TF_LITE_ENSURE(context, affine_quantization->zero_point); + + TF_LITE_ENSURE(context, + affine_quantization->scale->size == 1 || + affine_quantization->scale->size == + filter->dims->data[kConvQuantizedDimension]); + TF_LITE_ENSURE_EQ(context, affine_quantization->scale->size, + affine_quantization->zero_point->size); + } + + TF_LITE_ENSURE_STATUS(CalculateOpData( + context, node, params, input_width, input_height, filter_width, + filter_height, output_width, output_height, input->type, data)); + + data->input_zero_point = input->params.zero_point; + data->filter_zero_point = filter->params.zero_point; + data->output_zero_point = output->params.zero_point; + + if (data->is_mli_applicable) { + data->mli_in = ops::micro::MliTensorInterface(static_cast( + context->AllocatePersistentBuffer(context, sizeof(mli_tensor)))); + data->mli_weights = ops::micro::MliTensorInterface(static_cast( + context->AllocatePersistentBuffer(context, sizeof(mli_tensor)))); + data->mli_bias = ops::micro::MliTensorInterface(static_cast( + context->AllocatePersistentBuffer(context, sizeof(mli_tensor)))); + data->mli_out = ops::micro::MliTensorInterface(static_cast( + context->AllocatePersistentBuffer(context, sizeof(mli_tensor)))); + data->cfg = static_cast( + context->AllocatePersistentBuffer(context, sizeof(mli_conv2d_cfg))); + +#ifdef MLI_2_0 + data->per_channel_scale_frac_bits = + static_cast(context->AllocatePersistentBuffer( + context, 2 * num_channels * sizeof(int16_t))); +#endif + + // Reuse space allocated for OpData parameters. +#ifdef MLI_2_0 + *data->mli_weights.Scale() = + reinterpret_cast(data->per_channel_output_multiplier); + *data->mli_bias.Scale() = + reinterpret_cast(data->per_channel_output_multiplier) + + num_channels; +#else + *data->mli_weights.Scale() = + static_cast(data->per_channel_output_multiplier); + *data->mli_bias.Scale() = + static_cast(data->per_channel_output_shift); +#endif + +#ifdef MLI_2_0 + *data->mli_weights.ZeroPoint() = + reinterpret_cast(data->per_channel_output_shift); + *data->mli_bias.ZeroPoint() = + reinterpret_cast(data->per_channel_output_shift) + + num_channels; +#else + *data->mli_weights.ZeroPoint() = + reinterpret_cast(&data->filter_zero_point); + *data->mli_bias.ZeroPoint() = + reinterpret_cast(&data->filter_zero_point) + sizeof(int16_t); +#endif + +#ifdef MLI_2_0 + *data->mli_weights.ScaleFracBits() = + reinterpret_cast(data->per_channel_scale_frac_bits); + *data->mli_bias.ScaleFracBits() = + reinterpret_cast(data->per_channel_scale_frac_bits) + + num_channels; +#endif + + ops::micro::ConvertToMliTensor(input, &data->mli_in); + ops::micro::ConvertToMliTensorPerChannel(filter, &data->mli_weights, + /* is_bias_tensor = */ false); + ops::micro::ConvertToMliTensorPerChannel(bias, &data->mli_bias, + /* is_bias_tensor = */ true); +#ifdef MLI_2_0 + ops::micro::AdjustBiasTensor(&data->mli_bias, &data->mli_in, + &data->mli_weights); +#endif + ops::micro::ConvertToMliTensor(output, &data->mli_out); + +#ifdef MLI_2_0 + // Choose convolution mli specialized function. + data->p_mli_krn_conv2d_sa8_sa8_sa32 = + mli_krn_conv2d_hwcn(data->mli_weights.MliTensor()); +#else + data->p_mli_krn_conv2d_sa8_sa8_sa32 = + mli_krn_conv2d_hwcn(data->mli_weights.MliTensor(), data->cfg); +#endif + +#ifdef MLI_2_0 + data->cfg->dilation_width = 1; + data->cfg->dilation_height = 1; +#endif + + if (data->output_activation_min == -128 && + data->output_activation_max == 127) { + data->cfg->relu.type = MLI_RELU_NONE; + } else if (params->activation == kTfLiteActRelu) { + data->cfg->relu.type = MLI_RELU_GEN; + } else if (params->activation == kTfLiteActRelu6) { + data->cfg->relu.type = MLI_RELU_6; + } else if (params->activation == kTfLiteActReluN1To1) { + data->cfg->relu.type = MLI_RELU_1; + } else { + data->cfg->relu.type = MLI_RELU_NONE; + } + data->cfg->stride_width = params->stride_width; + data->cfg->stride_height = params->stride_height; + if (params->padding == kTfLitePaddingValid) { + data->cfg->padding_left = 0; + data->cfg->padding_right = 0; + data->cfg->padding_top = 0; + data->cfg->padding_bottom = 0; + } else { + data->cfg->padding_left = data->padding.width; + data->cfg->padding_right = + data->padding.width + data->padding.width_offset; + data->cfg->padding_top = data->padding.height; + data->cfg->padding_bottom = + data->padding.height + data->padding.height_offset; + } + } + + micro_context->DeallocateTempTfLiteTensor(output); + micro_context->DeallocateTempTfLiteTensor(input); + micro_context->DeallocateTempTfLiteTensor(filter); + micro_context->DeallocateTempTfLiteTensor(bias); + return kTfLiteOk; +} + +TfLiteStatus EvalMliQuantizedPerChannel( + TfLiteContext* context, TfLiteNode* node, TfLiteConvParams* params, + const OpData& data, const TfLiteEvalTensor* input, + const TfLiteEvalTensor* filter, const TfLiteEvalTensor* bias, + TfLiteEvalTensor* output) { + // Run Conv MLI kernel + // MLI optimized version only supports int8_t dataype and dilation factor of 1 + if (data.is_mli_applicable) { + // Copy configuration data from external to local memory + mli_conv2d_cfg cfg_local = *data.cfg; + + ops::micro::MliTensorAttachBuffer(input, &data.mli_in); + ops::micro::MliTensorAttachBuffer(filter, &data.mli_weights); + ops::micro::MliTensorAttachBuffer(bias, &data.mli_bias); + ops::micro::MliTensorAttachBuffer(output, &data.mli_out); + + // for height slicing + const int height_dimension = 1; + int in_slice_height = 0; + int out_slice_height = 0; + const int kernel_height = + static_cast(data.mli_weights.Shape()[KRNL_H_DIM_HWC]); + const int overlap = kernel_height - cfg_local.stride_height; + +// for weight slicing (on output channels) +#if defined(MLI_2_0) && !defined(MLI_2_0_KRNL_TEST) + // HWCN layout for weights, output channel dimension is the first dimension. + const int weight_out_ch_dimension = 3; +#else + // NHWC layout for weights, output channel dimension is the first dimension. + const int weight_out_ch_dimension = 0; +#endif + // bias has only 1 dimension + const int bias_out_ch_dimension = 0; + int slice_channels = + static_cast(data.mli_weights.Shape()[weight_out_ch_dimension]); + // Batch-Height-Width-Channel layout means last dimension is output + // channels. + const int out_tensor_ch_dimension = 3; + + // Tensors for data in fast (local) memory and config to copy data from + // external to local memory + mli_tensor weights_local = *data.mli_weights.MliTensor(); + mli_tensor bias_local = *data.mli_bias.MliTensor(); + mli_tensor in_local = *data.mli_in.MliTensor(); + mli_tensor out_local = *data.mli_out.MliTensor(); + + ops::micro::MliTensorInterface weights_local_interface(&weights_local); + ops::micro::MliTensorInterface bias_local_interface(&bias_local); + ops::micro::MliTensorInterface in_local_interface(&in_local); + ops::micro::MliTensorInterface out_local_interface(&out_local); + + mli_mov_cfg_t copy_config; + mli_mov_cfg_for_copy(©_config); + + TF_LITE_ENSURE_STATUS(ops::micro::get_arc_scratch_buffer_for_conv_tensors( + context, &in_local_interface, &weights_local_interface, + &bias_local_interface, &out_local_interface)); + TF_LITE_ENSURE_STATUS(ops::micro::arc_scratch_buffer_calc_slice_size_io( + &in_local_interface, &out_local_interface, kernel_height, + cfg_local.stride_height, cfg_local.padding_top, + cfg_local.padding_bottom, &in_slice_height, &out_slice_height)); + TF_LITE_ENSURE_STATUS( + ops::micro::arc_scratch_buffer_calc_slice_size_weights( + &weights_local_interface, &bias_local_interface, + weight_out_ch_dimension, &slice_channels)); + + /* is_local indicates that the tensor is already in local memory, + so in that case the original tensor can be used, + and there is no need to copy it to the local tensor*/ + const bool in_is_local = + in_local_interface.Data() == data.mli_in.Data(); + const bool out_is_local = + out_local_interface.Data() == data.mli_out.Data(); + const bool b_is_local = + bias_local_interface.Data() == data.mli_bias.Data(); +#ifndef MLI_2_0_KRNL_TEST + const bool w_is_local = weights_local_interface.Data() == + data.mli_weights.Data(); +#endif + +#if defined(MLI_2_0) && !defined(MLI_2_0_KRNL_TEST) + ops::micro::TensorSlicer w_slice(data.mli_weights.MliTensor(), + weight_out_ch_dimension, slice_channels, 0, + 0, 0, true); +#else + ops::micro::TensorSlicer w_slice(data.mli_weights.MliTensor(), + weight_out_ch_dimension, slice_channels); +#endif + ops::micro::TensorSlicer b_slice(data.mli_bias.MliTensor(), + bias_out_ch_dimension, slice_channels); + ops::micro::TensorSlicer out_ch_slice(data.mli_out.MliTensor(), + out_tensor_ch_dimension, + slice_channels, 0, 0, 0, true); + +#ifdef MLI_2_0_KRNL_TEST + mli_tensor* w_ptr = &weights_local; +#else + mli_tensor* w_ptr = w_is_local ? w_slice.Sub() : &weights_local; +#endif + mli_tensor* b_ptr = b_is_local ? b_slice.Sub() : &bias_local; + + void* input_buffer_ptr = NULL; + uint32_t input_buffer_size = 0; + + while (!w_slice.Done()) { +#ifndef MLI_2_0_KRNL_TEST + mli_mov_tensor_sync(w_slice.Sub(), ©_config, w_ptr); +#endif + mli_mov_tensor_sync(b_slice.Sub(), ©_config, b_ptr); + + /* mli_in tensor contains batches of HWC tensors. so it is a 4 dimensional + tensor. because the mli kernel will process one HWC tensor at a time, the + 4 dimensional tensor needs to be sliced into nBatch 3 dimensional tensors. + on top of that there could be a need to also slice in the Height + dimension. for that the sliceHeight has been calculated. The tensor slicer + is configured that it will completely slice the nBatch dimension (0) and + slice the height dimension (1) in chunks of 'sliceHeight' */ + ops::micro::TensorSlicer in_slice( + data.mli_in.MliTensor(), height_dimension, in_slice_height, + cfg_local.padding_top, cfg_local.padding_bottom, overlap); + + /* output tensor is already sliced in the output channel dimension. + out_ch_slice.Sub() is the tensor for the amount of output channels of this + iteration of the weight slice loop. This tensor needs to be further + sliced over the batch and height dimension. */ + ops::micro::TensorSlicer out_slice(out_ch_slice.Sub(), height_dimension, + out_slice_height); + + /* setup the pointers to the local or remote tensor to make the code + * inside the loop easier. */ + mli_tensor* in_ptr = in_is_local ? in_slice.Sub() : &in_local; + mli_tensor* out_ptr = out_is_local ? out_slice.Sub() : &out_local; + +#ifdef MLI_2_0_KRNL_TEST + /* Permute weights tensor to the HWCN layout */ + // Checking conditions here to prevent usage non-contiguous buffer memory. + if (data.mli_out.Shape()[out_tensor_ch_dimension] != + out_slice.Sub()->shape[FMAP_C_DIM_HWC] || + data.mli_out.Shape()[height_dimension] != + out_slice.Sub()->shape[FMAP_H_DIM_HWC]) { + MicroPrintf("Slicing is not supported with real-time permutation."); + return kTfLiteError; + } + mli_permute_cfg permute_cfg = {{1, 2, 3, 0}}; + ops::micro::permute_weights(data.mli_weights.MliTensor(), &permute_cfg, + w_ptr, &out_ptr->data); +#endif + + while (!out_slice.Done()) { + if (!out_is_local) { + ops::micro::PrepareLocalTensor(out_slice.Sub(), &out_local); + ops::micro::PrepareLocalTensor(in_slice.Sub(), &in_local); + } + + TF_LITE_ENSURE(context, !in_slice.Done()); + cfg_local.padding_top = in_slice.GetPaddingPre(); + cfg_local.padding_bottom = in_slice.GetPaddingPost(); + + // if same input copy as previous iteration, skip the copy of input +#ifdef MLI_2_0 + if ((in_slice.Sub()->data.mem.pi8 != input_buffer_ptr) || + (mli_hlp_count_elem_num(in_slice.Sub(), 0) != input_buffer_size)) { + mli_mov_tensor_sync(in_slice.Sub(), ©_config, in_ptr); + input_buffer_ptr = in_slice.Sub()->data.mem.pi8; + input_buffer_size = mli_hlp_count_elem_num(in_slice.Sub(), 0); + } + + data.p_mli_krn_conv2d_sa8_sa8_sa32(in_ptr, w_ptr, b_ptr, &cfg_local, + out_ptr); +#else + if ((in_slice.Sub()->data != input_buffer_ptr) || + (mli_hlp_count_elem_num(in_slice.Sub(), 0) != input_buffer_size)) { + mli_mov_tensor_sync(in_slice.Sub(), ©_config, in_ptr); + input_buffer_ptr = in_slice.Sub()->data; + input_buffer_size = mli_hlp_count_elem_num(in_slice.Sub(), 0); + } + data.p_mli_krn_conv2d_sa8_sa8_sa32(in_ptr, w_ptr, b_ptr, &cfg_local, + out_ptr); +#endif + mli_mov_tensor_sync(out_ptr, ©_config, out_slice.Sub()); + + in_slice.Next(); + out_slice.Next(); + } + w_slice.Next(); + b_slice.Next(); + out_ch_slice.Next(); + TF_LITE_ENSURE(context, in_slice.Done()); + } + } + return kTfLiteOk; +} + +void EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node, + TfLiteConvParams* params, const OpData& data, + const TfLiteEvalTensor* input, + const TfLiteEvalTensor* filter, + const TfLiteEvalTensor* bias, + TfLiteEvalTensor* output, + TfLiteEvalTensor* im2col) { +#if !defined(TF_LITE_STRIP_REFERENCE_IMPL) + ConvParams op_params; + op_params.input_offset = -data.input_zero_point; + op_params.output_offset = data.output_zero_point; + op_params.stride_height = params->stride_height; + op_params.stride_width = params->stride_width; + op_params.dilation_height_factor = params->dilation_height_factor; + op_params.dilation_width_factor = params->dilation_width_factor; + op_params.padding_values.height = data.padding.height; + op_params.padding_values.width = data.padding.width; + op_params.quantized_activation_min = data.output_activation_min; + op_params.quantized_activation_max = data.output_activation_max; + + reference_integer_ops::ConvPerChannel( + op_params, data.per_channel_output_multiplier, + data.per_channel_output_shift, tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(filter), + tflite::micro::GetTensorData(filter), + tflite::micro::GetTensorShape(bias), + tflite::micro::GetTensorData(bias), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); +#else + MicroPrintf("Node configuration is not supported by ARC MLI Library."); +#endif +} + +void EvalQuantizedPerChannelInt16(TfLiteContext* context, TfLiteNode* node, + TfLiteConvParams* params, const OpData& data, + const TfLiteEvalTensor* input, + const TfLiteEvalTensor* filter, + const TfLiteEvalTensor* bias, + TfLiteEvalTensor* output) { +#if !defined(TF_LITE_STRIP_REFERENCE_IMPL) + ConvParams op_params; + op_params.input_offset = -data.input_zero_point; + op_params.output_offset = data.output_zero_point; + op_params.stride_height = params->stride_height; + op_params.stride_width = params->stride_width; + op_params.dilation_height_factor = params->dilation_height_factor; + op_params.dilation_width_factor = params->dilation_width_factor; + op_params.padding_values.height = data.padding.height; + op_params.padding_values.width = data.padding.width; + op_params.quantized_activation_min = data.output_activation_min; + op_params.quantized_activation_max = data.output_activation_max; + + reference_integer_ops::ConvPerChannel( + op_params, data.per_channel_output_multiplier, + data.per_channel_output_shift, tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(filter), + tflite::micro::GetTensorData(filter), + tflite::micro::GetTensorShape(bias), + tflite::micro::GetTensorData(bias), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); +#else + MicroPrintf("Node configuration is not supported by ARC MLI Library."); +#endif +} + +void EvalFloat(TfLiteContext* context, TfLiteNode* node, + TfLiteConvParams* params, const OpData& data, + const TfLiteEvalTensor* input, const TfLiteEvalTensor* filter, + const TfLiteEvalTensor* bias, TfLiteEvalTensor* im2col, + TfLiteEvalTensor* hwcn_weights, TfLiteEvalTensor* output) { +#if !defined(TF_LITE_STRIP_REFERENCE_IMPL) + float output_activation_min, output_activation_max; + CalculateActivationRange(params->activation, &output_activation_min, + &output_activation_max); + ConvParams op_params; + op_params.padding_type = RuntimePaddingType(params->padding); + op_params.padding_values.width = data.padding.width; + op_params.padding_values.height = data.padding.height; + op_params.stride_width = params->stride_width; + op_params.stride_height = params->stride_height; + op_params.dilation_width_factor = params->dilation_width_factor; + op_params.dilation_height_factor = params->dilation_height_factor; + op_params.float_activation_min = output_activation_min; + op_params.float_activation_max = output_activation_max; + + reference_ops::Conv(op_params, tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(filter), + tflite::micro::GetTensorData(filter), + tflite::micro::GetTensorShape(bias), + tflite::micro::GetTensorData(bias), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output), + tflite::micro::GetTensorShape(im2col), + tflite::micro::GetTensorData(im2col)); +#else + MicroPrintf("Type %s (%d) is not supported by ARC MLI Library.", + TfLiteTypeGetName(input->type), input->type); +#endif +} + +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + auto* params = reinterpret_cast(node->builtin_data); + + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kOutputTensor); + const TfLiteEvalTensor* input = + tflite::micro::GetEvalInput(context, node, kInputTensor); + const TfLiteEvalTensor* filter = + tflite::micro::GetEvalInput(context, node, kFilterTensor); + const TfLiteEvalTensor* bias = + tflite::micro::GetEvalInput(context, node, kBiasTensor); + + TFLITE_DCHECK(node->user_data != nullptr); + const OpData& data = *(static_cast(node->user_data)); + + TF_LITE_ENSURE_EQ(context, input->type, output->type); + TF_LITE_ENSURE_MSG( + context, + input->type == filter->type || + (input->type == kTfLiteInt16 && filter->type == kTfLiteInt8), + "Hybrid models are not supported on TFLite Micro."); + + switch (input->type) { // Already know in/out types are same. + case kTfLiteFloat32: +#if EI_TFLITE_DISABLE_CONV_2D_IN_F32 + MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(input->type), + input->type); + return kTfLiteError; +#endif + EvalFloat(context, node, params, data, input, filter, bias, nullptr, + nullptr, output); + break; + case kTfLiteInt8: +#if EI_TFLITE_DISABLE_CONV_2D_IN_I8 + MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(input->type), + input->type); + return kTfLiteError; +#endif + if (data.is_mli_applicable) { + EvalMliQuantizedPerChannel(context, node, params, data, input, filter, + bias, output); + } else { + EvalQuantizedPerChannel(context, node, params, data, input, filter, + bias, output, nullptr); + } + break; + case kTfLiteInt16: + EvalQuantizedPerChannelInt16(context, node, params, data, input, filter, + bias, output); + break; + default: + MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(input->type), + input->type); + return kTfLiteError; + } + return kTfLiteOk; +} + +} // namespace + +TfLiteRegistration Register_CONV_2D() { + return tflite::micro::RegisterOp(Init, Prepare, Eval); +} + +} // namespace tflite + +#elif EI_CLASSIFIER_TFLITE_ENABLE_SILABS_MVP == 1 + +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/conv.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/conv.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/padding.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" +#include "edge-impulse-sdk/CMSIS/NN/Include/arm_nnfunctions.h" + +#include "sl_mvp_ml_conv2d.h" + +namespace tflite { +namespace sl { +namespace conv2d { + +constexpr int kInputTensor = 0; +constexpr int kFilterTensor = 1; +constexpr int kBiasTensor = 2; +constexpr int kOutputTensor = 0; + +// Conv is quantized along dimension 0 of filter tensor. +// https://www.tensorflow.org/lite/performance/quantization_spec +constexpr int kConvQuantizedDimension = 0; + +enum op_support { kMvp, kCmsisNN, kTFLMrefF32, kTFLMrefI8 }; + +struct OpData { + op_support supported; + float activation_min_f32; + float activation_max_f32; + int scratch_buffer_index; + sli_mvp_ml_conv2d_s8_params_t op_params; + + // CMSIS-NN per channel output multiplier and shift. + int32_t *per_channel_output_multiplier; + int32_t *per_channel_output_shift; +}; + +inline float16_t normalize_fp16(float f) +{ + return (float16_t)std::min(std::max(f, SLI_MVP_FP16_MIN), SLI_MVP_FP16_MAX); +} + +inline PaddingType RuntimePaddingType(TfLitePadding padding) +{ + switch (padding) { + case TfLitePadding::kTfLitePaddingSame: + return PaddingType::kSame; + case TfLitePadding::kTfLitePaddingValid: + return PaddingType::kValid; + case TfLitePadding::kTfLitePaddingUnknown: + default: + return PaddingType::kNone; + } +} + +TfLiteStatus PopulateConvolutionQuantizationParams( + TfLiteContext* context, + const TfLiteTensor* input, + const TfLiteTensor* filter, + TfLiteTensor* output, + const TfLiteFusedActivation& activation, + int32_t* output_activation_min, int32_t* output_activation_max, + float16_t* per_channel_scalers, int num_channels, float accumulator_multipler) +{ + auto affine_quantization = + reinterpret_cast(filter->quantization.params); + + // Populate multiplier and shift using affine quantization. + const float input_scale = input->params.scale; + const float output_scale = output->params.scale; + const float* filter_scales = affine_quantization->scale->data; + + for (int i = 0; i < num_channels; ++i) { + // If per-tensor quantization parameter is specified, broadcast it along the + // quantization dimension (channels_out). + const float filter_scale = filter_scales[i]; + const float effective_output_scale = (input_scale * filter_scale) / output_scale; + const float acc_output_scale = effective_output_scale * accumulator_multipler; + per_channel_scalers[i] = normalize_fp16(acc_output_scale); + } + + TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized( + context, activation, output, output_activation_min, + output_activation_max)); + + return kTfLiteOk; +} + +void *Init(TfLiteContext* context, const char* buffer, size_t length) +{ + (void)buffer; + (void)length; + TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); + return context->AllocatePersistentBuffer(context, sizeof(OpData)); +} + +TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) +{ + int scratch_buffer_size = 0; + + TFLITE_DCHECK(node->user_data != nullptr); + TFLITE_DCHECK(node->builtin_data != nullptr); + + OpData* data = static_cast(node->user_data); + const auto params = static_cast(node->builtin_data); + + TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + const TfLiteTensor* bias = GetOptionalInputTensor(context, node, kBiasTensor); + const TfLiteTensor* input = GetInput(context, node, kInputTensor); + const TfLiteTensor* filter = GetInput(context, node, kFilterTensor); + TF_LITE_ENSURE(context, input != nullptr); + TF_LITE_ENSURE(context, output != nullptr); + TF_LITE_ENSURE(context, filter != nullptr); + + data->op_params.batches = input->dims->data[0]; + data->op_params.in_channels = input->dims->data[3]; + data->op_params.input_height = input->dims->data[1]; + data->op_params.input_width = input->dims->data[2]; + data->op_params.out_channels = filter->dims->data[kConvQuantizedDimension]; + data->op_params.output_height = output->dims->data[1]; + data->op_params.output_width = output->dims->data[2]; + data->op_params.filter_height = filter->dims->data[1]; + data->op_params.filter_width = filter->dims->data[2]; + data->op_params.input_offset = -input->params.zero_point; + data->op_params.output_offset = output->params.zero_point; + data->op_params.stride_height = params->stride_height; + data->op_params.stride_width = params->stride_width; + data->op_params.dilation_height = params->dilation_height_factor; + data->op_params.dilation_width = params->dilation_width_factor; + data->op_params.padding = params->padding == kTfLitePaddingSame; + + int dummy_height, dummy_width; + const auto padding = ComputePaddingHeightWidth( + params->stride_height, params->stride_width, + params->dilation_height_factor, params->dilation_width_factor, + data->op_params.input_height, data->op_params.input_width, + data->op_params.filter_height, data->op_params.filter_width, + params->padding, + &dummy_height, &dummy_width); + + data->op_params.pad_height = padding.height; + data->op_params.pad_width = padding.width; + + const int num_channels = data->op_params.out_channels; + + if (input->type == kTfLiteInt8) { + if (sli_mvp_ml_conv2d_s8_is_supported(&data->op_params)) { + data->supported = kMvp; + + float16_t *bias_data = static_cast(context->AllocatePersistentBuffer( + context, num_channels * sizeof(float16_t))); + if(bias != nullptr) { + data->op_params.bias = bias_data; + int32_t i32_bias; + for(int i = 0; i < num_channels; i++) { + i32_bias = bias->data.i32[i]; + bias_data[i] = float16_t(i32_bias * SLI_MVP_ACCUMULATOR_SCALER); + } + } else { + data->op_params.bias = nullptr; + } + + float16_t *scaler_data = static_cast(context->AllocatePersistentBuffer( + context, num_channels * sizeof(float16_t))); + data->op_params.output_scaler = scaler_data; + TF_LITE_ENSURE_STATUS(PopulateConvolutionQuantizationParams( + context, input, filter, output, params->activation, + reinterpret_cast(&data->op_params.output_activation_min), + reinterpret_cast(&data->op_params.output_activation_max), + scaler_data, num_channels, SLI_MVP_ACCUMULATOR_MULTIPLIER)); + + } else { + data->per_channel_output_multiplier = static_cast(context->AllocatePersistentBuffer( + context, num_channels * sizeof(int32_t))); + data->per_channel_output_shift = static_cast(context->AllocatePersistentBuffer( + context, num_channels * sizeof(int32_t))); + + int32_t dummy_output_multiplier; + int dummy_output_shift; + TF_LITE_ENSURE_STATUS(tflite::PopulateConvolutionQuantizationParams( + context, input, filter, bias, output, params->activation, + &dummy_output_multiplier, &dummy_output_shift, + reinterpret_cast(&data->op_params.output_activation_min), + reinterpret_cast(&data->op_params.output_activation_max), + data->per_channel_output_multiplier, + reinterpret_cast(data->per_channel_output_shift), + num_channels)); + + if (data->op_params.dilation_height == 1 && data->op_params.dilation_width == 1) { + data->supported = kCmsisNN; + cmsis_nn_conv_params conv_params; + conv_params.input_offset = data->op_params.input_offset; + conv_params.output_offset = data->op_params.output_offset; + conv_params.stride.h = data->op_params.stride_height; + conv_params.stride.w = data->op_params.stride_width; + conv_params.dilation.h = 1; + conv_params.dilation.w = 1; + conv_params.padding.h = data->op_params.pad_height; + conv_params.padding.w = data->op_params.pad_width; + conv_params.activation.min = data->op_params.output_activation_min; + conv_params.activation.max = data->op_params.output_activation_max; + + cmsis_nn_dims input_dims; + input_dims.n = data->op_params.batches; + input_dims.h = data->op_params.input_height; + input_dims.w = data->op_params.input_width; + input_dims.c = data->op_params.in_channels; + + cmsis_nn_dims filter_dims; + filter_dims.h = data->op_params.filter_height; + filter_dims.w = data->op_params.filter_width; + + cmsis_nn_dims output_dims; + output_dims.h = data->op_params.output_height; + output_dims.w = data->op_params.output_width; + output_dims.c = data->op_params.out_channels; + + scratch_buffer_size = arm_convolve_wrapper_s8_get_buffer_size( + &conv_params, &input_dims, &filter_dims, &output_dims); + } else { + data->supported = kTFLMrefI8; + } + } + + } else if (input->type == kTfLiteFloat32) { + data->supported = kTFLMrefF32; + CalculateActivationRange(params->activation, + &data->activation_min_f32, + &data->activation_max_f32); + + } else { + TF_LITE_KERNEL_LOG(context, "Type %s not currently supported.", + TfLiteTypeGetName(input->type)); + return kTfLiteError; + } + + if(scratch_buffer_size > 0) { + TF_LITE_ENSURE_STATUS( + context->RequestScratchBufferInArena( + context, scratch_buffer_size, &data->scratch_buffer_index)); + } else { + data->scratch_buffer_index = -1; + } + + return kTfLiteOk; +} + +TfLiteStatus eval_mvp_int8(TfLiteContext* context, + OpData* data, + const TfLiteEvalTensor* input, + const TfLiteEvalTensor* filter, + TfLiteEvalTensor* output) +{ + data->op_params.input = tflite::micro::GetTensorData(input); + data->op_params.output = tflite::micro::GetTensorData(output); + data->op_params.filter = tflite::micro::GetTensorData(filter); + + TF_LITE_ENSURE_EQ(context, SL_STATUS_OK, sli_mvp_ml_conv2d_s8(&data->op_params)); + + return kTfLiteOk; +} + +TfLiteStatus eval_cmsis_int8(TfLiteContext* context, + OpData* data, + const TfLiteEvalTensor* input, + const TfLiteEvalTensor* filter, + const TfLiteEvalTensor* bias, + TfLiteEvalTensor* output) +{ + cmsis_nn_dims input_dims; + input_dims.n = data->op_params.batches; + input_dims.h = data->op_params.input_height; + input_dims.w = data->op_params.input_width; + input_dims.c = data->op_params.in_channels; + + cmsis_nn_dims filter_dims; + filter_dims.n = data->op_params.out_channels; + filter_dims.h = data->op_params.filter_height; + filter_dims.w = data->op_params.filter_width; + filter_dims.c = data->op_params.in_channels; + + cmsis_nn_dims bias_dims; + bias_dims.n = 1; + bias_dims.h = 1; + bias_dims.w = 1; + bias_dims.c = data->op_params.out_channels; + + cmsis_nn_dims output_dims; + output_dims.n = data->op_params.batches; + output_dims.h = data->op_params.output_height; + output_dims.w = data->op_params.output_width; + output_dims.c = data->op_params.out_channels; + + cmsis_nn_per_channel_quant_params quant_params; + quant_params.multiplier = data->per_channel_output_multiplier; + quant_params.shift = data->per_channel_output_shift; + + cmsis_nn_conv_params conv_params; + conv_params.input_offset = data->op_params.input_offset; + conv_params.output_offset = data->op_params.output_offset; + conv_params.stride.h = data->op_params.stride_height; + conv_params.stride.w = data->op_params.stride_width; + conv_params.dilation.h = 1; + conv_params.dilation.w = 1; + conv_params.padding.h = data->op_params.pad_height; + conv_params.padding.w = data->op_params.pad_width; + conv_params.activation.min = data->op_params.output_activation_min; + conv_params.activation.max = data->op_params.output_activation_max; + + cmsis_nn_context ctx; + ctx.buf = nullptr; + ctx.size = 0; + + if (data->scratch_buffer_index > -1) { + ctx.buf = context->GetScratchBuffer(context, data->scratch_buffer_index); + } + TFLITE_DCHECK_EQ(ARM_MATH_SUCCESS, + arm_convolve_wrapper_s8( + &ctx, &conv_params, &quant_params, + &input_dims, tflite::micro::GetTensorData(input), + &filter_dims, tflite::micro::GetTensorData(filter), + &bias_dims, bias == nullptr ? NULL : tflite::micro::GetTensorData(bias), + &output_dims, tflite::micro::GetTensorData(output))); + + return kTfLiteOk; +} + +TfLiteStatus eval_tflm_int8(OpData* data, + const TfLiteEvalTensor* input, + const TfLiteEvalTensor* filter, + const TfLiteEvalTensor* bias, + TfLiteEvalTensor* output) +{ + ConvParams op_params; + + op_params.input_offset = data->op_params.input_offset; + op_params.output_offset = data->op_params.output_offset; + op_params.stride_height = data->op_params.stride_height; + op_params.stride_width = data->op_params.stride_width; + op_params.dilation_height_factor = data->op_params.dilation_height; + op_params.dilation_width_factor = data->op_params.dilation_width; + op_params.padding_values.height = data->op_params.pad_height; + op_params.padding_values.width = data->op_params.pad_width; + op_params.quantized_activation_min = data->op_params.output_activation_min; + op_params.quantized_activation_max = data->op_params.output_activation_max; + + reference_integer_ops::ConvPerChannel( + op_params, + data->per_channel_output_multiplier, + data->per_channel_output_shift, + tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(filter), + tflite::micro::GetTensorData(filter), + tflite::micro::GetTensorShape(bias), + bias == nullptr ? nullptr : tflite::micro::GetTensorData(bias), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + + return kTfLiteOk; +} + +TfLiteStatus eval_float(TfLiteConvParams* params, + const OpData* data, + const TfLiteEvalTensor* input, + const TfLiteEvalTensor* filter, + const TfLiteEvalTensor* bias, + TfLiteEvalTensor* output) +{ + ConvParams op_params; + op_params.padding_type = RuntimePaddingType(params->padding); + op_params.padding_values.width = data->op_params.pad_width; + op_params.padding_values.height = data->op_params.pad_height; + op_params.stride_width = data->op_params.stride_width; + op_params.stride_height = data->op_params.stride_height; + op_params.dilation_width_factor = data->op_params.dilation_width; + op_params.dilation_height_factor = data->op_params.dilation_height; + op_params.float_activation_min = data->activation_min_f32; + op_params.float_activation_max = data->activation_max_f32; + + reference_ops::Conv(op_params, + tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(filter), + tflite::micro::GetTensorData(filter), + tflite::micro::GetTensorShape(bias), + bias == nullptr ? nullptr : tflite::micro::GetTensorData(bias), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output), + RuntimeShape(), + nullptr); + return kTfLiteOk; +} + +TfLiteStatus Invoke(TfLiteContext* context, TfLiteNode* node) +{ + TfLiteStatus status = kTfLiteError; + + TFLITE_DCHECK(node->user_data != nullptr); + TFLITE_DCHECK(node->builtin_data != nullptr); + + auto* params = reinterpret_cast(node->builtin_data); + OpData* data = static_cast(node->user_data); + + const auto input = tflite::micro::GetEvalInput(context, node, kInputTensor); + const auto filter = tflite::micro::GetEvalInput(context, node, kFilterTensor); + const auto bias = NumInputs(node) == 3 + ? tflite::micro::GetEvalInput(context, node, kBiasTensor) + : nullptr; + auto output = tflite::micro::GetEvalOutput(context, node, kOutputTensor); + + if (data->supported == kMvp) { + status = eval_mvp_int8(context, data, input, filter, output); + + } else if (data->supported == kCmsisNN) { + status = eval_cmsis_int8(context, data, input, filter, bias, output); + + } else if (data->supported == kTFLMrefI8) { + status = eval_tflm_int8(data, input, filter, bias, output); + + } else if (data->supported == kTFLMrefF32) { + #if EI_TFLITE_DISABLE_CONV_2D_IN_F32 + TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.", + TfLiteTypeGetName(input->type), input->type); + return kTfLiteError; + #endif + + status = eval_float(params, data, input, filter, bias, output); + } + + return status; +} + +} // namespace conv2d +} // namespace sl + +TfLiteRegistration Register_CONV_2D() { + return {/*init=*/sl::conv2d::Init, + /*free=*/nullptr, + /*prepare=*/sl::conv2d::Prepare, + /*invoke=*/sl::conv2d::Invoke, + /*profiling_string=*/nullptr, + /*builtin_code=*/0, + /*custom_name=*/nullptr, + /*version=*/0}; +} + +} // namespace tflite + +#elif EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN == 1 +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/conv.h" + +#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h" +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/quantization_util.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/conv.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/conv.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/padding.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h" + +#include + +#if ESP_NN +#include "edge-impulse-sdk/porting/espressif/ESP-NN/include/esp_nn.h" +#endif + + +long long conv_total_time = 0; + +namespace tflite { +namespace { + +struct NodeData { + OpDataConv op_data; +#if ESP_NN + int buffer_idx; +#endif +}; + +void* Init(TfLiteContext* context, const char* buffer, size_t length) { + TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); + return context->AllocatePersistentBuffer(context, sizeof(NodeData)); +} + +TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + TFLITE_DCHECK(node->user_data != nullptr); + TFLITE_DCHECK(node->builtin_data != nullptr); + + NodeData* data = static_cast(node->user_data); + const auto& params = + *(static_cast(node->builtin_data)); + + MicroContext* micro_context = GetMicroContext(context); + + TfLiteTensor* input = + micro_context->AllocateTempInputTensor(node, kConvInputTensor); + TF_LITE_ENSURE(context, input != nullptr); + TfLiteTensor* filter = + micro_context->AllocateTempInputTensor(node, kConvWeightsTensor); + TF_LITE_ENSURE(context, filter != nullptr); + TfLiteTensor* output = + micro_context->AllocateTempOutputTensor(node, kConvOutputTensor); + TF_LITE_ENSURE(context, output != nullptr); + + const int input_width = input->dims->data[2]; + const int input_height = input->dims->data[1]; + const int filter_width = filter->dims->data[2]; + const int filter_height = filter->dims->data[1]; + const int output_width = output->dims->data[2]; + const int output_height = output->dims->data[1]; + + // Dynamically allocate per-channel quantization parameters. + const int num_channels = filter->dims->data[kConvQuantizedDimension]; + data->op_data.per_channel_output_multiplier = + static_cast(context->AllocatePersistentBuffer( + context, num_channels * sizeof(int32_t))); + data->op_data.per_channel_output_shift = + static_cast(context->AllocatePersistentBuffer( + context, num_channels * sizeof(int32_t))); + + // All per-channel quantized tensors need valid zero point and scale arrays. + if (input->type == kTfLiteInt8) { + TF_LITE_ENSURE_EQ(context, filter->quantization.type, + kTfLiteAffineQuantization); + + const auto* affine_quantization = + static_cast(filter->quantization.params); + TFLITE_DCHECK(affine_quantization != nullptr); + TFLITE_DCHECK(affine_quantization->scale != nullptr); + TFLITE_DCHECK(affine_quantization->zero_point != nullptr); + + TF_LITE_ENSURE(context, + affine_quantization->scale->size == 1 || + affine_quantization->scale->size == + filter->dims->data[kConvQuantizedDimension]); + TF_LITE_ENSURE_EQ(context, affine_quantization->scale->size, + affine_quantization->zero_point->size); + } + + TF_LITE_ENSURE_STATUS(CalculateOpDataConv( + context, node, params, input_width, input_height, filter_width, + filter_height, output_width, output_height, input->type, &data->op_data)); + +#if ESP_NN + if (input->type == kTfLiteInt8) { + data_dims_t input_dims = { + .width = input_width, .height = input_height, + .channels = input->dims->data[3], 1 + }; + data_dims_t output_dims = { + .width = output_width, .height = output_height, + .channels = output->dims->data[3], 1 + }; + data_dims_t filter_dims = {.width = filter_width, .height = filter_height, 0, 0}; + conv_params_t conv_params = { + .in_offset = 0, .out_offset = 0, + .stride = {params.stride_width, params.stride_height}, + .padding = {data->op_data.padding.width, data->op_data.padding.height}, + .dilation = {0, 0}, .activation = {-128, 127} + }; + + int scratch_buf_size = esp_nn_get_conv_scratch_size( + &input_dims, &filter_dims, &output_dims, &conv_params); + if (scratch_buf_size > 0) { + TF_LITE_ENSURE_STATUS(context->RequestScratchBufferInArena( + context, scratch_buf_size, &data->buffer_idx)); + } else { + data->buffer_idx = -1; + } + } +#endif + + micro_context->DeallocateTempTfLiteTensor(output); + micro_context->DeallocateTempTfLiteTensor(input); + micro_context->DeallocateTempTfLiteTensor(filter); + + return kTfLiteOk; +} + +#if ESP_NN +// Fixed-point per-channel-quantization convolution Int8 function wrapper. +inline void EvalQuantizedPerChannel( + TfLiteContext* context, TfLiteNode* node, const TfLiteConvParams& params, + const NodeData& data, const TfLiteEvalTensor* input, + const TfLiteEvalTensor* filter, const TfLiteEvalTensor* bias, + TfLiteEvalTensor* output) { + const int dilation_width_factor = params.dilation_width_factor; + const int dilation_height_factor = params.dilation_height_factor; + + if (dilation_width_factor == 1 && dilation_height_factor == 1) { + // Get parameters. + RuntimeShape filter_shape = tflite::micro::GetTensorShape(filter); + RuntimeShape input_shape = tflite::micro::GetTensorShape(input); + RuntimeShape output_shape = tflite::micro::GetTensorShape(output); + RuntimeShape bias_shape = tflite::micro::GetTensorShape(bias); + + const int8_t *input_data = tflite::micro::GetTensorData(input); + int8_t *output_data = tflite::micro::GetTensorData(output); + + const int32_t input_offset = -data.op_data.input_zero_point; + const int32_t output_offset = data.op_data.output_zero_point; + const int stride_width = params.stride_width; + const int stride_height = params.stride_height; + const int pad_width = data.op_data.padding.width; + const int pad_height = data.op_data.padding.height; + + const int input_height = input_shape.Dims(1); + const int input_width = input_shape.Dims(2); + const int filter_height = filter_shape.Dims(1); + const int filter_width = filter_shape.Dims(2); + const int output_height = output_shape.Dims(1); + const int output_width = output_shape.Dims(2); + + // Set min and max value of the output. + const int32_t activation_min = data.op_data.output_activation_min; + const int32_t activation_max = data.op_data.output_activation_max; + + // Consistency check. + TFLITE_DCHECK_LE(activation_min, activation_max); + TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4); + TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4); + TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4); + const int batch_size = MatchingDim(input_shape, 0, output_shape, 0); + const int input_depth = MatchingDim(input_shape, 3, filter_shape, 3); + const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3); + + if (tflite::micro::GetTensorData(bias)) { + TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth); + } + + void *scratch_buf = NULL; + if (data.buffer_idx > -1) { + scratch_buf = context->GetScratchBuffer(context, data.buffer_idx); + } + esp_nn_set_conv_scratch_buf(scratch_buf); + + const int input_size = input_width * input_height * input_depth; + const int output_size = output_width * output_height * output_depth; + + data_dims_t input_dims = { + .width = input_width, .height = input_height, + .channels = input_depth, 1 + }; + data_dims_t output_dims = { + .width = output_width, .height = output_height, + .channels = output_depth, 1 + }; + data_dims_t filter_dims = {.width = filter_width, .height = filter_height, 0, 0}; + conv_params_t conv_params = { + .in_offset = input_offset, .out_offset = output_offset, + .stride = {stride_width, stride_height}, + .padding = {pad_width, pad_height}, + .dilation = {0, 0}, + .activation = {activation_min, activation_max} + }; + quant_data_t quant_data = { + .shift = data.op_data.per_channel_output_shift, + .mult = data.op_data.per_channel_output_multiplier + }; + + for (int i_batch = 0; i_batch < batch_size; i_batch++) { + esp_nn_conv_s8(&input_dims, input_data + i_batch * input_size, + &filter_dims, tflite::micro::GetTensorData(filter), + tflite::micro::GetTensorData(bias), + &output_dims, output_data + i_batch * output_size, + &conv_params, &quant_data); + } + } else { + reference_integer_ops::ConvPerChannel( + ConvParamsQuantized(params, data.op_data), + data.op_data.per_channel_output_multiplier, + data.op_data.per_channel_output_shift, + tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(filter), + tflite::micro::GetTensorData(filter), + tflite::micro::GetTensorShape(bias), + tflite::micro::GetTensorData(bias), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + } +} +#endif + +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + const TfLiteEvalTensor* input = + tflite::micro::GetEvalInput(context, node, kConvInputTensor); + const TfLiteEvalTensor* filter = + tflite::micro::GetEvalInput(context, node, kConvWeightsTensor); + const TfLiteEvalTensor* bias = + (NumInputs(node) == 3) + ? tflite::micro::GetEvalInput(context, node, kConvBiasTensor) + : nullptr; + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kConvOutputTensor); + + TFLITE_DCHECK(node->builtin_data != nullptr); + const auto& params = + *(reinterpret_cast(node->builtin_data)); + TFLITE_DCHECK(node->user_data != nullptr); + const auto& data = *(static_cast(node->user_data)); + + TF_LITE_ENSURE_EQ(context, input->type, output->type); + TF_LITE_ENSURE_MSG(context, input->type == filter->type, + "Hybrid models are not supported on TFLite Micro."); + + long long start_time = esp_timer_get_time(); + switch (input->type) { // Already know in/out types are same. + case kTfLiteFloat32: { +#if EI_TFLITE_DISABLE_CONV_2D_IN_F32 + TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.", + TfLiteTypeGetName(input->type), input->type); + return kTfLiteError; +#endif + tflite::reference_ops::Conv( + ConvParamsFloat(params, data.op_data), + tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(filter), + tflite::micro::GetTensorData(filter), + tflite::micro::GetTensorShape(bias), + tflite::micro::GetTensorData(bias), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output), + tflite::micro::GetTensorShape(nullptr), nullptr); + break; + } + case kTfLiteInt8: { +#if EI_TFLITE_DISABLE_CONV_2D_IN_I8 + TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.", + TfLiteTypeGetName(input->type), input->type); + return kTfLiteError; +#endif +#if ESP_NN + EvalQuantizedPerChannel(context, node, params, data, input, filter, + bias, output); +#else + reference_integer_ops::ConvPerChannel( + ConvParamsQuantized(params, data.op_data), + data.op_data.per_channel_output_multiplier, + data.op_data.per_channel_output_shift, + tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(filter), + tflite::micro::GetTensorData(filter), + tflite::micro::GetTensorShape(bias), + tflite::micro::GetTensorData(bias), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); +#endif + break; + } + case kTfLiteUInt8: { +#if EI_TFLITE_DISABLE_CONV_2D_IN_U8 + TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.", + TfLiteTypeGetName(input->type), input->type); + return kTfLiteError; +#endif + //EvalQuantized + reference_ops::Conv(ConvParamsQuantized(params, data.op_data), + tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(filter), + tflite::micro::GetTensorData(filter), + tflite::micro::GetTensorShape(bias), + tflite::micro::GetTensorData(bias), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output), + tflite::micro::GetTensorShape(nullptr), nullptr, + nullptr); + break; + } + default: + TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.", + TfLiteTypeGetName(input->type), input->type); + return kTfLiteError; + } + long long time_this_instance = esp_timer_get_time() - start_time; + conv_total_time += time_this_instance; + //printf("time this instance: %llu\n", time_this_instance / 1000); + return kTfLiteOk; +} + +} // namespace + +TfLiteRegistration Register_CONV_2D() { + return tflite::micro::RegisterOp(Init, Prepare, Eval); +} + +} // namespace tflite + +#else +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/conv.h" + +#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h" +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/portable_tensor_utils.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/conv.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/conv.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h" + +namespace tflite { +namespace { + +void* Init(TfLiteContext* context, const char* buffer, size_t length) { + TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); + return context->AllocatePersistentBuffer(context, sizeof(OpDataConv)); +} + +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + const TfLiteEvalTensor* input = + tflite::micro::GetEvalInput(context, node, kConvInputTensor); + const TfLiteEvalTensor* filter = + tflite::micro::GetEvalInput(context, node, kConvWeightsTensor); + const TfLiteEvalTensor* bias = + (NumInputs(node) == 3) + ? tflite::micro::GetEvalInput(context, node, kConvBiasTensor) + : nullptr; + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kConvOutputTensor); + + TFLITE_DCHECK(node->builtin_data != nullptr); + const auto& params = + *(reinterpret_cast(node->builtin_data)); + TFLITE_DCHECK(node->user_data != nullptr); + const auto& data = *(static_cast(node->user_data)); + + TF_LITE_ENSURE_EQ(context, input->type, output->type); + TF_LITE_ENSURE_MSG( + context, + input->type == filter->type || + (input->type == kTfLiteInt16 && filter->type == kTfLiteInt8) || + (input->type == kTfLiteInt8 && filter->type == kTfLiteInt4), + "Hybrid models are not supported on TFLite Micro."); + + switch (input->type) { // Already know in/out types are same. + case kTfLiteFloat32: { +#if EI_TFLITE_DISABLE_CONV_2D_IN_F32 + MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(input->type), + input->type); + return kTfLiteError; +#endif + tflite::reference_ops::Conv( + ConvParamsFloat(params, data), tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(filter), + tflite::micro::GetTensorData(filter), + tflite::micro::GetTensorShape(bias), + tflite::micro::GetOptionalTensorData(bias), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output), + tflite::micro::GetTensorShape(nullptr), nullptr); + break; + } + case kTfLiteInt16: { + switch (bias->type) { + case kTfLiteInt32: { + reference_integer_ops::ConvPerChannel( + ConvParamsQuantized(params, data), + data.per_channel_output_multiplier, data.per_channel_output_shift, + tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(filter), + tflite::micro::GetTensorData(filter), + tflite::micro::GetTensorShape(bias), + tflite::micro::GetOptionalTensorData(bias), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + break; + } + case kTfLiteInt64: { + reference_integer_ops::ConvPerChannel( + ConvParamsQuantized(params, data), + data.per_channel_output_multiplier, data.per_channel_output_shift, + tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(filter), + tflite::micro::GetTensorData(filter), + tflite::micro::GetTensorShape(bias), + tflite::micro::GetOptionalTensorData(bias), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + break; + } + default: + MicroPrintf("Bias type %s (%d) not supported.", + TfLiteTypeGetName(bias->type), bias->type); + return kTfLiteError; + } + break; + } + case kTfLiteInt8: { +#if EI_TFLITE_DISABLE_CONV_2D_IN_I8 + MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(input->type), + input->type); + return kTfLiteError; +#endif + switch (filter->type) { + case kTfLiteInt4: { + int8_t* unpacked_filter_data = static_cast( + context->GetScratchBuffer(context, data.filter_buffer_index)); + tflite::tensor_utils::UnpackDenseInt4IntoInt8( + tflite::micro::GetTensorData(filter), + tflite::micro::GetTensorShape(filter).FlatSize(), + unpacked_filter_data); + reference_integer_ops::ConvPerChannel( + ConvParamsQuantized(params, data), + data.per_channel_output_multiplier, data.per_channel_output_shift, + tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(filter), unpacked_filter_data, + tflite::micro::GetTensorShape(bias), + tflite::micro::GetOptionalTensorData(bias), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + break; + } + case kTfLiteInt8: { + reference_integer_ops::ConvPerChannel( + ConvParamsQuantized(params, data), + data.per_channel_output_multiplier, data.per_channel_output_shift, + tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(filter), + tflite::micro::GetTensorData(filter), + tflite::micro::GetTensorShape(bias), + tflite::micro::GetOptionalTensorData(bias), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + break; + } + default: + MicroPrintf("Weight type %s (%d) not supported.", + TfLiteTypeGetName(filter->type), filter->type); + return kTfLiteError; + } + break; + } + default: + MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(input->type), + input->type); + return kTfLiteError; + } + return kTfLiteOk; +} + +} // namespace + +TfLiteRegistration Register_CONV_2D() { + return tflite::micro::RegisterOp(Init, ConvPrepare, Eval); +} + +} // namespace tflite + +#endif diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/conv.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/conv.h new file mode 100644 index 0000000..2a4b63d --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/conv.h @@ -0,0 +1,116 @@ +/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_LITE_MICRO_KERNELS_CONV_H_ +#define TENSORFLOW_LITE_MICRO_KERNELS_CONV_H_ + +#include + +#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h" +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h" + +namespace tflite { + +struct OpDataConv { + TfLitePaddingValues padding; + + // Cached tensor zero point values for quantized operations. + int32_t input_zero_point; + int32_t filter_zero_point; + int32_t output_zero_point; + + // The scaling factor from input to output (aka the 'real multiplier') can + // be represented as a fixed point multiplier plus a left shift. + int32_t output_multiplier; + int output_shift; + + // Per channel output multiplier and shift. + int32_t* per_channel_output_multiplier; + int32_t* per_channel_output_shift; + + // The range of the fused activation layer. For example for kNone and + // uint8_t these would be 0 and 255. + int32_t output_activation_min; + int32_t output_activation_max; + + // A buffer used to store unpacked filter values. This is used if the source + // tensor is of n-bit precision that cannot be easily processed by kernels. + int filter_buffer_index; +}; + +extern const int kConvInputTensor; +extern const int kConvWeightsTensor; +extern const int kConvBiasTensor; +extern const int kConvOutputTensor; +extern const int kConvQuantizedDimension; + +// Returns a ConvParams struct with all the parameters needed for a +// float computation. +ConvParams ConvParamsFloat(const TfLiteConvParams& params, + const OpDataConv& data); + +// Returns a ConvParams struct with all the parameters needed for a +// quantized computation. +ConvParams ConvParamsQuantized(const TfLiteConvParams& params, + const OpDataConv& data); + +TfLiteStatus CalculateOpDataConv(TfLiteContext* context, TfLiteNode* node, + const TfLiteConvParams& params, int width, + int height, int filter_width, + int filter_height, int out_width, + int out_height, const TfLiteType data_type, + OpDataConv* data); + +TfLiteStatus ConvPrepare(TfLiteContext* context, TfLiteNode* node); + +// This is the most generic TfLiteRegistration. The actual supported types may +// still be target dependent. The only requirement is that every implementation +// (reference or optimized) must define this function. +TfLiteRegistration Register_CONV_2D(); + +#if defined(XTENSA) +// Returns a TfLiteRegistration struct for kernel variant that only supports +// int8 activations and int8 weights and always calls the reference +// implementation. +TfLiteRegistration Register_CONV_2D_INT8REF(); +#else +inline TfLiteRegistration Register_CONV_2D_INT8REF() { + return Register_CONV_2D(); +} +#endif + +#if defined(CMSIS_NN) +// Returns a TfLiteRegistration struct for kernel variant that only supports +// int8 activations and int8 weights and uses the latency optimized +// implementations. +TfLiteRegistration Register_CONV_2D_INT8(); + +// Returns a TfLiteRegistration struct for kernel variant that only supports +// int16 activations and int8 weights and uses the latency optimized +// implementations. +TfLiteRegistration Register_CONV_2D_INT16(); + +#else +inline TfLiteRegistration Register_CONV_2D_INT8() { return Register_CONV_2D(); } + +inline TfLiteRegistration Register_CONV_2D_INT16() { + return Register_CONV_2D(); +} +#endif + +} // namespace tflite + +#endif // TENSORFLOW_LITE_MICRO_KERNELS_CONV_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/conv_common.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/conv_common.cpp new file mode 100644 index 0000000..fe23085 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/conv_common.cpp @@ -0,0 +1,204 @@ +/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h" +#include "edge-impulse-sdk/tensorflow/lite/c/c_api_types.h" +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/padding.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/conv.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" + +namespace tflite { + +const int kConvInputTensor = 0; +const int kConvWeightsTensor = 1; +const int kConvBiasTensor = 2; +const int kConvOutputTensor = 0; + +// Conv is quantized along dimension 0: +// https://www.tensorflow.org/lite/performance/quantization_spec +const int kConvQuantizedDimension = 0; + +// Returns a ConvParams struct with all the parameters needed for a +// float computation. +ConvParams ConvParamsFloat(const TfLiteConvParams& params, + const OpDataConv& data) { + ConvParams op_params; + CalculateActivationRange(params.activation, &op_params.float_activation_min, + &op_params.float_activation_max); + op_params.padding_type = tflite::micro::RuntimePaddingType(params.padding); + op_params.padding_values.width = data.padding.width; + op_params.padding_values.height = data.padding.height; + op_params.stride_width = params.stride_width; + op_params.stride_height = params.stride_height; + op_params.dilation_width_factor = params.dilation_width_factor; + op_params.dilation_height_factor = params.dilation_height_factor; + return op_params; +} + +// Returns a ConvParams struct with all the parameters needed for a +// quantized computation. +ConvParams ConvParamsQuantized(const TfLiteConvParams& params, + const OpDataConv& data) { + ConvParams op_params; + op_params.input_offset = -data.input_zero_point; + op_params.weights_offset = -data.filter_zero_point; + op_params.output_offset = data.output_zero_point; + op_params.output_multiplier = data.output_multiplier; + op_params.output_shift = -data.output_shift; + op_params.padding_type = tflite::micro::RuntimePaddingType(params.padding); + op_params.padding_values.height = data.padding.height; + op_params.padding_values.width = data.padding.width; + op_params.stride_height = params.stride_height; + op_params.stride_width = params.stride_width; + op_params.dilation_height_factor = params.dilation_height_factor; + op_params.dilation_width_factor = params.dilation_width_factor; + op_params.quantized_activation_min = data.output_activation_min; + op_params.quantized_activation_max = data.output_activation_max; + return op_params; +} + +TfLiteStatus CalculateOpDataConv(TfLiteContext* context, TfLiteNode* node, + const TfLiteConvParams& params, int width, + int height, int filter_width, + int filter_height, int out_width, + int out_height, const TfLiteType data_type, + OpDataConv* data) { + bool has_bias = node->inputs->size == 3; + // Check number of inputs/outputs + TF_LITE_ENSURE(context, has_bias || node->inputs->size == 2); + TF_LITE_ENSURE_EQ(context, node->outputs->size, 1); + + // Matching GetWindowedOutputSize in TensorFlow. + auto padding = params.padding; + data->padding = ComputePaddingHeightWidth( + params.stride_height, params.stride_width, params.dilation_height_factor, + params.dilation_width_factor, height, width, filter_height, filter_width, + padding, &out_height, &out_width); + + MicroContext* micro_context = GetMicroContext(context); + + TfLiteTensor* input = + micro_context->AllocateTempInputTensor(node, kConvInputTensor); + TF_LITE_ENSURE(context, input != nullptr); + TfLiteTensor* filter = + micro_context->AllocateTempInputTensor(node, kConvWeightsTensor); + TF_LITE_ENSURE(context, filter != nullptr); + TfLiteTensor* bias = + micro_context->AllocateTempInputTensor(node, kConvBiasTensor); + TfLiteTensor* output = + micro_context->AllocateTempOutputTensor(node, kConvOutputTensor); + TF_LITE_ENSURE(context, output != nullptr); + + // Note that quantized inference requires that all tensors have their + // parameters set. This is usually done during quantized training. + if (data_type != kTfLiteFloat32) { + int output_channels = filter->dims->data[kConvQuantizedDimension]; + + TF_LITE_ENSURE_STATUS(tflite::PopulateConvolutionQuantizationParams( + context, input, filter, bias, output, params.activation, + &data->output_multiplier, &data->output_shift, + &data->output_activation_min, &data->output_activation_max, + data->per_channel_output_multiplier, data->per_channel_output_shift, + output_channels)); + } + + data->input_zero_point = input->params.zero_point; + data->filter_zero_point = filter->params.zero_point; + data->output_zero_point = output->params.zero_point; + + micro_context->DeallocateTempTfLiteTensor(input); + micro_context->DeallocateTempTfLiteTensor(filter); + micro_context->DeallocateTempTfLiteTensor(output); + micro_context->DeallocateTempTfLiteTensor(bias); + + return kTfLiteOk; +} + +TfLiteStatus ConvPrepare(TfLiteContext* context, TfLiteNode* node) { + TFLITE_DCHECK(node->user_data != nullptr); + TFLITE_DCHECK(node->builtin_data != nullptr); + + OpDataConv* data = static_cast(node->user_data); + const auto& params = + *(static_cast(node->builtin_data)); + MicroContext* micro_context = GetMicroContext(context); + + TfLiteTensor* output = + micro_context->AllocateTempOutputTensor(node, kConvOutputTensor); + TF_LITE_ENSURE(context, output != nullptr); + TfLiteTensor* input = + micro_context->AllocateTempInputTensor(node, kConvInputTensor); + TF_LITE_ENSURE(context, input != nullptr); + TfLiteTensor* filter = + micro_context->AllocateTempInputTensor(node, kConvWeightsTensor); + TF_LITE_ENSURE(context, filter != nullptr); + + const int input_width = input->dims->data[2]; + const int input_height = input->dims->data[1]; + const int filter_width = filter->dims->data[2]; + const int filter_height = filter->dims->data[1]; + const int output_width = output->dims->data[2]; + const int output_height = output->dims->data[1]; + + // Dynamically allocate per-channel quantization parameters. + if (input->type != kTfLiteFloat32) { + const int num_channels = filter->dims->data[kConvQuantizedDimension]; + data->per_channel_output_multiplier = + static_cast(context->AllocatePersistentBuffer( + context, num_channels * sizeof(int32_t))); + data->per_channel_output_shift = + static_cast(context->AllocatePersistentBuffer( + context, num_channels * sizeof(int32_t))); + } + + // All per-channel quantized tensors need valid zero point and scale arrays. + if (input->type == kTfLiteInt8 || input->type == kTfLiteInt16) { + TF_LITE_ENSURE_EQ(context, filter->quantization.type, + kTfLiteAffineQuantization); + + const auto* affine_quantization = + static_cast(filter->quantization.params); + TFLITE_DCHECK(affine_quantization != nullptr); + TFLITE_DCHECK(affine_quantization->scale != nullptr); + TFLITE_DCHECK(affine_quantization->zero_point != nullptr); + + TF_LITE_ENSURE(context, + affine_quantization->scale->size == 1 || + affine_quantization->scale->size == + filter->dims->data[kConvQuantizedDimension]); + } + + TF_LITE_ENSURE_STATUS(CalculateOpDataConv( + context, node, params, input_width, input_height, filter_width, + filter_height, output_width, output_height, input->type, data)); + + if (filter->type == kTfLiteInt4) { + int filter_size = + RuntimeShape(filter->dims->size, + reinterpret_cast(filter->dims->data)) + .FlatSize(); + context->RequestScratchBufferInArena(context, filter_size, + &data->filter_buffer_index); + } + + micro_context->DeallocateTempTfLiteTensor(filter); + micro_context->DeallocateTempTfLiteTensor(input); + micro_context->DeallocateTempTfLiteTensor(output); + + return kTfLiteOk; +} +} // namespace tflite diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/conv_test.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/conv_test.h new file mode 100644 index 0000000..cdaaefa --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/conv_test.h @@ -0,0 +1,114 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_LITE_MICRO_KERNELS_CONV_TEST_H_ +#define TENSORFLOW_LITE_MICRO_KERNELS_CONV_TEST_H_ + +#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h" +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_runner.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/micro_ops.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/test_helpers.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/testing/micro_test.h" + +namespace tflite { +namespace testing { + +TfLiteStatus InvokeConv(TfLiteTensor* tensors, int tensors_size, + int output_length, TfLiteConvParams* conv_params, + TfLiteRegistration registration, float* output_data); + +TfLiteStatus InvokeConv(TfLiteTensor* tensors, int tensors_size, + int output_length, TfLiteConvParams* conv_params, + TfLiteRegistration registration, int8_t* output_data); + +TfLiteStatus InvokeConv(TfLiteTensor* tensors, int tensors_size, + int output_length, TfLiteConvParams* conv_params, + TfLiteRegistration registration, uint8_t* output_data); + +TfLiteStatus ValidateConvGoldens(TfLiteTensor* tensors, int tensors_size, + const float* expected_output_data, + int output_length, + TfLiteConvParams* conv_params, + TfLiteRegistration registration, + float* output_data, float tolerance = 1e-5); + +TfLiteStatus ValidateConvGoldens(TfLiteTensor* tensors, int tensors_size, + const int8_t* expected_output_data, + int output_length, + TfLiteConvParams* conv_params, + TfLiteRegistration registration, + int8_t* output_data, float tolerance = 1e-5); + +TfLiteStatus ValidateConvGoldens(TfLiteTensor* tensors, int tensors_size, + const uint8_t* expected_output_data, + int output_length, + TfLiteConvParams* conv_params, + TfLiteRegistration registration, + uint8_t* output_data, float tolerance = 1e-5); + +TfLiteStatus TestConvFloat(int* input_dims_data, const float* input_data, + int* filter_dims_data, const float* filter_data, + int* bias_dims_data, const float* bias_data, + int* output_dims_data, + const float* expected_output_data, + TfLiteConvParams* conv_params, + TfLiteRegistration registration, float* output_data); + +TfLiteStatus TestConvQuantizedPerLayer( + int* input_dims_data, const float* input_data, uint8_t* input_quantized, + float input_scale, int* filter_dims_data, const float* filter_data, + uint8_t* filter_quantized, float filter_scale, int* bias_dims_data, + const float* bias_data, int32_t* bias_quantized, int* output_dims_data, + const float* expected_output_data, uint8_t* expected_output_quantized, + float output_scale, TfLiteConvParams* conv_params, + TfLiteRegistration registration, uint8_t* output_data); + +TfLiteStatus TestConvQuantizedPerChannel( + int* input_dims_data, const float* input_data, int8_t* input_quantized, + float input_scale, int input_zero_point, int* filter_dims_data, + const float* filter_data, int8_t* filter_data_quantized, + int* bias_dims_data, const float* bias_data, int32_t* bias_data_quantized, + float* bias_scales, int* bias_zero_points, int* output_dims_data, + const float* expected_output_data, int8_t* expected_output_data_quantized, + float output_scale, int output_zero_point, TfLiteConvParams* conv_params, + TfLiteRegistration registration, int8_t* output_data, + TfLiteType tensor_weight_type = kTfLiteNoType); + +TfLiteStatus TestConvQuantizedPerChannel( + int* input_dims_data, const float* input_data, int16_t* input_quantized, + float input_scale, int input_zero_point, int* filter_dims_data, + const float* filter_data, int8_t* filter_data_quantized, + int* bias_dims_data, const float* bias_data, + std::int64_t* bias_data_quantized, float* bias_scales, + int* bias_zero_points, int* output_dims_data, + const float* expected_output_data, int16_t* expected_output_data_quantized, + float output_scale, int output_zero_point, TfLiteConvParams* conv_params, + TfLiteRegistration registration, int16_t* output_data); + +TfLiteStatus TestConvQuantizedPerChannel( + int* input_dims_data, const float* input_data, int16_t* input_quantized, + float input_scale, int input_zero_point, int* filter_dims_data, + const float* filter_data, int8_t* filter_data_quantized, + int* bias_dims_data, const float* bias_data, int32_t* bias_data_quantized, + float* bias_scales, int* bias_zero_points, int* output_dims_data, + const float* expected_output_data, int16_t* expected_output_data_quantized, + float output_scale, int output_zero_point, TfLiteConvParams* conv_params, + TfLiteRegistration registration, int16_t* output_data); + +} // namespace testing +} // namespace tflite + +#endif // TENSORFLOW_LITE_MICRO_KERNELS_CONV_TEST_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/cumsum.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/cumsum.cpp new file mode 100644 index 0000000..bdc888b --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/cumsum.cpp @@ -0,0 +1,175 @@ +/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/cumsum.h" + +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/quantization_util.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h" + +namespace tflite { +namespace { + +constexpr int kInputTensor = 0; +constexpr int kAxisTensor = 1; +constexpr int kOutputTensor = 0; + +constexpr int kCumSumIntegerShift = 20; + +// only used with INT8 tensors +struct OpData { + int32_t output_activation_min; + int32_t output_activation_max; + int32_t input_offset; + int32_t output_offset; + int32_t input_multiplier; + int32_t output_multiplier; + int input_shift; + int output_shift; + int left_shift; +}; + +TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node) { + TF_LITE_ENSURE_EQ(context, NumInputs(node), 2); + TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); + + MicroContext* micro_context = GetMicroContext(context); + + TfLiteTensor* input = + micro_context->AllocateTempInputTensor(node, kInputTensor); + TfLiteTensor* axis = + micro_context->AllocateTempInputTensor(node, kAxisTensor); + + TF_LITE_ENSURE(context, + input->type == kTfLiteFloat32 || input->type == kTfLiteInt8); + TF_LITE_ENSURE_EQ(context, axis->type, kTfLiteInt32); + + TF_LITE_ENSURE_EQ(context, NumElements(axis), 1); + + TF_LITE_ENSURE(context, NumDimensions(input) >= 1); + + TfLiteTensor* output = + micro_context->AllocateTempOutputTensor(node, kOutputTensor); + + TF_LITE_ENSURE_EQ(context, input->type, output->type); + TF_LITE_ENSURE(context, HaveSameShapes(input, output)); + + if (output->type == kTfLiteInt8) { + node->user_data = + context->AllocatePersistentBuffer(context, sizeof(OpData)); + OpData* data = static_cast(node->user_data); + + // 8bit -> 8bit general quantized path, with general rescalings + data->input_offset = -input->params.zero_point; + data->output_offset = output->params.zero_point; + data->left_shift = kCumSumIntegerShift; + const double twice_max_input_scale = + 2 * static_cast(input->params.scale); + const double real_input_multiplier = + static_cast(input->params.scale) / twice_max_input_scale; + const double real_output_multiplier = + twice_max_input_scale / + ((1 << data->left_shift) * static_cast(output->params.scale)); + + QuantizeMultiplierSmallerThanOneExp( + real_input_multiplier, &data->input_multiplier, &data->input_shift); + + QuantizeMultiplierSmallerThanOneExp( + real_output_multiplier, &data->output_multiplier, &data->output_shift); + + TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized( + context, kTfLiteActNone, output, &data->output_activation_min, + &data->output_activation_max)); + } + + micro_context->DeallocateTempTfLiteTensor(input); + micro_context->DeallocateTempTfLiteTensor(axis); + micro_context->DeallocateTempTfLiteTensor(output); + + return kTfLiteOk; +} + +TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + return CalculateOpData(context, node); +} + +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + const TfLiteEvalTensor* input = + tflite::micro::GetEvalInput(context, node, kInputTensor); + const TfLiteEvalTensor* axis_tensor = + tflite::micro::GetEvalInput(context, node, kAxisTensor); + + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kOutputTensor); + + auto* cs_params = static_cast(node->builtin_data); + auto input_shape = tflite::micro::GetTensorShape(input); + + int32_t axis = *tflite::micro::GetTensorData(axis_tensor); + if (axis < 0) axis += input_shape.DimensionsCount(); + + if (axis < 0 || axis >= input_shape.DimensionsCount()) { + MicroPrintf("CUMSUM Invalid axis: %d", axis); + return kTfLiteError; + } + + switch (input->type) { + case kTfLiteFloat32: { + reference_ops::CumSum(tflite::micro::GetTensorData(input), + input_shape, axis, cs_params->exclusive, + cs_params->reverse, + tflite::micro::GetTensorData(output)); + return kTfLiteOk; + } break; + + case kTfLiteInt8: { + auto* data = static_cast(node->user_data); + ArithmeticParams params; + params.left_shift = data->left_shift; + params.input1_offset = data->input_offset; + params.input1_multiplier = data->input_multiplier; + params.input1_shift = data->input_shift; + params.output_offset = data->output_offset; + params.output_multiplier = data->output_multiplier; + params.output_shift = data->output_shift; + SetActivationParams(data->output_activation_min, + data->output_activation_max, ¶ms); + reference_ops::CumSum(params, tflite::micro::GetTensorData(input), + input_shape, axis, cs_params->exclusive, + cs_params->reverse, + tflite::micro::GetTensorData(output)); + return kTfLiteOk; + } break; + + default: { + MicroPrintf("CUMSUM only supports FLOAT32 and INT8, got %s.", + TfLiteTypeGetName(output->type)); + return kTfLiteError; + } + } + + return kTfLiteError; +} + +} // namespace + +TfLiteRegistration Register_CUMSUM() { + return tflite::micro::RegisterOp(nullptr, Prepare, Eval); +} + +} // namespace tflite diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/depth_to_space.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/depth_to_space.cpp new file mode 100644 index 0000000..72e1545 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/depth_to_space.cpp @@ -0,0 +1,142 @@ +/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/depth_to_space.h" + +#include + +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h" + +namespace tflite { +namespace { + +constexpr int kInputTensor = 0; +constexpr int kOutputTensor = 0; + +// input/output tensor shape rank associations +constexpr int kBatchRank = 0; +constexpr int kHeightRank = 1; +constexpr int kWidthRank = 2; +constexpr int kDepthRank = 3; + +TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node) { + auto* params = + reinterpret_cast(node->builtin_data); + + TF_LITE_ENSURE_EQ(context, NumInputs(node), 1); + TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); + + MicroContext* micro_context = GetMicroContext(context); + + TfLiteTensor* input = + micro_context->AllocateTempInputTensor(node, kInputTensor); + TF_LITE_ENSURE(context, input != nullptr); + TfLiteTensor* output = + micro_context->AllocateTempOutputTensor(node, kOutputTensor); + TF_LITE_ENSURE(context, output != nullptr); + + TF_LITE_ENSURE_EQ(context, NumDimensions(input), 4); + + auto data_type = output->type; + TF_LITE_ENSURE(context, + data_type == kTfLiteFloat32 || data_type == kTfLiteInt8); + TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type); + + const int block_size = params->block_size; + TF_LITE_ENSURE(context, block_size > 0); + const int input_height = input->dims->data[kHeightRank]; + const int input_width = input->dims->data[kWidthRank]; + const int input_channels = input->dims->data[kDepthRank]; + int output_height = input_height * block_size; + int output_width = input_width * block_size; + int output_channels = input_channels / block_size / block_size; + + TF_LITE_ENSURE_EQ(context, input_height, output_height / block_size); + TF_LITE_ENSURE_EQ(context, input_width, output_width / block_size); + TF_LITE_ENSURE_EQ(context, input_channels, + output_channels * block_size * block_size); + + // We must update the output tensor dimensions. + // The dims storage is expected to be the same area in memory + // for both TfLiteTensor and TfLiteEvalTensor. This is important + // because TfLiteTensor in the MicroInterpreter is a temporary + // allocation. For the KernelRunner interpreter, TfLiteEvalTensor + // is a temporary allocation. We must therefore relocate the dims + // from the FlatBuffer to the persistant storage arena. + TfLiteEvalTensor* output_eval = + tflite::micro::GetEvalOutput(context, node, kOutputTensor); + TF_LITE_ENSURE_OK(context, tflite::micro::CreateWritableTensorDimsWithCopy( + context, output, output_eval)); + output->dims->data[kBatchRank] = input->dims->data[kBatchRank]; + output->dims->data[kHeightRank] = output_height; + output->dims->data[kWidthRank] = output_width; + output->dims->data[kDepthRank] = output_channels; + + micro_context->DeallocateTempTfLiteTensor(input); + micro_context->DeallocateTempTfLiteTensor(output); + + return kTfLiteOk; +} + +TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + return CalculateOpData(context, node); +} + +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + auto* params = + reinterpret_cast(node->builtin_data); + + const TfLiteEvalTensor* input = + tflite::micro::GetEvalInput(context, node, kInputTensor); + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kOutputTensor); + + tflite::DepthToSpaceParams op_params; + op_params.block_size = static_cast(params->block_size); + + switch (input->type) { // Already know in/out types are same. + case kTfLiteFloat32: + reference_ops::DepthToSpace(op_params, + tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + break; + case kTfLiteInt8: + reference_ops::DepthToSpace(op_params, + tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + break; + default: + MicroPrintf("DEPTH_TO_SPACE only supports FLOAT32 and INT8, got %s.", + TfLiteTypeGetName(output->type)); + return kTfLiteError; + } + + return kTfLiteOk; +} + +} // namespace + +TfLiteRegistration Register_DEPTH_TO_SPACE() { + return tflite::micro::RegisterOp(nullptr, Prepare, Eval); +} + +} // namespace tflite diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/depthwise_conv.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/depthwise_conv.cpp new file mode 100644 index 0000000..000bb0b --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/depthwise_conv.cpp @@ -0,0 +1,2106 @@ +// Patched by Edge Impulse to include reference and hardware-accelerated kernels +#include "../../../../classifier/ei_classifier_config.h" +#if 0 == 1 +/* noop */ +#elif EI_CLASSIFIER_TFLITE_ENABLE_CMSIS_NN == 1 +/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/depthwise_conv.h" + +#include "edge-impulse-sdk/CMSIS/NN/Include/arm_nnfunctions.h" +#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h" +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/quantization_util.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/depthwiseconv_float.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/depthwise_conv.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/padding.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/conv.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h" + +namespace tflite { +namespace { + +struct OpData { + OpDataConv reference_op_data; + + // Index to buffer for optimizations if applicable. + int buffer_idx; +}; + +// Always inline for optimal code size. +void PopulateDwConvParams( + cmsis_nn_dw_conv_params* const dw_conv_params, + cmsis_nn_per_channel_quant_params* const quant_params, + cmsis_nn_dims* const input_dims, cmsis_nn_dims* const filter_dims, + cmsis_nn_dims* const bias_dims, cmsis_nn_dims* const output_dims, + const TfLiteDepthwiseConvParams& params, const OpData& data, + const TfLiteEvalTensor* input, const TfLiteEvalTensor* filter, + const TfLiteEvalTensor* bias, TfLiteEvalTensor* output) + __attribute__((always_inline)); + +void* Init(TfLiteContext* context, const char* buffer, size_t length) { + TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); + return context->AllocatePersistentBuffer(context, sizeof(OpData)); +} + +TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + TFLITE_DCHECK(node->user_data != nullptr); + TFLITE_DCHECK(node->builtin_data != nullptr); + + OpData* data = static_cast(node->user_data); + const auto& params = + *(reinterpret_cast(node->builtin_data)); + + MicroContext* micro_context = GetMicroContext(context); + + TfLiteTensor* input = + micro_context->AllocateTempInputTensor(node, kDepthwiseConvInputTensor); + TF_LITE_ENSURE(context, input != nullptr); + TfLiteTensor* filter = + micro_context->AllocateTempInputTensor(node, kDepthwiseConvWeightsTensor); + TF_LITE_ENSURE(context, filter != nullptr); + TfLiteTensor* output = + micro_context->AllocateTempOutputTensor(node, kDepthwiseConvOutputTensor); + TF_LITE_ENSURE(context, output != nullptr); + + const TfLiteType data_type = input->type; + int input_width = SizeOfDimension(input, 2); + int input_height = SizeOfDimension(input, 1); + int filter_width = SizeOfDimension(filter, 2); + int filter_height = SizeOfDimension(filter, 1); + int output_width = SizeOfDimension(output, 2); + int output_height = SizeOfDimension(output, 1); + + if (input->type == kTfLiteInt8 || input->type == kTfLiteInt16) { + TF_LITE_ENSURE_EQ(context, filter->quantization.type, + kTfLiteAffineQuantization); + + if (input->type == kTfLiteInt16) { + TF_LITE_ENSURE_EQ(context, input->params.zero_point, 0); + TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0); + } + + // All per-channel quantized tensors need valid zero point and scale arrays. + const auto* affine_quantization = + reinterpret_cast( + filter->quantization.params); + TF_LITE_ENSURE(context, affine_quantization); + TF_LITE_ENSURE(context, affine_quantization->scale); + TF_LITE_ENSURE(context, affine_quantization->zero_point); + TF_LITE_ENSURE( + context, affine_quantization->scale->size == 1 || + affine_quantization->scale->size == + filter->dims->data[kDepthwiseConvQuantizedDimension]); + TF_LITE_ENSURE_EQ(context, affine_quantization->scale->size, + affine_quantization->zero_point->size); + + // Allocate memory for per-channel quantization parameters + const int num_channels = + filter->dims->data[kDepthwiseConvQuantizedDimension]; + + data->reference_op_data.per_channel_output_multiplier = + reinterpret_cast(context->AllocatePersistentBuffer( + context, num_channels * sizeof(int32_t))); + data->reference_op_data.per_channel_output_shift = + reinterpret_cast(context->AllocatePersistentBuffer( + context, num_channels * sizeof(int32_t))); + } + + if (filter->type == kTfLiteInt4) { + int filter_size = + RuntimeShape(filter->dims->size, + reinterpret_cast(filter->dims->data)) + .FlatSize(); + context->RequestScratchBufferInArena( + context, filter_size, &data->reference_op_data.filter_buffer_index); + } + + TF_LITE_ENSURE_STATUS(CalculateOpDataDepthwiseConv( + context, node, params, input_width, input_height, filter_width, + filter_height, output_width, output_height, data_type, + &data->reference_op_data)); + + if (input->type == kTfLiteInt8) { + RuntimeShape input_shape = GetTensorShape(input); + RuntimeShape output_shape = GetTensorShape(output); + RuntimeShape filter_shape = GetTensorShape(filter); + TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4); + TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4); + TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4); + + const int batch_size = MatchingDim(input_shape, 0, output_shape, 0); + const int output_depth = MatchingDim(output_shape, 3, filter_shape, 3); + TFLITE_DCHECK_EQ(batch_size, 1); /* Only batch = 1 is supported */ + + cmsis_nn_dims input_dims; + input_dims.n = batch_size; + input_dims.h = input_height; + input_dims.w = input_width; + input_dims.c = input_shape.Dims(3); + + cmsis_nn_dims filter_dims; + filter_dims.n = 1; + filter_dims.h = filter_height; + filter_dims.w = filter_width; + filter_dims.c = output_depth; + + cmsis_nn_dims output_dims; + output_dims.n = batch_size; + output_dims.h = output_height; + output_dims.w = output_width; + output_dims.c = output_depth; + + cmsis_nn_dw_conv_params dw_conv_params; + dw_conv_params.padding.h = data->reference_op_data.padding.height; + dw_conv_params.padding.w = data->reference_op_data.padding.width; + dw_conv_params.dilation.h = params.dilation_height_factor; + dw_conv_params.dilation.w = params.dilation_width_factor; + + const int32_t buf_size = arm_depthwise_conv_wrapper_s8_get_buffer_size( + &dw_conv_params, &input_dims, &filter_dims, &output_dims); + + if (buf_size > 0) { + TF_LITE_ENSURE_STATUS(context->RequestScratchBufferInArena( + context, buf_size, &data->buffer_idx)); + } else { + data->buffer_idx = -1; + } + } + + micro_context->DeallocateTempTfLiteTensor(output); + micro_context->DeallocateTempTfLiteTensor(input); + micro_context->DeallocateTempTfLiteTensor(filter); + + return kTfLiteOk; +} + +inline void PopulateDwConvParams( + cmsis_nn_dw_conv_params* const dw_conv_params, + cmsis_nn_per_channel_quant_params* const quant_params, + cmsis_nn_dims* const input_dims, cmsis_nn_dims* const filter_dims, + cmsis_nn_dims* const bias_dims, cmsis_nn_dims* const output_dims, + const TfLiteDepthwiseConvParams& params, const OpData& data, + const TfLiteEvalTensor* input, const TfLiteEvalTensor* filter, + const TfLiteEvalTensor* bias, TfLiteEvalTensor* output) { + dw_conv_params->dilation.h = params.dilation_height_factor; + dw_conv_params->dilation.w = params.dilation_width_factor; + + dw_conv_params->input_offset = -data.reference_op_data.input_zero_point; + dw_conv_params->output_offset = data.reference_op_data.output_zero_point; + dw_conv_params->stride.h = params.stride_height; + dw_conv_params->stride.w = params.stride_width; + dw_conv_params->padding.h = data.reference_op_data.padding.height; + dw_conv_params->padding.w = data.reference_op_data.padding.width; + + dw_conv_params->activation.min = data.reference_op_data.output_activation_min; + dw_conv_params->activation.max = data.reference_op_data.output_activation_max; + + dw_conv_params->ch_mult = params.depth_multiplier; + + quant_params->multiplier = + data.reference_op_data.per_channel_output_multiplier; + quant_params->shift = data.reference_op_data.per_channel_output_shift; + + RuntimeShape filter_shape = tflite::micro::GetTensorShape(filter); + RuntimeShape input_shape = tflite::micro::GetTensorShape(input); + RuntimeShape output_shape = tflite::micro::GetTensorShape(output); + RuntimeShape bias_shape = tflite::micro::GetTensorShape(bias); + + TFLITE_DCHECK_LE(dw_conv_params->activation.min, + dw_conv_params->activation.max); + + const int batch_size = MatchingDim(input_shape, 0, output_shape, 0); + const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3); + + if (tflite::micro::GetOptionalTensorData(bias)) { + TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth); + } + + input_dims->n = batch_size; + input_dims->h = input_shape.Dims(1); + input_dims->w = input_shape.Dims(2); + input_dims->c = input_shape.Dims(3); + + filter_dims->n = filter_shape.Dims(0); + filter_dims->h = filter_shape.Dims(1); + filter_dims->w = filter_shape.Dims(2); + filter_dims->c = output_depth; + + bias_dims->n = 1; + bias_dims->h = 1; + bias_dims->w = 1; + bias_dims->c = output_depth; + + output_dims->n = batch_size; + output_dims->h = output_shape.Dims(1); + output_dims->w = output_shape.Dims(2); + output_dims->c = output_depth; +} + +void EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node, + const TfLiteDepthwiseConvParams& params, + const OpData& data, const TfLiteEvalTensor* input, + const TfLiteEvalTensor* filter, + const TfLiteEvalTensor* bias, + TfLiteEvalTensor* output) { + cmsis_nn_dw_conv_params dw_conv_params; + cmsis_nn_per_channel_quant_params quant_params; + cmsis_nn_dims input_dims; + cmsis_nn_dims filter_dims; + cmsis_nn_dims bias_dims; + cmsis_nn_dims output_dims; + + PopulateDwConvParams(&dw_conv_params, &quant_params, &input_dims, + &filter_dims, &bias_dims, &output_dims, params, data, + input, filter, bias, output); + + cmsis_nn_context ctx; + ctx.buf = nullptr; + /* 'size' is unused */ + ctx.size = 0; + + if (data.buffer_idx > -1) { + ctx.buf = context->GetScratchBuffer(context, data.buffer_idx); + } + + TFLITE_DCHECK_EQ( + arm_depthwise_conv_wrapper_s8( + &ctx, &dw_conv_params, &quant_params, &input_dims, + tflite::micro::GetTensorData(input), &filter_dims, + tflite::micro::GetTensorData(filter), &bias_dims, + tflite::micro::GetOptionalTensorData(bias), &output_dims, + tflite::micro::GetTensorData(output)), + ARM_CMSIS_NN_SUCCESS); +} + +void EvalQuantizedPerChannel16x8(TfLiteContext* context, TfLiteNode* node, + const TfLiteDepthwiseConvParams& params, + const OpData& data, + const TfLiteEvalTensor* input, + const TfLiteEvalTensor* filter, + const TfLiteEvalTensor* bias, + TfLiteEvalTensor* output) { + cmsis_nn_dw_conv_params dw_conv_params; + cmsis_nn_per_channel_quant_params quant_params; + cmsis_nn_dims input_dims; + cmsis_nn_dims filter_dims; + cmsis_nn_dims bias_dims; + cmsis_nn_dims output_dims; + + PopulateDwConvParams(&dw_conv_params, &quant_params, &input_dims, + &filter_dims, &bias_dims, &output_dims, params, data, + input, filter, bias, output); + + cmsis_nn_context ctx; + ctx.buf = nullptr; + /* 'size' is unused */ + ctx.size = 0; + + TFLITE_DCHECK_EQ( + arm_depthwise_conv_s16( + &ctx, &dw_conv_params, &quant_params, &input_dims, + tflite::micro::GetTensorData(input), &filter_dims, + tflite::micro::GetTensorData(filter), &bias_dims, + tflite::micro::GetOptionalTensorData(bias), &output_dims, + tflite::micro::GetTensorData(output)), + ARM_CMSIS_NN_SUCCESS); +} + +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + TFLITE_DCHECK(node->user_data != nullptr); + TFLITE_DCHECK(node->builtin_data != nullptr); + + const auto& params = + *(reinterpret_cast(node->builtin_data)); + const OpData& data = *(static_cast(node->user_data)); + + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kDepthwiseConvOutputTensor); + const TfLiteEvalTensor* input = + tflite::micro::GetEvalInput(context, node, kDepthwiseConvInputTensor); + const TfLiteEvalTensor* filter = + tflite::micro::GetEvalInput(context, node, kDepthwiseConvWeightsTensor); + const TfLiteEvalTensor* bias = + (NumInputs(node) == 3) + ? tflite::micro::GetEvalInput(context, node, kDepthwiseConvBiasTensor) + : nullptr; + + TfLiteEvalTensor filter_int8 = tflite::micro::MakeUnpackedInt4Tensor( + context, data.reference_op_data.filter_buffer_index, filter); + + switch (input->type) { // Already know in/out types are same. + case kTfLiteFloat32: { +#if EI_TFLITE_DISABLE_DEPTHWISE_CONV_2D_IN_F32 + MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(input->type), + input->type); + return kTfLiteError; +#endif + tflite::reference_ops::DepthwiseConv( + DepthwiseConvParamsFloat(params, data.reference_op_data), + tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(filter), + tflite::micro::GetTensorData(filter), + tflite::micro::GetTensorShape(bias), + tflite::micro::GetOptionalTensorData(bias), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + break; + } + case kTfLiteInt8: +#if EI_TFLITE_DISABLE_DEPTHWISE_CONV_2D_IN_I8 + MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(input->type), + input->type); + return kTfLiteError; +#endif + switch (filter_int8.type) { + case kTfLiteInt8: { + EvalQuantizedPerChannel(context, node, params, data, input, + &filter_int8, bias, output); + break; + } + default: { + MicroPrintf("Filter type %s (%d) not supported.", + TfLiteTypeGetName(filter->type), filter->type); + return kTfLiteError; + } + } + break; + case kTfLiteInt16: + EvalQuantizedPerChannel16x8(context, node, params, data, input, filter, + bias, output); + break; + default: + MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(input->type), + input->type); + return kTfLiteError; + } + return kTfLiteOk; +} + +TfLiteStatus EvalInt8(TfLiteContext* context, TfLiteNode* node) { + TFLITE_DCHECK(node->user_data != nullptr); + TFLITE_DCHECK(node->builtin_data != nullptr); + + const auto& params = + *(reinterpret_cast(node->builtin_data)); + const OpData& data = *(static_cast(node->user_data)); + + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kDepthwiseConvOutputTensor); + const TfLiteEvalTensor* input = + tflite::micro::GetEvalInput(context, node, kDepthwiseConvInputTensor); + const TfLiteEvalTensor* filter = + tflite::micro::GetEvalInput(context, node, kDepthwiseConvWeightsTensor); + const TfLiteEvalTensor* bias = + (NumInputs(node) == 3) + ? tflite::micro::GetEvalInput(context, node, kDepthwiseConvBiasTensor) + : nullptr; + + TfLiteEvalTensor filter_int8 = tflite::micro::MakeUnpackedInt4Tensor( + context, data.reference_op_data.filter_buffer_index, filter); + + EvalQuantizedPerChannel(context, node, params, data, input, &filter_int8, + bias, output); + return kTfLiteOk; +} + +TfLiteStatus EvalInt16x8(TfLiteContext* context, TfLiteNode* node) { + TFLITE_DCHECK(node->user_data != nullptr); + TFLITE_DCHECK(node->builtin_data != nullptr); + + const auto& params = + *(reinterpret_cast(node->builtin_data)); + const OpData& data = *(static_cast(node->user_data)); + + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kDepthwiseConvOutputTensor); + const TfLiteEvalTensor* input = + tflite::micro::GetEvalInput(context, node, kDepthwiseConvInputTensor); + const TfLiteEvalTensor* filter = + tflite::micro::GetEvalInput(context, node, kDepthwiseConvWeightsTensor); + const TfLiteEvalTensor* bias = + (NumInputs(node) == 3) + ? tflite::micro::GetEvalInput(context, node, kDepthwiseConvBiasTensor) + : nullptr; + + EvalQuantizedPerChannel16x8(context, node, params, data, input, filter, bias, + output); + return kTfLiteOk; +} + +} // namespace + +TfLiteRegistration Register_DEPTHWISE_CONV_2D() { + return tflite::micro::RegisterOp(Init, Prepare, Eval); +} + +TfLiteRegistration Register_DEPTHWISE_CONV_2D_INT8() { + return tflite::micro::RegisterOp(Init, Prepare, EvalInt8); +} + +TfLiteRegistration Register_DEPTHWISE_CONV_2D_INT16() { + return tflite::micro::RegisterOp(Init, Prepare, EvalInt16x8); +} + +} // namespace tflite + +#elif EI_CLASSIFIER_TFLITE_ENABLE_ARC == 1 +/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/depthwise_conv.h" + +#include "mli_api.h" // NOLINT +#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h" +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/quantization_util.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/depthwiseconv_float.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/depthwiseconv_uint8.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/padding.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/mli_function_specializations.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/mli_slicers.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/mli_tf_utils.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/scratch_buf_mgr.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/scratch_buffers.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h" + +namespace tflite { +namespace { + +constexpr int kInputTensor = 0; +constexpr int kFilterTensor = 1; +constexpr int kBiasTensor = 2; +constexpr int kOutputTensor = 0; + +// Depthwise conv is quantized along dimension 3: +// https://www.tensorflow.org/lite/performance/quantization_spec +constexpr int kDepthwiseConvQuantizedDimension = 3; + +struct OpData { + TfLitePaddingValues padding; + + // Cached tensor zero point values for quantized operations. + int32_t input_zero_point; + int32_t filter_zero_point; + int32_t output_zero_point; + + // The scaling factor from input to output (aka the 'real multiplier') can + // be represented as a fixed point multiplier plus a left shift. + int32_t output_multiplier; + int output_shift; + + // Per channel output multiplier and shift. + int32_t* per_channel_output_multiplier; + int32_t* per_channel_output_shift; +#ifdef MLI_2_0 + int8_t* per_channel_scale_frac_bits; +#endif + + // The range of the fused activation layer. For example for kNone and + // uint8_t these would be 0 and 255. + int32_t output_activation_min; + int32_t output_activation_max; + + // The result of checking if MLI optimized version of tensors can be used. + bool is_mli_applicable; + + // Tensors in MLI format. + mutable ops::micro::MliTensorInterface mli_in; + mutable ops::micro::MliTensorInterface mli_weights; + mutable ops::micro::MliTensorInterface mli_bias; + mutable ops::micro::MliTensorInterface mli_out; + mli_conv2d_cfg* cfg; + + // Pointer to the required depthwise function. For “channel multiplier†+ // functionality group convolution is used. + depthwise_func_ptr p_mli_krn_depthwise_conv2d_sa8_sa8_sa32; +}; + +bool IsMliApplicable(TfLiteContext* context, const TfLiteTensor* input, + const TfLiteTensor* filter, const TfLiteTensor* bias, + const TfLiteDepthwiseConvParams* params) { + const auto* affine_quantization = + reinterpret_cast(filter->quantization.params); + +#ifndef MLI_2_0 + const int in_ch = SizeOfDimension(input, 3); + const int filters_num = SizeOfDimension(filter, 3); +#endif + + // MLI optimized version only supports int8_t datatype, dilation factor of 1 + // and per-axis quantization of weights (no broadcasting/per-tensor). For + // MLI 1.1 (in_ch == filters_num) || (in_ch == 1)) is used to prevent usage of + // channel multiplier logic for multichannel input. + + bool ret_val = (filter->type == kTfLiteInt8) && + (input->type == kTfLiteInt8) && (bias->type == kTfLiteInt32) && + (params->dilation_width_factor == 1) && + (params->dilation_height_factor == 1) && + (affine_quantization->scale->size == +#ifdef MLI_2_0 + filter->dims->data[kDepthwiseConvQuantizedDimension]); +#else + filter->dims->data[kDepthwiseConvQuantizedDimension]) && + ((in_ch == filters_num) || (in_ch == 1)); +#endif + return ret_val; +} + +TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node, + TfLiteDepthwiseConvParams* params, int width, + int height, int filter_width, int filter_height, + const TfLiteType data_type, OpData* data) { + bool has_bias = node->inputs->size == 3; + // Check number of inputs/outputs + TF_LITE_ENSURE(context, has_bias || node->inputs->size == 2); + TF_LITE_ENSURE_EQ(context, node->outputs->size, 1); + + int unused_output_height, unused_output_width; + data->padding = ComputePaddingHeightWidth( + params->stride_height, params->stride_width, 1, 1, height, width, + filter_height, filter_width, params->padding, &unused_output_height, + &unused_output_width); + + // Note that quantized inference requires that all tensors have their + // parameters set. This is usually done during quantized training. +#if !defined(TF_LITE_STRIP_REFERENCE_IMPL) + MicroContext* micro_context = GetMicroContext(context); + + TfLiteTensor* input = + micro_context->AllocateTempInputTensor(node, kInputTensor); + TfLiteTensor* filter = + micro_context->AllocateTempInputTensor(node, kFilterTensor); + TfLiteTensor* bias = + micro_context->AllocateTempInputTensor(node, kBiasTensor); + TfLiteTensor* output = + micro_context->AllocateTempOutputTensor(node, kOutputTensor); + + if (data_type != kTfLiteFloat32 && !data->is_mli_applicable) { + int num_channels = filter->dims->data[kDepthwiseConvQuantizedDimension]; + + return tflite::PopulateConvolutionQuantizationParams( + context, input, filter, bias, output, params->activation, + &data->output_multiplier, &data->output_shift, + &data->output_activation_min, &data->output_activation_max, + data->per_channel_output_multiplier, + reinterpret_cast(data->per_channel_output_shift), num_channels); + } + micro_context->DeallocateTempTfLiteTensor(input); + micro_context->DeallocateTempTfLiteTensor(filter); + micro_context->DeallocateTempTfLiteTensor(bias); + micro_context->DeallocateTempTfLiteTensor(output); + +#endif + return kTfLiteOk; +} + +void* Init(TfLiteContext* context, const char* buffer, size_t length) { + TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); + return context->AllocatePersistentBuffer(context, sizeof(OpData)); +} + +TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + TFLITE_DCHECK(node->user_data != nullptr); + TFLITE_DCHECK(node->builtin_data != nullptr); + + auto* params = + reinterpret_cast(node->builtin_data); + OpData* data = static_cast(node->user_data); + + MicroContext* micro_context = GetMicroContext(context); + + TfLiteTensor* output = micro_context->AllocateTempOutputTensor(node, kOutputTensor); + const TfLiteTensor* input = micro_context->AllocateTempInputTensor(node, kInputTensor); + const TfLiteTensor* filter = micro_context->AllocateTempInputTensor(node, kFilterTensor); + const TfLiteTensor* bias = micro_context->AllocateTempInputTensor(node, kBiasTensor); + const TfLiteType data_type = input->type; + int width = SizeOfDimension(input, 2); + int height = SizeOfDimension(input, 1); + +#if defined(MLI_2_0) && !defined(MLI_2_0_KRNL_TEST) + int filter_width = SizeOfDimension(filter, 1); + int filter_height = SizeOfDimension(filter, 0); +#else + int filter_width = SizeOfDimension(filter, 2); + int filter_height = SizeOfDimension(filter, 1); +#endif + + // Per channel quantization is only needed for int8 inference. For other + // quantized types, only a single scale and zero point is needed. + const int num_channels = filter->dims->data[kDepthwiseConvQuantizedDimension]; + // Dynamically allocate per-channel quantization parameters. + data->per_channel_output_multiplier = + reinterpret_cast(context->AllocatePersistentBuffer( + context, num_channels * sizeof(int32_t))); + data->per_channel_output_shift = + reinterpret_cast(context->AllocatePersistentBuffer( + context, num_channels * sizeof(int32_t))); + + data->is_mli_applicable = + IsMliApplicable(context, input, filter, bias, params); + + // All per-channel quantized tensors need valid zero point and scale arrays. + if (input->type == kTfLiteInt8) { + TF_LITE_ENSURE_EQ(context, filter->quantization.type, + kTfLiteAffineQuantization); + + const auto* affine_quantization = + reinterpret_cast( + filter->quantization.params); + TF_LITE_ENSURE(context, affine_quantization); + TF_LITE_ENSURE(context, affine_quantization->scale); + TF_LITE_ENSURE(context, affine_quantization->zero_point); + TF_LITE_ENSURE( + context, affine_quantization->scale->size == 1 || + affine_quantization->scale->size == + filter->dims->data[kDepthwiseConvQuantizedDimension]); + TF_LITE_ENSURE_EQ(context, affine_quantization->scale->size, + affine_quantization->zero_point->size); + } + + TF_LITE_ENSURE_STATUS(CalculateOpData(context, node, params, width, height, + filter_width, filter_height, data_type, + data)); + + data->input_zero_point = input->params.zero_point; + data->filter_zero_point = filter->params.zero_point; + data->output_zero_point = output->params.zero_point; + + if (data->is_mli_applicable) { + data->mli_in = ops::micro::MliTensorInterface(static_cast( + context->AllocatePersistentBuffer(context, sizeof(mli_tensor)))); + data->mli_weights = ops::micro::MliTensorInterface(static_cast( + context->AllocatePersistentBuffer(context, sizeof(mli_tensor)))); + data->mli_bias = ops::micro::MliTensorInterface(static_cast( + context->AllocatePersistentBuffer(context, sizeof(mli_tensor)))); + data->mli_out = ops::micro::MliTensorInterface(static_cast( + context->AllocatePersistentBuffer(context, sizeof(mli_tensor)))); + data->cfg = static_cast( + context->AllocatePersistentBuffer(context, sizeof(mli_conv2d_cfg))); + +#ifdef MLI_2_0 + const int num_buffers = 2; + data->per_channel_scale_frac_bits = + static_cast(context->AllocatePersistentBuffer( + context, num_buffers * num_channels * sizeof(int16_t))); +#endif + + // Reuse space allocated for OpData parameters. +#ifdef MLI_2_0 + *data->mli_weights.Scale() = + reinterpret_cast(data->per_channel_output_multiplier); + *data->mli_bias.Scale() = + reinterpret_cast(data->per_channel_output_multiplier) + + num_channels; +#else + *data->mli_weights.Scale() = + static_cast(data->per_channel_output_multiplier); + *data->mli_bias.Scale() = + static_cast(data->per_channel_output_shift); +#endif + +#ifdef MLI_2_0 + *data->mli_weights.ZeroPoint() = + reinterpret_cast(data->per_channel_output_shift); + *data->mli_bias.ZeroPoint() = + reinterpret_cast(data->per_channel_output_shift) + + num_channels; +#else + *data->mli_weights.ZeroPoint() = + reinterpret_cast(&data->filter_zero_point); + *data->mli_bias.ZeroPoint() = + reinterpret_cast(&data->filter_zero_point) + sizeof(int16_t); +#endif + +#ifdef MLI_2_0 + *data->mli_weights.ScaleFracBits() = + reinterpret_cast(data->per_channel_scale_frac_bits); + *data->mli_bias.ScaleFracBits() = + reinterpret_cast(data->per_channel_scale_frac_bits) + + num_channels; +#endif + + ops::micro::ConvertToMliTensor(input, &data->mli_in); + ops::micro::ConvertToMliTensorPerChannel(filter, &data->mli_weights, + /* is_bias_tensor = */ false); + ops::micro::ConvertToMliTensorPerChannel(bias, &data->mli_bias, + /* is_bias_tensor = */ true); +#ifdef MLI_2_0 + ops::micro::AdjustBiasTensor(&data->mli_bias, &data->mli_in, + &data->mli_weights); +#endif + ops::micro::ConvertToMliTensor(output, &data->mli_out); + +#ifdef MLI_2_0 + // Choose group convolution function for "channel multiplier" functionality. + const int in_ch = SizeOfDimension(input, 3); + const int filters_num = SizeOfDimension(filter, 3); + const int channels_num = SizeOfDimension(filter, 2); + if (in_ch == filters_num && channels_num == 1) { + data->p_mli_krn_depthwise_conv2d_sa8_sa8_sa32 = + mli_krn_depthwise_conv2d(data->mli_weights.MliTensor()); + } else { + data->p_mli_krn_depthwise_conv2d_sa8_sa8_sa32 = + mli_krn_group_conv2d(data->mli_weights.MliTensor()); + } +#else + data->p_mli_krn_depthwise_conv2d_sa8_sa8_sa32 = + mli_krn_depthwise_conv2d(data->mli_weights.MliTensor(), data->cfg); +#endif + +#ifdef MLI_2_0 + data->cfg->dilation_width = 1; + data->cfg->dilation_height = 1; +#endif + + if (data->output_activation_min == -128 && + data->output_activation_max == 127) { + data->cfg->relu.type = MLI_RELU_NONE; + } else if (params->activation == kTfLiteActRelu) { + data->cfg->relu.type = MLI_RELU_GEN; + } else if (params->activation == kTfLiteActRelu6) { + data->cfg->relu.type = MLI_RELU_6; + } else if (params->activation == kTfLiteActReluN1To1) { + data->cfg->relu.type = MLI_RELU_1; + } else { + data->cfg->relu.type = MLI_RELU_NONE; + } + + data->cfg->stride_width = params->stride_width; + data->cfg->stride_height = params->stride_height; + if (params->padding == kTfLitePaddingValid) { + data->cfg->padding_left = 0; + data->cfg->padding_right = 0; + data->cfg->padding_top = 0; + data->cfg->padding_bottom = 0; + } else { + data->cfg->padding_left = data->padding.width; + data->cfg->padding_right = + data->padding.width + data->padding.width_offset; + data->cfg->padding_top = data->padding.height; + data->cfg->padding_bottom = + data->padding.height + data->padding.height_offset; + } + } + return kTfLiteOk; +} + +void EvalFloat(TfLiteContext* context, TfLiteNode* node, + TfLiteDepthwiseConvParams* params, const OpData& data, + const TfLiteEvalTensor* input, const TfLiteEvalTensor* filter, + const TfLiteEvalTensor* bias, TfLiteEvalTensor* output) { +#if !defined(TF_LITE_STRIP_REFERENCE_IMPL) + float output_activation_min, output_activation_max; + CalculateActivationRange(params->activation, &output_activation_min, + &output_activation_max); + + tflite::DepthwiseParams op_params; + // Padding type is ignored, but still set. + op_params.padding_type = PaddingType::kSame; + op_params.padding_values.width = data.padding.width; + op_params.padding_values.height = data.padding.height; + op_params.stride_width = params->stride_width; + op_params.stride_height = params->stride_height; + op_params.dilation_width_factor = params->dilation_width_factor; + op_params.dilation_height_factor = params->dilation_height_factor; + op_params.depth_multiplier = params->depth_multiplier; + op_params.float_activation_min = output_activation_min; + op_params.float_activation_max = output_activation_max; + + tflite::reference_ops::DepthwiseConv( + op_params, tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(filter), + tflite::micro::GetTensorData(filter), + tflite::micro::GetTensorShape(bias), + tflite::micro::GetTensorData(bias), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); +#else + MicroPrintf("Type %s (%d) is not supported by ARC MLI Library.", + TfLiteTypeGetName(input->type), input->type); +#endif +} +TfLiteStatus EvalMliQuantizedPerChannel( + TfLiteContext* context, TfLiteNode* node, TfLiteDepthwiseConvParams* params, + const OpData& data, const TfLiteEvalTensor* input, + const TfLiteEvalTensor* filter, const TfLiteEvalTensor* bias, + TfLiteEvalTensor* output) { + // Run Depthwise Conv MLI kernel + // MLI optimized version only supports int8_t dataype and dilation factor of 1 + if (data.is_mli_applicable) { + // Copy configuration data from external to local memory + mli_conv2d_cfg cfg_local = *data.cfg; + + ops::micro::MliTensorAttachBuffer(input, &data.mli_in); + ops::micro::MliTensorAttachBuffer(filter, &data.mli_weights); + ops::micro::MliTensorAttachBuffer(bias, &data.mli_bias); + ops::micro::MliTensorAttachBuffer(output, &data.mli_out); + + // for height slicing + const int height_dimension = 1; + int in_slice_height = 0; + int out_slice_height = 0; + uint32_t* mli_weights_shape = data.mli_weights.Shape(); +#ifdef MLI_2_0 + const int kernel_height = + static_cast(mli_weights_shape[KRNL_DW_H_DIM_HW1N]); +#else + const int kernel_height = + static_cast(mli_weights_shape[KRNL_DW_H_DIM_HWC]); +#endif + const int overlap = kernel_height - cfg_local.stride_height; + + // for weight slicing (on output channels) + // HWCN layout for weights, output channel dimension is the first dimension. + const int weight_out_ch_dimension = 3; + // bias has only 1 dimension + const int bias_out_ch_dimension = 0; + // Batch-Height-Width-Channel layout means last dimension is output + // channels. + const int out_tensor_ch_dimension = 3; + const int32_t in_channels = data.mli_in.Shape()[out_tensor_ch_dimension]; + const int32_t out_channels = data.mli_out.Shape()[out_tensor_ch_dimension]; + int slice_channels = + static_cast(mli_weights_shape[weight_out_ch_dimension]); + + // Tensors for data in fast (local) memory + // and config to copy data from external to local memory + mli_tensor weights_local = *data.mli_weights.MliTensor(); + mli_tensor bias_local = *data.mli_bias.MliTensor(); + mli_tensor in_local = *data.mli_in.MliTensor(); + mli_tensor out_local = + *data.mli_out.MliTensor(); // this assumes that output shape + // is already filled in the tensor struct. + + ops::micro::MliTensorInterface weights_local_interface(&weights_local); + ops::micro::MliTensorInterface bias_local_interface(&bias_local); + ops::micro::MliTensorInterface in_local_interface(&in_local); + ops::micro::MliTensorInterface out_local_interface(&out_local); + + mli_mov_cfg_t copy_config; + mli_mov_cfg_for_copy(©_config); + + TF_LITE_ENSURE_STATUS(ops::micro::get_arc_scratch_buffer_for_conv_tensors( + context, &in_local_interface, &weights_local_interface, + &bias_local_interface, &out_local_interface)); + + /* is_local indicates that the tensor is already in local memory, + so in that case the original tensor can be used, + and there is no need to copy it to the local tensor*/ + const bool in_is_local = + in_local_interface.Data() == data.mli_in.Data(); + const bool out_is_local = + out_local_interface.Data() == data.mli_out.Data(); + const bool w_is_local = weights_local_interface.Data() == + data.mli_weights.Data(); + const bool b_is_local = + bias_local_interface.Data() == data.mli_bias.Data(); + + TF_LITE_ENSURE_STATUS(ops::micro::arc_scratch_buffer_calc_slice_size_io( + &in_local_interface, &out_local_interface, kernel_height, + cfg_local.stride_height, cfg_local.padding_top, + cfg_local.padding_bottom, &in_slice_height, &out_slice_height)); + TF_LITE_ENSURE_STATUS( + ops::micro::arc_scratch_buffer_calc_slice_size_weights( + &weights_local_interface, &bias_local_interface, + weight_out_ch_dimension, &slice_channels)); + + /* if input channels is not equal to output channels, a channel multiplier + is used. in this case the slice channels needs to be rounded down to a + multiple of the input channels */ + if (in_channels != out_channels) { + slice_channels = (slice_channels / in_channels) * in_channels; + } + + ops::micro::TensorSlicer b_slice(data.mli_bias.MliTensor(), + bias_out_ch_dimension, slice_channels); + ops::micro::TensorSlicer w_slice(data.mli_weights.MliTensor(), + weight_out_ch_dimension, slice_channels, 0, + 0, 0, true); + ops::micro::TensorSlicer out_ch_slice(data.mli_out.MliTensor(), + out_tensor_ch_dimension, + slice_channels, 0, 0, 0, true); + ops::micro::TensorSlicer in_ch_slice(data.mli_in.MliTensor(), + out_tensor_ch_dimension, + slice_channels, 0, 0, 0, true); + + mli_tensor* w_ptr = w_is_local ? w_slice.Sub() : &weights_local; + mli_tensor* b_ptr = b_is_local ? b_slice.Sub() : &bias_local; + + void* input_buffer_ptr = NULL; + uint32_t input_buffer_size = 0; + int padding_top = cfg_local.padding_top; + int padding_bottom = cfg_local.padding_bottom; + + while (!w_slice.Done()) { + mli_mov_tensor_sync(w_slice.Sub(), ©_config, w_ptr); + mli_mov_tensor_sync(b_slice.Sub(), ©_config, b_ptr); + + /* input tensor is already sliced in the channel dimension. + out_ch_slice.Sub() is the tensor for the amount of channels of this + iteration of the weight slice loop. This tensor needs to be further + sliced over the batch and height dimension. in_ch_slice.Sub() tensor + contains batches of HWC tensors. so it is a 4 dimensional tensor. because + the mli kernel will process one HWC tensor at a time, the 4 dimensional + tensor needs to be sliced into nBatch 3 dimensional tensors. on top of + that there could be a need to also slice in the Height dimension. for that + the sliceHeight has been calculated. The tensor slicer is configured that + it will completely slice the nBatch dimension (0) and slice the height + dimension (1) in chunks of 'sliceHeight' */ + ops::micro::TensorSlicer in_slice(in_ch_slice.Sub(), height_dimension, + in_slice_height, padding_top, + padding_bottom, overlap); + + /* output tensor is already sliced in the output channel dimension. + out_ch_slice.Sub() is the tensor for the amount of output channels of this + iteration of the weight slice loop. This tensor needs to be further + sliced over the batch and height dimension. */ + ops::micro::TensorSlicer out_slice(out_ch_slice.Sub(), height_dimension, + out_slice_height); + + /* setup the pointers to the local or remote tensor to make the code + * inside the loop easier. */ + mli_tensor* in_ptr = in_is_local ? in_slice.Sub() : &in_local; + mli_tensor* out_ptr = out_is_local ? out_slice.Sub() : &out_local; + + while (!out_slice.Done()) { + if (!out_is_local) { + ops::micro::PrepareLocalTensor(out_slice.Sub(), &out_local); + ops::micro::PrepareLocalTensor(in_slice.Sub(), &in_local); + } + TF_LITE_ENSURE(context, !in_slice.Done()); + cfg_local.padding_top = in_slice.GetPaddingPre(); + cfg_local.padding_bottom = in_slice.GetPaddingPost(); + + // if same input copy as previous iteration, skip the copy of input +#ifdef MLI_2_0 + if ((in_slice.Sub()->data.mem.pi8 != input_buffer_ptr) || + (mli_hlp_count_elem_num(in_slice.Sub(), 0) != input_buffer_size)) { + mli_mov_tensor_sync(in_slice.Sub(), ©_config, in_ptr); + input_buffer_ptr = in_slice.Sub()->data.mem.pi8; + input_buffer_size = mli_hlp_count_elem_num(in_slice.Sub(), 0); + } + +#ifdef MLI_2_0_KRNL_TEST + // Checking conditions here to prevent usage non-contiguous buffer + // memory. + if (mli_weights_shape[weight_out_ch_dimension] != + w_slice.Sub()->shape[3]) { + MicroPrintf("Slicing is not supported with real-time permutation."); + return kTfLiteError; + } + uint8_t dim_order[] = {1, 2, 0, 3}; + ops::micro::change_shape(w_ptr, dim_order); +#endif + + data.p_mli_krn_depthwise_conv2d_sa8_sa8_sa32(in_ptr, w_ptr, b_ptr, + &cfg_local, out_ptr); +#else + if ((in_slice.Sub()->data != input_buffer_ptr) || + (mli_hlp_count_elem_num(in_slice.Sub(), 0) != input_buffer_size)) { + mli_mov_tensor_sync(in_slice.Sub(), ©_config, in_ptr); + input_buffer_ptr = in_slice.Sub()->data; + input_buffer_size = mli_hlp_count_elem_num(in_slice.Sub(), 0); + } + data.p_mli_krn_depthwise_conv2d_sa8_sa8_sa32(in_ptr, w_ptr, b_ptr, + &cfg_local, out_ptr); +#endif + + mli_mov_tensor_sync(out_ptr, ©_config, out_slice.Sub()); + + in_slice.Next(); + out_slice.Next(); + } + w_slice.Next(); + b_slice.Next(); + out_ch_slice.Next(); + in_ch_slice.Next(); + TF_LITE_ENSURE(context, in_slice.Done()); + } + } + return kTfLiteOk; +} + +void EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node, + TfLiteDepthwiseConvParams* params, + const OpData& data, const TfLiteEvalTensor* input, + const TfLiteEvalTensor* filter, + const TfLiteEvalTensor* bias, + TfLiteEvalTensor* output) { +#if !defined(TF_LITE_STRIP_REFERENCE_IMPL) + DepthwiseParams op_params; + op_params.padding_type = PaddingType::kSame; + op_params.padding_values.width = data.padding.width; + op_params.padding_values.height = data.padding.height; + op_params.stride_width = params->stride_width; + op_params.stride_height = params->stride_height; + op_params.dilation_width_factor = params->dilation_width_factor; + op_params.dilation_height_factor = params->dilation_height_factor; + op_params.depth_multiplier = params->depth_multiplier; + op_params.input_offset = -data.input_zero_point; + op_params.weights_offset = 0; + op_params.output_offset = data.output_zero_point; + op_params.quantized_activation_min = std::numeric_limits::min(); + op_params.quantized_activation_max = std::numeric_limits::max(); + + reference_integer_ops::DepthwiseConvPerChannel( + op_params, data.per_channel_output_multiplier, + data.per_channel_output_shift, tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(filter), + tflite::micro::GetTensorData(filter), + tflite::micro::GetTensorShape(bias), + tflite::micro::GetTensorData(bias), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); +#else + MicroPrintf("Node configuration is not supported by ARC MLI Library."); +#endif +} + +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + TFLITE_DCHECK(node->user_data != nullptr); + TFLITE_DCHECK(node->builtin_data != nullptr); + + auto* params = + reinterpret_cast(node->builtin_data); + const OpData& data = *(static_cast(node->user_data)); + + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kOutputTensor); + const TfLiteEvalTensor* input = + tflite::micro::GetEvalInput(context, node, kInputTensor); + const TfLiteEvalTensor* filter = + tflite::micro::GetEvalInput(context, node, kFilterTensor); + const TfLiteEvalTensor* bias = + (NumInputs(node) == 3) + ? tflite::micro::GetEvalInput(context, node, kBiasTensor) + : nullptr; + + switch (input->type) { // Already know in/out types are same. + case kTfLiteFloat32: +#if EI_TFLITE_DISABLE_DEPTHWISE_CONV_2D_IN_F32 + MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(input->type), + input->type); + return kTfLiteError; +#endif + EvalFloat(context, node, params, data, input, filter, bias, output); + break; + case kTfLiteInt8: +#if EI_TFLITE_DISABLE_DEPTHWISE_CONV_2D_IN_I8 + MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(input->type), + input->type); + return kTfLiteError; +#endif + if (data.is_mli_applicable) { + EvalMliQuantizedPerChannel(context, node, params, data, input, filter, + bias, output); + } else { + EvalQuantizedPerChannel(context, node, params, data, input, filter, + bias, output); + } + break; + default: + MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(input->type), + input->type); + return kTfLiteError; + } + return kTfLiteOk; +} + +} // namespace + +TfLiteRegistration Register_DEPTHWISE_CONV_2D() { + return tflite::micro::RegisterOp(Init, Prepare, Eval); +} + +} // namespace tflite + +#elif EI_CLASSIFIER_TFLITE_ENABLE_SILABS_MVP == 1 + +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/depthwiseconv_float.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/depthwise_conv.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/padding.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" +#include "edge-impulse-sdk/CMSIS/NN/Include/arm_nnfunctions.h" + +#include "sl_mvp_ml_depthwise_conv2d.h" + +namespace tflite { +namespace sl { +namespace depthwise_conv2d { + +constexpr int kInputTensor = 0; +constexpr int kFilterTensor = 1; +constexpr int kBiasTensor = 2; +constexpr int kOutputTensor = 0; + +// Depthwise conv is quantized along dimension 3 of filter tensor. +// https://www.tensorflow.org/lite/performance/quantization_spec +constexpr int kDepthwiseConvQuantizedDimension = 3; + +enum op_support { kMvp, kCmsisNN, kTFLMrefF32, kTFLMrefI8 }; + +struct OpData { + op_support supported; + float activation_min_f32; + float activation_max_f32; + int scratch_buffer_index; + sli_mvp_ml_depthwise_conv2d_s8_params_t op_params; + + // CMSIS-NN per channel output multiplier and shift. + int32_t *per_channel_output_multiplier; + int32_t *per_channel_output_shift; +}; + +inline float16_t normalize_fp16(float f) +{ + return (float16_t)std::min(std::max(f, SLI_MVP_FP16_MIN), SLI_MVP_FP16_MAX); +} + +inline PaddingType RuntimePaddingType(TfLitePadding padding) +{ + switch (padding) { + case TfLitePadding::kTfLitePaddingSame: + return PaddingType::kSame; + case TfLitePadding::kTfLitePaddingValid: + return PaddingType::kValid; + case TfLitePadding::kTfLitePaddingUnknown: + default: + return PaddingType::kNone; + } +} + +TfLiteStatus PopulateConvolutionQuantizationParams( + TfLiteContext* context, + const TfLiteTensor* input, + const TfLiteTensor* filter, + TfLiteTensor* output, + const TfLiteFusedActivation& activation, + int32_t* output_activation_min, int32_t* output_activation_max, + float16_t* per_channel_scalers, int num_channels, float accumulator_multipler) +{ + auto affine_quantization = + reinterpret_cast(filter->quantization.params); + + // Populate multiplier and shift using affine quantization. + const float input_scale = input->params.scale; + const float output_scale = output->params.scale; + const float* filter_scales = affine_quantization->scale->data; + + for (int i = 0; i < num_channels; ++i) { + // If per-tensor quantization parameter is specified, broadcast it along the + // quantization dimension (channels_out). + const float filter_scale = filter_scales[i]; + const float effective_output_scale = (input_scale * filter_scale) / output_scale; + const float acc_output_scale = effective_output_scale * accumulator_multipler; + per_channel_scalers[i] = normalize_fp16(acc_output_scale); + } + + TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized( + context, activation, output, output_activation_min, + output_activation_max)); + + return kTfLiteOk; +} + +void *Init(TfLiteContext* context, const char* buffer, size_t length) +{ + (void)buffer; + (void)length; + TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); + return context->AllocatePersistentBuffer(context, sizeof(OpData)); +} + +TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) +{ + int scratch_buffer_size = 0; + + TFLITE_DCHECK(node->user_data != nullptr); + TFLITE_DCHECK(node->builtin_data != nullptr); + + OpData* data = static_cast(node->user_data); + const auto params = static_cast(node->builtin_data); + + TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + const TfLiteTensor* bias = GetOptionalInputTensor(context, node, kBiasTensor); + const TfLiteTensor* input = GetInput(context, node, kInputTensor); + const TfLiteTensor* filter = GetInput(context, node, kFilterTensor); + TF_LITE_ENSURE(context, input != nullptr); + TF_LITE_ENSURE(context, output != nullptr); + TF_LITE_ENSURE(context, filter != nullptr); + + data->op_params.batches = input->dims->data[0]; + data->op_params.in_channels = input->dims->data[3]; + data->op_params.input_height = input->dims->data[1]; + data->op_params.input_width = input->dims->data[2]; + data->op_params.out_channels = filter->dims->data[kDepthwiseConvQuantizedDimension]; + data->op_params.output_height = output->dims->data[1]; + data->op_params.output_width = output->dims->data[2]; + data->op_params.filter_height = filter->dims->data[1]; + data->op_params.filter_width = filter->dims->data[2]; + data->op_params.input_offset = -input->params.zero_point; + data->op_params.output_offset = output->params.zero_point; + data->op_params.stride_height = params->stride_height; + data->op_params.stride_width = params->stride_width; + data->op_params.dilation_height = params->dilation_height_factor; + data->op_params.dilation_width = params->dilation_width_factor; + data->op_params.padding = params->padding == kTfLitePaddingSame; + + int dummy_height, dummy_width; + const auto padding = ComputePaddingHeightWidth( + params->stride_height, params->stride_width, + params->dilation_height_factor, params->dilation_width_factor, + data->op_params.input_height, data->op_params.input_width, + data->op_params.filter_height, data->op_params.filter_width, + params->padding, + &dummy_height, &dummy_width); + + data->op_params.pad_height = padding.height; + data->op_params.pad_width = padding.width; + + const int num_channels = data->op_params.out_channels; + + if (input->type == kTfLiteInt8) { + if (sli_mvp_ml_depthwise_conv2d_s8_is_supported(&data->op_params)) { + data->supported = kMvp; + + float16_t *bias_data = static_cast(context->AllocatePersistentBuffer( + context, num_channels * sizeof(float16_t))); + if(bias != nullptr) { + data->op_params.bias = bias_data; + int32_t i32_bias; + for(int i = 0; i < num_channels; i++) { + i32_bias = bias->data.i32[i]; + bias_data[i] = float16_t(i32_bias * SLI_MVP_ACCUMULATOR_SCALER); + } + } else { + data->op_params.bias = nullptr; + } + + float16_t *scaler_data = static_cast(context->AllocatePersistentBuffer( + context, num_channels * sizeof(float16_t))); + data->op_params.output_scaler = scaler_data; + TF_LITE_ENSURE_STATUS(PopulateConvolutionQuantizationParams( + context, input, filter, output, params->activation, + reinterpret_cast(&data->op_params.output_activation_min), + reinterpret_cast(&data->op_params.output_activation_max), + scaler_data, num_channels, SLI_MVP_ACCUMULATOR_MULTIPLIER)); + + } else { + data->per_channel_output_multiplier = static_cast(context->AllocatePersistentBuffer( + context, num_channels * sizeof(int32_t))); + data->per_channel_output_shift = static_cast(context->AllocatePersistentBuffer( + context, num_channels * sizeof(int32_t))); + + int32_t dummy_output_multiplier; + int dummy_output_shift; + TF_LITE_ENSURE_STATUS(tflite::PopulateConvolutionQuantizationParams( + context, input, filter, bias, output, params->activation, + &dummy_output_multiplier, &dummy_output_shift, + reinterpret_cast(&data->op_params.output_activation_min), + reinterpret_cast(&data->op_params.output_activation_max), + data->per_channel_output_multiplier, + reinterpret_cast(data->per_channel_output_shift), + num_channels)); + + if (data->op_params.dilation_height == 1 && data->op_params.dilation_width == 1) { + data->supported = kCmsisNN; + cmsis_nn_dw_conv_params dw_conv_params; + dw_conv_params.input_offset = data->op_params.input_offset; + dw_conv_params.output_offset = data->op_params.output_offset; + dw_conv_params.stride.h = data->op_params.stride_height; + dw_conv_params.stride.w = data->op_params.stride_width; + dw_conv_params.dilation.h = 1; + dw_conv_params.dilation.w = 1; + dw_conv_params.padding.h = data->op_params.pad_height; + dw_conv_params.padding.w = data->op_params.pad_width; + dw_conv_params.activation.min = data->op_params.output_activation_min; + dw_conv_params.activation.max = data->op_params.output_activation_max; + dw_conv_params.ch_mult = data->op_params.out_channels / data->op_params.in_channels; + + cmsis_nn_dims input_dims; + input_dims.n = data->op_params.batches; + input_dims.h = data->op_params.input_height; + input_dims.w = data->op_params.input_width; + input_dims.c = data->op_params.in_channels; + + cmsis_nn_dims filter_dims; + filter_dims.h = data->op_params.filter_height; + filter_dims.w = data->op_params.filter_width; + + cmsis_nn_dims output_dims; + output_dims.h = data->op_params.output_height; + output_dims.w = data->op_params.output_width; + output_dims.c = data->op_params.out_channels; + + scratch_buffer_size = arm_depthwise_conv_wrapper_s8_get_buffer_size( + &dw_conv_params, &input_dims, &filter_dims, &output_dims); + } else { + data->supported = kTFLMrefI8; + } + } + + } else if (input->type == kTfLiteFloat32) { + data->supported = kTFLMrefF32; + CalculateActivationRange(params->activation, + &data->activation_min_f32, + &data->activation_max_f32); + + } else { + TF_LITE_KERNEL_LOG(context, "Type %s not currently supported.", + TfLiteTypeGetName(input->type)); + return kTfLiteError; + } + + if(scratch_buffer_size > 0) { + TF_LITE_ENSURE_STATUS( + context->RequestScratchBufferInArena( + context, scratch_buffer_size, &data->scratch_buffer_index)); + } else { + data->scratch_buffer_index = -1; + } + + return kTfLiteOk; +} + +TfLiteStatus eval_mvp_int8(TfLiteContext* context, + OpData* data, + const TfLiteEvalTensor* input, + const TfLiteEvalTensor* filter, + TfLiteEvalTensor* output) +{ + data->op_params.input = tflite::micro::GetTensorData(input); + data->op_params.output = tflite::micro::GetTensorData(output); + data->op_params.filter = tflite::micro::GetTensorData(filter); + + TF_LITE_ENSURE_EQ(context, SL_STATUS_OK, sli_mvp_ml_depthwise_conv2d_s8(&data->op_params)); + + return kTfLiteOk; +} + +TfLiteStatus eval_cmsis_int8(TfLiteContext* context, + OpData* data, + const TfLiteEvalTensor* input, + const TfLiteEvalTensor* filter, + const TfLiteEvalTensor* bias, + TfLiteEvalTensor* output) +{ + cmsis_nn_dims input_dims; + input_dims.n = data->op_params.batches; + input_dims.h = data->op_params.input_height; + input_dims.w = data->op_params.input_width; + input_dims.c = data->op_params.in_channels; + + cmsis_nn_dims filter_dims; + filter_dims.n = data->op_params.in_channels; + filter_dims.h = data->op_params.filter_height; + filter_dims.w = data->op_params.filter_width; + filter_dims.c = data->op_params.out_channels; + + cmsis_nn_dims bias_dims; + bias_dims.n = 1; + bias_dims.h = 1; + bias_dims.w = 1; + bias_dims.c = data->op_params.out_channels; + + cmsis_nn_dims output_dims; + output_dims.n = data->op_params.batches; + output_dims.h = data->op_params.output_height; + output_dims.w = data->op_params.output_width; + output_dims.c = data->op_params.out_channels; + + cmsis_nn_per_channel_quant_params quant_params; + quant_params.multiplier = data->per_channel_output_multiplier; + quant_params.shift = data->per_channel_output_shift; + + cmsis_nn_dw_conv_params dw_conv_params; + dw_conv_params.input_offset = data->op_params.input_offset; + dw_conv_params.output_offset = data->op_params.output_offset; + dw_conv_params.stride.h = data->op_params.stride_height; + dw_conv_params.stride.w = data->op_params.stride_width; + dw_conv_params.dilation.h = 1; + dw_conv_params.dilation.w = 1; + dw_conv_params.padding.h = data->op_params.pad_height; + dw_conv_params.padding.w = data->op_params.pad_width; + dw_conv_params.activation.min = data->op_params.output_activation_min; + dw_conv_params.activation.max = data->op_params.output_activation_max; + dw_conv_params.ch_mult = data->op_params.out_channels / data->op_params.in_channels; + + cmsis_nn_context ctx; + ctx.buf = nullptr; + ctx.size = 0; + + if (data->scratch_buffer_index > -1) { + ctx.buf = context->GetScratchBuffer(context, data->scratch_buffer_index); + } + TFLITE_DCHECK_EQ(ARM_MATH_SUCCESS, + arm_depthwise_conv_wrapper_s8( + &ctx, &dw_conv_params, &quant_params, + &input_dims, tflite::micro::GetTensorData(input), + &filter_dims, tflite::micro::GetTensorData(filter), + &bias_dims, bias == nullptr ? NULL : tflite::micro::GetTensorData(bias), + &output_dims, tflite::micro::GetTensorData(output))); + + return kTfLiteOk; +} + +TfLiteStatus eval_tflm_int8(OpData* data, + const TfLiteEvalTensor* input, + const TfLiteEvalTensor* filter, + const TfLiteEvalTensor* bias, + TfLiteEvalTensor* output) +{ + DepthwiseParams dw_op_params; + + dw_op_params.input_offset = data->op_params.input_offset; + dw_op_params.output_offset = data->op_params.output_offset; + dw_op_params.stride_height = data->op_params.stride_height; + dw_op_params.stride_width = data->op_params.stride_width; + dw_op_params.dilation_height_factor = data->op_params.dilation_height; + dw_op_params.dilation_width_factor = data->op_params.dilation_width; + dw_op_params.padding_values.height = data->op_params.pad_height; + dw_op_params.padding_values.width = data->op_params.pad_width; + dw_op_params.quantized_activation_min = data->op_params.output_activation_min; + dw_op_params.quantized_activation_max = data->op_params.output_activation_max; + dw_op_params.depth_multiplier = data->op_params.out_channels / data->op_params.in_channels; + + reference_integer_ops::DepthwiseConvPerChannel( + dw_op_params, + data->per_channel_output_multiplier, + data->per_channel_output_shift, + tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(filter), + tflite::micro::GetTensorData(filter), + tflite::micro::GetTensorShape(bias), + bias == nullptr ? nullptr : tflite::micro::GetTensorData(bias), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + + return kTfLiteOk; +} + +TfLiteStatus eval_float(TfLiteDepthwiseConvParams* params, + const OpData* data, + const TfLiteEvalTensor* input, + const TfLiteEvalTensor* filter, + const TfLiteEvalTensor* bias, + TfLiteEvalTensor* output) +{ + DepthwiseParams dw_op_params; + + dw_op_params.padding_type = RuntimePaddingType(params->padding); + dw_op_params.padding_values.width = data->op_params.pad_width; + dw_op_params.padding_values.height = data->op_params.pad_height; + dw_op_params.stride_width = data->op_params.stride_width; + dw_op_params.stride_height = data->op_params.stride_height; + dw_op_params.dilation_width_factor = data->op_params.dilation_width; + dw_op_params.dilation_height_factor = data->op_params.dilation_height; + dw_op_params.float_activation_min = data->activation_min_f32; + dw_op_params.float_activation_max = data->activation_max_f32; + dw_op_params.depth_multiplier = data->op_params.out_channels / data->op_params.in_channels; + + reference_ops::DepthwiseConv(dw_op_params, + tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(filter), + tflite::micro::GetTensorData(filter), + tflite::micro::GetTensorShape(bias), + bias == nullptr ? nullptr : tflite::micro::GetTensorData(bias), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + return kTfLiteOk; +} + +TfLiteStatus Invoke(TfLiteContext* context, TfLiteNode* node) +{ + TfLiteStatus status = kTfLiteError; + + TFLITE_DCHECK(node->user_data != nullptr); + TFLITE_DCHECK(node->builtin_data != nullptr); + + auto* params = reinterpret_cast(node->builtin_data); + OpData* data = static_cast(node->user_data); + + const auto input = tflite::micro::GetEvalInput(context, node, kInputTensor); + const auto filter = tflite::micro::GetEvalInput(context, node, kFilterTensor); + const auto bias = NumInputs(node) == 3 + ? tflite::micro::GetEvalInput(context, node, kBiasTensor) + : nullptr; + auto output = tflite::micro::GetEvalOutput(context, node, kOutputTensor); + + if (data->supported == kMvp) { + status = eval_mvp_int8(context, data, input, filter, output); + + } else if (data->supported == kCmsisNN) { + status = eval_cmsis_int8(context, data, input, filter, bias, output); + + } else if (data->supported == kTFLMrefI8) { + status = eval_tflm_int8(data, input, filter, bias, output); + + } else if (data->supported == kTFLMrefF32) { + #if EI_TFLITE_DISABLE_DEPTHWISE_CONV_2D_IN_F32 + TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.", + TfLiteTypeGetName(input->type), input->type); + return kTfLiteError; + #endif + + status = eval_float(params, data, input, filter, bias, output); + } + + return status; +} + +} // namespace depthwise_conv2d +} // namespace sl + +TfLiteRegistration Register_DEPTHWISE_CONV_2D() { + return {/*init=*/sl::depthwise_conv2d::Init, + /*free=*/nullptr, + /*prepare=*/sl::depthwise_conv2d::Prepare, + /*invoke=*/sl::depthwise_conv2d::Invoke, + /*profiling_string=*/nullptr, + /*builtin_code=*/0, + /*custom_name=*/nullptr, + /*version=*/0}; +} + +} // namespace tflite + +#elif EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN == 1 +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/depthwise_conv.h" + +#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h" +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/quantization_util.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/depthwiseconv_float.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/depthwiseconv_uint8.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/depthwise_conv.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/padding.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h" + +#include + +#if ESP_NN +#include "edge-impulse-sdk/porting/espressif/ESP-NN/include/esp_nn.h" +#endif + +long long dc_total_time = 0; + +namespace tflite { +namespace { + +struct NodeData { + OpDataConv op_data; +#if ESP_NN + int buffer_idx; +#endif +}; + +void* Init(TfLiteContext* context, const char* buffer, size_t length) { + TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); + return context->AllocatePersistentBuffer(context, sizeof(NodeData)); +} + +#if ESP_NN +inline void EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node, + const TfLiteDepthwiseConvParams& params, + const NodeData& data, + const TfLiteEvalTensor* input, + const TfLiteEvalTensor* filter, + const TfLiteEvalTensor* bias, + TfLiteEvalTensor* output) { + const int dilation_width_factor = params.dilation_width_factor; + const int dilation_height_factor = params.dilation_height_factor; + + if (dilation_width_factor == 1 && dilation_height_factor == 1) { + // Get parameters. + RuntimeShape input_shape = tflite::micro::GetTensorShape(input); + RuntimeShape filter_shape = tflite::micro::GetTensorShape(filter); + RuntimeShape output_shape = tflite::micro::GetTensorShape(output); + RuntimeShape bias_shape = tflite::micro::GetTensorShape(bias); + + TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4); + TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4); + TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4); + + const int8_t *input_data = tflite::micro::GetTensorData(input); + int8_t *output_data = tflite::micro::GetTensorData(output); + + const int depth_multiplier = params.depth_multiplier; + const int32_t input_offset = -data.op_data.input_zero_point; + const int32_t output_offset = data.op_data.output_zero_point; + const int stride_width = params.stride_width; + const int stride_height = params.stride_height; + const int pad_width = data.op_data.padding.width; + const int pad_height = data.op_data.padding.height; + + const int input_height = input_shape.Dims(1); + const int input_width = input_shape.Dims(2); + const int input_depth = input_shape.Dims(3); + const int filter_height = filter_shape.Dims(1); + const int filter_width = filter_shape.Dims(2); + const int output_height = output_shape.Dims(1); + const int output_width = output_shape.Dims(2); + + // Set min and max value of the output. + const int32_t activation_min = data.op_data.output_activation_min; + const int32_t activation_max = data.op_data.output_activation_max; + + // Consistency check. + TFLITE_DCHECK_LE(activation_min, activation_max); + const int batch_size = MatchingDim(input_shape, 0, output_shape, 0); + const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3); + + TFLITE_DCHECK_EQ(output_depth, input_depth * depth_multiplier); + if (tflite::micro::GetTensorData(bias)) { + TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth); + } + + const int input_size = input_width * input_height * input_depth; + const int output_size = output_width * output_height * output_depth; + void *scratch_buf = NULL; + if (data.buffer_idx > -1) { + scratch_buf = context->GetScratchBuffer(context, data.buffer_idx); + } + + esp_nn_set_depthwise_conv_scratch_buf(scratch_buf); + + data_dims_t input_dims = { + .width = input_width, .height = input_height, + .channels = input_depth, 1 + }; + data_dims_t output_dims = { + .width = output_width, .height = output_height, + .channels = output_depth, 1 + }; + data_dims_t filter_dims = {.width = filter_width, .height = filter_height, 0, 0}; + dw_conv_params_t conv_params = { + .in_offset = input_offset, .out_offset = output_offset, + .ch_mult = depth_multiplier, + .stride = {stride_width, stride_height}, + .padding = {pad_width, pad_height}, .dilation = {0, 0}, + .activation = {activation_min, activation_max} + }; + quant_data_t quant_data = { + .shift = data.op_data.per_channel_output_shift, + .mult = data.op_data.per_channel_output_multiplier + }; + + for (int i_batch = 0; i_batch < batch_size; i_batch++) { + esp_nn_depthwise_conv_s8(&input_dims, input_data + i_batch * input_size, + &filter_dims, tflite::micro::GetTensorData(filter), + tflite::micro::GetTensorData(bias), + &output_dims, output_data + i_batch * output_size, + &conv_params, &quant_data); + } + } else { + reference_integer_ops::DepthwiseConvPerChannel( + DepthwiseConvParamsQuantized(params, data.op_data), + data.op_data.per_channel_output_multiplier, + data.op_data.per_channel_output_shift, + tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(filter), + tflite::micro::GetTensorData(filter), + tflite::micro::GetTensorShape(bias), + tflite::micro::GetTensorData(bias), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + } +} +#endif + +TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + TFLITE_DCHECK(node->user_data != nullptr); + TFLITE_DCHECK(node->builtin_data != nullptr); + + NodeData* data = static_cast(node->user_data); + const TfLiteDepthwiseConvParams& params = + *(static_cast(node->builtin_data)); + + MicroContext* micro_context = GetMicroContext(context); + + TfLiteTensor* input = + micro_context->AllocateTempInputTensor(node, kConvInputTensor); + TF_LITE_ENSURE(context, input != nullptr); + TfLiteTensor* filter = + micro_context->AllocateTempInputTensor(node, kConvWeightsTensor); + TF_LITE_ENSURE(context, filter != nullptr); + TfLiteTensor* bias = + micro_context->AllocateTempInputTensor(node, kConvBiasTensor); + TfLiteTensor* output = + micro_context->AllocateTempOutputTensor(node, kConvOutputTensor); + TF_LITE_ENSURE(context, output != nullptr); + + const int input_width = input->dims->data[2]; + const int input_height = input->dims->data[1]; + const int filter_width = filter->dims->data[2]; + const int filter_height = filter->dims->data[1]; + const int output_width = output->dims->data[2]; + const int output_height = output->dims->data[1]; + + // Dynamically allocate per-channel quantization parameters. + const int num_channels = filter->dims->data[kDepthwiseConvQuantizedDimension]; + data->op_data.per_channel_output_multiplier = + static_cast(context->AllocatePersistentBuffer( + context, num_channels * sizeof(int32_t))); + data->op_data.per_channel_output_shift = + static_cast(context->AllocatePersistentBuffer( + context, num_channels * sizeof(int32_t))); + + // All per-channel quantized tensors need valid zero point and scale arrays. + if (input->type == kTfLiteInt8) { + TF_LITE_ENSURE_EQ(context, filter->quantization.type, + kTfLiteAffineQuantization); + + const auto* affine_quantization = + static_cast(filter->quantization.params); + TFLITE_DCHECK(affine_quantization != nullptr); + TFLITE_DCHECK(affine_quantization->scale != nullptr); + TFLITE_DCHECK(affine_quantization->zero_point != nullptr); + + TF_LITE_ENSURE( + context, affine_quantization->scale->size == 1 || + affine_quantization->scale->size == + filter->dims->data[kDepthwiseConvQuantizedDimension]); + + TF_LITE_ENSURE_EQ(context, affine_quantization->scale->size, + affine_quantization->zero_point->size); + } + + TF_LITE_ENSURE_STATUS(CalculateOpDataDepthwiseConv( + context, node, params, input_width, input_height, filter_width, + filter_height, output_width, output_height, input->type, &data->op_data)); + +#if ESP_NN + if (input->type == kTfLiteInt8) { + data_dims_t input_dims = { + .width = input_width, .height = input_height, + .channels = input->dims->data[3], 1 + }; + data_dims_t output_dims = { + .width = output_width, .height = output_height, + .channels = output->dims->data[3], 1 + }; + data_dims_t filter_dims = {.width = filter_width, .height = filter_height, 0, 0}; + dw_conv_params_t conv_params = { + .in_offset = 0, .out_offset = 0, + .ch_mult = params.depth_multiplier, + .stride = {params.stride_width, params.stride_height}, + .padding = {data->op_data.padding.width, data->op_data.padding.height}, + .dilation = {0, 0}, .activation = {-128, 127} + }; + + int scratch_buf_size = esp_nn_get_depthwise_conv_scratch_size( + &input_dims, &filter_dims, &output_dims, &conv_params); + if (scratch_buf_size > 0) { + TF_LITE_ENSURE_STATUS(context->RequestScratchBufferInArena( + context, scratch_buf_size, &data->buffer_idx)); + } else { + data->buffer_idx = -1; + } + } +#endif + + micro_context->DeallocateTempTfLiteTensor(input); + micro_context->DeallocateTempTfLiteTensor(filter); + micro_context->DeallocateTempTfLiteTensor(bias); + micro_context->DeallocateTempTfLiteTensor(output); + + return kTfLiteOk; +} + +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + TFLITE_DCHECK(node->user_data != nullptr); + TFLITE_DCHECK(node->builtin_data != nullptr); + + auto& params = + *(reinterpret_cast(node->builtin_data)); + const NodeData& data = *(static_cast(node->user_data)); + + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kDepthwiseConvOutputTensor); + const TfLiteEvalTensor* input = + tflite::micro::GetEvalInput(context, node, kDepthwiseConvInputTensor); + const TfLiteEvalTensor* filter = + tflite::micro::GetEvalInput(context, node, kDepthwiseConvWeightsTensor); + const TfLiteEvalTensor* bias = + (NumInputs(node) == 3) + ? tflite::micro::GetEvalInput(context, node, kDepthwiseConvBiasTensor) + : nullptr; + + long long start_time = esp_timer_get_time(); + switch (input->type) { // Already know in/out types are same. + case kTfLiteFloat32: +#if EI_TFLITE_DISABLE_DEPTHWISE_CONV_2D_IN_F32 + TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.", + TfLiteTypeGetName(input->type), input->type); + return kTfLiteError; +#endif + tflite::reference_ops::DepthwiseConv( + DepthwiseConvParamsFloat(params, data.op_data), + tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(filter), + tflite::micro::GetTensorData(filter), + tflite::micro::GetTensorShape(bias), + tflite::micro::GetTensorData(bias), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + break; + case kTfLiteInt8: +#if EI_TFLITE_DISABLE_DEPTHWISE_CONV_2D_IN_I8 + TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.", + TfLiteTypeGetName(input->type), input->type); + return kTfLiteError; +#endif +#if ESP_NN + EvalQuantizedPerChannel(context, node, params, data, input, filter, bias, + output); +#else + reference_integer_ops::DepthwiseConvPerChannel( + DepthwiseConvParamsQuantized(params, data.op_data), + data.op_data.per_channel_output_multiplier, + data.op_data.per_channel_output_shift, + tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(filter), + tflite::micro::GetTensorData(filter), + tflite::micro::GetTensorShape(bias), + tflite::micro::GetTensorData(bias), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); +#endif + break; + case kTfLiteUInt8: +#if EI_TFLITE_DISABLE_DEPTHWISE_CONV_2D_IN_U8 + TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.", + TfLiteTypeGetName(input->type), input->type); + return kTfLiteError; +#endif + //EvalQuantized(context, node, params, &data, input, filter, bias, output); + reference_ops::DepthwiseConv( + DepthwiseConvParamsQuantized(params, data.op_data), + tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(filter), + tflite::micro::GetTensorData(filter), + tflite::micro::GetTensorShape(bias), + tflite::micro::GetTensorData(bias), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + break; + default: + TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.", + TfLiteTypeGetName(input->type), input->type); + return kTfLiteError; + } + long long time_this_instance = esp_timer_get_time() - start_time; + dc_total_time += time_this_instance; + // printf("time this instance: %llu\n", time_this_instance / 1000); + + return kTfLiteOk; +} + +} // namespace + +TfLiteRegistration Register_DEPTHWISE_CONV_2D() { + return tflite::micro::RegisterOp(Init, Prepare, Eval); +} + +} // namespace tflite + +#else +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/depthwise_conv.h" + +#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h" +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/portable_tensor_utils.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/depthwiseconv_float.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/depthwise_conv.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h" + +namespace tflite { +namespace { + +void* Init(TfLiteContext* context, const char* buffer, size_t length) { + TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); + return context->AllocatePersistentBuffer(context, sizeof(OpDataConv)); +} + +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + TFLITE_DCHECK(node->user_data != nullptr); + TFLITE_DCHECK(node->builtin_data != nullptr); + + auto& params = + *(reinterpret_cast(node->builtin_data)); + const OpDataConv& data = *(static_cast(node->user_data)); + + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kDepthwiseConvOutputTensor); + const TfLiteEvalTensor* input = + tflite::micro::GetEvalInput(context, node, kDepthwiseConvInputTensor); + const TfLiteEvalTensor* filter = + tflite::micro::GetEvalInput(context, node, kDepthwiseConvWeightsTensor); + const TfLiteEvalTensor* bias = + (NumInputs(node) == 3) + ? tflite::micro::GetEvalInput(context, node, kDepthwiseConvBiasTensor) + : nullptr; + + switch (input->type) { // Already know in/out types are same. + case kTfLiteFloat32: { +#if EI_TFLITE_DISABLE_DEPTHWISE_CONV_2D_IN_F32 + MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(input->type), + input->type); + return kTfLiteError; +#endif + tflite::reference_ops::DepthwiseConv( + DepthwiseConvParamsFloat(params, data), + tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(filter), + tflite::micro::GetTensorData(filter), + tflite::micro::GetTensorShape(bias), + tflite::micro::GetOptionalTensorData(bias), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + break; + } + case kTfLiteInt8: { +#if EI_TFLITE_DISABLE_DEPTHWISE_CONV_2D_IN_I8 + MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(input->type), + input->type); + return kTfLiteError; +#endif + switch (filter->type) { + case kTfLiteInt4: { + int8_t* unpacked_filter_data = static_cast( + context->GetScratchBuffer(context, data.filter_buffer_index)); + tflite::tensor_utils::UnpackDenseInt4IntoInt8( + tflite::micro::GetTensorData(filter), + tflite::micro::GetTensorShape(filter).FlatSize(), + unpacked_filter_data); + reference_integer_ops::DepthwiseConvPerChannel( + DepthwiseConvParamsQuantized(params, data), + data.per_channel_output_multiplier, data.per_channel_output_shift, + tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(filter), unpacked_filter_data, + tflite::micro::GetTensorShape(bias), + tflite::micro::GetOptionalTensorData(bias), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + break; + } + case kTfLiteInt8: { + reference_integer_ops::DepthwiseConvPerChannel( + DepthwiseConvParamsQuantized(params, data), + data.per_channel_output_multiplier, data.per_channel_output_shift, + tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(filter), + tflite::micro::GetTensorData(filter), + tflite::micro::GetTensorShape(bias), + tflite::micro::GetOptionalTensorData(bias), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + break; + } + default: + MicroPrintf("Filter type %s (%d) not supported.", + TfLiteTypeGetName(filter->type), filter->type); + return kTfLiteError; + } + break; + } + default: + MicroPrintf("Input type %s (%d) not supported.", + TfLiteTypeGetName(input->type), input->type); + return kTfLiteError; + } + return kTfLiteOk; +} + +} // namespace + +TfLiteRegistration Register_DEPTHWISE_CONV_2D() { + return tflite::micro::RegisterOp(Init, DepthwiseConvPrepare, Eval); +} + +} // namespace tflite + +#endif diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/depthwise_conv.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/depthwise_conv.h new file mode 100644 index 0000000..000e792 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/depthwise_conv.h @@ -0,0 +1,80 @@ +/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_LITE_MICRO_KERNELS_DEPTHWISE_CONV_H_ +#define TENSORFLOW_LITE_MICRO_KERNELS_DEPTHWISE_CONV_H_ + +#include + +#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h" +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/conv.h" + +namespace tflite { + +extern const int kDepthwiseConvInputTensor; +extern const int kDepthwiseConvWeightsTensor; +extern const int kDepthwiseConvBiasTensor; +extern const int kDepthwiseConvOutputTensor; +extern const int kDepthwiseConvQuantizedDimension; + +// Returns a DepthwiseParams struct with all the parameters needed for a +// float computation. +DepthwiseParams DepthwiseConvParamsFloat( + const TfLiteDepthwiseConvParams& params, const OpDataConv& data); + +// Returns a DepthwiseParams struct with all the parameters needed for a +// quantized computation. +DepthwiseParams DepthwiseConvParamsQuantized( + const TfLiteDepthwiseConvParams& params, const OpDataConv& data); + +TfLiteStatus CalculateOpDataDepthwiseConv( + TfLiteContext* context, TfLiteNode* node, + const TfLiteDepthwiseConvParams& params, int width, int height, + int filter_width, int filter_height, int out_width, int out_height, + const TfLiteType data_type, OpDataConv* data); + +TfLiteStatus DepthwiseConvPrepare(TfLiteContext* context, TfLiteNode* node); + +// This is the most generic TfLiteRegistration. The actual supported types may +// still be target dependent. The only requirement is that every implementation +// (reference or optimized) must define this function. +TfLiteRegistration Register_DEPTHWISE_CONV_2D(); + +#if defined(CMSIS_NN) +// Returns a TfLiteRegistration struct for kernel variant that only supports +// int8 activations and int8 weights and uses the latency optimized +// implementations. +TfLiteRegistration Register_DEPTHWISE_CONV_2D_INT8(); + +// Returns a TfLiteRegistration struct for kernel variant that only supports +// int16 activations and int8 weights and uses the latency optimized +// implementations. +TfLiteRegistration Register_DEPTHWISE_CONV_2D_INT16(); + +#else +inline TfLiteRegistration Register_DEPTHWISE_CONV_2D_INT8() { + return Register_DEPTHWISE_CONV_2D(); +} + +inline TfLiteRegistration Register_DEPTHWISE_CONV_2D_INT16() { + return Register_DEPTHWISE_CONV_2D(); +} +#endif + +} // namespace tflite + +#endif // TENSORFLOW_LITE_MICRO_KERNELS_DEPTHWISE_CONV_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/depthwise_conv_common.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/depthwise_conv_common.cpp new file mode 100644 index 0000000..5263961 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/depthwise_conv_common.cpp @@ -0,0 +1,213 @@ +/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h" +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/quantization_util.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/depthwiseconv_float.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/depthwise_conv.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/padding.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/depthwise_conv.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" + +namespace tflite { + +const int kDepthwiseConvInputTensor = 0; +const int kDepthwiseConvWeightsTensor = 1; +const int kDepthwiseConvBiasTensor = 2; +const int kDepthwiseConvOutputTensor = 0; + +// DepthwiseConv is quantized along dimension 3: +// https://www.tensorflow.org/lite/performance/quantization_spec +const int kDepthwiseConvQuantizedDimension = 3; + +// Returns a DepthwiseParams struct with all the parameters needed for a +// float computation. +DepthwiseParams DepthwiseConvParamsFloat( + const TfLiteDepthwiseConvParams& params, const OpDataConv& data) { + DepthwiseParams op_params; + CalculateActivationRange(params.activation, &op_params.float_activation_min, + &op_params.float_activation_max); + op_params.padding_type = tflite::micro::RuntimePaddingType(params.padding); + op_params.padding_values.width = data.padding.width; + op_params.padding_values.height = data.padding.height; + op_params.stride_width = params.stride_width; + op_params.stride_height = params.stride_height; + op_params.dilation_width_factor = params.dilation_width_factor; + op_params.dilation_height_factor = params.dilation_height_factor; + op_params.depth_multiplier = params.depth_multiplier; + return op_params; +} + +// Returns a DepthwiseParams struct with all the parameters needed for a +// quantized computation. +DepthwiseParams DepthwiseConvParamsQuantized( + const TfLiteDepthwiseConvParams& params, const OpDataConv& data) { + DepthwiseParams op_params; + op_params.input_offset = -data.input_zero_point; + op_params.weights_offset = -data.filter_zero_point; + op_params.output_offset = data.output_zero_point; + op_params.output_multiplier = data.output_multiplier; + op_params.output_shift = -data.output_shift; + op_params.padding_type = tflite::micro::RuntimePaddingType(params.padding); + op_params.padding_values.height = data.padding.height; + op_params.padding_values.width = data.padding.width; + op_params.stride_height = params.stride_height; + op_params.stride_width = params.stride_width; + op_params.dilation_height_factor = params.dilation_height_factor; + op_params.dilation_width_factor = params.dilation_width_factor; + op_params.depth_multiplier = params.depth_multiplier; + op_params.quantized_activation_min = data.output_activation_min; + op_params.quantized_activation_max = data.output_activation_max; + return op_params; +} + +TfLiteStatus CalculateOpDataDepthwiseConv( + TfLiteContext* context, TfLiteNode* node, + const TfLiteDepthwiseConvParams& params, int width, int height, + int filter_width, int filter_height, int out_width, int out_height, + const TfLiteType data_type, OpDataConv* data) { + bool has_bias = node->inputs->size == 3; + // Check number of inputs/outputs + TF_LITE_ENSURE(context, has_bias || node->inputs->size == 2); + TF_LITE_ENSURE_EQ(context, node->outputs->size, 1); + + // Matching GetWindowedOutputSize in TensorFlow. + auto padding = params.padding; + data->padding = ComputePaddingHeightWidth( + params.stride_height, params.stride_width, params.dilation_height_factor, + params.dilation_width_factor, height, width, filter_height, filter_width, + padding, &out_height, &out_width); + + MicroContext* micro_context = GetMicroContext(context); + + TfLiteTensor* input = + micro_context->AllocateTempInputTensor(node, kConvInputTensor); + TF_LITE_ENSURE(context, input != nullptr); + TfLiteTensor* filter = + micro_context->AllocateTempInputTensor(node, kConvWeightsTensor); + TF_LITE_ENSURE(context, filter != nullptr); + TfLiteTensor* bias = + micro_context->AllocateTempInputTensor(node, kConvBiasTensor); + TfLiteTensor* output = + micro_context->AllocateTempOutputTensor(node, kConvOutputTensor); + TF_LITE_ENSURE(context, output != nullptr); + + // Note that quantized inference requires that all tensors have their + // parameters set. This is usually done during quantized training. + if (data_type != kTfLiteFloat32) { + int output_channels = filter->dims->data[kDepthwiseConvQuantizedDimension]; + + TF_LITE_ENSURE_STATUS(tflite::PopulateConvolutionQuantizationParams( + context, input, filter, bias, output, params.activation, + &data->output_multiplier, &data->output_shift, + &data->output_activation_min, &data->output_activation_max, + data->per_channel_output_multiplier, data->per_channel_output_shift, + output_channels)); + } + + data->input_zero_point = input->params.zero_point; + data->filter_zero_point = filter->params.zero_point; + data->output_zero_point = output->params.zero_point; + + micro_context->DeallocateTempTfLiteTensor(input); + micro_context->DeallocateTempTfLiteTensor(filter); + micro_context->DeallocateTempTfLiteTensor(bias); + micro_context->DeallocateTempTfLiteTensor(output); + + return kTfLiteOk; +} + +TfLiteStatus DepthwiseConvPrepare(TfLiteContext* context, TfLiteNode* node) { + TFLITE_DCHECK(node->user_data != nullptr); + TFLITE_DCHECK(node->builtin_data != nullptr); + + OpDataConv* data = static_cast(node->user_data); + const auto& params = + *(static_cast(node->builtin_data)); + MicroContext* micro_context = GetMicroContext(context); + + TfLiteTensor* output = + micro_context->AllocateTempOutputTensor(node, kDepthwiseConvOutputTensor); + TF_LITE_ENSURE(context, output != nullptr); + TfLiteTensor* input = + micro_context->AllocateTempInputTensor(node, kDepthwiseConvInputTensor); + TF_LITE_ENSURE(context, input != nullptr); + TfLiteTensor* filter = + micro_context->AllocateTempInputTensor(node, kDepthwiseConvWeightsTensor); + TF_LITE_ENSURE(context, filter != nullptr); + + const int input_width = input->dims->data[2]; + const int input_height = input->dims->data[1]; + const int filter_width = filter->dims->data[2]; + const int filter_height = filter->dims->data[1]; + const int output_width = output->dims->data[2]; + const int output_height = output->dims->data[1]; + + // Dynamically allocate per-channel quantization parameters. + if (input->type != kTfLiteFloat32) { + const int num_channels = filter->dims->data[kDepthwiseConvQuantizedDimension]; + data->per_channel_output_multiplier = + static_cast(context->AllocatePersistentBuffer( + context, num_channels * sizeof(int32_t))); + data->per_channel_output_shift = + static_cast(context->AllocatePersistentBuffer( + context, num_channels * sizeof(int32_t))); + } + + // All per-channel quantized tensors need valid zero point and scale arrays. + if (input->type == kTfLiteInt8) { + TF_LITE_ENSURE_EQ(context, filter->quantization.type, + kTfLiteAffineQuantization); + + const auto* affine_quantization = + static_cast(filter->quantization.params); + TFLITE_DCHECK(affine_quantization != nullptr); + TFLITE_DCHECK(affine_quantization->scale != nullptr); + TFLITE_DCHECK(affine_quantization->zero_point != nullptr); + + TF_LITE_ENSURE( + context, affine_quantization->scale->size == 1 || + affine_quantization->scale->size == + filter->dims->data[kDepthwiseConvQuantizedDimension]); + + TF_LITE_ENSURE_EQ(context, affine_quantization->scale->size, + affine_quantization->zero_point->size); + } + + if (filter->type == kTfLiteInt4) { + int filter_size = + RuntimeShape(filter->dims->size, + reinterpret_cast(filter->dims->data)) + .FlatSize(); + context->RequestScratchBufferInArena(context, filter_size, + &data->filter_buffer_index); + } + + TF_LITE_ENSURE_STATUS(CalculateOpDataDepthwiseConv( + context, node, params, input_width, input_height, filter_width, + filter_height, output_width, output_height, input->type, data)); + + micro_context->DeallocateTempTfLiteTensor(output); + micro_context->DeallocateTempTfLiteTensor(input); + micro_context->DeallocateTempTfLiteTensor(filter); + + return kTfLiteOk; +} + +} // namespace tflite diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/dequantize.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/dequantize.cpp new file mode 100644 index 0000000..c41036e --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/dequantize.cpp @@ -0,0 +1,88 @@ +/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/dequantize.h" + +#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h" +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/quantization_util.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/quantize.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/requantize.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/dequantize.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h" + +namespace tflite { + +void* DequantizeInit(TfLiteContext* context, const char* buffer, + size_t length) { + TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); + return context->AllocatePersistentBuffer(context, sizeof(DequantizeOpData)); +} + +TfLiteStatus DequantizeEval(TfLiteContext* context, TfLiteNode* node) { + TFLITE_DCHECK(node->user_data != nullptr); + DequantizeOpData* data = static_cast(node->user_data); + + const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0); + TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0); + + if (output->type == kTfLiteFloat32) { + switch (input->type) { + case kTfLiteInt8: + reference_ops::Dequantize(data->quantization_params, + tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + break; + case kTfLiteInt16: + reference_ops::Dequantize(data->quantization_params, + tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + break; + case kTfLiteUInt8: + reference_ops::Dequantize(data->quantization_params, + tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + break; + default: + MicroPrintf("Input %s, output %s not supported.", + TfLiteTypeGetName(input->type), + TfLiteTypeGetName(output->type)); + return kTfLiteError; + } + } else { + MicroPrintf("Input %s, output %s not supported.", + TfLiteTypeGetName(input->type), + TfLiteTypeGetName(output->type)); + return kTfLiteError; + } + + return kTfLiteOk; +} + +TfLiteRegistration Register_DEQUANTIZE() { + return tflite::micro::RegisterOp(DequantizeInit, DequantizePrepare, + DequantizeEval); +} + +} // namespace tflite diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/dequantize.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/dequantize.h new file mode 100644 index 0000000..ee45f36 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/dequantize.h @@ -0,0 +1,38 @@ +/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_LITE_MICRO_KERNELS_DEQUANTIZE_H_ +#define TENSORFLOW_LITE_MICRO_KERNELS_DEQUANTIZE_H_ + +#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h" +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h" + +namespace tflite { + +struct DequantizeOpData { + tflite::DequantizationParams quantization_params; + // The scaling factor from input to output (aka the 'real multiplier') can + // be represented as a fixed point multiplier plus a left shift. + int32_t output_multiplier; + int output_shift; + int32_t output_zero_point; +}; + +TfLiteStatus DequantizePrepare(TfLiteContext* context, TfLiteNode* node); + +} // namespace tflite + +#endif // TENSORFLOW_LITE_MICRO_KERNELS_DEQUANTIZE_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/dequantize_common.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/dequantize_common.cpp new file mode 100644 index 0000000..e8ae297 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/dequantize_common.cpp @@ -0,0 +1,67 @@ +/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h" +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/quantization_util.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/dequantize.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/quantize.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/requantize.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/dequantize.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" + +namespace tflite { + +TfLiteStatus DequantizePrepare(TfLiteContext* context, TfLiteNode* node) { + TFLITE_DCHECK(node->user_data != nullptr); + DequantizeOpData* data = static_cast(node->user_data); + + TF_LITE_ENSURE_EQ(context, NumInputs(node), 1); + TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); + + MicroContext* micro_context = GetMicroContext(context); + + // TODO(b/140515557): Add cached dequant to improve hybrid model performance. + TfLiteTensor* input = micro_context->AllocateTempInputTensor(node, 0); + TF_LITE_ENSURE(context, input != nullptr); + TfLiteTensor* output = micro_context->AllocateTempOutputTensor(node, 0); + TF_LITE_ENSURE(context, output != nullptr); + + TF_LITE_ENSURE(context, input->type == kTfLiteInt8 || + input->type == kTfLiteInt16 || + input->type == kTfLiteUInt8); + TF_LITE_ENSURE(context, output->type == kTfLiteFloat32); + + if (output->type == kTfLiteInt32) { + const double effective_output_scale = + static_cast(input->params.scale) / + static_cast(output->params.scale); + QuantizeMultiplier(effective_output_scale, &data->output_multiplier, + &data->output_shift); + } + + data->quantization_params.zero_point = input->params.zero_point; + data->quantization_params.scale = static_cast(input->params.scale); + data->output_zero_point = output->params.zero_point; + + micro_context->DeallocateTempTfLiteTensor(input); + micro_context->DeallocateTempTfLiteTensor(output); + + return kTfLiteOk; +} + +} // namespace tflite diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/detection_postprocess.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/detection_postprocess.cpp new file mode 100644 index 0000000..2209a58 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/detection_postprocess.cpp @@ -0,0 +1,807 @@ +/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include +#include +#include + +#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/flexbuffers.h" +#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h" +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/quantization_util.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/op_macros.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_utils.h" + +namespace tflite { +namespace { + +/** + * This version of detection_postprocess is specific to TFLite Micro. It + * contains the following differences between the TFLite version: + * + * 1.) Temporaries (temporary tensors) - Micro use instead scratch buffer API. + * 2.) Output dimensions - the TFLite version does not support undefined out + * dimensions. So model must have static out dimensions. + */ + +// Input tensors +constexpr int kInputTensorBoxEncodings = 0; +constexpr int kInputTensorClassPredictions = 1; +constexpr int kInputTensorAnchors = 2; + +// Output tensors +constexpr int kOutputTensorDetectionBoxes = 0; +constexpr int kOutputTensorDetectionClasses = 1; +constexpr int kOutputTensorDetectionScores = 2; +constexpr int kOutputTensorNumDetections = 3; + +constexpr int kNumCoordBox = 4; +constexpr int kBatchSize = 1; + +constexpr int kNumDetectionsPerClass = 100; + +// Object Detection model produces axis-aligned boxes in two formats: +// BoxCorner represents the lower left corner (xmin, ymin) and +// the upper right corner (xmax, ymax). +// CenterSize represents the center (xcenter, ycenter), height and width. +// BoxCornerEncoding and CenterSizeEncoding are related as follows: +// ycenter = y / y_scale * anchor.h + anchor.y; +// xcenter = x / x_scale * anchor.w + anchor.x; +// half_h = 0.5*exp(h/ h_scale)) * anchor.h; +// half_w = 0.5*exp(w / w_scale)) * anchor.w; +// ymin = ycenter - half_h +// ymax = ycenter + half_h +// xmin = xcenter - half_w +// xmax = xcenter + half_w +struct BoxCornerEncoding { + float ymin; + float xmin; + float ymax; + float xmax; +}; + +struct CenterSizeEncoding { + float y; + float x; + float h; + float w; +}; +// We make sure that the memory allocations are contiguous with static_assert. +static_assert(sizeof(BoxCornerEncoding) == sizeof(float) * kNumCoordBox, + "Size of BoxCornerEncoding is 4 float values"); +static_assert(sizeof(CenterSizeEncoding) == sizeof(float) * kNumCoordBox, + "Size of CenterSizeEncoding is 4 float values"); + +struct OpData { + int max_detections; + int max_classes_per_detection; // Fast Non-Max-Suppression + int detections_per_class; // Regular Non-Max-Suppression + float non_max_suppression_score_threshold; + float intersection_over_union_threshold; + int num_classes; + bool use_regular_non_max_suppression; + CenterSizeEncoding scale_values; + + // Scratch buffers indexes + int active_candidate_idx; + int decoded_boxes_idx; + int scores_idx; + int score_buffer_idx; + int keep_scores_idx; + int scores_after_regular_non_max_suppression_idx; + int sorted_values_idx; + int keep_indices_idx; + int sorted_indices_idx; + int buffer_idx; + int selected_idx; + + // Cached tensor scale and zero point values for quantized operations + TfLiteQuantizationParams input_box_encodings; + TfLiteQuantizationParams input_class_predictions; + TfLiteQuantizationParams input_anchors; +}; + +void* Init(TfLiteContext* context, const char* buffer, size_t length) { + TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); + OpData* op_data = nullptr; + + const uint8_t* buffer_t = reinterpret_cast(buffer); + const flexbuffers::Map& m = flexbuffers::GetRoot(buffer_t, length).AsMap(); + op_data = reinterpret_cast( + context->AllocatePersistentBuffer(context, sizeof(OpData))); + + op_data->max_detections = m["max_detections"].AsInt32(); + op_data->max_classes_per_detection = m["max_classes_per_detection"].AsInt32(); + if (m["detections_per_class"].IsNull()) + op_data->detections_per_class = kNumDetectionsPerClass; + else + op_data->detections_per_class = m["detections_per_class"].AsInt32(); + if (m["use_regular_nms"].IsNull()) + op_data->use_regular_non_max_suppression = false; + else + op_data->use_regular_non_max_suppression = m["use_regular_nms"].AsBool(); + + op_data->non_max_suppression_score_threshold = + m["nms_score_threshold"].AsFloat(); + op_data->intersection_over_union_threshold = m["nms_iou_threshold"].AsFloat(); + op_data->num_classes = m["num_classes"].AsInt32(); + op_data->scale_values.y = m["y_scale"].AsFloat(); + op_data->scale_values.x = m["x_scale"].AsFloat(); + op_data->scale_values.h = m["h_scale"].AsFloat(); + op_data->scale_values.w = m["w_scale"].AsFloat(); + + return op_data; +} + +TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + auto* op_data = static_cast(node->user_data); + + MicroContext* micro_context = GetMicroContext(context); + + // Inputs: box_encodings, scores, anchors + TF_LITE_ENSURE_EQ(context, NumInputs(node), 3); + TfLiteTensor* input_box_encodings = + micro_context->AllocateTempInputTensor(node, kInputTensorBoxEncodings); + TfLiteTensor* input_class_predictions = + micro_context->AllocateTempInputTensor(node, + kInputTensorClassPredictions); + TfLiteTensor* input_anchors = + micro_context->AllocateTempInputTensor(node, kInputTensorAnchors); + TF_LITE_ENSURE_EQ(context, NumDimensions(input_box_encodings), 3); + TF_LITE_ENSURE_EQ(context, NumDimensions(input_class_predictions), 3); + TF_LITE_ENSURE_EQ(context, NumDimensions(input_anchors), 2); + + TF_LITE_ENSURE_EQ(context, NumOutputs(node), 4); + const int num_boxes = input_box_encodings->dims->data[1]; + const int num_classes = op_data->num_classes; + + op_data->input_box_encodings.scale = input_box_encodings->params.scale; + op_data->input_box_encodings.zero_point = + input_box_encodings->params.zero_point; + op_data->input_class_predictions.scale = + input_class_predictions->params.scale; + op_data->input_class_predictions.zero_point = + input_class_predictions->params.zero_point; + op_data->input_anchors.scale = input_anchors->params.scale; + op_data->input_anchors.zero_point = input_anchors->params.zero_point; + + // Scratch tensors + context->RequestScratchBufferInArena(context, num_boxes, + &op_data->active_candidate_idx); + context->RequestScratchBufferInArena(context, + num_boxes * kNumCoordBox * sizeof(float), + &op_data->decoded_boxes_idx); + context->RequestScratchBufferInArena( + context, + input_class_predictions->dims->data[1] * + input_class_predictions->dims->data[2] * sizeof(float), + &op_data->scores_idx); + + // Additional buffers + context->RequestScratchBufferInArena(context, num_boxes * sizeof(float), + &op_data->score_buffer_idx); + context->RequestScratchBufferInArena(context, num_boxes * sizeof(float), + &op_data->keep_scores_idx); + context->RequestScratchBufferInArena( + context, op_data->max_detections * num_boxes * sizeof(float), + &op_data->scores_after_regular_non_max_suppression_idx); + context->RequestScratchBufferInArena( + context, op_data->max_detections * num_boxes * sizeof(float), + &op_data->sorted_values_idx); + context->RequestScratchBufferInArena(context, num_boxes * sizeof(int), + &op_data->keep_indices_idx); + context->RequestScratchBufferInArena( + context, op_data->max_detections * num_boxes * sizeof(int), + &op_data->sorted_indices_idx); + int buffer_size = std::max(num_classes, op_data->max_detections); + context->RequestScratchBufferInArena( + context, buffer_size * num_boxes * sizeof(int), &op_data->buffer_idx); + buffer_size = std::min(num_boxes, op_data->max_detections); + context->RequestScratchBufferInArena( + context, buffer_size * num_boxes * sizeof(int), &op_data->selected_idx); + + // Outputs: detection_boxes, detection_scores, detection_classes, + // num_detections + TF_LITE_ENSURE_EQ(context, NumOutputs(node), 4); + + micro_context->DeallocateTempTfLiteTensor(input_box_encodings); + micro_context->DeallocateTempTfLiteTensor(input_class_predictions); + micro_context->DeallocateTempTfLiteTensor(input_anchors); + + return kTfLiteOk; +} + +class Dequantizer { + public: + Dequantizer(int zero_point, float scale) + : zero_point_(zero_point), scale_(scale) {} + float operator()(uint8_t x) { + return (static_cast(x) - zero_point_) * scale_; + } + + private: + int zero_point_; + float scale_; +}; + +template +T ReInterpretTensor(const TfLiteEvalTensor* tensor) { + const float* tensor_base = tflite::micro::GetTensorData(tensor); + return reinterpret_cast(tensor_base); +} + +template +T ReInterpretTensor(TfLiteEvalTensor* tensor) { + float* tensor_base = tflite::micro::GetTensorData(tensor); + return reinterpret_cast(tensor_base); +} + +TfLiteStatus DecodeCenterSizeBoxes(TfLiteContext* context, TfLiteNode* node, + OpData* op_data) { + // Parse input tensor boxencodings + const TfLiteEvalTensor* input_box_encodings = + tflite::micro::GetEvalInput(context, node, kInputTensorBoxEncodings); + TF_LITE_ENSURE_EQ(context, input_box_encodings->dims->data[0], kBatchSize); + const int num_boxes = input_box_encodings->dims->data[1]; + TF_LITE_ENSURE(context, input_box_encodings->dims->data[2] >= kNumCoordBox); + const TfLiteEvalTensor* input_anchors = + tflite::micro::GetEvalInput(context, node, kInputTensorAnchors); + + // Decode the boxes to get (ymin, xmin, ymax, xmax) based on the anchors + CenterSizeEncoding box_centersize; + CenterSizeEncoding scale_values = op_data->scale_values; + CenterSizeEncoding anchor; + for (int idx = 0; idx < num_boxes; ++idx) { + switch (input_box_encodings->type) { + // Float + case kTfLiteFloat32: { + // Please see DequantizeBoxEncodings function for the support detail. + const int box_encoding_idx = idx * input_box_encodings->dims->data[2]; + const float* boxes = &(tflite::micro::GetTensorData( + input_box_encodings)[box_encoding_idx]); + box_centersize = *reinterpret_cast(boxes); + anchor = + ReInterpretTensor(input_anchors)[idx]; + break; + } + default: + // Unsupported type. + return kTfLiteError; + } + + float ycenter = static_cast(static_cast(box_centersize.y) / + static_cast(scale_values.y) * + static_cast(anchor.h) + + static_cast(anchor.y)); + + float xcenter = static_cast(static_cast(box_centersize.x) / + static_cast(scale_values.x) * + static_cast(anchor.w) + + static_cast(anchor.x)); + + float half_h = + static_cast(0.5 * + (std::exp(static_cast(box_centersize.h) / + static_cast(scale_values.h))) * + static_cast(anchor.h)); + float half_w = + static_cast(0.5 * + (std::exp(static_cast(box_centersize.w) / + static_cast(scale_values.w))) * + static_cast(anchor.w)); + + float* decoded_boxes = reinterpret_cast( + context->GetScratchBuffer(context, op_data->decoded_boxes_idx)); + auto& box = reinterpret_cast(decoded_boxes)[idx]; + box.ymin = ycenter - half_h; + box.xmin = xcenter - half_w; + box.ymax = ycenter + half_h; + box.xmax = xcenter + half_w; + } + return kTfLiteOk; +} + +void DecreasingPartialArgSort(const float* values, int num_values, + int num_to_sort, int* indices) { + std::iota(indices, indices + num_values, 0); + std::partial_sort(indices, indices + num_to_sort, indices + num_values, + [&values](const int i, const int j) { + return std::tie(values[i], j) > std::tie(values[j], i); + }); +} + +template +void InsertionSort(int* start, int* end, Compare compare) { + for (int* i = start; i != end; ++i) { + std::rotate(std::upper_bound(start, i, *i, compare), i, i + 1); + } +} + +template +void TopDownMerge(int* values, int* scratch, const int half_num_values, + int num_values, Compare compare) { + int left = 0; + int right = half_num_values; + + for (int i = 0; i < num_values; i++) { + if (left >= half_num_values || + (right < num_values && compare(values[right], values[left]))) { + scratch[i] = values[right++]; + } else { + scratch[i] = values[left++]; + } + } + memcpy(values, scratch, num_values * sizeof(int)); +} + +template +void MergeSort(int* values, int* scratch, const int num_values, + Compare compare) { + constexpr int threshold = 20; + + if (num_values < threshold) { + InsertionSort(values, values + num_values, compare); + return; + } + + const int half_num_values = num_values / 2; + + MergeSort(values, scratch, half_num_values, compare); + MergeSort(values + half_num_values, scratch, num_values - half_num_values, + compare); + TopDownMerge(values, scratch, half_num_values, num_values, compare); +} + +void DecreasingArgSort(const float* values, int num_values, int* indices, + int* scratch) { + std::iota(indices, indices + num_values, 0); + + MergeSort(indices, scratch, num_values, [&values](const int i, const int j) { + return values[i] > values[j]; + }); +} + +int SelectDetectionsAboveScoreThreshold(const float* values, int size, + const float threshold, + float* keep_values, int* keep_indices) { + int counter = 0; + for (int i = 0; i < size; i++) { + if (values[i] >= threshold) { + keep_values[counter] = values[i]; + keep_indices[counter] = i; + counter++; + } + } + return counter; +} + +bool ValidateBoxes(const float* decoded_boxes, const int num_boxes) { + for (int i = 0; i < num_boxes; ++i) { + // ymax>=ymin, xmax>=xmin + auto& box = reinterpret_cast(decoded_boxes)[i]; + if (box.ymin >= box.ymax || box.xmin >= box.xmax) { + return false; + } + } + return true; +} + +float ComputeIntersectionOverUnion(const float* decoded_boxes, const int i, + const int j) { + auto& box_i = reinterpret_cast(decoded_boxes)[i]; + auto& box_j = reinterpret_cast(decoded_boxes)[j]; + const float area_i = (box_i.ymax - box_i.ymin) * (box_i.xmax - box_i.xmin); + const float area_j = (box_j.ymax - box_j.ymin) * (box_j.xmax - box_j.xmin); + if (area_i <= 0 || area_j <= 0) return 0.0; + const float intersection_ymin = std::max(box_i.ymin, box_j.ymin); + const float intersection_xmin = std::max(box_i.xmin, box_j.xmin); + const float intersection_ymax = std::min(box_i.ymax, box_j.ymax); + const float intersection_xmax = std::min(box_i.xmax, box_j.xmax); + const float intersection_area = + std::max(intersection_ymax - intersection_ymin, 0.0) * + std::max(intersection_xmax - intersection_xmin, 0.0); + return intersection_area / (area_i + area_j - intersection_area); +} + +// NonMaxSuppressionSingleClass() prunes out the box locations with high overlap +// before selecting the highest scoring boxes (max_detections in number) +// It assumes all boxes are good in beginning and sorts based on the scores. +// If lower-scoring box has too much overlap with a higher-scoring box, +// we get rid of the lower-scoring box. +// Complexity is O(N^2) pairwise comparison between boxes +TfLiteStatus NonMaxSuppressionSingleClassHelper( + TfLiteContext* context, TfLiteNode* node, OpData* op_data, + const float* scores, int* selected, int* selected_size, + int max_detections) { + const TfLiteEvalTensor* input_box_encodings = + tflite::micro::GetEvalInput(context, node, kInputTensorBoxEncodings); + const int num_boxes = input_box_encodings->dims->data[1]; + const float non_max_suppression_score_threshold = + op_data->non_max_suppression_score_threshold; + const float intersection_over_union_threshold = + op_data->intersection_over_union_threshold; + // Maximum detections should be positive. + TF_LITE_ENSURE(context, (max_detections >= 0)); + // intersection_over_union_threshold should be positive + // and should be less than 1. + TF_LITE_ENSURE(context, (intersection_over_union_threshold > 0.0f) && + (intersection_over_union_threshold <= 1.0f)); + // Validate boxes + float* decoded_boxes = reinterpret_cast( + context->GetScratchBuffer(context, op_data->decoded_boxes_idx)); + + TF_LITE_ENSURE(context, ValidateBoxes(decoded_boxes, num_boxes)); + + // threshold scores + int* keep_indices = reinterpret_cast( + context->GetScratchBuffer(context, op_data->keep_indices_idx)); + float* keep_scores = reinterpret_cast( + context->GetScratchBuffer(context, op_data->keep_scores_idx)); + int num_scores_kept = SelectDetectionsAboveScoreThreshold( + scores, num_boxes, non_max_suppression_score_threshold, keep_scores, + keep_indices); + int* sorted_indices = reinterpret_cast( + context->GetScratchBuffer(context, op_data->sorted_indices_idx)); + + // Reusing keep_indices for scratch buffer and write back its values + // after the sorting is done. + DecreasingArgSort(keep_scores, num_scores_kept, sorted_indices, keep_indices); + int counter = 0; + for (int i = 0; i < num_boxes; i++) { + if (scores[i] >= non_max_suppression_score_threshold) { + keep_indices[counter] = i; + counter++; + } + } + + const int num_boxes_kept = num_scores_kept; + const int output_size = std::min(num_boxes_kept, max_detections); + *selected_size = 0; + + int num_active_candidate = num_boxes_kept; + uint8_t* active_box_candidate = reinterpret_cast( + context->GetScratchBuffer(context, op_data->active_candidate_idx)); + + for (int row = 0; row < num_boxes_kept; row++) { + active_box_candidate[row] = 1; + } + for (int i = 0; i < num_boxes_kept; ++i) { + if (num_active_candidate == 0 || *selected_size >= output_size) break; + if (active_box_candidate[i] == 1) { + selected[(*selected_size)++] = keep_indices[sorted_indices[i]]; + active_box_candidate[i] = 0; + num_active_candidate--; + } else { + continue; + } + for (int j = i + 1; j < num_boxes_kept; ++j) { + if (active_box_candidate[j] == 1) { + float intersection_over_union = ComputeIntersectionOverUnion( + decoded_boxes, keep_indices[sorted_indices[i]], + keep_indices[sorted_indices[j]]); + + if (intersection_over_union > intersection_over_union_threshold) { + active_box_candidate[j] = 0; + num_active_candidate--; + } + } + } + } + + return kTfLiteOk; +} + +// This function implements a regular version of Non Maximal Suppression (NMS) +// for multiple classes where +// 1) we do NMS separately for each class across all anchors and +// 2) keep only the highest anchor scores across all classes +// 3) The worst runtime of the regular NMS is O(K*N^2) +// where N is the number of anchors and K the number of +// classes. +TfLiteStatus NonMaxSuppressionMultiClassRegularHelper(TfLiteContext* context, + TfLiteNode* node, + OpData* op_data, + const float* scores) { + const TfLiteEvalTensor* input_box_encodings = + tflite::micro::GetEvalInput(context, node, kInputTensorBoxEncodings); + const TfLiteEvalTensor* input_class_predictions = + tflite::micro::GetEvalInput(context, node, kInputTensorClassPredictions); + TfLiteEvalTensor* detection_boxes = + tflite::micro::GetEvalOutput(context, node, kOutputTensorDetectionBoxes); + TfLiteEvalTensor* detection_classes = tflite::micro::GetEvalOutput( + context, node, kOutputTensorDetectionClasses); + TfLiteEvalTensor* detection_scores = + tflite::micro::GetEvalOutput(context, node, kOutputTensorDetectionScores); + TfLiteEvalTensor* num_detections = + tflite::micro::GetEvalOutput(context, node, kOutputTensorNumDetections); + + const int num_boxes = input_box_encodings->dims->data[1]; + const int num_classes = op_data->num_classes; + const int num_detections_per_class = op_data->detections_per_class; + const int max_detections = op_data->max_detections; + const int num_classes_with_background = + input_class_predictions->dims->data[2]; + // The row index offset is 1 if background class is included and 0 otherwise. + int label_offset = num_classes_with_background - num_classes; + TF_LITE_ENSURE(context, num_detections_per_class > 0); + + // For each class, perform non-max suppression. + float* class_scores = reinterpret_cast( + context->GetScratchBuffer(context, op_data->score_buffer_idx)); + int* box_indices_after_regular_non_max_suppression = reinterpret_cast( + context->GetScratchBuffer(context, op_data->buffer_idx)); + float* scores_after_regular_non_max_suppression = + reinterpret_cast(context->GetScratchBuffer( + context, op_data->scores_after_regular_non_max_suppression_idx)); + + int size_of_sorted_indices = 0; + int* sorted_indices = reinterpret_cast( + context->GetScratchBuffer(context, op_data->sorted_indices_idx)); + float* sorted_values = reinterpret_cast( + context->GetScratchBuffer(context, op_data->sorted_values_idx)); + + for (int col = 0; col < num_classes; col++) { + for (int row = 0; row < num_boxes; row++) { + // Get scores of boxes corresponding to all anchors for single class + class_scores[row] = + *(scores + row * num_classes_with_background + col + label_offset); + } + // Perform non-maximal suppression on single class + int selected_size = 0; + int* selected = reinterpret_cast( + context->GetScratchBuffer(context, op_data->selected_idx)); + TF_LITE_ENSURE_STATUS(NonMaxSuppressionSingleClassHelper( + context, node, op_data, class_scores, selected, &selected_size, + num_detections_per_class)); + // Add selected indices from non-max suppression of boxes in this class + int output_index = size_of_sorted_indices; + for (int i = 0; i < selected_size; i++) { + int selected_index = selected[i]; + + box_indices_after_regular_non_max_suppression[output_index] = + (selected_index * num_classes_with_background + col + label_offset); + scores_after_regular_non_max_suppression[output_index] = + class_scores[selected_index]; + output_index++; + } + // Sort the max scores among the selected indices + // Get the indices for top scores + int num_indices_to_sort = std::min(output_index, max_detections); + DecreasingPartialArgSort(scores_after_regular_non_max_suppression, + output_index, num_indices_to_sort, sorted_indices); + + // Copy values to temporary vectors + for (int row = 0; row < num_indices_to_sort; row++) { + int temp = sorted_indices[row]; + sorted_indices[row] = box_indices_after_regular_non_max_suppression[temp]; + sorted_values[row] = scores_after_regular_non_max_suppression[temp]; + } + // Copy scores and indices from temporary vectors + for (int row = 0; row < num_indices_to_sort; row++) { + box_indices_after_regular_non_max_suppression[row] = sorted_indices[row]; + scores_after_regular_non_max_suppression[row] = sorted_values[row]; + } + size_of_sorted_indices = num_indices_to_sort; + } + + // Allocate output tensors + for (int output_box_index = 0; output_box_index < max_detections; + output_box_index++) { + if (output_box_index < size_of_sorted_indices) { + const int anchor_index = floor( + box_indices_after_regular_non_max_suppression[output_box_index] / + num_classes_with_background); + const int class_index = + box_indices_after_regular_non_max_suppression[output_box_index] - + anchor_index * num_classes_with_background - label_offset; + const float selected_score = + scores_after_regular_non_max_suppression[output_box_index]; + // detection_boxes + float* decoded_boxes = reinterpret_cast( + context->GetScratchBuffer(context, op_data->decoded_boxes_idx)); + ReInterpretTensor(detection_boxes)[output_box_index] = + reinterpret_cast(decoded_boxes)[anchor_index]; + // detection_classes + tflite::micro::GetTensorData(detection_classes)[output_box_index] = + class_index; + // detection_scores + tflite::micro::GetTensorData(detection_scores)[output_box_index] = + selected_score; + } else { + ReInterpretTensor( + detection_boxes)[output_box_index] = {0.0f, 0.0f, 0.0f, 0.0f}; + // detection_classes + tflite::micro::GetTensorData(detection_classes)[output_box_index] = + 0.0f; + // detection_scores + tflite::micro::GetTensorData(detection_scores)[output_box_index] = + 0.0f; + } + } + tflite::micro::GetTensorData(num_detections)[0] = + size_of_sorted_indices; + + return kTfLiteOk; +} + +// This function implements a fast version of Non Maximal Suppression for +// multiple classes where +// 1) we keep the top-k scores for each anchor and +// 2) during NMS, each anchor only uses the highest class score for sorting. +// 3) Compared to standard NMS, the worst runtime of this version is O(N^2) +// instead of O(KN^2) where N is the number of anchors and K the number of +// classes. +TfLiteStatus NonMaxSuppressionMultiClassFastHelper(TfLiteContext* context, + TfLiteNode* node, + OpData* op_data, + const float* scores) { + const TfLiteEvalTensor* input_box_encodings = + tflite::micro::GetEvalInput(context, node, kInputTensorBoxEncodings); + const TfLiteEvalTensor* input_class_predictions = + tflite::micro::GetEvalInput(context, node, kInputTensorClassPredictions); + TfLiteEvalTensor* detection_boxes = + tflite::micro::GetEvalOutput(context, node, kOutputTensorDetectionBoxes); + + TfLiteEvalTensor* detection_classes = tflite::micro::GetEvalOutput( + context, node, kOutputTensorDetectionClasses); + TfLiteEvalTensor* detection_scores = + tflite::micro::GetEvalOutput(context, node, kOutputTensorDetectionScores); + TfLiteEvalTensor* num_detections = + tflite::micro::GetEvalOutput(context, node, kOutputTensorNumDetections); + + const int num_boxes = input_box_encodings->dims->data[1]; + const int num_classes = op_data->num_classes; + const int max_categories_per_anchor = op_data->max_classes_per_detection; + const int num_classes_with_background = + input_class_predictions->dims->data[2]; + + // The row index offset is 1 if background class is included and 0 otherwise. + int label_offset = num_classes_with_background - num_classes; + TF_LITE_ENSURE(context, (max_categories_per_anchor > 0)); + const int num_categories_per_anchor = + std::min(max_categories_per_anchor, num_classes); + float* max_scores = reinterpret_cast( + context->GetScratchBuffer(context, op_data->score_buffer_idx)); + int* sorted_class_indices = reinterpret_cast( + context->GetScratchBuffer(context, op_data->buffer_idx)); + + for (int row = 0; row < num_boxes; row++) { + const float* box_scores = + scores + row * num_classes_with_background + label_offset; + int* class_indices = sorted_class_indices + row * num_classes; + DecreasingPartialArgSort(box_scores, num_classes, num_categories_per_anchor, + class_indices); + max_scores[row] = box_scores[class_indices[0]]; + } + + // Perform non-maximal suppression on max scores + int selected_size = 0; + int* selected = reinterpret_cast( + context->GetScratchBuffer(context, op_data->selected_idx)); + TF_LITE_ENSURE_STATUS(NonMaxSuppressionSingleClassHelper( + context, node, op_data, max_scores, selected, &selected_size, + op_data->max_detections)); + + // Allocate output tensors + int output_box_index = 0; + + for (int i = 0; i < selected_size; i++) { + int selected_index = selected[i]; + + const float* box_scores = + scores + selected_index * num_classes_with_background + label_offset; + const int* class_indices = + sorted_class_indices + selected_index * num_classes; + + for (int col = 0; col < num_categories_per_anchor; ++col) { + int box_offset = num_categories_per_anchor * output_box_index + col; + + // detection_boxes + float* decoded_boxes = reinterpret_cast( + context->GetScratchBuffer(context, op_data->decoded_boxes_idx)); + ReInterpretTensor(detection_boxes)[box_offset] = + reinterpret_cast(decoded_boxes)[selected_index]; + + // detection_classes + tflite::micro::GetTensorData(detection_classes)[box_offset] = + class_indices[col]; + + // detection_scores + tflite::micro::GetTensorData(detection_scores)[box_offset] = + box_scores[class_indices[col]]; + + output_box_index++; + } + } + + tflite::micro::GetTensorData(num_detections)[0] = output_box_index; + return kTfLiteOk; +} + +TfLiteStatus NonMaxSuppressionMultiClass(TfLiteContext* context, + TfLiteNode* node, OpData* op_data) { + // Get the input tensors + const TfLiteEvalTensor* input_box_encodings = + tflite::micro::GetEvalInput(context, node, kInputTensorBoxEncodings); + const TfLiteEvalTensor* input_class_predictions = + tflite::micro::GetEvalInput(context, node, kInputTensorClassPredictions); + const int num_boxes = input_box_encodings->dims->data[1]; + const int num_classes = op_data->num_classes; + + TF_LITE_ENSURE_EQ(context, input_class_predictions->dims->data[0], + kBatchSize); + TF_LITE_ENSURE_EQ(context, input_class_predictions->dims->data[1], num_boxes); + const int num_classes_with_background = + input_class_predictions->dims->data[2]; + + TF_LITE_ENSURE(context, (num_classes_with_background - num_classes <= 1)); + TF_LITE_ENSURE(context, (num_classes_with_background >= num_classes)); + + const float* scores; + switch (input_class_predictions->type) { + case kTfLiteFloat32: + scores = tflite::micro::GetTensorData(input_class_predictions); + break; + default: + // Unsupported type. + return kTfLiteError; + } + + if (op_data->use_regular_non_max_suppression) { + TF_LITE_ENSURE_STATUS(NonMaxSuppressionMultiClassRegularHelper( + context, node, op_data, scores)); + } else { + TF_LITE_ENSURE_STATUS( + NonMaxSuppressionMultiClassFastHelper(context, node, op_data, scores)); + } + + return kTfLiteOk; +} + +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + TF_LITE_ENSURE(context, (kBatchSize == 1)); + auto* op_data = static_cast(node->user_data); + + // These two functions correspond to two blocks in the Object Detection model. + // In future, we would like to break the custom op in two blocks, which is + // currently not feasible because we would like to input quantized inputs + // and do all calculations in float. Mixed quantized/float calculations are + // currently not supported in TFLite. + + // This fills in temporary decoded_boxes + // by transforming input_box_encodings and input_anchors from + // CenterSizeEncodings to BoxCornerEncoding + TF_LITE_ENSURE_STATUS(DecodeCenterSizeBoxes(context, node, op_data)); + + // This fills in the output tensors + // by choosing effective set of decoded boxes + // based on Non Maximal Suppression, i.e. selecting + // highest scoring non-overlapping boxes. + TF_LITE_ENSURE_STATUS(NonMaxSuppressionMultiClass(context, node, op_data)); + + return kTfLiteOk; +} +} // namespace + +TfLiteRegistration* Register_DETECTION_POSTPROCESS() { + static TfLiteRegistration r = tflite::micro::RegisterOp(Init, Prepare, Eval); + return &r; +} + +} // namespace tflite diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/detection_postprocess_flexbuffers_generated_data.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/detection_postprocess_flexbuffers_generated_data.h new file mode 100644 index 0000000..f5b9eae --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/detection_postprocess_flexbuffers_generated_data.h @@ -0,0 +1,25 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_LITE_MICRO_KERNELS_FLEXBUFFERS_GENERATED_DATA_H +#define TENSORFLOW_LITE_MICRO_KERNELS_FLEXBUFFERS_GENERATED_DATA_H + +extern const int g_gen_data_size_none_regular_nms; +extern const unsigned char g_gen_data_none_regular_nms[]; + +extern const int g_gen_data_size_regular_nms; +extern const unsigned char g_gen_data_regular_nms[]; + +#endif diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/div.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/div.cpp new file mode 100644 index 0000000..e5fb262 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/div.cpp @@ -0,0 +1,208 @@ +/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/div.h" + +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/quantization_util.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h" + +namespace tflite { +namespace { + +constexpr int kInputTensor1 = 0; +constexpr int kInputTensor2 = 1; +constexpr int kOutputTensor = 0; + +struct OpDataDiv { + // Parameters used in the quantized paths where the output is 8bit + int32_t input1_zero_point; + int32_t input2_zero_point; + int32_t output_zero_point; + int32_t output_activation_min; + int32_t output_activation_max; + + // Parameters used in all quantized paths + int32_t output_multiplier; + int output_shift; +}; + +TfLiteStatus CalculateOpDataDiv(TfLiteContext* context, TfLiteTensor* input1, + TfLiteTensor* input2, TfLiteTensor* output, + TfLiteDivParams* params, OpDataDiv* data) { + TF_LITE_ENSURE_TYPES_EQ(context, input1->type, input2->type); + TF_LITE_ENSURE_TYPES_EQ(context, input1->type, output->type); + + if (output->type == kTfLiteInt8) { + TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized( + context, params->activation, output, &data->output_activation_min, + &data->output_activation_max)); + const double real_multiplier = static_cast( + input1->params.scale / (input2->params.scale * output->params.scale)); + QuantizeMultiplier(real_multiplier, &data->output_multiplier, + &data->output_shift); + data->input1_zero_point = input1->params.zero_point; + data->input2_zero_point = input2->params.zero_point; + data->output_zero_point = output->params.zero_point; + } + + return kTfLiteOk; +} + +void* Init(TfLiteContext* context, const char* buffer, size_t length) { + TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); + return context->AllocatePersistentBuffer(context, sizeof(OpDataDiv)); +} + +TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + TFLITE_DCHECK(node->user_data != nullptr); + TFLITE_DCHECK(node->builtin_data != nullptr); + + MicroContext* micro_context = GetMicroContext(context); + TfLiteTensor* input1 = + micro_context->AllocateTempInputTensor(node, kInputTensor1); + TF_LITE_ENSURE(context, input1 != nullptr); + TfLiteTensor* input2 = + micro_context->AllocateTempInputTensor(node, kInputTensor2); + TF_LITE_ENSURE(context, input2 != nullptr); + TfLiteTensor* output = + micro_context->AllocateTempOutputTensor(node, kOutputTensor); + TF_LITE_ENSURE(context, output != nullptr); + + OpDataDiv* data = static_cast(node->user_data); + auto* params = reinterpret_cast(node->builtin_data); + + TF_LITE_ENSURE_STATUS( + CalculateOpDataDiv(context, input1, input2, output, params, data)); + + micro_context->DeallocateTempTfLiteTensor(input1); + micro_context->DeallocateTempTfLiteTensor(input2); + micro_context->DeallocateTempTfLiteTensor(output); + return kTfLiteOk; +} + +void EvalDiv(TfLiteContext* context, TfLiteNode* node, TfLiteDivParams* params, + const OpDataDiv* data, const TfLiteEvalTensor* input1, + const TfLiteEvalTensor* input2, TfLiteEvalTensor* output) { + tflite::ArithmeticParams op_params = {}; + +#define TF_LITE_DIV(type, opname, data_type) \ + data_type output_activation_min, output_activation_max; \ + CalculateActivationRange(params->activation, &output_activation_min, \ + &output_activation_max); \ + SetActivationParams(output_activation_min, output_activation_max, \ + &op_params); \ + type::opname(op_params, tflite::micro::GetTensorShape(input1), \ + tflite::micro::GetTensorData(input1), \ + tflite::micro::GetTensorShape(input2), \ + tflite::micro::GetTensorData(input2), \ + tflite::micro::GetTensorShape(output), \ + tflite::micro::GetTensorData(output)) + + bool requires_broadcast = reference_ops::ProcessBroadcastShapes( + tflite::micro::GetTensorShape(input1), + tflite::micro::GetTensorShape(input2), &op_params); + + if (requires_broadcast) { + TF_LITE_DIV(reference_ops, BroadcastDivSlow, float); + } else { + TF_LITE_DIV(reference_ops, Div, float); + } +#undef TF_LITE_DIV +} + +TfLiteStatus EvalQuantized(TfLiteContext* context, TfLiteNode* node, + TfLiteDivParams* params, const OpDataDiv* data, + const TfLiteEvalTensor* input1, + const TfLiteEvalTensor* input2, + TfLiteEvalTensor* output) { + tflite::ArithmeticParams op_params = {}; + +#define TF_LITE_DIV(type, opname, dtype) \ + type::opname(op_params, tflite::micro::GetTensorShape(input1), \ + tflite::micro::GetTensorData(input1), \ + tflite::micro::GetTensorShape(input2), \ + tflite::micro::GetTensorData(input2), \ + tflite::micro::GetTensorShape(output), \ + tflite::micro::GetTensorData(output)) + + if (input1->type == kTfLiteInt8 && input2->type == kTfLiteInt8 && + output->type == kTfLiteInt8) { + SetActivationParams(data->output_activation_min, + data->output_activation_max, &op_params); + op_params.input1_offset = -data->input1_zero_point; + op_params.input2_offset = -data->input2_zero_point; + op_params.output_offset = data->output_zero_point; + op_params.output_multiplier = data->output_multiplier; + op_params.output_shift = data->output_shift; + + bool requires_broadcast = reference_ops::ProcessBroadcastShapes( + tflite::micro::GetTensorShape(input1), + tflite::micro::GetTensorShape(input2), &op_params); + + if (requires_broadcast) { + TF_LITE_DIV(reference_ops, BroadcastDivSlow, int8_t); + } else { + TF_LITE_DIV(reference_ops, Div, int8_t); + } +#undef TF_LITE_DIV + } else { + MicroPrintf("Unsupported combination of input and output types in DIV."); + return kTfLiteError; + } + + return kTfLiteOk; +} + +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + TFLITE_DCHECK(node->builtin_data != nullptr); + auto* params = static_cast(node->builtin_data); + TFLITE_DCHECK(node->user_data != nullptr); + auto* data = static_cast(node->user_data); + + const TfLiteEvalTensor* input1 = + tflite::micro::GetEvalInput(context, node, kInputTensor1); + const TfLiteEvalTensor* input2 = + tflite::micro::GetEvalInput(context, node, kInputTensor2); + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kOutputTensor); + + if (output->type == kTfLiteFloat32) { + EvalDiv(context, node, params, data, input1, input2, output); + } else if (output->type == kTfLiteInt8) { + TF_LITE_ENSURE_OK(context, EvalQuantized(context, node, params, data, + input1, input2, output)); + } else { + MicroPrintf( + "DIV only supports FLOAT32, quantized INT8 " + "now, got type %s (%d).", + TfLiteTypeGetName(output->type), output->type); + return kTfLiteError; + } + + return kTfLiteOk; +} + +} // namespace + +TfLiteRegistration Register_DIV() { + return tflite::micro::RegisterOp(Init, Prepare, Eval); +} + +} // namespace tflite diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/elementwise.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/elementwise.cpp new file mode 100644 index 0000000..4ee7f2c --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/elementwise.cpp @@ -0,0 +1,430 @@ +/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include + +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/quantization_util.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_utils.h" + +namespace tflite { +namespace ops { +namespace micro { +namespace elementwise { +namespace { + +constexpr int kAbsNameId = 0; +constexpr int kRsrqtNameId = 1; + +const int kElementwiseInputTensor = 0; +const int kElementwiseOutputTensor = 0; + +struct OpDataAbsRsqrt { + int32_t multiplier; + int shift; + int input_offset; + int output_offset; + bool needs_rescale; + TfLiteQuantizationType input_quantization_type; + TfLiteType input_type; +}; + +bool IsNumericSupportedType(const TfLiteType type) { + return type == kTfLiteFloat32; +} + +bool IsLogicalSupportedType(const TfLiteType type) { + return type == kTfLiteBool; +} + +bool IsAbsSupportedType(const TfLiteType type) { + return type == kTfLiteFloat32 || type == kTfLiteInt8 || type == kTfLiteInt16; +} + +bool IsRsqrtSupportedType(const TfLiteType type) { + return type == kTfLiteFloat32 || type == kTfLiteInt8; +} + +inline void SetAbsOutputMultiplier(const float input_scale, + const float output_scale, + int32_t* multiplier, int* shift) { + QuantizeMultiplier(static_cast(input_scale / output_scale), + multiplier, shift); +} + +inline void SetRsqrtOutputMultiplier(const float input_scale, + const float output_scale, + int32_t* multiplier, int* shift) { + const double scale = + 1. / static_cast((std::sqrt(input_scale) * output_scale)); + QuantizeMultiplier(scale, multiplier, shift); +} + +typedef bool (*IsSupportedType)(TfLiteType); +template +TfLiteStatus GenericPrepare(TfLiteContext* context, TfLiteNode* node) { + MicroContext* micro_context = GetMicroContext(context); + TF_LITE_ENSURE_EQ(context, NumInputs(node), 1); + TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); + TfLiteTensor* input = + micro_context->AllocateTempInputTensor(node, kElementwiseInputTensor); + TF_LITE_ENSURE(context, input != nullptr); + TfLiteTensor* output = + micro_context->AllocateTempOutputTensor(node, kElementwiseOutputTensor); + TF_LITE_ENSURE(context, output != nullptr); + TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type); + if (!IsSupportedType(input->type)) { + MicroPrintf("Input data type %s (%d) is not supported.", + TfLiteTypeGetName(input->type), input->type); + return kTfLiteError; + } + + micro_context->DeallocateTempTfLiteTensor(input); + micro_context->DeallocateTempTfLiteTensor(output); + return kTfLiteOk; +} + +typedef bool (*IsSupportedType)(TfLiteType); +template +TfLiteStatus PrepareAbsRsqrt(TfLiteContext* context, TfLiteNode* node) { + MicroContext* micro_context = GetMicroContext(context); + TF_LITE_ENSURE_EQ(context, NumInputs(node), 1); + TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); + TfLiteTensor* input = micro_context->AllocateTempInputTensor(node, 0); + TF_LITE_ENSURE(context, input != nullptr); + TfLiteTensor* output = micro_context->AllocateTempOutputTensor(node, 0); + TF_LITE_ENSURE(context, output != nullptr); + TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type); + if (!IsSupportedType(input->type)) { + MicroPrintf("Input data type %s (%d) is not supported.", + TfLiteTypeGetName(input->type), input->type); + return kTfLiteError; + } + + auto* op_data = static_cast(node->user_data); + op_data->input_type = input->type; + + // For int16 type input, we support both quantized and non-quantized + // evaluation. + if (op_nameid == kAbsNameId) { + op_data->input_quantization_type = input->quantization.type; + } + + if (input->type == kTfLiteInt8 || + (input->type == kTfLiteInt16 && + input->quantization.type != kTfLiteNoQuantization)) { + TF_LITE_ENSURE_EQ(context, input->quantization.type, + kTfLiteAffineQuantization); + TF_LITE_ENSURE_EQ(context, output->quantization.type, + kTfLiteAffineQuantization); + const auto* input_params = + reinterpret_cast(input->quantization.params); + const auto* output_params = reinterpret_cast( + output->quantization.params); + TF_LITE_ENSURE(context, input_params != nullptr); + TF_LITE_ENSURE(context, input_params->scale != nullptr); + TF_LITE_ENSURE(context, input_params->scale->size > 0); + TF_LITE_ENSURE(context, input_params->zero_point->size > 0); + TF_LITE_ENSURE(context, output_params != nullptr); + TF_LITE_ENSURE(context, output_params->scale != nullptr); + TF_LITE_ENSURE(context, output_params->scale->size > 0); + TF_LITE_ENSURE(context, output_params->zero_point->size > 0); + op_data->input_offset = input_params->zero_point->data[0]; + op_data->output_offset = output_params->zero_point->data[0]; + if (input->type == kTfLiteInt16) { + TF_LITE_ENSURE_EQ(context, op_data->input_offset, 0); + TF_LITE_ENSURE_EQ(context, op_data->output_offset, 0); + } + const float input_scale = input_params->scale->data[0]; + const float output_scale = output_params->scale->data[0]; + op_data->needs_rescale = input_scale != output_scale; + if (op_nameid == kAbsNameId && op_data->needs_rescale) { + SetAbsOutputMultiplier(input_scale, output_scale, &op_data->multiplier, + &op_data->shift); + } else if (op_nameid == kRsrqtNameId) { + SetRsqrtOutputMultiplier(input_scale, output_scale, &op_data->multiplier, + &op_data->shift); + } + } + micro_context->DeallocateTempTfLiteTensor(input); + micro_context->DeallocateTempTfLiteTensor(output); + return kTfLiteOk; +} + +template +inline TfLiteStatus EvalImplQuantized( + TfLiteContext* context, TfLiteNode* node, + T func(TfLiteContext*, TfLiteNode*, T), + TfLiteStatus validate_input_func(TfLiteContext*, TfLiteNode*, T), + TfLiteType expected_type) { + const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0); + TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0); + TF_LITE_ENSURE_TYPES_EQ(context, input->type, expected_type); + const size_t num_elements = ElementCount(*input->dims); + const T* in_data = tflite::micro::GetTensorData(input); + T* out_data = tflite::micro::GetTensorData(output); + for (size_t i = 0; i < num_elements; ++i) { + if (validate_input_func) { + TF_LITE_ENSURE_OK(context, + validate_input_func(context, node, in_data[i])); + } + out_data[i] = func(context, node, in_data[i]); + } + return kTfLiteOk; +} + +template +inline T AbsHelper(T i) { + return std::abs(i); +} + +template +inline TfLiteStatus EvalImpl(TfLiteContext* context, TfLiteNode* node, + T func(T), TfLiteStatus validate_input_func(T), + TfLiteType expected_type) { + const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0); + TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0); + TF_LITE_ENSURE_TYPES_EQ(context, input->type, expected_type); + const size_t num_elements = ElementCount(*input->dims); + const T* in_data = tflite::micro::GetTensorData(input); + T* out_data = tflite::micro::GetTensorData(output); + for (size_t i = 0; i < num_elements; ++i) { + if (validate_input_func) { + TF_LITE_ENSURE_OK(context, validate_input_func(in_data[i])); + } + out_data[i] = func(in_data[i]); + } + return kTfLiteOk; +} + +inline TfLiteStatus EvalNumeric(TfLiteContext* context, TfLiteNode* node, + float float_func(float)) { + return EvalImpl(context, node, float_func, + /*validate_input_func=*/nullptr, kTfLiteFloat32); +} + +inline TfLiteStatus EvalLogical(TfLiteContext* context, TfLiteNode* node, + + bool bool_func(bool)) { + return EvalImpl(context, node, bool_func, + /*validate_input_func=*/nullptr, kTfLiteBool); +} + +void* ElementWiseAbsRsqrtInit(TfLiteContext* context, const char* buffer, + size_t length) { + TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); + return context->AllocatePersistentBuffer(context, sizeof(OpDataAbsRsqrt)); +} + +template +inline T AbsEvalQuantized(TfLiteContext* context, TfLiteNode* node, T i) { + const auto* op_data = static_cast(node->user_data); + const int kMin = std::numeric_limits::min(); + const int kMax = std::numeric_limits::max(); + + const int32_t value = std::abs(i - op_data->input_offset); + if (!op_data->needs_rescale) { + return static_cast( + std::min(std::max(static_cast(value + op_data->output_offset), + static_cast(kMin)), + static_cast(kMax))); + } + + const int32_t output = tflite::MultiplyByQuantizedMultiplier( + value, op_data->multiplier, op_data->shift) + + op_data->output_offset; + return static_cast(std::min( + std::max(static_cast(output), static_cast(kMin)), + static_cast(kMax))); +} + +template +inline T RsqrtEvalQuantized(TfLiteContext* context, TfLiteNode* node, T i) { + const auto* op_data = static_cast(node->user_data); + const int kMin = std::numeric_limits::min(); + const int kMax = std::numeric_limits::max(); + + const int32_t value = (i - op_data->input_offset); + const int32_t kShift = 20; // Shift to keep value integer. + if (value == 0) { + // Assume that any value close to 0 represents the max output value. + return static_cast(kMax); + } + int32_t inv_sqrt_multiplier; + int inv_sqrt_shift; + GetInvSqrtQuantizedMultiplierExp(value, kReverseShift, &inv_sqrt_multiplier, + &inv_sqrt_shift); + const int32_t data = tflite::MultiplyByQuantizedMultiplier( + static_cast(1), inv_sqrt_multiplier, inv_sqrt_shift + kShift); + const int32_t output = + tflite::MultiplyByQuantizedMultiplier(data, op_data->multiplier, + op_data->shift - kShift) + + op_data->output_offset; + return static_cast(std::min( + std::max(static_cast(output), static_cast(kMin)), + static_cast(kMax))); +} + +template +TfLiteStatus validate_input_func(TfLiteContext* context, TfLiteNode* node, + T i) { + const auto* op_data = static_cast(node->user_data); + + TF_LITE_ENSURE_MSG(context, i >= op_data->input_offset, + "Rsqrt is only defined for positive values"); + return static_cast(kTfLiteOk); +} + +TfLiteStatus AbsEval(TfLiteContext* context, TfLiteNode* node) { + OpDataAbsRsqrt* op_data = reinterpret_cast(node->user_data); + TfLiteType type = op_data->input_type; + TfLiteQuantizationType input_quantization_type = + op_data->input_quantization_type; + TfLiteStatus eval_result; + + switch (type) { + case kTfLiteFloat32: + eval_result = EvalNumeric(context, node, std::abs); + break; + case kTfLiteInt8: + eval_result = + EvalImplQuantized(context, node, AbsEvalQuantized, + /*validate_input_func=*/nullptr, type); + break; + case kTfLiteInt16: + eval_result = + input_quantization_type == kTfLiteNoQuantization + ? EvalImpl(context, node, AbsHelper, + /*validate_input_func=*/nullptr, type) + : EvalImplQuantized(context, node, AbsEvalQuantized, + /*validate_input_func=*/nullptr, + type); + break; + default: + MicroPrintf("Current data type %s is not supported.", + TfLiteTypeGetName(type)); + return kTfLiteError; + break; + } + return eval_result; +} + +TfLiteStatus SinEval(TfLiteContext* context, TfLiteNode* node) { + return EvalNumeric(context, node, std::sin); +} + +TfLiteStatus CosEval(TfLiteContext* context, TfLiteNode* node) { + return EvalNumeric(context, node, std::cos); +} + +TfLiteStatus LogEval(TfLiteContext* context, TfLiteNode* node) { + return EvalNumeric(context, node, std::log); +} + +TfLiteStatus SqrtEval(TfLiteContext* context, TfLiteNode* node) { + return EvalNumeric(context, node, std::sqrt); +} + +TfLiteStatus RsqrtEval(TfLiteContext* context, TfLiteNode* node) { + const auto* op_data = static_cast(node->user_data); + TfLiteType type = op_data->input_type; + switch (type) { + case kTfLiteFloat32: + return EvalImpl( + context, node, [](float f) { return 1.f / std::sqrt(f); }, + /*validate_input_func=*/nullptr, type); + case kTfLiteInt8: + return EvalImplQuantized(context, node, + elementwise::RsqrtEvalQuantized, + elementwise::validate_input_func, type); + + default: + MicroPrintf("Current data type %s is not supported.", + TfLiteTypeGetName(type)); + return kTfLiteError; + } +} + +TfLiteStatus SquareEval(TfLiteContext* context, TfLiteNode* node) { + return EvalNumeric(context, node, [](float f) { return f * f; }); +} + +TfLiteStatus LogicalNotEval(TfLiteContext* context, TfLiteNode* node) { + return EvalLogical(context, node, [](bool v) { return !v; }); +} + +} // namespace +} // namespace elementwise + +TfLiteRegistration Register_ABS() { + return tflite::micro::RegisterOp( + elementwise::ElementWiseAbsRsqrtInit, + elementwise::PrepareAbsRsqrt, + elementwise::AbsEval); +} + +TfLiteRegistration Register_SIN() { + return tflite::micro::RegisterOp( + nullptr, elementwise::GenericPrepare, + elementwise::SinEval); +} + +TfLiteRegistration Register_COS() { + return tflite::micro::RegisterOp( + nullptr, elementwise::GenericPrepare, + elementwise::CosEval); +} + +TfLiteRegistration Register_LOG() { + return tflite::micro::RegisterOp( + nullptr, elementwise::GenericPrepare, + elementwise::LogEval); +} + +TfLiteRegistration Register_SQRT() { + return tflite::micro::RegisterOp( + nullptr, elementwise::GenericPrepare, + elementwise::SqrtEval); +} + +TfLiteRegistration Register_RSQRT() { + return tflite::micro::RegisterOp( + elementwise::ElementWiseAbsRsqrtInit, + elementwise::PrepareAbsRsqrt, + elementwise::RsqrtEval); +} + +TfLiteRegistration Register_SQUARE() { + return tflite::micro::RegisterOp( + nullptr, elementwise::GenericPrepare, + elementwise::SquareEval); +} + +TfLiteRegistration Register_LOGICAL_NOT() { + return tflite::micro::RegisterOp( + nullptr, elementwise::GenericPrepare, + elementwise::LogicalNotEval); +} + +} // namespace micro +} // namespace ops +} // namespace tflite diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/elu.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/elu.cpp new file mode 100644 index 0000000..7581772 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/elu.cpp @@ -0,0 +1,151 @@ +/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/elu.h" + +#include +#include + +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/cppmath.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/quantization_util.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h" + +namespace tflite { +namespace { + +// Input/output tensor index. +constexpr int kInputTensor = 0; +constexpr int kOutputTensor = 0; + +// OLD-TODO(b/142762739): We should figure out a multi-threading plan for most +// of the activation ops below. + +struct OpData { + int8_t table[256]; +}; + +using TransformFunc = float (*)(float); + +template +void PopulateLookupTable(const TfLiteTensor* input, const TfLiteTensor* output, + const TransformFunc transform, OpData* data) { + if (sizeof(T) != 1) { + MicroPrintf("Lookup table valid only for 8bit"); + TFLITE_ABORT; + } + + const float inverse_scale = 1 / output->params.scale; + int32_t maxval = std::numeric_limits::max(); + int32_t minval = std::numeric_limits::min(); + for (int32_t val = minval; val <= maxval; ++val) { + const float dequantized = + input->params.scale * (val - input->params.zero_point); + const float transformed = transform(dequantized); + const float rescaled = TfLiteRound(transformed * inverse_scale); + const int32_t quantized = + static_cast(rescaled + output->params.zero_point); + data->table[static_cast(static_cast(val))] = + static_cast(std::max(std::min(maxval, quantized), minval)); + } +} + +// OLD-TODO(b/143696793): move this to optimized_ops. +void EvalUsingLookupTable(const OpData* data, const TfLiteEvalTensor* input, + TfLiteEvalTensor* output) { + const int size = MatchingFlatSize(tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorShape(output)); + int8_t* output_data = tflite::micro::GetTensorData(output); + const int8_t* input_data = tflite::micro::GetTensorData(input); + + for (int i = 0; i < size; ++i) { + output_data[i] = data->table[static_cast(input_data[i])]; + } +} + +TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node) { + MicroContext* micro_context = GetMicroContext(context); + + TF_LITE_ENSURE_EQ(context, NumInputs(node), 1); + TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); + TfLiteTensor* input = + micro_context->AllocateTempInputTensor(node, kInputTensor); + TF_LITE_ENSURE(context, input != nullptr); + TfLiteTensor* output = + micro_context->AllocateTempOutputTensor(node, kOutputTensor); + TF_LITE_ENSURE(context, output != nullptr); + TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type); + + // Use LUT to handle quantized elu path. + if (input->type == kTfLiteInt8) { + OpData* data = static_cast(node->user_data); + TransformFunc transform = [](float value) { + return value < 0.0f ? std::exp(value) - 1.0f : value; + }; + PopulateLookupTable(input, output, transform, data); + } + micro_context->DeallocateTempTfLiteTensor(input); + micro_context->DeallocateTempTfLiteTensor(output); + return kTfLiteOk; +} + +void* EluInit(TfLiteContext* context, const char* buffer, size_t length) { + // This is a builtin op, so we don't use the contents in 'buffer', if any. + // Instead, we allocate a new object to carry information from Prepare() to + // Eval(). + TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); + return context->AllocatePersistentBuffer(context, sizeof(OpData)); +} + +TfLiteStatus EluPrepare(TfLiteContext* context, TfLiteNode* node) { + return CalculateOpData(context, node); +} + +TfLiteStatus EluEval(TfLiteContext* context, TfLiteNode* node) { + const TfLiteEvalTensor* input = + tflite::micro::GetEvalInput(context, node, kInputTensor); + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kOutputTensor); + switch (input->type) { + case kTfLiteFloat32: { + reference_ops::Elu(tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + return kTfLiteOk; + } + case kTfLiteInt8: { + const OpData* data = static_cast(node->user_data); + EvalUsingLookupTable(data, input, output); + return kTfLiteOk; + } + default: + MicroPrintf("ELU only supports float32 and int8 currently, got %s.", + TfLiteTypeGetName(input->type)); + return kTfLiteError; + } +} + +} // namespace + +TfLiteRegistration Register_ELU() { + return tflite::micro::RegisterOp(EluInit, EluPrepare, EluEval); +} + +} // namespace tflite diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/ethosu.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/ethosu.cpp new file mode 100644 index 0000000..e2bccde --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/ethosu.cpp @@ -0,0 +1,214 @@ +/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#define FLATBUFFERS_LOCALE_INDEPENDENT 0 +#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/flexbuffers.h" +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_context.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h" + +#if EI_CLASSIFIER_TFLITE_ETHOSU_POLYFILL || EI_ETHOS + +#if EI_CLASSIFIER_TFLITE_ETHOSU_POLYFILL +// Modified by Edge Impulse +// Add stub definitions so that EON Compiler can run + +int ethosu_invoke(struct ethosu_driver *drv, + const void *custom_data_ptr, + const int custom_data_size, + const uint64_t *base_addr, + const size_t *base_addr_size, + const int num_base_addr) +{ return 0; } + +// forward declare the struct +struct ethosu_driver; + +struct ethosu_driver *ethosu_reserve_driver(void) { return nullptr; } +void ethosu_release_driver(struct ethosu_driver *drv) {} +#else +#include +#endif +namespace tflite { +namespace { + +constexpr uint8_t CO_TYPE_ETHOSU = 1; + +struct OpData { + int cms_data_size; + int base_addr_idx; + int base_addr_size_idx; +}; + +void* Init(TfLiteContext* context, const char* buffer, size_t length) { + TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); + return context->AllocatePersistentBuffer(context, sizeof(OpData)); +} + +void Free(TfLiteContext* context, void* buffer) {} + +TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + TFLITE_DCHECK(context != nullptr); + TF_LITE_ENSURE(context, node->inputs->size > 0); + TFLITE_DCHECK(node->user_data != nullptr); + TF_LITE_ENSURE(context, node->custom_initial_data_size > 0); + + OpData* data = static_cast(node->user_data); + int num_base_addr = node->inputs->size + node->outputs->size; + + // Request arrays for the base address pointers and sizes. + TF_LITE_ENSURE_STATUS(context->RequestScratchBufferInArena( + context, num_base_addr * sizeof(uint64_t), &data->base_addr_idx)); + TF_LITE_ENSURE_STATUS(context->RequestScratchBufferInArena( + context, num_base_addr * sizeof(size_t), &data->base_addr_size_idx)); + + // Get command stream data size. + MicroContext* micro_context = GetMicroContext(context); + TfLiteTensor* tensor = micro_context->AllocateTempInputTensor(node, 0); + data->cms_data_size = tensor->bytes; + micro_context->DeallocateTempTfLiteTensor(tensor); + return kTfLiteOk; +} + +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + TFLITE_DCHECK(node->user_data != nullptr); + TFLITE_DCHECK(context != nullptr); + TFLITE_DCHECK(context->GetScratchBuffer != nullptr); + + // Get base addresses. + TfLiteEvalTensor* tensor; + int i = 0; + int num_tensors = 0; + void* cms_data; + uint8_t co_type; + int result; + const OpData* data = static_cast(node->user_data); + uint64_t* base_addrs = static_cast( + context->GetScratchBuffer(context, data->base_addr_idx)); + size_t* base_addrs_size = static_cast( + context->GetScratchBuffer(context, data->base_addr_size_idx)); + + const uint8_t* custom_data = + static_cast(node->custom_initial_data); + auto root = flexbuffers::GetRoot(custom_data, node->custom_initial_data_size); + co_type = root.AsInt8(); + if (co_type != CO_TYPE_ETHOSU) { + MicroPrintf("CO_TYPE != ETHOSU"); + return kTfLiteError; + } + + // Get command stream data address. + tensor = context->GetEvalTensor(context, node->inputs->data[0]); + cms_data = reinterpret_cast(tensor->data.uint8); + + // Get addresses to weights/scratch/input data. + for (i = 1; i < node->inputs->size; ++i) { + tensor = context->GetEvalTensor(context, node->inputs->data[i]); + base_addrs[num_tensors] = + static_cast(reinterpret_cast(tensor->data.uint8)); + size_t byte_size = 1; + for (int k = 0; k < tensor->dims->size; k++) { + byte_size = byte_size * tensor->dims->data[k]; + } + base_addrs_size[num_tensors] = byte_size; + num_tensors++; + } + + // Get addresses to output data. + for (i = 0; i < node->outputs->size; ++i) { + tensor = context->GetEvalTensor(context, node->outputs->data[i]); + base_addrs[num_tensors] = + static_cast(reinterpret_cast(tensor->data.uint8)); + size_t byte_size = 1; + for (int k = 0; k < tensor->dims->size; k++) { + byte_size = byte_size * tensor->dims->data[k]; + } + base_addrs_size[num_tensors] = byte_size; + num_tensors++; + } + + // Ethos-U guarantees that the tensors that require a base pointer are among + // the 8 first tensors + // When Vela optimizes a tflite file it will assign the tensors like this: + // + // +-------+------------------------+ +--------+-------------+ + // | INPUT | Description | | OUTPUT | Description | + // +-------+------------------------+ +--------+-------------+ + // | 0 | Ethos-U command stream | | 0..m | Outputs | + // | 1 | TFLM model | +--------+-------------+ + // | 2 | TFLM arena | + // | 3 | Ethos-U fast scratch | + // | 4..n | Inputs | + // +-------+------------------------+ + // + // This code will assign the NPU base addresses like this: + // + // +--------------+----------------------+ + // | Base address | Description | + // +--------------+----------------------+ + // | 0 | TFLM model | + // | 1 | TFLM arena | + // | 2 | Ethos-U fast scratch | + // | 3..n | Input tensors | + // | n..m | Output tensors | + // +--------------+----------------------+ + // + // The number of base address will be limited to 8. + // + // NOTE! The command stream produced by Vela will access the IFM and OFM + // buffers using base address 1. This means that it is not possible to point + // the input and output tensors outside of the TFLM arena. + num_tensors = std::min(num_tensors, 8); + + struct ethosu_driver* drv = ethosu_reserve_driver(); + result = ethosu_invoke(drv, cms_data, data->cms_data_size, base_addrs, + base_addrs_size, num_tensors); + ethosu_release_driver(drv); + + if (-1 == result) { + return kTfLiteError; + } else { + return kTfLiteOk; + } +} + +} // namespace + +TfLiteRegistration* Register_ETHOSU() { + static TfLiteRegistration r = tflite::micro::RegisterOp(Init, Prepare, Eval); + return &r; +} + +const char* GetString_ETHOSU() { return "ethos-u"; } + +} // namespace tflite + +#else + +// +// This is a stub file for non-Ethos platforms +// +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" + +namespace tflite { + +TfLiteRegistration* Register_ETHOSU() { return nullptr; } + +const char* GetString_ETHOSU() { return ""; } + +} // namespace tflite + +#endif // Ethos flag \ No newline at end of file diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/ethosu.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/ethosu.h new file mode 100644 index 0000000..fd61d65 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/ethosu.h @@ -0,0 +1,28 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_MICRO_KERNELS_ETHOSU_H_ +#define TENSORFLOW_LITE_MICRO_KERNELS_ETHOSU_H_ + +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" + +namespace tflite { + +TfLiteRegistration* Register_ETHOSU(); + +const char* GetString_ETHOSU(); + +} // namespace tflite + +#endif // TENSORFLOW_LITE_MICRO_KERNELS_ETHOSU_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/exp.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/exp.cpp new file mode 100644 index 0000000..c727cb9 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/exp.cpp @@ -0,0 +1,79 @@ +/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/exp.h" + +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h" + +namespace tflite { +namespace { + +constexpr int kInputTensor = 0; +constexpr int kOutputTensor = 0; + +TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + MicroContext* micro_context = GetMicroContext(context); + + TF_LITE_ENSURE_EQ(context, NumInputs(node), 1); + TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); + TfLiteTensor* input = + micro_context->AllocateTempInputTensor(node, kInputTensor); + TF_LITE_ENSURE(context, input != nullptr); + TfLiteTensor* output = + micro_context->AllocateTempOutputTensor(node, kOutputTensor); + TF_LITE_ENSURE(context, output != nullptr); + TF_LITE_ENSURE_TYPES_EQ(context, input->type, kTfLiteFloat32); + TF_LITE_ENSURE_TYPES_EQ(context, output->type, input->type); + TF_LITE_ENSURE_EQ(context, output->bytes, input->bytes); + TF_LITE_ENSURE_EQ(context, output->dims->size, input->dims->size); + for (int i = 0; i < output->dims->size; ++i) { + TF_LITE_ENSURE_EQ(context, output->dims->data[i], input->dims->data[i]); + } + micro_context->DeallocateTempTfLiteTensor(input); + micro_context->DeallocateTempTfLiteTensor(output); + + return kTfLiteOk; +} + +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + const TfLiteEvalTensor* input = + tflite::micro::GetEvalInput(context, node, kInputTensor); + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kOutputTensor); + int flat_size = MatchingFlatSize(tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorShape(output)); + + if (input->type == kTfLiteFloat32) { + reference_ops::Exp(tflite::micro::GetTensorData(input), + static_cast(flat_size), + tflite::micro::GetTensorData(output)); + } else { + MicroPrintf("Type %s (%d) currently not supported by Exp.", + TfLiteTypeGetName(input->type), input->type); + return kTfLiteError; + } + return kTfLiteOk; +} +} // namespace + +TfLiteRegistration Register_EXP() { + return tflite::micro::RegisterOp(nullptr, Prepare, Eval); +} + +} // namespace tflite diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/expand_dims.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/expand_dims.cpp new file mode 100644 index 0000000..f2b638b --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/expand_dims.cpp @@ -0,0 +1,149 @@ +/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_utils.h" + +namespace tflite { +namespace { + +constexpr int kInputTensor = 0; +constexpr int kAxisTensor = 1; +constexpr int kOutputTensor = 0; + +TfLiteStatus GetAxisValueFromTensor(TfLiteContext* context, + const TfLiteTensor* axis, + int32_t* axis_value) { + const int axis_dims = (tflite::GetTensorShape(axis)).DimensionsCount(); + if (axis_dims > 1) { + MicroPrintf("Axis has only one element for Expand_Dims.", axis_dims); + return kTfLiteError; + } + + if (kTfLiteInt32 == (axis->type)) { + const int32_t* axis_ptr = tflite::GetTensorData(axis); + *axis_value = axis_ptr[0]; + return kTfLiteOk; + } else { + MicroPrintf("Axis type %s (%d) not supported by Expand_Dims.", + TfLiteTypeGetName(axis->type), axis->type); + return kTfLiteError; + } +} + +// Verifies that the output tensor's dimension shape is equivalent to inserting +// a dimension of length 1 at the dimension index axis of input's shape as +// defined in https://www.tensorflow.org/api_docs/python/tf/expand_dims. +TfLiteStatus VerifyTensorDim(TfLiteContext* context, const TfLiteTensor* input, + const TfLiteTensor* axis_tensor, + const TfLiteTensor* output) { + int32_t axis_value = 0; + TF_LITE_ENSURE_OK(context, + GetAxisValueFromTensor(context, axis_tensor, &axis_value)); + + tflite::RuntimeShape input_shape = tflite::GetTensorShape(input); + if (axis_value < 0) { + axis_value = input_shape.DimensionsCount() + 1 + axis_value; + } + TF_LITE_ENSURE(context, axis_value <= input_shape.DimensionsCount()); + + // TFLM only supports fixed dimension tensor and assumes that the output shape + // is fully specified in the model. As such, TFLM directly use the pointer to + // the dimension array in the model buffer. + tflite::RuntimeShape output_shape = tflite::GetTensorShape(output); + + TF_LITE_ENSURE(context, output_shape.DimensionsCount() == + input_shape.DimensionsCount() + 1); + for (int i = 0; i < output_shape.DimensionsCount(); ++i) { + if (i < axis_value) { + TF_LITE_ENSURE(context, output_shape.Dims(i) == input_shape.Dims(i)); + } else if (i == axis_value) { + TF_LITE_ENSURE(context, output_shape.Dims(i) == 1); + } else { + TF_LITE_ENSURE(context, output_shape.Dims(i) == input_shape.Dims(i - 1)); + } + } + return kTfLiteOk; +} + +TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + MicroContext* micro_context = GetMicroContext(context); + + TF_LITE_ENSURE_EQ(context, NumInputs(node), 2); + TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); + TfLiteTensor* input = + micro_context->AllocateTempInputTensor(node, kInputTensor); + TF_LITE_ENSURE(context, input != nullptr); + TfLiteTensor* axis = + micro_context->AllocateTempInputTensor(node, kAxisTensor); + TF_LITE_ENSURE(context, axis != nullptr); + TfLiteTensor* output = + micro_context->AllocateTempOutputTensor(node, kOutputTensor); + TF_LITE_ENSURE(context, output != nullptr); + output->type = input->type; + if (IsDynamicTensor(axis)) { + MicroPrintf("DynamicTensor is not yet supported by Expand_Dims."); + return kTfLiteError; + } + TF_LITE_ENSURE_OK(context, VerifyTensorDim(context, input, axis, output)); + + micro_context->DeallocateTempTfLiteTensor(input); + micro_context->DeallocateTempTfLiteTensor(axis); + micro_context->DeallocateTempTfLiteTensor(output); + return kTfLiteOk; +} + +template +void memCopyN(T* out, const T* in, const int num_elements) { + for (int i = 0; i < num_elements; ++i) { + out[i] = in[i]; + } +} + +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + const TfLiteEvalTensor* input = + tflite::micro::GetEvalInput(context, node, kInputTensor); + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kOutputTensor); + const int flat_size = ElementCount(*input->dims); + + switch (input->type) { + case kTfLiteFloat32: { + memCopyN(tflite::micro::GetTensorData(output), + tflite::micro::GetTensorData(input), flat_size); + } break; + case kTfLiteInt8: { + memCopyN(tflite::micro::GetTensorData(output), + tflite::micro::GetTensorData(input), flat_size); + } break; + default: + MicroPrintf( + "Expand_Dims only currently supports int8 and float32, got %d.", + input->type); + return kTfLiteError; + } + return kTfLiteOk; +} +} // namespace + +TfLiteRegistration Register_EXPAND_DIMS() { + return tflite::micro::RegisterOp(nullptr, Prepare, Eval); +} + +} // namespace tflite diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/fill.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/fill.cpp new file mode 100644 index 0000000..202caef --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/fill.cpp @@ -0,0 +1,140 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/fill.h" + +#include + +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h" + +namespace tflite { + +namespace { + +template +TfLiteStatus EnsureEqImpl(TfLiteContext* context, const TfLiteIntArray* array, + const TfLiteTensor* tensor) { + for (int i = 0; i < array->size; ++i) { + TF_LITE_ENSURE_EQ(context, array->data[i], GetTensorData(tensor)[i]); + } + return kTfLiteOk; +} + +// Ensure the equality of an int array and a tensor, which must be +// one-dimensional and of an integer type. +TfLiteStatus EnsureEq(TfLiteContext* context, const TfLiteIntArray* array, + const TfLiteTensor* tensor) { + TF_LITE_ENSURE_EQ(context, NumDimensions(tensor), 1); + const auto tensor_len = tensor->dims->data[0]; + TF_LITE_ENSURE_EQ(context, array->size, tensor_len); + + switch (tensor->type) { + case kTfLiteInt8: + return EnsureEqImpl(context, array, tensor); + case kTfLiteInt16: + return EnsureEqImpl(context, array, tensor); + case kTfLiteInt32: + return EnsureEqImpl(context, array, tensor); + case kTfLiteInt64: + return EnsureEqImpl(context, array, tensor); + default: + MicroPrintf("cannot compare int array to tensor of type %d.", + tensor->type); + return kTfLiteError; + } +} + +constexpr int kDimsTensor = 0; +constexpr int kValueTensor = 1; +constexpr int kOutputTensor = 0; + +TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + MicroContext* micro_context = GetMicroContext(context); + + // Ensure inputs and outputs exist. + TfLiteTensor* dims = + micro_context->AllocateTempInputTensor(node, kDimsTensor); + TF_LITE_ENSURE(context, dims != nullptr); + TfLiteTensor* value = + micro_context->AllocateTempInputTensor(node, kValueTensor); + TF_LITE_ENSURE(context, value != nullptr); + TfLiteTensor* output = + micro_context->AllocateTempOutputTensor(node, kOutputTensor); + TF_LITE_ENSURE(context, output != nullptr); + + // The value tensor must be a scalar. + TF_LITE_ENSURE_EQ(context, NumDimensions(value), 0); + + // The value type and output type must match. + TF_LITE_ENSURE_EQ(context, value->type, output->type); + + // The dimension of the output tensor is known in model already. + TFLITE_DCHECK(output->dims != nullptr); + + if (dims->data.data != nullptr) { + // When the dims tensor is specified in model already (i.e. is not an + // activation tensor), the dims tensor must match the output tensor shape. + // As a byproduct, ensures the dims tensor is of an integer type. + TF_LITE_ENSURE_OK(context, EnsureEq(context, output->dims, dims)); + } + + micro_context->DeallocateTempTfLiteTensor(dims); + micro_context->DeallocateTempTfLiteTensor(value); + micro_context->DeallocateTempTfLiteTensor(output); + return kTfLiteOk; +} + +template +void FillImpl(const TfLiteEvalTensor* value, TfLiteEvalTensor* output) { + reference_ops::Fill( + micro::GetTensorShape(value), micro::GetTensorData(value), + micro::GetTensorShape(output), micro::GetTensorData(output)); +} + +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + const TfLiteEvalTensor* value = + micro::GetEvalInput(context, node, kValueTensor); + TfLiteEvalTensor* output = micro::GetEvalOutput(context, node, kOutputTensor); + + switch (value->type) { + case kTfLiteFloat32: + FillImpl(value, output); + break; + case kTfLiteInt32: + FillImpl(value, output); + break; + case kTfLiteInt8: + FillImpl(value, output); + break; + default: + MicroPrintf("Fill only currently supports float32 for input 1, got %d.", + TfLiteTypeGetName(value->type)); + return kTfLiteError; + } + + return kTfLiteOk; +} + +} // namespace + +TfLiteRegistration Register_FILL() { + return tflite::micro::RegisterOp(nullptr, Prepare, Eval); +} + +} // namespace tflite diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/floor.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/floor.cpp new file mode 100644 index 0000000..76c1a19 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/floor.cpp @@ -0,0 +1,48 @@ +/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/floor.h" + +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" + +namespace tflite { + +namespace { + +constexpr int kInputTensor = 0; +constexpr int kOutputTensor = 0; + +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + const TfLiteEvalTensor* input = + tflite::micro::GetEvalInput(context, node, kInputTensor); + TF_LITE_ENSURE_TYPES_EQ(context, input->type, kTfLiteFloat32); + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kOutputTensor); + reference_ops::Floor(tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + return kTfLiteOk; +} + +} // namespace + +TfLiteRegistration Register_FLOOR() { + return tflite::micro::RegisterOp(nullptr, nullptr, Eval); +} + +} // namespace tflite diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/floor_div.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/floor_div.cpp new file mode 100644 index 0000000..9fc135c --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/floor_div.cpp @@ -0,0 +1,130 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/floor_div.h" + +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/binary_function.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_utils.h" + +namespace tflite { +namespace { + +// Input/output tensor index. +constexpr int kInputTensor1 = 0; +constexpr int kInputTensor2 = 1; +constexpr int kOutputTensor = 0; + +TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node) { + MicroContext* micro_context = GetMicroContext(context); + + TF_LITE_ENSURE_EQ(context, NumInputs(node), 2); + TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); + + TfLiteTensor* input1 = + micro_context->AllocateTempInputTensor(node, kInputTensor1); + TF_LITE_ENSURE(context, input1 != nullptr); + TfLiteTensor* input2 = + micro_context->AllocateTempInputTensor(node, kInputTensor2); + TF_LITE_ENSURE(context, input2 != nullptr); + TfLiteTensor* output = + micro_context->AllocateTempOutputTensor(node, kOutputTensor); + TF_LITE_ENSURE(context, output != nullptr); + + TF_LITE_ENSURE_TYPES_EQ(context, input1->type, input2->type); + TF_LITE_ENSURE_TYPES_EQ(context, input1->type, output->type); + + micro_context->DeallocateTempTfLiteTensor(input1); + micro_context->DeallocateTempTfLiteTensor(input2); + micro_context->DeallocateTempTfLiteTensor(output); + + return kTfLiteOk; +} + +void* Init(TfLiteContext* context, const char* buffer, size_t length) { + return nullptr; +} + +TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + return CalculateOpData(context, node); +} + +template +TfLiteStatus EvalFloorDiv(TfLiteContext* context, + const TfLiteEvalTensor* input1, + const TfLiteEvalTensor* input2, + TfLiteEvalTensor* output) { + const T* denominator_data = tflite::micro::GetTensorData(input2); + + // Validate the denominator. + for (int i = 0; i < tflite::ElementCount(*input2->dims); ++i) { + if (std::equal_to()(denominator_data[i], 0)) { + MicroPrintf("Division by 0"); + return kTfLiteError; + } + } + + bool requires_broadcast = !tflite::micro::HaveSameShapes(input1, input2); + + if (requires_broadcast) { + reference_ops::BroadcastBinaryFunction4DSlow( + tflite::micro::GetTensorShape(input1), + tflite::micro::GetTensorData(input1), + tflite::micro::GetTensorShape(input2), denominator_data, + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output), reference_ops::FloorDiv); + } else { + reference_ops::BinaryFunction( + tflite::micro::GetTensorShape(input1), + tflite::micro::GetTensorData(input1), + tflite::micro::GetTensorShape(input2), denominator_data, + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output), reference_ops::FloorDiv); + } + + return kTfLiteOk; +} + +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + const TfLiteEvalTensor* input1 = + tflite::micro::GetEvalInput(context, node, kInputTensor1); + const TfLiteEvalTensor* input2 = + tflite::micro::GetEvalInput(context, node, kInputTensor2); + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kOutputTensor); + + switch (input1->type) { + case kTfLiteFloat32: { + return EvalFloorDiv(context, input1, input2, output); + } + default: { + MicroPrintf("Type '%s' is not supported by FLOOR_DIV.", + TfLiteTypeGetName(input1->type)); + return kTfLiteError; + } + } +} + +} // namespace + +TfLiteRegistration Register_FLOOR_DIV() { + return tflite::micro::RegisterOp(Init, Prepare, Eval); +} + +} // namespace tflite diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/floor_mod.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/floor_mod.cpp new file mode 100644 index 0000000..acf4bbc --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/floor_mod.cpp @@ -0,0 +1,128 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/floor_mod.h" + +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/binary_function.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_utils.h" + +// OLD-TODO(b/117523611): We should factor out a binary_op and put binary ops +// there. +namespace tflite { +namespace { + +// Input/output tensor index. +constexpr int kInputTensor1 = 0; +constexpr int kInputTensor2 = 1; +constexpr int kOutputTensor = 0; + +// OLD-TODO(b/117912880): Support quantization. + +TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node) { + MicroContext* micro_context = GetMicroContext(context); + + TF_LITE_ENSURE_EQ(context, NumInputs(node), 2); + TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); + + TfLiteTensor* input1 = + micro_context->AllocateTempInputTensor(node, kInputTensor1); + TF_LITE_ENSURE(context, input1 != nullptr); + TfLiteTensor* input2 = + micro_context->AllocateTempInputTensor(node, kInputTensor2); + TF_LITE_ENSURE(context, input2 != nullptr); + TfLiteTensor* output = + micro_context->AllocateTempOutputTensor(node, kOutputTensor); + TF_LITE_ENSURE(context, output != nullptr); + + TF_LITE_ENSURE_TYPES_EQ(context, input1->type, input2->type); + TF_LITE_ENSURE_TYPES_EQ(context, input1->type, output->type); + + micro_context->DeallocateTempTfLiteTensor(input1); + micro_context->DeallocateTempTfLiteTensor(input2); + micro_context->DeallocateTempTfLiteTensor(output); + + return kTfLiteOk; +} + +void* Init(TfLiteContext* context, const char* buffer, size_t length) { + return nullptr; +} + +TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + return CalculateOpData(context, node); +} + +template +TfLiteStatus EvalFloorMod(TfLiteContext* context, bool requires_broadcast, + const TfLiteEvalTensor* input1, + const TfLiteEvalTensor* input2, + TfLiteEvalTensor* output) { + const T* denominator_data = tflite::micro::GetTensorData(input2); + + if (requires_broadcast) { + reference_ops::BroadcastBinaryFunction4DSlow( + tflite::micro::GetTensorShape(input1), + tflite::micro::GetTensorData(input1), + tflite::micro::GetTensorShape(input2), denominator_data, + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output), reference_ops::FloorMod); + } else { + reference_ops::BinaryFunction( + tflite::micro::GetTensorShape(input1), + tflite::micro::GetTensorData(input1), + tflite::micro::GetTensorShape(input2), denominator_data, + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output), reference_ops::FloorMod); + } + + return kTfLiteOk; +} + +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + const TfLiteEvalTensor* input1 = + tflite::micro::GetEvalInput(context, node, kInputTensor1); + const TfLiteEvalTensor* input2 = + tflite::micro::GetEvalInput(context, node, kInputTensor2); + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kOutputTensor); + + bool requires_broadcast = !tflite::micro::HaveSameShapes(input1, input2); + + switch (input1->type) { + case kTfLiteFloat32: { + return EvalFloorMod(context, requires_broadcast, input1, input2, + output); + } + default: { + MicroPrintf("Type '%s' is not supported by FLOOR_MOD.", + TfLiteTypeGetName(input1->type)); + return kTfLiteError; + } + } +} + +} // namespace + +TfLiteRegistration Register_FLOOR_MOD() { + return tflite::micro::RegisterOp(Init, Prepare, Eval); +} + +} // namespace tflite diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/fully_connected.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/fully_connected.cpp new file mode 100644 index 0000000..27ef622 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/fully_connected.cpp @@ -0,0 +1,1809 @@ +// Patched by Edge Impulse to include reference and hardware-accelerated kernels +#include "../../../../classifier/ei_classifier_config.h" +#if 0 == 1 +/* noop */ +#elif EI_CLASSIFIER_TFLITE_ENABLE_CMSIS_NN == 1 +/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/fully_connected.h" + +#include "edge-impulse-sdk/CMSIS/NN/Include/arm_nnfunctions.h" +#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h" +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/portable_tensor_utils.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/quantization_util.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/fully_connected.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h" + +namespace tflite { +namespace { + +struct OpData { + OpDataFullyConnected reference_op_data; + + // Conv 1x1 that may be invoked in some cases currently need per channel + // quantization. + int32_t* per_channel_output_multiplier; + int32_t* per_channel_output_shift; + + // Index to buffer for optimizations if applicable. + int buffer_idx; + + int32_t batches; + int32_t accum_depth; + int32_t output_depth; +}; + +void* Init(TfLiteContext* context, const char* buffer, size_t length) { + TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); + return context->AllocatePersistentBuffer(context, sizeof(OpData)); +} + +TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + TFLITE_DCHECK(node->user_data != nullptr); + TFLITE_DCHECK(node->builtin_data != nullptr); + + OpData* data = static_cast(node->user_data); + const auto params = + static_cast(node->builtin_data); + + MicroContext* micro_context = GetMicroContext(context); + TfLiteTensor* input = + micro_context->AllocateTempInputTensor(node, kFullyConnectedInputTensor); + TF_LITE_ENSURE(context, input != nullptr); + TfLiteTensor* filter = micro_context->AllocateTempInputTensor( + node, kFullyConnectedWeightsTensor); + TF_LITE_ENSURE(context, filter != nullptr); + TfLiteTensor* bias = + micro_context->AllocateTempInputTensor(node, kFullyConnectedBiasTensor); + TfLiteTensor* output = micro_context->AllocateTempOutputTensor( + node, kFullyConnectedOutputTensor); + TF_LITE_ENSURE(context, output != nullptr); + + TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type); + + const RuntimeShape filter_shape = GetTensorShape(filter); + const RuntimeShape output_shape = GetTensorShape(output); + const int filter_dim_count = filter_shape.DimensionsCount(); + const int output_dim_count = output_shape.DimensionsCount(); + cmsis_nn_dims filter_dims; + filter_dims.n = filter_shape.Dims(filter_dim_count - 1); + filter_dims.h = 1; + filter_dims.w = 1; + filter_dims.c = output_shape.Dims(output_dim_count - 1); + + data->accum_depth = filter_shape.Dims(filter_dim_count - 1); + data->batches = FlatSizeSkipDim(output_shape, output_dim_count - 1); + data->output_depth = output_shape.Dims(output_dim_count - 1); + + // Set buffer index to a reset value + data->buffer_idx = -1; + TF_LITE_ENSURE_STATUS(CalculateOpDataFullyConnected( + context, params->activation, input->type, input, filter, bias, output, + &(data->reference_op_data))); + + int32_t buf_size = 0; + + if (input->type == kTfLiteInt16) { +#if EI_TFLITE_DISABLE_FULLY_CONNECTED_IN_I16 + MicroPrintf("Filter data type %s currently not supported.", + TfLiteTypeGetName(input->type)); + return kTfLiteError; +#endif + TF_LITE_ENSURE_EQ(context, input->params.zero_point, 0); + TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0); + buf_size = arm_fully_connected_s16_get_buffer_size(&filter_dims); + } else if (input->type == kTfLiteInt8) { +#if EI_TFLITE_DISABLE_FULLY_CONNECTED_IN_I8 + MicroPrintf("Filter data type %s currently not supported.", + TfLiteTypeGetName(input->type)); + return kTfLiteError; +#endif + const RuntimeShape input_shape = GetTensorShape(input); + + TFLITE_DCHECK_GE(output_dim_count, 2); + TFLITE_DCHECK_LE(output_dim_count, 4); + +#if EI_TFLITE_DISABLE_CONV_2D_IN_I8 + buf_size = arm_fully_connected_s8_get_buffer_size(&filter_dims); +#else + if (output_dim_count > 2 && data->accum_depth % 4 == 0) { + data->per_channel_output_multiplier = + static_cast(context->AllocatePersistentBuffer( + context, data->output_depth * sizeof(int32_t))); + data->per_channel_output_shift = + static_cast(context->AllocatePersistentBuffer( + context, data->output_depth * sizeof(int32_t))); + + cmsis_nn_dims input_dims; + input_dims.n = data->batches; + input_dims.h = 1; + input_dims.w = 1; + input_dims.c = data->accum_depth; + + buf_size = arm_convolve_1x1_s8_fast_get_buffer_size(&input_dims); + } else { + buf_size = arm_fully_connected_s8_get_buffer_size(&filter_dims); + } +#endif + } + + if (filter->type == kTfLiteInt4) { + int filter_size = + RuntimeShape(filter->dims->size, + reinterpret_cast(filter->dims->data)) + .FlatSize(); + context->RequestScratchBufferInArena( + context, filter_size, &data->reference_op_data.filter_buffer_index); + } + + if (buf_size > 0) { + TF_LITE_ENSURE_STATUS(context->RequestScratchBufferInArena( + context, buf_size, &data->buffer_idx)); + } + + micro_context->DeallocateTempTfLiteTensor(output); + micro_context->DeallocateTempTfLiteTensor(input); + micro_context->DeallocateTempTfLiteTensor(filter); + if (bias != nullptr) { + micro_context->DeallocateTempTfLiteTensor(bias); + } + + return kTfLiteOk; +} + +void PopulateCommonParams(TfLiteContext* context, + cmsis_nn_per_tensor_quant_params* const quant_params, + cmsis_nn_dims* const input_dims, + cmsis_nn_dims* const filter_dims, + cmsis_nn_dims* const bias_dims, + cmsis_nn_dims* const output_dims, + cmsis_nn_context* const ctx, const OpData& data) { + quant_params->multiplier = data.reference_op_data.output_multiplier; + quant_params->shift = data.reference_op_data.output_shift; + + input_dims->n = data.batches; + input_dims->h = 1; + input_dims->w = 1; + input_dims->c = data.accum_depth; + + filter_dims->n = data.accum_depth; + filter_dims->h = 1; + filter_dims->w = 1; + filter_dims->c = data.output_depth; + + bias_dims->n = 1; + bias_dims->h = 1; + bias_dims->w = 1; + bias_dims->c = data.output_depth; + + output_dims->n = data.batches; + output_dims->h = 1; + output_dims->w = 1; + output_dims->c = data.output_depth; + + ctx->buf = nullptr; + ctx->size = 0; + if (data.buffer_idx > -1) { + ctx->buf = context->GetScratchBuffer(context, data.buffer_idx); + } +} + +TfLiteStatus EvalQuantizedInt8(TfLiteContext* context, TfLiteNode* node, + const OpData& data, + const TfLiteEvalTensor* input, + const TfLiteEvalTensor* filter, + const TfLiteEvalTensor* bias, + TfLiteEvalTensor* output) { + const RuntimeShape output_shape = tflite::micro::GetTensorShape(output); + const int output_dim_count = output_shape.DimensionsCount(); + TFLITE_DCHECK_GE(output_dim_count, 2); + TFLITE_DCHECK_LE(output_dim_count, 4); + + cmsis_nn_per_tensor_quant_params quant_params; + cmsis_nn_dims input_dims; + cmsis_nn_dims filter_dims; + cmsis_nn_dims bias_dims; + cmsis_nn_dims output_dims; + cmsis_nn_context ctx; + + PopulateCommonParams(context, &quant_params, &input_dims, &filter_dims, + &bias_dims, &output_dims, &ctx, data); + + const int32_t* bias_data = + tflite::micro::GetOptionalTensorData(bias); + +#if EI_TFLITE_DISABLE_CONV_2D_IN_I8 + cmsis_nn_fc_params fc_params; + fc_params.input_offset = -data.reference_op_data.input_zero_point; + fc_params.output_offset = data.reference_op_data.output_zero_point; + fc_params.filter_offset = 0; + fc_params.activation.min = data.reference_op_data.output_activation_min; + fc_params.activation.max = data.reference_op_data.output_activation_max; + + TF_LITE_ENSURE_EQ( + context, + arm_fully_connected_s8( + &ctx, &fc_params, &quant_params, &input_dims, + tflite::micro::GetTensorData(input), &filter_dims, + tflite::micro::GetTensorData(filter), &bias_dims, bias_data, + &output_dims, tflite::micro::GetTensorData(output)), + ARM_CMSIS_NN_SUCCESS); +#else + + if (output_dim_count > 2 && data.accum_depth % 4 == 0) { + cmsis_nn_conv_params conv_params; + conv_params.dilation.h = 1; + conv_params.dilation.w = 1; + conv_params.input_offset = -data.reference_op_data.input_zero_point; + conv_params.output_offset = data.reference_op_data.output_zero_point; + conv_params.stride.h = 1; + conv_params.stride.w = 1; + conv_params.padding.h = 0; + conv_params.padding.w = 0; + conv_params.activation.min = data.reference_op_data.output_activation_min; + conv_params.activation.max = data.reference_op_data.output_activation_max; + + cmsis_nn_per_channel_quant_params per_channel_quant_params; + per_channel_quant_params.multiplier = + const_cast(data.per_channel_output_multiplier); + per_channel_quant_params.shift = + const_cast(data.per_channel_output_shift); + + for (int i = 0; i < data.output_depth; i++) { + per_channel_quant_params.multiplier[i] = quant_params.multiplier; + per_channel_quant_params.shift[i] = quant_params.shift; + } + + TF_LITE_ENSURE_EQ( + context, + arm_convolve_1x1_s8_fast( + &ctx, &conv_params, &per_channel_quant_params, &input_dims, + tflite::micro::GetTensorData(input), &filter_dims, + tflite::micro::GetTensorData(filter), &bias_dims, bias_data, + &output_dims, tflite::micro::GetTensorData(output)), + ARM_CMSIS_NN_SUCCESS); + } else { + cmsis_nn_fc_params fc_params; + fc_params.input_offset = -data.reference_op_data.input_zero_point; + fc_params.output_offset = data.reference_op_data.output_zero_point; + fc_params.filter_offset = 0; + fc_params.activation.min = data.reference_op_data.output_activation_min; + fc_params.activation.max = data.reference_op_data.output_activation_max; + + TF_LITE_ENSURE_EQ( + context, + arm_fully_connected_s8( + &ctx, &fc_params, &quant_params, &input_dims, + tflite::micro::GetTensorData(input), &filter_dims, + tflite::micro::GetTensorData(filter), &bias_dims, bias_data, + &output_dims, tflite::micro::GetTensorData(output)), + ARM_CMSIS_NN_SUCCESS); + } +#endif + + return kTfLiteOk; +} + +TfLiteStatus EvalQuantizedInt16(TfLiteContext* context, TfLiteNode* node, + const OpData& data, + const TfLiteEvalTensor* input, + const TfLiteEvalTensor* filter, + const TfLiteEvalTensor* bias, + TfLiteEvalTensor* output) { + cmsis_nn_per_tensor_quant_params quant_params; + cmsis_nn_dims input_dims; + cmsis_nn_dims filter_dims; + cmsis_nn_dims bias_dims; + cmsis_nn_dims output_dims; + cmsis_nn_context ctx; + + PopulateCommonParams(context, &quant_params, &input_dims, &filter_dims, + &bias_dims, &output_dims, &ctx, data); + + const int64_t* bias_data = + tflite::micro::GetOptionalTensorData(bias); + + cmsis_nn_fc_params fc_params; + fc_params.input_offset = -data.reference_op_data.input_zero_point; + fc_params.output_offset = data.reference_op_data.output_zero_point; + fc_params.filter_offset = 0; + fc_params.activation.min = data.reference_op_data.output_activation_min; + fc_params.activation.max = data.reference_op_data.output_activation_max; + + TF_LITE_ENSURE_EQ( + context, + arm_fully_connected_s16( + &ctx, &fc_params, &quant_params, &input_dims, + tflite::micro::GetTensorData(input), &filter_dims, + tflite::micro::GetTensorData(filter), &bias_dims, bias_data, + &output_dims, tflite::micro::GetTensorData(output)), + ARM_CMSIS_NN_SUCCESS); + + return kTfLiteOk; +} + +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + TFLITE_DCHECK(node->builtin_data != nullptr); + const auto* params = + static_cast(node->builtin_data); + + const TfLiteEvalTensor* input = + tflite::micro::GetEvalInput(context, node, kFullyConnectedInputTensor); + const TfLiteEvalTensor* filter = + tflite::micro::GetEvalInput(context, node, kFullyConnectedWeightsTensor); + const TfLiteEvalTensor* bias = + tflite::micro::GetEvalInput(context, node, kFullyConnectedBiasTensor); + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kFullyConnectedOutputTensor); + + TFLITE_DCHECK(node->user_data != nullptr); + const OpData& data = *(static_cast(node->user_data)); + + TfLiteEvalTensor filter_int8 = tflite::micro::MakeUnpackedInt4Tensor( + context, data.reference_op_data.filter_buffer_index, filter); + + // Checks in Prepare ensure input, output and filter types are all the same. + switch (input->type) { + case kTfLiteFloat32: { +#if EI_TFLITE_DISABLE_FULLY_CONNECTED_IN_F32 + MicroPrintf("Type %s (%d) not supported.", + TfLiteTypeGetName(input->type), input->type); + return kTfLiteError; +#endif + const float* bias_data = + tflite::micro::GetOptionalTensorData(bias); + tflite::reference_ops::FullyConnected( + FullyConnectedParamsFloat(params->activation), + tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(filter), + tflite::micro::GetTensorData(filter), + tflite::micro::GetTensorShape(bias), bias_data, + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + break; + } + case kTfLiteInt8: { + switch (filter_int8.type) { + case kTfLiteInt8: +#if EI_TFLITE_DISABLE_FULLY_CONNECTED_IN_I8 + MicroPrintf("Filter data type %s currently not supported.", + TfLiteTypeGetName(filter->type)); + return kTfLiteError; +#endif + return EvalQuantizedInt8(context, node, data, input, &filter_int8, + bias, output); + default: + MicroPrintf("Filter Type %s (%d) not supported.", + TfLiteTypeGetName(filter->type), filter->type); + return kTfLiteError; + } + break; + } + case kTfLiteInt16: { +#if EI_TFLITE_DISABLE_FULLY_CONNECTED_IN_I16 + MicroPrintf("Filter data type %s currently not supported.", + TfLiteTypeGetName(filter->type)); + return kTfLiteError; +#endif + return EvalQuantizedInt16(context, node, data, input, filter, bias, + output); + } + default: { + MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(input->type), + input->type); + return kTfLiteError; + } + } + return kTfLiteOk; +} + +// Note that the current function names are not ideal at all (this EvalInt8 +// function internally calls EvalQuantizedInt8, and there is similar name +// aliasing in the Eval function too). We will be attempting to have a more +// descriptive naming convention but holding off on that for now, since the +// renaming might be coupled with reducing code duplication and some additional +// refactoring. +TfLiteStatus EvalInt8(TfLiteContext* context, TfLiteNode* node) { + const TfLiteEvalTensor* input = + tflite::micro::GetEvalInput(context, node, kFullyConnectedInputTensor); + const TfLiteEvalTensor* filter = + tflite::micro::GetEvalInput(context, node, kFullyConnectedWeightsTensor); + const TfLiteEvalTensor* bias = + tflite::micro::GetEvalInput(context, node, kFullyConnectedBiasTensor); + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kFullyConnectedOutputTensor); + + TFLITE_DCHECK(node->user_data != nullptr); + const OpData& data = *(static_cast(node->user_data)); + + // Checks in Prepare ensure input, output and filter types are all the same. + if (input->type != kTfLiteInt8) { + MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(input->type), + input->type); + return kTfLiteError; + } + + TfLiteEvalTensor filter_int8 = tflite::micro::MakeUnpackedInt4Tensor( + context, data.reference_op_data.filter_buffer_index, filter); + + return EvalQuantizedInt8(context, node, data, input, &filter_int8, bias, + output); +} + +TfLiteStatus EvalInt16(TfLiteContext* context, TfLiteNode* node) { + const TfLiteEvalTensor* input = + tflite::micro::GetEvalInput(context, node, kFullyConnectedInputTensor); + const TfLiteEvalTensor* filter = + tflite::micro::GetEvalInput(context, node, kFullyConnectedWeightsTensor); + const TfLiteEvalTensor* bias = + tflite::micro::GetEvalInput(context, node, kFullyConnectedBiasTensor); + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kFullyConnectedOutputTensor); + + TFLITE_DCHECK(node->user_data != nullptr); + const OpData& data = *(static_cast(node->user_data)); + + // Checks in Prepare ensure input, output and filter types are all the same. + if (input->type != kTfLiteInt16) { + MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(input->type), + input->type); + return kTfLiteError; + } + + return EvalQuantizedInt16(context, node, data, input, filter, bias, output); +} + +} // namespace + +TfLiteRegistration Register_FULLY_CONNECTED() { + return tflite::micro::RegisterOp(Init, Prepare, Eval); +} + +TfLiteRegistration Register_FULLY_CONNECTED_INT8() { + return tflite::micro::RegisterOp(Init, Prepare, EvalInt8); +} + +TfLiteRegistration Register_FULLY_CONNECTED_INT16() { + return tflite::micro::RegisterOp(Init, Prepare, EvalInt16); +} + +} // namespace tflite + +#elif EI_CLASSIFIER_TFLITE_ENABLE_ARC == 1 +/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/fully_connected.h" + +#include "mli_api.h" // NOLINT +#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h" +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/quantization_util.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/mli_slicers.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/mli_tf_utils.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/scratch_buf_mgr.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/scratch_buffers.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h" + +namespace tflite { +namespace { + +struct OpData { + // The scaling factor from input to output (aka the 'real multiplier') can + // be represented as a fixed point multiplier plus a left shift. + int32_t output_multiplier; + int output_shift; + // The range of the fused activation layer. For example for kNone and + // uint8_t these would be 0 and 255. + int32_t output_activation_min; + int32_t output_activation_max; + // The index of the temporary tensor where the quantized inputs are cached. + int input_quantized_index; + // Cached tensor zero point values for quantized operations. + int32_t input_zero_point; + int32_t filter_zero_point; + int32_t output_zero_point; + + // The result of checking if MLI optimized version of tensors can be used. + bool is_mli_applicable; + + // Tensors in MLI format. + mutable ops::micro::MliTensorInterface mli_in; + mutable ops::micro::MliTensorInterface mli_weights; + mutable ops::micro::MliTensorInterface mli_bias; + mutable ops::micro::MliTensorInterface mli_out; + +#ifdef MLI_2_0 + mli_fully_connected_cfg* cfg; +#endif +}; + +constexpr int kInputTensor = 0; +constexpr int kWeightsTensor = 1; +constexpr int kBiasTensor = 2; +constexpr int kOutputTensor = 0; + +bool IsMliApplicable(TfLiteContext* context, const TfLiteTensor* input, + const TfLiteTensor* filter, const TfLiteTensor* bias, + const TfLiteFullyConnectedParams* params, + int32_t output_activation_min, + int32_t output_activation_max) { + // MLI optimized version only supports int8_t datatype and no fused Relu and + // symmetric per-tensor quantization of weights (not per-axis) + bool ret_val = + (filter->type == kTfLiteInt8) && (input->type == kTfLiteInt8) && + (bias->type == kTfLiteInt32) && +#ifndef MLI_2_0 + (params->activation == kTfLiteActNone || + (output_activation_min == -128 && output_activation_max == 127)) && +#endif + (filter->params.zero_point == 0); + return ret_val; +} + +TfLiteStatus CalculateOpData(TfLiteContext* context, + const TfLiteFullyConnectedParams* params, + TfLiteType data_type, const TfLiteTensor* input, + const TfLiteTensor* filter, + const TfLiteTensor* bias, TfLiteTensor* output, + OpData* data) { + TfLiteStatus status = kTfLiteOk; +#if !defined(TF_LITE_STRIP_REFERENCE_IMPL) + if (data_type != kTfLiteFloat32 && !data->is_mli_applicable) { + double real_multiplier = 0.0; + TF_LITE_ENSURE_STATUS(GetQuantizedConvolutionMultipler( + context, input, filter, bias, output, &real_multiplier)); + int exponent; + QuantizeMultiplier(real_multiplier, &data->output_multiplier, &exponent); + data->output_shift = -exponent; + TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized( + context, params->activation, output, &data->output_activation_min, + &data->output_activation_max)); + } +#endif + return status; +} + +} // namespace + +void* Init(TfLiteContext* context, const char* buffer, size_t length) { + TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); + return context->AllocatePersistentBuffer(context, sizeof(OpData)); +} + +TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + TFLITE_DCHECK(node->user_data != nullptr); + TFLITE_DCHECK(node->builtin_data != nullptr); + + OpData* data = static_cast(node->user_data); + const auto params = + static_cast(node->builtin_data); + + MicroContext* micro_context = GetMicroContext(context); + + TfLiteTensor* input = + micro_context->AllocateTempInputTensor(node, kInputTensor); + TfLiteTensor* filter = + micro_context->AllocateTempInputTensor(node, kWeightsTensor); + TfLiteTensor* bias = micro_context->AllocateTempInputTensor(node, kBiasTensor); + TfLiteTensor* output = micro_context->AllocateTempOutputTensor(node, kOutputTensor); + + TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type); + TF_LITE_ENSURE_MSG(context, input->type == filter->type, + "Hybrid models are not supported on TFLite Micro."); + + data->input_zero_point = input->params.zero_point; + data->filter_zero_point = filter->params.zero_point; + data->output_zero_point = output->params.zero_point; + + TfLiteStatus status = CalculateOpData(context, params, input->type, input, + filter, bias, output, data); + + data->is_mli_applicable = + IsMliApplicable(context, input, filter, bias, params, + data->output_activation_min, data->output_activation_max); + + if (input->type == kTfLiteInt8 && data->is_mli_applicable) { +#if EI_TFLITE_DISABLE_FULLY_CONNECTED_IN_I8 + TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.", + TfLiteTypeGetName(output->type), output->type); + return kTfLiteError; +#endif + data->mli_in = ops::micro::MliTensorInterface(static_cast( + context->AllocatePersistentBuffer(context, sizeof(mli_tensor)))); + data->mli_weights = ops::micro::MliTensorInterface(static_cast( + context->AllocatePersistentBuffer(context, sizeof(mli_tensor)))); + data->mli_bias = ops::micro::MliTensorInterface(static_cast( + context->AllocatePersistentBuffer(context, sizeof(mli_tensor)))); + data->mli_out = ops::micro::MliTensorInterface(static_cast( + context->AllocatePersistentBuffer(context, sizeof(mli_tensor)))); + + ops::micro::ConvertToMliTensor(input, &data->mli_in); + ops::micro::ConvertToMliTensor(filter, &data->mli_weights); + ops::micro::ConvertToMliTensor(bias, &data->mli_bias); +#ifdef MLI_2_0 + ops::micro::AdjustBiasTensor(&data->mli_bias, &data->mli_in, + &data->mli_weights); +#endif + ops::micro::ConvertToMliTensor(output, &data->mli_out); + +#ifdef MLI_2_0 + if (data->output_activation_min == -128 && + data->output_activation_max == 127) { + data->cfg->relu.type = MLI_RELU_NONE; + } else if (params->activation == kTfLiteActRelu) { + data->cfg->relu.type = MLI_RELU_GEN; + } else if (params->activation == kTfLiteActRelu6) { + data->cfg->relu.type = MLI_RELU_6; + } else if (params->activation == kTfLiteActReluN1To1) { + data->cfg->relu.type = MLI_RELU_1; + } else { + data->cfg->relu.type = MLI_RELU_NONE; + } +#endif + + /* The input tensor can have more than 2 dimensions. for the compute this + doesn't make any difference because all the inputs or a batch entry will + be used anyway. because the MLI kernel doesn't recognize the multiple + dimensions, the tensor shape is casted to a {batchnum, inputsize} shape. */ + data->mli_in.Shape()[0] = data->mli_out.Shape()[0]; +#if defined(MLI_2_0) && !defined(MLI_2_0_KRNL_TEST) + data->mli_in.Shape()[1] = data->mli_weights.Shape()[0]; +#else + data->mli_in.Shape()[1] = data->mli_weights.Shape()[1]; +#endif + data->mli_in.Shape()[2] = 0; + data->mli_in.Shape()[3] = 0; + data->mli_in.MemStride()[0] = data->mli_in.Shape()[1]; + data->mli_in.MemStride()[1] = 0; + *data->mli_in.Rank() = 2; + } + + micro_context->DeallocateTempTfLiteTensor(input); + micro_context->DeallocateTempTfLiteTensor(filter); + micro_context->DeallocateTempTfLiteTensor(bias); + micro_context->DeallocateTempTfLiteTensor(output); + return status; +} + +TfLiteStatus EvalMliQuantizedInt8(TfLiteContext* context, TfLiteNode* node, + const TfLiteFullyConnectedParams* params, + const OpData& data, + const TfLiteEvalTensor* input, + const TfLiteEvalTensor* filter, + const TfLiteEvalTensor* bias, + TfLiteEvalTensor* output) { + ops::micro::MliTensorAttachBuffer(input, &data.mli_in); + ops::micro::MliTensorAttachBuffer(filter, &data.mli_weights); + ops::micro::MliTensorAttachBuffer(bias, &data.mli_bias); + ops::micro::MliTensorAttachBuffer(output, &data.mli_out); + + // Tensors for data in fast (local) memory and config to copy data from + // external to local memory + mli_tensor weights_local = *data.mli_weights.MliTensor(); + mli_tensor bias_local = *data.mli_bias.MliTensor(); + mli_tensor in_local = *data.mli_in.MliTensor(); + mli_tensor out_local = *data.mli_out.MliTensor(); + + ops::micro::MliTensorInterface weights_local_interface(&weights_local); + ops::micro::MliTensorInterface bias_local_interface(&bias_local); + ops::micro::MliTensorInterface in_local_interface(&in_local); + ops::micro::MliTensorInterface out_local_interface(&out_local); + + mli_mov_cfg_t copy_config; + mli_mov_cfg_for_copy(©_config); +#if defined(MLI_2_0) && !defined(MLI_2_0_KRNL_TEST) + const int weight_out_dimension = 1; +#else + const int weight_out_dimension = 0; +#endif + // bias has only 1 dimension + const int bias_out_ch_dimension = 0; + const int out_tensor_dimension = 1; + const int input_size_dimension = 1; + int slice_size = data.mli_weights.Shape()[weight_out_dimension]; + + /* allocate the local buffers, and compute the slice size */ + TF_LITE_ENSURE_STATUS( + ops::micro::get_arc_scratch_buffer_for_fully_connect_tensors( + context, &in_local_interface, &weights_local_interface, + &bias_local_interface, &out_local_interface)); + TF_LITE_ENSURE_STATUS(ops::micro::arc_scratch_buffer_calc_slice_size_weights( + &weights_local_interface, &bias_local_interface, weight_out_dimension, + &slice_size)); + + int max_out_slice_size = *out_local_interface.DataCapacity() / + mli_hlp_tensor_element_size(&out_local); + + if (slice_size > max_out_slice_size) slice_size = max_out_slice_size; + + /* is_local indicates that the tensor is already in local memory, + so in that case the original tensor can be used, + and there is no need to copy it to the local tensor*/ + const bool in_is_local = + in_local_interface.Data() == data.mli_in.Data(); + const bool out_is_local = + out_local_interface.Data() == data.mli_out.Data(); + const bool b_is_local = + bias_local_interface.Data() == data.mli_bias.Data(); +#ifndef MLI_2_0_KRNL_TEST + const bool w_is_local = + weights_local_interface.Data() == data.mli_weights.Data(); +#endif + +#if defined(MLI_2_0) && !defined(MLI_2_0_KRNL_TEST) + ops::micro::TensorSlicer w_slice(data.mli_weights.MliTensor(), + weight_out_dimension, slice_size, 0, 0, 0, + true); +#else + ops::micro::TensorSlicer w_slice(data.mli_weights.MliTensor(), + weight_out_dimension, slice_size); +#endif + ops::micro::TensorSlicer b_slice(data.mli_bias.MliTensor(), + bias_out_ch_dimension, slice_size); + ops::micro::TensorSlicer out_ch_slice(data.mli_out.MliTensor(), + out_tensor_dimension, slice_size, 0, 0, + 0, true); + +#ifdef MLI_2_0_KRNL_TEST + mli_tensor* w_ptr = &weights_local; +#else + mli_tensor* w_ptr = w_is_local ? w_slice.Sub() : &weights_local; +#endif + mli_tensor* b_ptr = b_is_local ? b_slice.Sub() : &bias_local; + + void* input_buffer_ptr = NULL; + + while (!w_slice.Done()) { +#if defined(MLI_2_0) && !defined(MLI_2_0_KRNL_TEST) + w_ptr->el_params.sa.scale.mem.pi16 = NULL; + b_ptr->el_params.sa.scale.mem.pi16 = NULL; +#endif + +#ifndef MLI_2_0_KRNL_TEST + mli_mov_tensor_sync(w_slice.Sub(), ©_config, w_ptr); +#endif + mli_mov_tensor_sync(b_slice.Sub(), ©_config, b_ptr); + + // Slice the input over the batches (one at a time with the size of a + // complete input) + ops::micro::TensorSlicer in_slice( + data.mli_in.MliTensor(), input_size_dimension, + data.mli_in.Shape()[input_size_dimension]); + + /* output tensor is already sliced in the output size dimension. + out_ch_slice.Sub() is the tensor for the amount of output size of this + iteration of the weight slice loop. This tensor needs to be further + sliced over the batch */ + ops::micro::TensorSlicer out_slice(out_ch_slice.Sub(), out_tensor_dimension, + slice_size); + + /* setup the pointers to the local or remote tensor to make the code + * inside the loop easier. */ + mli_tensor* in_ptr = in_is_local ? in_slice.Sub() : &in_local; + mli_tensor* out_ptr = out_is_local ? out_slice.Sub() : &out_local; + +#ifdef MLI_2_0_KRNL_TEST + /* Permute weights tensor to the HWCN layout */ + // Assertion here to prevent usage non-contiguous buffer memory. + if (data.mli_out.Shape()[out_tensor_dimension] != + out_slice.Sub()->shape[0]) { + MicroPrintf("Slicing is not supported with real-time permutation."); + return kTfLiteError; + } + mli_permute_cfg permute_cfg = {{1, 0, 2, 3}}; + ops::micro::permute_weights(data.mli_weights.MliTensor(), &permute_cfg, + w_ptr, &out_ptr->data); +#endif + + while (!out_slice.Done()) { + if (!out_is_local) { + ops::micro::PrepareLocalTensor(out_slice.Sub(), &out_local); + ops::micro::PrepareLocalTensor(in_slice.Sub(), &in_local); + } + // if same input copy as previous iteration, skip the copy of input +#ifdef MLI_2_0 + if (in_slice.Sub()->data.mem.pi8 != input_buffer_ptr) { + mli_mov_tensor_sync(in_slice.Sub(), ©_config, in_ptr); + input_buffer_ptr = in_slice.Sub()->data.mem.pi8; + } + mli_fully_connected_cfg cfg; + cfg.relu.type = MLI_RELU_NONE; + mli_krn_fully_connected_sa8_sa8_sa32(in_ptr, w_ptr, b_ptr, &cfg, out_ptr); +#else + if (in_slice.Sub()->data != input_buffer_ptr) { + mli_mov_tensor_sync(in_slice.Sub(), ©_config, in_ptr); + input_buffer_ptr = in_slice.Sub()->data; + } + mli_krn_fully_connected_sa8_sa8_sa32(in_ptr, w_ptr, b_ptr, out_ptr); +#endif + + mli_mov_tensor_sync(out_ptr, ©_config, out_slice.Sub()); + + in_slice.Next(); + out_slice.Next(); + } + w_slice.Next(); + b_slice.Next(); + out_ch_slice.Next(); + } + return kTfLiteOk; +} + +TfLiteStatus EvalQuantized(TfLiteContext* context, TfLiteNode* node, + const OpData& data, const TfLiteEvalTensor* input, + const TfLiteEvalTensor* filter, + const TfLiteEvalTensor* bias, + TfLiteEvalTensor* output) { +#if !defined(TF_LITE_STRIP_REFERENCE_IMPL) + tflite::FullyConnectedParams op_params; + op_params.input_offset = -data.input_zero_point; + op_params.weights_offset = -data.filter_zero_point; + op_params.output_offset = data.output_zero_point; + op_params.output_multiplier = data.output_multiplier; + op_params.output_shift = -data.output_shift; + op_params.quantized_activation_min = data.output_activation_min; + op_params.quantized_activation_max = data.output_activation_max; + +#define TF_LITE_FULLY_CONNECTED(output_data_type) \ + reference_ops::FullyConnected( \ + op_params, tflite::micro::GetTensorShape(input), \ + tflite::micro::GetTensorData(input), \ + tflite::micro::GetTensorShape(filter), \ + tflite::micro::GetTensorData(filter), \ + tflite::micro::GetTensorShape(bias), \ + tflite::micro::GetTensorData(bias), \ + tflite::micro::GetTensorShape(output), \ + tflite::micro::GetTensorData(output)) + + switch (output->type) { + case kTfLiteUInt8: + #if EI_TFLITE_DISABLE_FULLY_CONNECTED_OUT_U8 + MicroPrintf("Type %s currently not supported.", + TfLiteTypeGetName(filter->type)); + return kTfLiteError; + #endif + + TF_LITE_FULLY_CONNECTED(uint8_t); + break; + case kTfLiteInt16: + #if EI_TFLITE_DISABLE_FULLY_CONNECTED_OUT_I16 + MicroPrintf("Type %s currently not supported.", + TfLiteTypeGetName(filter->type)); + return kTfLiteError; + #endif + + TF_LITE_FULLY_CONNECTED(int16_t); + break; + default: + MicroPrintf("Type %s (%d) not supported.", + TfLiteTypeGetName(output->type), output->type); + return kTfLiteError; + + return kTfLiteOk; +#else + MicroPrintf("Node configuration is not supported by ARC MLI Library."); + return kTfLiteError; +#endif + } +} + +TfLiteStatus EvalFloat(TfLiteContext* context, TfLiteNode* node, + TfLiteFusedActivation activation, + const TfLiteEvalTensor* input, + const TfLiteEvalTensor* filter, + const TfLiteEvalTensor* bias, TfLiteEvalTensor* output) { +#if !defined(TF_LITE_STRIP_REFERENCE_IMPL) + float output_activation_min, output_activation_max; + CalculateActivationRange(activation, &output_activation_min, + &output_activation_max); + tflite::FullyConnectedParams op_params; + op_params.float_activation_min = output_activation_min; + op_params.float_activation_max = output_activation_max; + tflite::reference_ops::FullyConnected( + op_params, tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(filter), + tflite::micro::GetTensorData(filter), + tflite::micro::GetTensorShape(bias), + tflite::micro::GetOptionalTensorData(bias), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + return kTfLiteOk; +#else + MicroPrintf("Type %s (%d) is not supported by ARC MLI Library.", + TfLiteTypeGetName(input->type), input->type); + return kTfLiteError; +#endif +} + +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + TFLITE_DCHECK(node->builtin_data != nullptr); + const auto* params = + static_cast(node->builtin_data); + + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kOutputTensor); + const TfLiteEvalTensor* input = + tflite::micro::GetEvalInput(context, node, kInputTensor); + const TfLiteEvalTensor* filter = + tflite::micro::GetEvalInput(context, node, kWeightsTensor); + const TfLiteEvalTensor* bias = + tflite::micro::GetEvalInput(context, node, kBiasTensor); + + TFLITE_DCHECK(node->user_data != nullptr); + const OpData& data = *(static_cast(node->user_data)); + + // Checks in Prepare ensure input, output and filter types are all the same. + switch (input->type) { + case kTfLiteFloat32: + #if EI_TFLITE_DISABLE_FULLY_CONNECTED_IN_F32 + MicroPrintf("Type %s (%d) not supported.", + TfLiteTypeGetName(input->type), input->type); + return kTfLiteError; + #endif + + return EvalFloat(context, node, params->activation, input, filter, bias, + output); + case kTfLiteInt8: + #if EI_TFLITE_DISABLE_FULLY_CONNECTED_IN_I8 + MicroPrintf("Type %s (%d) not supported.", + TfLiteTypeGetName(input->type), input->type); + return kTfLiteError; + #endif + + if (data.is_mli_applicable) { + return EvalMliQuantizedInt8(context, node, params, data, input, filter, + bias, output); + } else { + return EvalQuantized(context, node, data, input, filter, bias, output); + } + + case kTfLiteUInt8: + #if EI_TFLITE_DISABLE_FULLY_CONNECTED_IN_U8 + MicroPrintf("Type %s (%d) not supported.", + TfLiteTypeGetName(input->type), input->type); + return kTfLiteError; + #endif + + return EvalQuantized(context, node, data, input, filter, bias, output); + + default: + MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(input->type), + input->type); + return kTfLiteError; + } + return kTfLiteOk; +} + +TfLiteRegistration Register_FULLY_CONNECTED() { + return tflite::micro::RegisterOp(Init, Prepare, Eval); +} + +} // namespace tflite + +#elif EI_CLASSIFIER_TFLITE_ENABLE_SILABS_MVP == 1 +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/fully_connected.h" + +#include "edge-impulse-sdk/CMSIS/NN/Include/arm_nnfunctions.h" +#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h" +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/quantization_util.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/fully_connected.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" +#include "sl_mvp_ml_fully_connected.h" + +namespace tflite { +namespace sl { +namespace fully_connected { + +struct OpData { + int32_t output_multiplier; + int output_shift; + sli_mvp_ml_fully_connected_s8_params_t op_params; + float16_t *bias_fp16; + bool use_mvp; +}; + +constexpr int kInputTensor = 0; +constexpr int kWeightsTensor = 1; +constexpr int kBiasTensor = 2; +constexpr int kOutputTensor = 0; + +// TODO(b/169801227): This global struct is needed for the linker to drop unused +// code (for example, by using Register_FULLY_CONNECTED_INT8 instead of +// Register_FULLY_CONNECTED). +TfLiteRegistration fully_connected_registration; + +sli_shape_t dims2shape(const TfLiteIntArray *dim) +{ + TFLITE_DCHECK(dim->size <= 4); + + sli_shape_t shape = {0}; + for (int i = 0; i < dim->size; i++) { + shape.dim[i] = dim->data[i]; + } + return shape; +} + +void* Init(TfLiteContext* context, const char* buffer, size_t length) { + TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); + return context->AllocatePersistentBuffer(context, sizeof(OpData)); +} + +TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + TFLITE_DCHECK(node->user_data != nullptr); + TFLITE_DCHECK(node->builtin_data != nullptr); + + OpData* data = static_cast(node->user_data); + TfLiteFullyConnectedParams* params = + reinterpret_cast(node->builtin_data); + const TfLiteTensor* input = GetInput(context, node, kInputTensor); + const TfLiteTensor* weight = GetInput(context, node, kWeightsTensor); + const TfLiteTensor* bias = GetInput(context, node, kBiasTensor); + TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + int32_t output_min; + int32_t output_max; + float16_t *bias_data = nullptr; + int bias_len = 0; + + TF_LITE_ENSURE(context, input != nullptr); + TF_LITE_ENSURE(context, output != nullptr); + + if (!(input->type == kTfLiteFloat32 || input->type == kTfLiteInt8)) { + // Unsupported datatype used by model + return kTfLiteError; + } + + if (bias) { + RuntimeShape bias_shape = GetTensorShape(bias); + bias_len = bias_shape.FlatSize(); + } + + if (input->type == kTfLiteInt8) { + TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized( + context, params->activation, output, &output_min, &output_max)); + + double real_multiplier = 0.0; + TF_LITE_ENSURE_STATUS(GetQuantizedConvolutionMultipler( + context, input, weight, bias, output, &real_multiplier)); + + data->op_params.input = GetTensorData(input); + data->op_params.input_shape = dims2shape(input->dims); + data->op_params.input_offset = -input->params.zero_point; + data->op_params.weight = GetTensorData(weight); + data->op_params.weight_shape = dims2shape(weight->dims); + data->op_params.weight_offset = -weight->params.zero_point; + data->op_params.bias = nullptr; + data->op_params.bias_length = bias_len; + data->op_params.output = GetTensorData(output); + data->op_params.output_shape = dims2shape(output->dims); + data->op_params.output_offset = output->params.zero_point; + data->op_params.output_multiplier = sli_mvp_ml_fully_connected_output_multiplier(real_multiplier); + data->op_params.activation_min = static_cast(output_min); + data->op_params.activation_max = static_cast(output_max); + + data->use_mvp = sli_mvp_ml_fully_connected_s8_is_supported(&data->op_params); + + if (data->use_mvp && bias) { + // Convert int32_t to float16_t as the MVP does not support loading int32 values. + const int32_t *bias_src = GetTensorData(bias); + bias_data = static_cast(context->AllocatePersistentBuffer(context, bias_len * sizeof(float16_t))); + if (bias_data == nullptr) { + return kTfLiteError; + } + sl_status_t status = sli_mvp_ml_fully_connected_bias_convert(bias_src, bias_data, bias_len); + if (status != SL_STATUS_OK) { + return kTfLiteError; + } + data->op_params.bias = bias_data; + } + + if (!data->use_mvp) { + // In this case we have to convert the output scale factor to a + // value in the TensorFlow fixed point format (Q.31 + shift) + int exponent; + QuantizeMultiplier(real_multiplier, &data->output_multiplier, &exponent); + data->output_shift = -exponent; + } + } + + return kTfLiteOk; +} + +TfLiteStatus EvalQuantizedInt8_MVP(TfLiteContext* context, TfLiteNode* node, + const OpData& data, + const TfLiteEvalTensor* input, + const TfLiteEvalTensor* filter, + const TfLiteEvalTensor* bias, + TfLiteEvalTensor* output) { + sli_mvp_ml_fully_connected_s8_params_t *params = const_cast(&data.op_params); + params->input = tflite::micro::GetTensorData(input); + params->output = tflite::micro::GetTensorData(output); + + sl_status_t result = sli_mvp_ml_fully_connected_s8(params); + if (result == SL_STATUS_OK) { + return kTfLiteOk; + } else { + return kTfLiteError; + } +} + +TfLiteStatus EvalQuantizedInt8(TfLiteContext* context, TfLiteNode* node, + const OpData& data, + const TfLiteEvalTensor* input, + const TfLiteEvalTensor* filter, + const TfLiteEvalTensor* bias, + TfLiteEvalTensor* output) { + if (data.use_mvp && input->type == kTfLiteInt8) { + return EvalQuantizedInt8_MVP(context, node, data, input, filter, bias, output); + } + + // The 'if' condition can be removed when null handling of bias is added to + // arm_fully_connected_s8 + if (nullptr != tflite::micro::GetTensorData(bias)) { + const RuntimeShape output_shape = tflite::micro::GetTensorShape(output); + TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 2); + const int batches = output_shape.Dims(0); + const int output_depth = output_shape.Dims(1); + const RuntimeShape filter_shape = tflite::micro::GetTensorShape(filter); + const int filter_dim_count = filter_shape.DimensionsCount(); + const int accum_depth = filter_shape.Dims(filter_dim_count - 1); + const RuntimeShape input_shape = tflite::micro::GetTensorShape(input); + + cmsis_nn_fc_params fc_params; + fc_params.input_offset = data.op_params.input_offset; + fc_params.output_offset = data.op_params.output_offset; + fc_params.filter_offset = data.op_params.weight_offset; + fc_params.activation.min = data.op_params.activation_min; + fc_params.activation.max = data.op_params.activation_max; + + cmsis_nn_per_tensor_quant_params quant_params; + quant_params.multiplier = data.output_multiplier; + // TODO(b/138810107): Figure out whether output shift should be inverted + quant_params.shift = -data.output_shift; + + cmsis_nn_dims input_dims; + input_dims.n = batches; + input_dims.h = 1; + input_dims.w = 1; + input_dims.c = accum_depth; + + cmsis_nn_dims filter_dims; + filter_dims.n = accum_depth; + filter_dims.h = 1; + filter_dims.w = 1; + filter_dims.c = output_depth; + + cmsis_nn_dims bias_dims; + bias_dims.n = 1; + bias_dims.h = 1; + bias_dims.w = 1; + bias_dims.c = output_depth; + + cmsis_nn_dims output_dims; + output_dims.n = batches; + output_dims.h = 1; + output_dims.w = 1; + output_dims.c = output_depth; + + cmsis_nn_context ctx; + ctx.buf = nullptr; + ctx.size = 0; + + TF_LITE_ENSURE_EQ( + context, + arm_fully_connected_s8( + &ctx, &fc_params, &quant_params, &input_dims, + tflite::micro::GetTensorData(input), &filter_dims, + tflite::micro::GetTensorData(filter), &bias_dims, + tflite::micro::GetTensorData(bias), &output_dims, + tflite::micro::GetTensorData(output)), + ARM_MATH_SUCCESS); + } else { + tflite::FullyConnectedParams op_params; + op_params.input_offset = data.op_params.input_offset; + op_params.weights_offset = data.op_params.weight_offset; + op_params.output_offset = data.op_params.output_offset; + op_params.output_multiplier = data.output_multiplier; + // TODO(b/138810107): Figure out whether output shift should be inverted + op_params.output_shift = -data.output_shift; + op_params.quantized_activation_min = data.op_params.activation_min; + op_params.quantized_activation_max = data.op_params.activation_max; + + reference_integer_ops::FullyConnected( + op_params, tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(filter), + tflite::micro::GetTensorData(filter), + tflite::micro::GetTensorShape(bias), + tflite::micro::GetTensorData(bias), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + } + return kTfLiteOk; +} + +TfLiteStatus EvalFloat(TfLiteContext* context, TfLiteNode* node, + TfLiteFusedActivation activation, + const TfLiteEvalTensor* input, + const TfLiteEvalTensor* filter, + const TfLiteEvalTensor* bias, TfLiteEvalTensor* output) { + float output_activation_min, output_activation_max; + CalculateActivationRange(activation, &output_activation_min, + &output_activation_max); + tflite::FullyConnectedParams op_params; + op_params.float_activation_min = output_activation_min; + op_params.float_activation_max = output_activation_max; + tflite::reference_ops::FullyConnected( + op_params, tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(filter), + tflite::micro::GetTensorData(filter), + tflite::micro::GetTensorShape(bias), + tflite::micro::GetOptionalTensorData(bias), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + return kTfLiteOk; +} + +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + TFLITE_DCHECK(node->builtin_data != nullptr); + const auto* params = + static_cast(node->builtin_data); + + const TfLiteEvalTensor* input = + tflite::micro::GetEvalInput(context, node, kInputTensor); + const TfLiteEvalTensor* filter = + tflite::micro::GetEvalInput(context, node, kWeightsTensor); + const TfLiteEvalTensor* bias = + tflite::micro::GetEvalInput(context, node, kBiasTensor); + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kOutputTensor); + + TFLITE_DCHECK(node->user_data != nullptr); + const OpData& data = *(static_cast(node->user_data)); + + switch (input->type) { + case kTfLiteFloat32: + #if EI_TFLITE_DISABLE_FULLY_CONNECTED_IN_F32 + TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.", + TfLiteTypeGetName(input->type), input->type); + return kTfLiteError; + #endif + + return EvalFloat(context, node, params->activation, input, filter, bias, + output); + case kTfLiteInt8: + #if EI_TFLITE_DISABLE_FULLY_CONNECTED_IN_I8 + TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.", + TfLiteTypeGetName(input->type), input->type); + return kTfLiteError; + #endif + + return EvalQuantizedInt8(context, node, data, input, filter, bias, + output); + + default: + TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.", + TfLiteTypeGetName(input->type), input->type); + return kTfLiteError; + } + return kTfLiteOk; +} + +// Note that the current function names are not ideal at all (this EvalInt8 +// function internally calls EvalQuantizedInt8, and there is similar name +// aliasing in the Eval function too). We will be attempting to have a more +// descriptive naming convention but holding off on that for now, since the +// renaming might be coupled with reducing code duplication and some additional +// refactoring. +TfLiteStatus EvalInt8(TfLiteContext* context, TfLiteNode* node) { + const TfLiteEvalTensor* input = + tflite::micro::GetEvalInput(context, node, kInputTensor); + const TfLiteEvalTensor* filter = + tflite::micro::GetEvalInput(context, node, kWeightsTensor); + const TfLiteEvalTensor* bias = + tflite::micro::GetEvalInput(context, node, kBiasTensor); + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kOutputTensor); + + TFLITE_DCHECK(node->user_data != nullptr); + const OpData& data = *(static_cast(node->user_data)); + + // Checks in Prepare ensure input, output and filter types are all the same. + if (input->type != kTfLiteInt8) { + TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.", + TfLiteTypeGetName(input->type), input->type); + return kTfLiteError; + } + + return EvalQuantizedInt8(context, node, data, input, filter, bias, output); +} + +} // namespace fully_connected +} // namespace sl + +TfLiteRegistration Register_FULLY_CONNECTED() { + return {/*init*/sl::fully_connected::Init, + /*free*/nullptr, + /*prepare*/sl::fully_connected::Prepare, + /*invoke*/sl::fully_connected::Eval, + /*profiling_string*/nullptr, + /*builtin_code*/0, + /*custom_name*/nullptr, + /*version*/0}; +} + +TfLiteRegistration Register_FULLY_CONNECTED_INT8() { + return {/*init*/sl::fully_connected::Init, + /*free*/nullptr, + /*prepare*/sl::fully_connected::Prepare, + /*invoke*/sl::fully_connected::EvalInt8, + /*profiling_string*/nullptr, + /*builtin_code*/0, + /*custom_name*/nullptr, + /*version*/0}; +} + +} // namespace tflite + +#elif EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN == 1 +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/fully_connected.h" + +#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h" +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/quantization_util.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/fully_connected.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h" + +#if ESP_NN +#include "edge-impulse-sdk/porting/espressif/ESP-NN/include/esp_nn.h" +#endif + +#include + +long long fc_total_time = 0; + +namespace tflite { +namespace { + +void* Init(TfLiteContext* context, const char* buffer, size_t length) { + TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); + return context->AllocatePersistentBuffer(context, + sizeof(OpDataFullyConnected)); +} + +TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + MicroContext* micro_context = GetMicroContext(context); + + TFLITE_DCHECK(node->user_data != nullptr); + TFLITE_DCHECK(node->builtin_data != nullptr); + + auto* data = static_cast(node->user_data); + const auto params = + static_cast(node->builtin_data); + + TfLiteTensor* input = + micro_context->AllocateTempInputTensor(node, kFullyConnectedInputTensor); + TF_LITE_ENSURE(context, input != nullptr); + TfLiteTensor* filter = micro_context->AllocateTempInputTensor( + node, kFullyConnectedWeightsTensor); + TF_LITE_ENSURE(context, filter != nullptr); + TfLiteTensor* bias = + micro_context->AllocateTempInputTensor(node, kFullyConnectedBiasTensor); + TfLiteTensor* output = micro_context->AllocateTempOutputTensor( + node, kFullyConnectedOutputTensor); + TF_LITE_ENSURE(context, output != nullptr); + + TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type); + TF_LITE_ENSURE_MSG(context, input->type == filter->type, + "Hybrid models are not supported on TFLite Micro."); + + TF_LITE_ENSURE_OK(context, CalculateOpDataFullyConnected( + context, params->activation, input->type, + input, filter, bias, output, data)); + + micro_context->DeallocateTempTfLiteTensor(input); + micro_context->DeallocateTempTfLiteTensor(filter); + if (bias != nullptr) { + micro_context->DeallocateTempTfLiteTensor(bias); + } + micro_context->DeallocateTempTfLiteTensor(output); + return kTfLiteOk; +} + +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + TFLITE_DCHECK(node->builtin_data != nullptr); + const auto* params = + static_cast(node->builtin_data); + + const TfLiteEvalTensor* input = + tflite::micro::GetEvalInput(context, node, kFullyConnectedInputTensor); + const TfLiteEvalTensor* filter = + tflite::micro::GetEvalInput(context, node, kFullyConnectedWeightsTensor); + const TfLiteEvalTensor* bias = + tflite::micro::GetEvalInput(context, node, kFullyConnectedBiasTensor); + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kFullyConnectedOutputTensor); + + TFLITE_DCHECK(node->user_data != nullptr); + const auto& data = + *(static_cast(node->user_data)); + + long long start_time = esp_timer_get_time(); + // Checks in Prepare ensure input, output and filter types are all the same. + switch (input->type) { + case kTfLiteFloat32: { +#if EI_TFLITE_DISABLE_FULLY_CONNECTED_IN_F32 + TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.", + TfLiteTypeGetName(input->type), input->type); + return kTfLiteError; +#endif + tflite::reference_ops::FullyConnected( + FullyConnectedParamsFloat(params->activation), + tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(filter), + tflite::micro::GetTensorData(filter), + tflite::micro::GetTensorShape(bias), + tflite::micro::GetOptionalTensorData(bias), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + break; + } + + case kTfLiteInt8: { +#if EI_TFLITE_DISABLE_FULLY_CONNECTED_IN_I8 + TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.", + TfLiteTypeGetName(input->type), input->type); + return kTfLiteError; +#endif + const int32_t* bias_data = + nullptr != bias ? tflite::micro::GetTensorData(bias) + : nullptr; +#if ESP_NN + const RuntimeShape& filter_shape = tflite::micro::GetTensorShape(filter); + const RuntimeShape& output_shape = tflite::micro::GetTensorShape(output); + const int filter_dim_count = filter_shape.DimensionsCount(); + const int batches = output_shape.Dims(0); + const int output_depth = output_shape.Dims(1); + TFLITE_DCHECK_LE(output_depth, filter_shape.Dims(filter_dim_count - 2)); + const int accum_depth = filter_shape.Dims(filter_dim_count - 1); + + const int8_t *input_data = tflite::micro::GetTensorData(input); + int8_t *output_data = tflite::micro::GetTensorData(output); + const int8_t *filter_data = tflite::micro::GetTensorData(filter); + + for (int b = 0; b < batches; ++b) { + esp_nn_fully_connected_s8(input_data, -data.input_zero_point, + accum_depth, + filter_data, -data.filter_zero_point, + bias_data, output_data, output_depth, + data.output_zero_point, + data.output_shift, data.output_multiplier, + data.output_activation_min, + data.output_activation_max); + input_data += accum_depth; + output_data += output_depth; + } +#else + tflite::reference_integer_ops::FullyConnected( + FullyConnectedParamsQuantized(data), + tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(filter), + tflite::micro::GetTensorData(filter), + tflite::micro::GetTensorShape(bias), bias_data, + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); +#endif + break; + } + + case kTfLiteUInt8: { +#if EI_TFLITE_DISABLE_FULLY_CONNECTED_IN_U8 + TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.", + TfLiteTypeGetName(input->type), input->type); + return kTfLiteError; +#endif + tflite::reference_ops::FullyConnected( + FullyConnectedParamsQuantized(data), + tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(filter), + tflite::micro::GetTensorData(filter), + tflite::micro::GetTensorShape(bias), + tflite::micro::GetOptionalTensorData(bias), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + break; + } + default: { + TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.", + TfLiteTypeGetName(input->type), input->type); + return kTfLiteError; + } + } + fc_total_time += esp_timer_get_time() - start_time; + return kTfLiteOk; +} + +} // namespace + +TfLiteRegistration Register_FULLY_CONNECTED() { + return tflite::micro::RegisterOp(Init, Prepare, Eval); +} + +} // namespace tflite + +#else +/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/fully_connected.h" + +#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h" +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/portable_tensor_utils.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/fully_connected.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h" + +namespace tflite { +namespace { + +void* Init(TfLiteContext* context, const char* buffer, size_t length) { + TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); + return context->AllocatePersistentBuffer(context, + sizeof(OpDataFullyConnected)); +} + +TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + MicroContext* micro_context = GetMicroContext(context); + + TFLITE_DCHECK(node->user_data != nullptr); + TFLITE_DCHECK(node->builtin_data != nullptr); + + auto* data = static_cast(node->user_data); + const auto params = + static_cast(node->builtin_data); + + TfLiteTensor* input = + micro_context->AllocateTempInputTensor(node, kFullyConnectedInputTensor); + TF_LITE_ENSURE(context, input != nullptr); + TfLiteTensor* filter = micro_context->AllocateTempInputTensor( + node, kFullyConnectedWeightsTensor); + TF_LITE_ENSURE(context, filter != nullptr); + TfLiteTensor* bias = + micro_context->AllocateTempInputTensor(node, kFullyConnectedBiasTensor); + TfLiteTensor* output = micro_context->AllocateTempOutputTensor( + node, kFullyConnectedOutputTensor); + TF_LITE_ENSURE(context, output != nullptr); + TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type); + + if (filter->type == kTfLiteInt4) { + int filter_size = + RuntimeShape(filter->dims->size, + reinterpret_cast(filter->dims->data)) + .FlatSize(); + context->RequestScratchBufferInArena(context, filter_size, + &data->filter_buffer_index); + } + + TF_LITE_ENSURE_OK(context, CalculateOpDataFullyConnected( + context, params->activation, input->type, + input, filter, bias, output, data)); + + micro_context->DeallocateTempTfLiteTensor(input); + micro_context->DeallocateTempTfLiteTensor(filter); + if (bias != nullptr) { + micro_context->DeallocateTempTfLiteTensor(bias); + } + micro_context->DeallocateTempTfLiteTensor(output); + return kTfLiteOk; +} + +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + TFLITE_DCHECK(node->builtin_data != nullptr); + const auto* params = + static_cast(node->builtin_data); + + const TfLiteEvalTensor* input = + tflite::micro::GetEvalInput(context, node, kFullyConnectedInputTensor); + const TfLiteEvalTensor* filter = + tflite::micro::GetEvalInput(context, node, kFullyConnectedWeightsTensor); + const TfLiteEvalTensor* bias = + tflite::micro::GetEvalInput(context, node, kFullyConnectedBiasTensor); + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kFullyConnectedOutputTensor); + + TFLITE_DCHECK(node->user_data != nullptr); + + const auto& data = + *(static_cast(node->user_data)); + + // Checks in Prepare ensure input, output and filter types are all the same. + switch (input->type) { + case kTfLiteFloat32: { +#if EI_TFLITE_DISABLE_FULLY_CONNECTED_IN_F32 + MicroPrintf("Type %s (%d) not supported.", + TfLiteTypeGetName(input->type), input->type); + return kTfLiteError; +#endif + tflite::reference_ops::FullyConnected( + FullyConnectedParamsFloat(params->activation), + tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(filter), + tflite::micro::GetTensorData(filter), + tflite::micro::GetTensorShape(bias), + tflite::micro::GetOptionalTensorData(bias), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + break; + } + + case kTfLiteInt8: { +#if EI_TFLITE_DISABLE_FULLY_CONNECTED_IN_I8 + MicroPrintf("Type %s (%d) not supported.", + TfLiteTypeGetName(input->type), input->type); + return kTfLiteError; +#endif + switch (filter->type) { + case kTfLiteInt4: { + int8_t* unpacked_filter_data = static_cast( + context->GetScratchBuffer(context, data.filter_buffer_index)); + tflite::tensor_utils::UnpackDenseInt4IntoInt8( + tflite::micro::GetTensorData(filter), + tflite::micro::GetTensorShape(filter).FlatSize(), + unpacked_filter_data); + tflite::reference_integer_ops::FullyConnected( + FullyConnectedParamsQuantized(data), + tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(filter), unpacked_filter_data, + tflite::micro::GetTensorShape(bias), + tflite::micro::GetOptionalTensorData(bias), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + break; + } + case kTfLiteInt8: { + tflite::reference_integer_ops::FullyConnected( + FullyConnectedParamsQuantized(data), + tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(filter), + tflite::micro::GetTensorData(filter), + tflite::micro::GetTensorShape(bias), + tflite::micro::GetOptionalTensorData(bias), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + break; + } + default: { + MicroPrintf("Filter type %s (%d) not supported.", + TfLiteTypeGetName(filter->type), input->type); + return kTfLiteError; + } + } + break; + } + + case kTfLiteInt16: { + switch (filter->type) { + case kTfLiteInt8: { + tflite::reference_integer_ops::FullyConnected( + FullyConnectedParamsQuantized(data), + tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(filter), + tflite::micro::GetTensorData(filter), + tflite::micro::GetTensorShape(bias), + tflite::micro::GetOptionalTensorData(bias), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + break; + } + default: { + MicroPrintf("Filter type %s (%d) not supported.", + TfLiteTypeGetName(filter->type), input->type); + return kTfLiteError; + } + } + break; + } + + default: { + MicroPrintf("Input type %s (%d) not supported.", + TfLiteTypeGetName(input->type), input->type); + return kTfLiteError; + } + } + return kTfLiteOk; +} + +} // namespace + +TfLiteRegistration Register_FULLY_CONNECTED() { + return tflite::micro::RegisterOp(Init, Prepare, Eval); +} + +} // namespace tflite + +#endif diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/fully_connected.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/fully_connected.h new file mode 100644 index 0000000..b245abe --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/fully_connected.h @@ -0,0 +1,112 @@ +/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_MICRO_KERNELS_FULLY_CONNECTED_H_ +#define TENSORFLOW_LITE_MICRO_KERNELS_FULLY_CONNECTED_H_ + +#include + +#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h" +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h" + +namespace tflite { + +struct OpDataFullyConnected { + // The scaling factor from input to output (aka the 'real multiplier') can + // be represented as a fixed point multiplier plus a left shift. + int32_t output_multiplier; + int output_shift; + // The range of the fused activation layer. For example for kNone and + // uint8_t these would be 0 and 255. + int32_t output_activation_min; + int32_t output_activation_max; + // The index of the temporary tensor where the quantized inputs are cached. + int input_quantized_index; + // Cached zero point values of tensors. + int32_t input_zero_point; + int32_t filter_zero_point; + int32_t output_zero_point; + +// TODO(b/258710417): enable by default once optimized fully-connected works for +// all targets. +#if !defined(HEXAGON) + // A buffer used to store unpacked filter values. This is used if the source + // tensor is of n-bit precision that cannot be easily processed by kernels. + int filter_buffer_index; +#endif +}; + +extern const int kFullyConnectedInputTensor; +extern const int kFullyConnectedWeightsTensor; +extern const int kFullyConnectedBiasTensor; +extern const int kFullyConnectedOutputTensor; + +// Returns a FullyConnectedParams struct with all the parameters needed for a +// float computation. +FullyConnectedParams FullyConnectedParamsFloat( + TfLiteFusedActivation activation); + +// Returns a FullyConnectedParams struct with all the parameters needed for a +// quantized computation. +FullyConnectedParams FullyConnectedParamsQuantized( + const OpDataFullyConnected& op_data); + +TfLiteStatus CalculateOpDataFullyConnected( + TfLiteContext* context, TfLiteFusedActivation activation, + TfLiteType data_type, const TfLiteTensor* input, const TfLiteTensor* filter, + const TfLiteTensor* bias, TfLiteTensor* output, OpDataFullyConnected* data); + +// This is the most generic TfLiteRegistration. The actual supported types may +// still be target dependent. The only requirement is that every implementation +// (reference or optimized) must define this function. +TfLiteRegistration Register_FULLY_CONNECTED(); + +#if defined(CMSIS_NN) || defined(HEXAGON) || defined(EI_CLASSIFIER_TFLITE_ENABLE_SILABS_MVP) +// Returns a TfLiteRegistration struct for kernel variant that only supports +// int8. +TfLiteRegistration Register_FULLY_CONNECTED_INT8(); + +#else +// Note that while this block gets used for both reference and optimized kernels +// that do not have any specialized implementations, the only goal here is to +// define fallback implementation that allow reference kernels to still be used +// from applications that call a more specific kernel variant. + +inline TfLiteRegistration Register_FULLY_CONNECTED_INT8() { + return Register_FULLY_CONNECTED(); +} + +#endif + +#if defined(CMSIS_NN) +// Returns a TfLiteRegistration struct for kernel variant that only supports +// int16. +TfLiteRegistration Register_FULLY_CONNECTED_INT16(); + +#else +// Note that while this block gets used for both reference and optimized kernels +// that do not have any specialized implementations, the only goal here is to +// define fallback implementation that allow reference kernels to still be used +// from applications that call a more specific kernel variant. + +inline TfLiteRegistration Register_FULLY_CONNECTED_INT16() { + return Register_FULLY_CONNECTED(); +} + +#endif + +} // namespace tflite + +#endif // TENSORFLOW_LITE_MICRO_KERNELS_FULLY_CONNECTED_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/fully_connected_common.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/fully_connected_common.cpp new file mode 100644 index 0000000..d38ea3e --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/fully_connected_common.cpp @@ -0,0 +1,83 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h" +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/quantization_util.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/fully_connected.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/fully_connected.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" + +namespace tflite { + +const int kFullyConnectedInputTensor = 0; +const int kFullyConnectedWeightsTensor = 1; +const int kFullyConnectedBiasTensor = 2; +const int kFullyConnectedOutputTensor = 0; + +FullyConnectedParams FullyConnectedParamsQuantized( + const OpDataFullyConnected& op_data) { + FullyConnectedParams op_params; + op_params.input_offset = -op_data.input_zero_point; + op_params.weights_offset = -op_data.filter_zero_point; + op_params.output_offset = op_data.output_zero_point; + op_params.output_multiplier = op_data.output_multiplier; + op_params.output_shift = op_data.output_shift; + op_params.quantized_activation_min = op_data.output_activation_min; + op_params.quantized_activation_max = op_data.output_activation_max; + return op_params; +} + +FullyConnectedParams FullyConnectedParamsFloat( + TfLiteFusedActivation activation) { + FullyConnectedParams op_params; + CalculateActivationRange(activation, &op_params.float_activation_min, + &op_params.float_activation_max); + return op_params; +} + +TfLiteStatus CalculateOpDataFullyConnected( + TfLiteContext* context, TfLiteFusedActivation activation, + TfLiteType data_type, const TfLiteTensor* input, const TfLiteTensor* filter, + const TfLiteTensor* bias, TfLiteTensor* output, + OpDataFullyConnected* data) { + if (data_type != kTfLiteFloat32) { + double real_multiplier = 0.0; + TF_LITE_ENSURE_STATUS(GetQuantizedConvolutionMultipler( + context, input, filter, bias, output, &real_multiplier)); + QuantizeMultiplier(real_multiplier, &data->output_multiplier, + &data->output_shift); + + data->input_zero_point = input->params.zero_point; + // Filter weights will always be symmetric quantized since we only support + // int8 quantization. See + // https://github.com/tensorflow/tensorflow/issues/44912 for additional + // context. + TFLITE_DCHECK(filter->params.zero_point == 0); + data->filter_zero_point = filter->params.zero_point; + data->output_zero_point = output->params.zero_point; + + return CalculateActivationRangeQuantized(context, activation, output, + &data->output_activation_min, + &data->output_activation_max); + } + return kTfLiteOk; +} + +} // namespace tflite diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/gather.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/gather.cpp new file mode 100644 index 0000000..4fb05d8 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/gather.cpp @@ -0,0 +1,226 @@ +/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TF_LITE_STATIC_MEMORY + +#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h" +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_utils.h" + +namespace tflite { +namespace { + +constexpr int kInputTensor = 0; +constexpr int kInputPositions = 1; +constexpr int kOutputTensor = 0; + +template +TfLiteStatus Gather(const TfLiteGatherParams* params, + const TfLiteEvalTensor* input, + const TfLiteEvalTensor* coords, TfLiteEvalTensor* output) { + const InputT* input_data = tflite::micro::GetTensorData(input); + const CoordsT* coords_data = tflite::micro::GetTensorData(coords); + InputT* output_data = tflite::micro::GetTensorData(output); + const TfLiteIntArray* input_dims = input->dims; + const int input_dims_size = input_dims->size; + int axis = params->axis; + if (axis < 0) { + axis += input_dims_size; + } + TFLITE_DCHECK_GE(axis, 0); + TFLITE_DCHECK_LT(axis, input_dims_size); + + int batch_dims = params->batch_dims; + // batch_dims should be in range: [-rank(coords), rank(coords)]. + // Negative batch_dims is added with rank of coords. + const TfLiteIntArray* coords_dims = coords->dims; + const int coords_dims_size = coords_dims->size; + if (batch_dims < 0) { + batch_dims += coords_dims_size; + } + TFLITE_DCHECK_GE(batch_dims, 0); + TFLITE_DCHECK_LT(batch_dims, input_dims_size); + TFLITE_DCHECK_LE(batch_dims, coords_dims_size); + TFLITE_DCHECK_GE(axis, batch_dims); + for (int i = 0; i < batch_dims; ++i) { + TFLITE_DCHECK_EQ(input_dims->data[i], coords_dims->data[i]); + } + + const int axis_size = input_dims->data[axis]; + + int batch_size = 1; + for (int i = 0; i < batch_dims; ++i) { + batch_size *= input_dims->data[i]; + } + int outer_size = 1; + for (int i = batch_dims; i < axis; ++i) { + outer_size *= input_dims->data[i]; + } + int inner_size = 1; + for (int i = axis + 1; i < input_dims_size; ++i) { + inner_size *= input_dims->data[i]; + } + int coord_size = 1; + for (int i = batch_dims; i < coords_dims_size; ++i) { + coord_size *= coords_dims->data[i]; + } + + for (int batch = 0; batch < batch_size; ++batch) { + for (int outer = 0; outer < outer_size; ++outer) { + for (int coord = 0; coord < coord_size; ++coord) { + TFLITE_DCHECK_GE(coords_data[coord], 0); + TFLITE_DCHECK_LT(coords_data[coord], axis_size); + std::memcpy(output_data + + (((batch * outer_size) + outer) * coord_size + coord) * + inner_size, + input_data + (((batch * outer_size) + outer) * axis_size + + coords_data[batch * coord_size + coord]) * + inner_size, + sizeof(InputT) * inner_size); + } + } + } + return kTfLiteOk; +} + +TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + MicroContext* micro_context = GetMicroContext(context); + + TF_LITE_ENSURE_EQ(context, NumInputs(node), 2); + TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); + + const auto* params = + reinterpret_cast(node->builtin_data); + TfLiteTensor* input = + micro_context->AllocateTempInputTensor(node, kInputTensor); + TF_LITE_ENSURE(context, input != nullptr); + TfLiteTensor* coords = + micro_context->AllocateTempInputTensor(node, kInputPositions); + TF_LITE_ENSURE(context, coords != nullptr); + TfLiteTensor* output = + micro_context->AllocateTempOutputTensor(node, kOutputTensor); + TF_LITE_ENSURE(context, output != nullptr); + + switch (coords->type) { + case kTfLiteInt32: + break; + default: + MicroPrintf("Positions of type '%s' are not supported by gather.", + TfLiteTypeGetName(coords->type)); + return kTfLiteError; + break; + } + + // Assign to output the input type. + output->type = input->type; + + // Check conditions for different types. + switch (input->type) { + case kTfLiteFloat32: + case kTfLiteInt8: + break; + default: + MicroPrintf("Type '%s' is not supported by gather.", + TfLiteTypeGetName(input->type)); + return kTfLiteError; + break; + } + + int axis = params->axis; + if (axis < 0) { + axis += NumDimensions(input); + } + TF_LITE_ENSURE(context, 0 <= axis && axis < NumDimensions(input)); + + int batch_dims = params->batch_dims; + // batch_dims should be in range: [-rank(coords), rank(coords)]. + // Negative batch_dims is added with rank of coords. + if (batch_dims < 0) { + batch_dims += NumDimensions(coords); + } + TF_LITE_ENSURE(context, batch_dims <= axis); + TF_LITE_ENSURE(context, 0 <= batch_dims && batch_dims < NumDimensions(input)); + TF_LITE_ENSURE(context, batch_dims <= NumDimensions(coords)); + for (int i = 0; i < batch_dims; ++i) { + TF_LITE_ENSURE_EQ(context, input->dims->data[i], coords->dims->data[i]); + } + + // GATHER updates the output tensor dimensions, but TfLiteTensor in the + // MicroInterpreter is a temporary allocation. We must therefore relocate the + // dims from the FlatBuffer to the persistant storage arena. + TfLiteEvalTensor* output_eval = + tflite::micro::GetEvalOutput(context, node, kOutputTensor); + TF_LITE_ENSURE_OK(context, tflite::micro::CreateWritableTensorDimsWithCopy( + context, output, output_eval)); + + TfLiteIntArray* output_shape = output->dims; + output_shape->size = + NumDimensions(input) + NumDimensions(coords) - 1 - batch_dims; + int output_index = 0; + for (int i = 0; i < axis; ++i) { + output_shape->data[output_index++] = input->dims->data[i]; + } + for (int i = batch_dims; i < coords->dims->size; ++i) { + output_shape->data[output_index++] = coords->dims->data[i]; + } + for (int i = axis + 1; i < input->dims->size; ++i) { + output_shape->data[output_index++] = input->dims->data[i]; + } + + micro_context->DeallocateTempTfLiteTensor(input); + micro_context->DeallocateTempTfLiteTensor(coords); + micro_context->DeallocateTempTfLiteTensor(output); + + return kTfLiteOk; +} + +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + const auto* params = + reinterpret_cast(node->builtin_data); + const TfLiteEvalTensor* input = + tflite::micro::GetEvalInput(context, node, kInputTensor); + const TfLiteEvalTensor* coords = + tflite::micro::GetEvalInput(context, node, kInputPositions); + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kOutputTensor); + + if (coords->type == kTfLiteInt32) { + switch (input->type) { + case kTfLiteFloat32: + return Gather(params, input, coords, output); + break; + case kTfLiteInt8: + return Gather(params, input, coords, output); + break; + default: + MicroPrintf("Type '%s' is not supported by gather.", + TfLiteTypeGetName(input->type)); + return kTfLiteError; + break; + } + } + return kTfLiteOk; +} +} // namespace + +TfLiteRegistration Register_GATHER() { + return tflite::micro::RegisterOp(nullptr, Prepare, Eval); +} + +} // namespace tflite +#endif // TF_LITE_STATIC_MEMORY \ No newline at end of file diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/gather_nd.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/gather_nd.cpp new file mode 100644 index 0000000..5e4b261 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/gather_nd.cpp @@ -0,0 +1,212 @@ +/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_utils.h" + +namespace tflite { +namespace { + +constexpr int kParams = 0; +constexpr int kIndices = 1; +constexpr int kOutputTensor = 0; +constexpr int MAX_INDICES_ND = 5; + +TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + MicroContext* micro_context = GetMicroContext(context); + + TF_LITE_ENSURE_EQ(context, NumInputs(node), 2); + TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); + + TfLiteTensor* params = micro_context->AllocateTempInputTensor(node, kParams); + TF_LITE_ENSURE(context, params != nullptr); + TfLiteTensor* indices = + micro_context->AllocateTempInputTensor(node, kIndices); + TF_LITE_ENSURE(context, indices != nullptr); + TfLiteTensor* output = + micro_context->AllocateTempOutputTensor(node, kOutputTensor); + TF_LITE_ENSURE(context, output != nullptr); + + switch (params->type) { + case kTfLiteFloat32: + case kTfLiteInt8: + break; + default: + MicroPrintf("Params of type '%s' are not supported by gather_nd.", + TfLiteTypeGetName(params->type)); + return kTfLiteError; + break; + } + switch (indices->type) { + case kTfLiteInt32: + break; + default: + MicroPrintf("Indices of type '%s' are not supported by gather_nd.", + TfLiteTypeGetName(indices->type)); + return kTfLiteError; + } + + const int params_rank = NumDimensions(params); + const int indices_rank = NumDimensions(indices); + const int indices_nd = SizeOfDimension(indices, indices_rank - 1); + if (params_rank < 1) { + MicroPrintf("Params must be at least a vector."); + return kTfLiteError; + } + if (indices_rank < 1) { + MicroPrintf("Indices must be at least a vector."); + return kTfLiteError; + } + if (indices_nd > params_rank) { + MicroPrintf("Index innermost dimension length must be <= params rank."); + return kTfLiteError; + } + if (indices_nd > MAX_INDICES_ND) { + MicroPrintf("Index innermost dimension length must not exceed %d.", + MAX_INDICES_ND); + return kTfLiteError; + } + + // Assign to output the input type. + output->type = params->type; + + // The tensor output dims must be relocated + // from the FlatBuffer to the persistant storage arena. + TfLiteEvalTensor* output_eval = + tflite::micro::GetEvalOutput(context, node, kOutputTensor); + TF_LITE_ENSURE_OK(context, tflite::micro::CreateWritableTensorDimsWithCopy( + context, output, output_eval)); + + // TFLM gather_nd does not create the output tensor, but it needs to ensure + // that the output shape is correct. The result shape is + // indices.shape[:-1] + params.shape[indices.shape[-1]:] + TfLiteIntArray* output_shape = output->dims; + int output_index = 0; + for (int i = 0; i < indices_rank - 1; ++i) { + output_shape->data[output_index++] = indices->dims->data[i]; + } + for (int i = indices_nd; i < params_rank; ++i) { + output_shape->data[output_index++] = params->dims->data[i]; + } + output_shape->size = output_index; + + micro_context->DeallocateTempTfLiteTensor(params); + micro_context->DeallocateTempTfLiteTensor(indices); + micro_context->DeallocateTempTfLiteTensor(output); + return kTfLiteOk; +} + +template +TfLiteStatus GatherNd(const TfLiteEvalTensor* params, + const TfLiteEvalTensor* indices, + TfLiteEvalTensor* output) { + const int indices_dims = indices->dims->size; + const int indices_nd = indices->dims->data[indices_dims - 1]; + const int params_dims = params->dims->size; + const IndicesT* index_data = tflite::micro::GetTensorData(indices); + const ParamsT* param_data = tflite::micro::GetTensorData(params); + ParamsT* output_data = tflite::micro::GetTensorData(output); + + int n_slices = 1; + for (int i = 0; i < indices_dims - 1; ++i) { + n_slices *= indices->dims->data[i]; + } + + // If indices[-1] == params.rank, fetch single elements. + // If indices[-1] < params.rank, fetch slices. + int slice_size = 1; + for (int i = indices_nd; i < params_dims; ++i) { + slice_size *= params->dims->data[i]; + } + + int params_flat_size = ElementCount(*params->dims); + int remain_flat_size = params_flat_size; + + // Number of elements per dimension + int dims_to_count[MAX_INDICES_ND]; + for (int i = 0; i < indices_nd; ++i) { + dims_to_count[i] = remain_flat_size / params->dims->data[i]; + remain_flat_size = dims_to_count[i]; + } + + for (int i = 0; i < n_slices; ++i) { + int from_pos = 0; + for (int j = 0; j < indices_nd; ++j) { + int offset = i * indices_nd + j; + IndicesT index = index_data[offset]; + from_pos += index * dims_to_count[j]; + } + if (from_pos < 0 || from_pos + slice_size > params_flat_size) { + return kTfLiteError; + } + std::memcpy(output_data + i * slice_size, param_data + from_pos, + sizeof(ParamsT) * slice_size); + } + return kTfLiteOk; +} + +template +TfLiteStatus EvalGatherNd(TfLiteContext* context, + const TfLiteEvalTensor* params, + const TfLiteEvalTensor* indices, + TfLiteEvalTensor* output) { + TfLiteStatus status = kTfLiteError; + switch (params->type) { + case kTfLiteFloat32: + status = GatherNd(params, indices, output); + break; + case kTfLiteInt8: + status = GatherNd(params, indices, output); + break; + default: + MicroPrintf("Params type '%s' are not supported by gather_nd.", + TfLiteTypeGetName(params->type)); + return kTfLiteError; + } + if (status != kTfLiteOk) { + MicroPrintf("gather_nd index out of bounds"); + } + return status; +} + +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + const TfLiteEvalTensor* params = + tflite::micro::GetEvalInput(context, node, kParams); + const TfLiteEvalTensor* indices = + tflite::micro::GetEvalInput(context, node, kIndices); + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kOutputTensor); + + switch (indices->type) { + case kTfLiteInt32: + return EvalGatherNd(context, params, indices, output); + break; + default: + MicroPrintf("Indices of type '%s' are not supported by gather_nd.", + TfLiteTypeGetName(indices->type)); + return kTfLiteError; + } +} +} // namespace + +TfLiteRegistration Register_GATHER_ND() { + return tflite::micro::RegisterOp(nullptr, Prepare, Eval); +} + +} // namespace tflite diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/hard_swish.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/hard_swish.cpp new file mode 100644 index 0000000..0f8a718 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/hard_swish.cpp @@ -0,0 +1,75 @@ +/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/hard_swish.h" + +#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h" +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/quantization_util.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/op_macros.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/hard_swish.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_utils.h" + +namespace tflite { +namespace { +void* HardSwishInit(TfLiteContext* context, const char* buffer, size_t length) { + TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); + return context->AllocatePersistentBuffer(context, sizeof(HardSwishParams)); +} + +TfLiteStatus HardSwishEval(TfLiteContext* context, TfLiteNode* node) { + const TfLiteEvalTensor* input = + tflite::micro::GetEvalInput(context, node, kHardSwishInputTensor); + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kHardSwishOutputTensor); + HardSwishParams* params = static_cast(node->user_data); + + switch (input->type) { + case kTfLiteFloat32: { + tflite::reference_ops::HardSwish( + tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + } break; + case kTfLiteInt8: { + tflite::reference_ops::HardSwish( + *params, tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + } break; + default: { + MicroPrintf("Unsupported type %s", TfLiteTypeGetName(input->type)); + return kTfLiteError; + } + } + return kTfLiteOk; +} + +} // namespace + +TfLiteRegistration Register_HARD_SWISH() { + return tflite::micro::RegisterOp(HardSwishInit, tflite::HardSwishPrepare, + HardSwishEval); +} + +} // namespace tflite diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/hard_swish.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/hard_swish.h new file mode 100644 index 0000000..cb34f13 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/hard_swish.h @@ -0,0 +1,30 @@ +/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_LITE_MICRO_KERNELS_HARD_SWISH_H_ +#define TENSORFLOW_LITE_MICRO_KERNELS_HARD_SWISH_H_ + +#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h" +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" + +namespace tflite { + +extern const int kHardSwishInputTensor; +extern const int kHardSwishOutputTensor; + +TfLiteStatus HardSwishPrepare(TfLiteContext* context, TfLiteNode* node); +} // namespace tflite + +#endif // TENSORFLOW_LITE_MICRO_KERNELS_HARD_SWISH_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/hard_swish_common.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/hard_swish_common.cpp new file mode 100644 index 0000000..1b82154 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/hard_swish_common.cpp @@ -0,0 +1,86 @@ +/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h" +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/quantization_util.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/hard_swish.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/op_macros.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/hard_swish.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_utils.h" + +namespace tflite { + +const int kHardSwishInputTensor = 0; +const int kHardSwishOutputTensor = 0; + +TfLiteStatus HardSwishPrepare(TfLiteContext* context, TfLiteNode* node) { + MicroContext* micro_context = GetMicroContext(context); + + TFLITE_DCHECK(node->user_data != nullptr); + TF_LITE_ENSURE_EQ(context, NumInputs(node), 1); + TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); + + TfLiteTensor* input = + micro_context->AllocateTempInputTensor(node, kHardSwishInputTensor); + TF_LITE_ENSURE(context, input != nullptr); + TfLiteTensor* output = + micro_context->AllocateTempOutputTensor(node, kHardSwishOutputTensor); + TF_LITE_ENSURE(context, output != nullptr); + + if (input->type == kTfLiteInt8) { + HardSwishParams* params = static_cast(node->user_data); + + params->input_zero_point = input->params.zero_point; + params->output_zero_point = output->params.zero_point; + + const float input_scale = input->params.scale; + const float hires_input_scale = (1.0f / 128.0f) * input_scale; + const float reluish_scale = 3.0f / 32768.0f; + const float output_scale = output->params.scale; + + const double output_multiplier = + static_cast(hires_input_scale / output_scale); + int32_t output_multiplier_fixedpoint_int32; + QuantizeMultiplier(output_multiplier, &output_multiplier_fixedpoint_int32, + ¶ms->output_multiplier_exponent); + DownScaleInt32ToInt16Multiplier( + output_multiplier_fixedpoint_int32, + ¶ms->output_multiplier_fixedpoint_int16); + + TF_LITE_ENSURE(context, params->output_multiplier_exponent <= 0); + + const double reluish_multiplier = + static_cast(hires_input_scale / reluish_scale); + int32_t reluish_multiplier_fixedpoint_int32; + QuantizeMultiplier(reluish_multiplier, &reluish_multiplier_fixedpoint_int32, + ¶ms->reluish_multiplier_exponent); + DownScaleInt32ToInt16Multiplier( + reluish_multiplier_fixedpoint_int32, + ¶ms->reluish_multiplier_fixedpoint_int16); + } + + micro_context->DeallocateTempTfLiteTensor(input); + micro_context->DeallocateTempTfLiteTensor(output); + + return kTfLiteOk; +} + +} // namespace tflite diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/if.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/if.cpp new file mode 100644 index 0000000..afa9920 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/if.cpp @@ -0,0 +1,121 @@ +/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include + +#include + +#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h" +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/compatibility.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/memory_helpers.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_context.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_graph.h" +#include "edge-impulse-sdk/tensorflow/lite/schema/schema_generated.h" + +namespace tflite { + +namespace { + +struct OpData { + int then_subgraph_index; + int else_subgraph_index; +}; + +void* Init(TfLiteContext* context, const char* buffer, size_t length) { + TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); + return context->AllocatePersistentBuffer(context, sizeof(OpData)); +} + +TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + OpData* op_data = reinterpret_cast(node->user_data); + const auto* params = + reinterpret_cast(node->builtin_data); + op_data->then_subgraph_index = params->then_subgraph_index; + op_data->else_subgraph_index = params->else_subgraph_index; + + TF_LITE_ENSURE(context, node->inputs->size > 0); + + // The first input is the condition. + tflite::MicroContext* micro_context = tflite::GetMicroContext(context); + TfLiteTensor* cond = micro_context->AllocateTempInputTensor(node, 0); + + TF_LITE_ENSURE(context, cond != nullptr); + TF_LITE_ENSURE_EQ(context, cond->type, kTfLiteBool); + TF_LITE_ENSURE_EQ(context, NumElements(cond), 1); + + micro_context->DeallocateTempTfLiteTensor(cond); + + // The first input of the node is the condition. The rest of inputs are + // passed to the branch subgraphs. Therefore, the number of subgraph inputs + // will be the number of node inputs - 1. + size_t num_inputs = node->inputs->size - 1; + size_t num_outputs = node->outputs->size; + + MicroGraph& graph_info = micro_context->graph(); + + TF_LITE_ENSURE(context, + op_data->then_subgraph_index < graph_info.NumSubgraphs()); + TF_LITE_ENSURE(context, + op_data->else_subgraph_index < graph_info.NumSubgraphs()); + + TF_LITE_ENSURE_EQ(context, num_inputs, + graph_info.NumSubgraphInputs(op_data->then_subgraph_index)); + TF_LITE_ENSURE_EQ( + context, num_outputs, + graph_info.NumSubgraphOutputs(op_data->then_subgraph_index)); + + return kTfLiteOk; +} + +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + const OpData* op_data = reinterpret_cast(node->user_data); + + tflite::MicroContext* micro_context = tflite::GetMicroContext(context); + TfLiteTensor* cond = micro_context->AllocateTempInputTensor(node, 0); + + TF_LITE_ENSURE(context, cond != nullptr); + bool cond_value = cond->data.b[0]; + micro_context->DeallocateTempTfLiteTensor(cond); + + MicroGraph* graph_info = µ_context->graph(); + // Currently we copy the input / output between the subgraphs. + int active_branch_subgraph_index = + cond_value ? op_data->then_subgraph_index : op_data->else_subgraph_index; + + TF_LITE_ENSURE_OK(context, + tflite::micro::CopyOpInputsToSubgraphInputs( + context, node, graph_info, active_branch_subgraph_index, + /*first_tensor_idx=*/1)); + + TF_LITE_ENSURE_OK(context, + graph_info->InvokeSubgraph(active_branch_subgraph_index)); + + TF_LITE_ENSURE_OK( + context, tflite::micro::CopySubgraphOutputsToOpOutputs( + context, node, graph_info, active_branch_subgraph_index)); + + return kTfLiteOk; +} + +} // namespace. + +TfLiteRegistration Register_IF() { + return tflite::micro::RegisterOp(Init, Prepare, Eval); +} + +} // namespace tflite diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_runner.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_runner.cpp new file mode 100644 index 0000000..e731f4e --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_runner.cpp @@ -0,0 +1,121 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_runner.h" + +#include "edge-impulse-sdk/tensorflow/lite/micro/single_arena_buffer_allocator.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_arena_constants.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/test_helpers.h" + +namespace tflite { +namespace micro { + +// TODO(b/161841696): Consider moving away from global arena buffers: +constexpr int KernelRunner::kKernelRunnerBufferSize_; +uint8_t KernelRunner::kKernelRunnerBuffer_[]; + +void ClearBufferApi(TfLiteContext* context_) { + context_->GetScratchBuffer = nullptr; + context_->GetExternalContext = nullptr; + context_->AllocatePersistentBuffer = nullptr; + context_->RequestScratchBufferInArena = nullptr; +} + +KernelRunner::KernelRunner(const TfLiteRegistration& registration, + TfLiteTensor* tensors, int tensors_size, + TfLiteIntArray* inputs, TfLiteIntArray* outputs, + void* builtin_data, TfLiteIntArray* intermediates) + : registration_(registration), + allocator_(SingleArenaBufferAllocator::Create(kKernelRunnerBuffer_, + kKernelRunnerBufferSize_)), + mock_micro_graph_(allocator_), + fake_micro_context_(tensors, allocator_, &mock_micro_graph_) { + // Prepare TfLiteContext: + context_.impl_ = static_cast(&fake_micro_context_); + context_.ReportError = MicroContextReportOpError; + context_.recommended_num_threads = 1; + context_.GetTensor = MicroContextGetTensor; + context_.GetEvalTensor = MicroContextGetEvalTensor; + tflite::micro::ClearBufferApi(&context_); + context_.AllocatePersistentBuffer = MicroContextAllocatePersistentBuffer; + + context_.recommended_num_threads = 0; + + // Prepare TfLiteNode: + node_.inputs = inputs; + node_.outputs = outputs; + node_.builtin_data = builtin_data; + node_.intermediates = intermediates; +} + +bool KernelRunner::ValidateTempBufferDeallocated() { + return fake_micro_context_.IsAllTempTfLiteTensorDeallocated(); +} + +TfLiteStatus KernelRunner::InitAndPrepare(const char* init_data, + size_t length) { + if (registration_.init) { + tflite::micro::ClearBufferApi(&context_); + context_.AllocatePersistentBuffer = MicroContextAllocatePersistentBuffer; + node_.user_data = registration_.init(&context_, init_data, length); + } + + TF_LITE_ENSURE(&context_, ValidateTempBufferDeallocated()); + + if (registration_.prepare) { + tflite ::micro::ClearBufferApi(&context_); + context_.AllocatePersistentBuffer = MicroContextAllocatePersistentBuffer; + context_.RequestScratchBufferInArena = + MicroContextRequestScratchBufferInArena; + context_.GetExternalContext = MicroContextGetExternalContext; + TF_LITE_ENSURE_STATUS(registration_.prepare(&context_, &node_)); + } + + TF_LITE_ENSURE(&context_, ValidateTempBufferDeallocated()); + + return kTfLiteOk; +} + +TfLiteStatus KernelRunner::Invoke() { + tflite::micro::ClearBufferApi(&context_); + context_.GetScratchBuffer = MicroContextGetScratchBuffer; + + if (registration_.invoke == nullptr) { + MicroPrintf("TfLiteRegistration missing invoke function pointer!"); + return kTfLiteError; + } + + TF_LITE_ENSURE_STATUS(registration_.invoke(&context_, &node_)); + + TF_LITE_ENSURE(&context_, ValidateTempBufferDeallocated()); + + return kTfLiteOk; +} + +TfLiteStatus KernelRunner::Free() { + tflite::micro::ClearBufferApi(&context_); + context_.GetScratchBuffer = MicroContextGetScratchBuffer; + + if (registration_.free == nullptr) { + MicroPrintf("TfLiteRegistration missing free function pointer!"); + return kTfLiteError; + } + + registration_.free(&context_, node_.user_data); + return kTfLiteOk; +} +} // namespace micro +} // namespace tflite \ No newline at end of file diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_runner.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_runner.h new file mode 100644 index 0000000..cf3c690 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_runner.h @@ -0,0 +1,81 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_LITE_MICRO_KERNELS_KERNEL_RUNNER_H_ +#define TENSORFLOW_LITE_MICRO_KERNELS_KERNEL_RUNNER_H_ + +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/compatibility.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/single_arena_buffer_allocator.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/fake_micro_context.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/mock_micro_graph.h" + +namespace tflite { +namespace micro { + +// Helper class to perform a simulated kernel (i.e. TfLiteRegistration) +// lifecycle (init, prepare, invoke). All internal allocations are handled by +// this class. Simply pass in the registration, list of required tensors, inputs +// array, outputs array, and any pre-builtin data. Calling Invoke() will +// automatically walk the kernel and outputs will be ready on the TfLiteTensor +// output provided during construction. +class KernelRunner { + public: + KernelRunner(const TfLiteRegistration& registration, TfLiteTensor* tensors, + int tensors_size, TfLiteIntArray* inputs, + TfLiteIntArray* outputs, void* builtin_data, + TfLiteIntArray* intermediates = nullptr); + + // Calls init and prepare on the kernel (i.e. TfLiteRegistration) struct. Any + // exceptions will be DebugLog'd and returned as a status code. + TfLiteStatus InitAndPrepare(const char* init_data = nullptr, + size_t length = 0); + + // Calls init, prepare, and invoke on a given TfLiteRegistration pointer. + // After successful invoke, results will be available in the output tensor as + // passed into the constructor of this class. + TfLiteStatus Invoke(); + + // Calls Free on a given TfLiteRegistration pointer(if it's implemented). + // After successful Free, kTfLiteOk status will be returned. If Free is not + // implemented for a given kernel kTfLiteError will be returned. + TfLiteStatus Free(); + + // Returns a pointer to the internal MockMicroGraph which KernelRunner uses + // to stub out MicroGraph methods and track invocations on each subgraph. + MockMicroGraph* GetMockGraph() { return &mock_micro_graph_; } + + // Returns true if all temp buffer in tests are deallocated. + // TODO(b/209453859): move this function to private after deallocation checks + // are enabled for all kernel tests. + bool ValidateTempBufferDeallocated(); + + private: + static constexpr int kKernelRunnerBufferSize_ = 10000; + static uint8_t kKernelRunnerBuffer_[kKernelRunnerBufferSize_]; + + TfLiteContext context_ = {}; + TfLiteNode node_ = {}; + const TfLiteRegistration& registration_; + + SingleArenaBufferAllocator* allocator_; + MockMicroGraph mock_micro_graph_; + FakeMicroContext fake_micro_context_; +}; + +} // namespace micro +} // namespace tflite + +#endif // TENSORFLOW_LITE_MICRO_KERNELS_KERNEL_RUNNER_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h new file mode 100644 index 0000000..616e7ff --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h @@ -0,0 +1,145 @@ +/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_LITE_MICRO_KERNELS_KERNEL_UTIL_H_ +#define TENSORFLOW_LITE_MICRO_KERNELS_KERNEL_UTIL_H_ + +#include + +#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h" +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/compatibility.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_context.h" + +namespace tflite { +namespace micro { + +TfLiteRegistration RegisterOp( + void* (*init)(TfLiteContext* context, const char* buffer, size_t length), + TfLiteStatus (*prepare)(TfLiteContext* context, TfLiteNode* node), + TfLiteStatus (*invoke)(TfLiteContext* context, TfLiteNode* node), + void (*free)(TfLiteContext* context, void* buffer) = nullptr); + +// Prints out n bytes in a int8_t buffer as hex +void PrintNBytes(const int8_t* tensor_data, int n_bytes, + const char* prefix = nullptr); + +// Prints out the the n bytes in a TfLiteEvalTensor as hex +void PrintNBytes(const TfLiteEvalTensor* tensor, int n_bytes, + const char* prefix = nullptr); + +// Prints out the the n bytes in a TfLiteTensor as hex +void PrintNBytes(const TfLiteTensor* tensor, int n_bytes, + const char* prefix = nullptr); + +// Returns a mutable tensor for a given input index. is_variable must be checked +// during prepare when the full TfLiteTensor is available. +TfLiteEvalTensor* GetMutableEvalInput(const TfLiteContext* context, + const TfLiteNode* node, int index); + +// Returns the TfLiteEvalTensor struct for a given input index in a node. +const TfLiteEvalTensor* GetEvalInput(const TfLiteContext* context, + const TfLiteNode* node, int index); + +// Returns the TfLiteEvalTensor struct for a given output index in a node. +TfLiteEvalTensor* GetEvalOutput(const TfLiteContext* context, + const TfLiteNode* node, int index); + +// Returns data for a TfLiteEvalTensor struct that are expected to exist. +template +T* GetTensorData(TfLiteEvalTensor* tensor) { + TFLITE_DCHECK(tensor != nullptr); + return reinterpret_cast(tensor->data.raw); +} + +// Returns const data for a TfLiteEvalTensor struct that are expected to exist. +template +const T* GetTensorData(const TfLiteEvalTensor* tensor) { + TFLITE_DCHECK(tensor != nullptr); + return reinterpret_cast(tensor->data.raw); +} + +// Returns data for a TfLiteEvalTensor struct that could be null. +template +T* GetOptionalTensorData(TfLiteEvalTensor* tensor) { + return tensor == nullptr ? nullptr : reinterpret_cast(tensor->data.raw); +} + +// Returns const data for a TfLiteEvalTensor struct that could be null. +template +const T* GetOptionalTensorData(const TfLiteEvalTensor* tensor) { + return tensor == nullptr ? nullptr + : reinterpret_cast(tensor->data.raw); +} + +// Returns the shape of a TfLiteEvalTensor struct. +const RuntimeShape GetTensorShape(const TfLiteEvalTensor* tensor); + +// Return true if the given tensors have the same shape. +bool HaveSameShapes(const TfLiteEvalTensor* input1, + const TfLiteEvalTensor* input2); + +PaddingType RuntimePaddingType(TfLitePadding padding); + +// Relocate tensor dims from FlatBuffer to the persistent storage arena. +// The old dims data is copied to the new storage area. +// The tensor and eval_tensor must be the same tensor. +// Only use during Prepare phase. +TfLiteStatus CreateWritableTensorDimsWithCopy(TfLiteContext* context, + TfLiteTensor* tensor, + TfLiteEvalTensor* eval_tensor); + +// Copy all op input tensors to op output tensors. Requires all op input tensor +// shapes and types to be identical to op output tensor shapes and types. +TfLiteStatus CopyOpInputsToOpOutputs(TfLiteContext* context, TfLiteNode* node); + +// Copy all op input tensors to subgraph input tensors. Requires all op input +// tensor shapes and types to be identical to subgraph input tensor shapes and +// types. +TfLiteStatus CopyOpInputsToSubgraphInputs(TfLiteContext* context, + TfLiteNode* node, + MicroGraph* graph_info, + int subgraph_idx, + int first_tensor_idx); + +// Copy all op output tensors to subgraph input tensors. Requires all op output +// tensor shapes and types to be identical to subgraph input tensor shapes and +// types. +TfLiteStatus CopyOpOutputsToSubgraphInputs(TfLiteContext* context, + TfLiteNode* node, + MicroGraph* graph_info, + int subgraph_idx); + +// Copy all subgraph output tensors to op outputs. Requires all subgraph output +// tensor shapes and types to be identical to op output tensor shapes and types. +TfLiteStatus CopySubgraphOutputsToOpOutputs(TfLiteContext* context, + TfLiteNode* node, + MicroGraph* graph_info, + int subgraph_idx); + +// If tensor is INT4, make a new TfLiteEvalTensor with data unpacked into +// a scratch buffer. The returned tensor will have the kTfLiteInt8 type. +// Assume scratch buffer is previously requested in Prepare, and +// scratch_buffer_index can be used to retrieve that buffer. +// If the tensor is not INT4, a shallow copy is returned. +TfLiteEvalTensor MakeUnpackedInt4Tensor(TfLiteContext* context, + int scratch_buffer_index, + const TfLiteEvalTensor* tensor); +} // namespace micro +} // namespace tflite + +#endif // TENSORFLOW_LITE_MICRO_KERNELS_KERNEL_UTIL_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util_micro.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util_micro.cpp new file mode 100644 index 0000000..73ab130 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util_micro.cpp @@ -0,0 +1,280 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" + +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/portable_tensor_utils.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/memory_helpers.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h" + +namespace tflite { +namespace micro { + +namespace { + +int ValidateTensorIndexing(const TfLiteContext* context, int index, + int max_size, const int* tensor_indices) { + if (index >= 0 && index < max_size) { + const int tensor_index = tensor_indices[index]; + if (tensor_index != kTfLiteOptionalTensor) { + return tensor_index; + } + } + return -1; +} + +} // namespace + +TfLiteRegistration RegisterOp( + void* (*init)(TfLiteContext* context, const char* buffer, size_t length), + TfLiteStatus (*prepare)(TfLiteContext* context, TfLiteNode* node), + TfLiteStatus (*invoke)(TfLiteContext* context, TfLiteNode* node), + void (*free)(TfLiteContext* context, void* buffer)) { + return {/*init=*/init, + /*free=*/free, + /*prepare=*/prepare, + /*invoke=*/invoke, + /*profiling_string=*/nullptr, + /*builtin_code=*/0, + /*custom_name=*/nullptr, + /*version=*/0, + /*registration_external=*/nullptr}; +} + +// Returns a mutable tensor for a given input index. is_variable must be checked +// during prepare when the full TfLiteTensor is available. +TfLiteEvalTensor* GetMutableEvalInput(const TfLiteContext* context, + const TfLiteNode* node, int index) { + TFLITE_DCHECK(context != nullptr); + TFLITE_DCHECK(node != nullptr); + const int tensor_index = ValidateTensorIndexing( + context, index, node->inputs->size, node->inputs->data); + + if (tensor_index < 0) { + return nullptr; + } + + return context->GetEvalTensor(context, node->inputs->data[index]); +} + +// Returns the TfLiteEvalTensor struct for a given input index in a node. +const TfLiteEvalTensor* GetEvalInput(const TfLiteContext* context, + const TfLiteNode* node, int index) { + return GetMutableEvalInput(context, node, index); +} + +// Returns the TfLiteEvalTensor struct for a given output index in a node. +TfLiteEvalTensor* GetEvalOutput(const TfLiteContext* context, + const TfLiteNode* node, int index) { + TFLITE_DCHECK(context != nullptr); + TFLITE_DCHECK(node != nullptr); + return context->GetEvalTensor(context, node->outputs->data[index]); +} + +bool HaveSameShapes(const TfLiteEvalTensor* input1, + const TfLiteEvalTensor* input2) { + TFLITE_DCHECK(input1 != nullptr); + TFLITE_DCHECK(input2 != nullptr); + return TfLiteIntArrayEqual(input1->dims, input2->dims); +} + +const RuntimeShape GetTensorShape(const TfLiteEvalTensor* tensor) { + if (tensor == nullptr || tensor->dims == nullptr) { + return RuntimeShape(); + } + TfLiteIntArray* dims = tensor->dims; + const int dims_size = dims->size; + const int32_t* dims_data = reinterpret_cast(dims->data); + return RuntimeShape(dims_size, dims_data); +} + +PaddingType RuntimePaddingType(TfLitePadding padding) { + switch (padding) { + case TfLitePadding::kTfLitePaddingSame: + return PaddingType::kSame; + case TfLitePadding::kTfLitePaddingValid: + return PaddingType::kValid; + case TfLitePadding::kTfLitePaddingUnknown: + default: + return PaddingType::kNone; + } +} + +// Relocate tensor dims from FlatBuffer to the persistent storage arena. +// The old dims data is copied to the new storage area. +// The tensor and eval_tensor must be the same tensor. +// Only use during Prepare phase. +TfLiteStatus CreateWritableTensorDimsWithCopy(TfLiteContext* context, + TfLiteTensor* tensor, + TfLiteEvalTensor* eval_tensor) { + TF_LITE_ENSURE(context, tensor != nullptr); + TF_LITE_ENSURE(context, eval_tensor != nullptr); + TF_LITE_ENSURE(context, context->AllocatePersistentBuffer != nullptr); + int ranks = tensor->dims->size; + size_t alloc_size = TfLiteIntArrayGetSizeInBytes(ranks); + TfLiteIntArray* new_dims = static_cast( + context->AllocatePersistentBuffer(context, alloc_size)); + TfLiteIntArray* old_dims = tensor->dims; + new_dims->size = ranks; + tensor->dims = new_dims; + eval_tensor->dims = new_dims; + for (int i = 0; i < ranks; i++) { + new_dims->data[i] = old_dims->data[i]; + } + + return kTfLiteOk; +} + +// Verify that both tensors have the same type and size, then return the size +// of both tensors in bytes if they are the same, or -1 if they are different. +size_t ValidateAndGetTensorSizes(const TfLiteEvalTensor* tensor1, + const TfLiteEvalTensor* tensor2) { + TFLITE_DCHECK(tensor1->type == tensor2->type); + size_t tensor1_size = 0; + size_t tensor2_size = 0; + TfLiteEvalTensorByteLength(tensor1, &tensor1_size); + TfLiteEvalTensorByteLength(tensor2, &tensor2_size); + return (tensor1_size == tensor2_size) ? tensor1_size : -1; +} + +TfLiteStatus CopyOpInputsToOpOutputs(TfLiteContext* context, TfLiteNode* node) { + TF_LITE_ENSURE(context, node->inputs->size == node->outputs->size); + for (int i = 0; i < node->inputs->size; i++) { + const TfLiteEvalTensor* input = + tflite::micro::GetEvalInput(context, node, i); + TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, i); + int bytes = ValidateAndGetTensorSizes(input, output); + TF_LITE_ENSURE(context, bytes >= 0); + memcpy(output->data.raw, input->data.raw, bytes); + } + return kTfLiteOk; +} + +// Args: +// 1. int8_t tensor_data - int8_t buffer of unknown size who's data you'd +// like +// to print +// 2. int n_btyes - a small int representing number of bytes you want to +// print +// to debug output. It should always be <= tensor_data's size. +// 3. prefix - optional message you'd like to print before printing bytes +// +// Purpose: +// Function takes in paramaters above and prints n_bytes bytes from the +// tensor_data buffer. This can be use to debug the output of a model and it's +// op. + +void PrintNBytes(const int8_t* tensor_data, int n_bytes, const char* prefix) { + if (prefix != nullptr) { + MicroPrintf("%s", prefix); + } + + for (int i = 0; i < n_bytes; ++i) { + MicroPrintf(" %x", tensor_data[i]); + } + MicroPrintf("\n"); +} + +// same as the PrintNBytes above but the buffer needs to be extracted out of the +// TfLiteEvalTensor* +void PrintNBytes(const TfLiteEvalTensor* tensor, int n_bytes, + const char* prefix) { + const int8_t* tensor_data = tflite::micro::GetTensorData(tensor); + PrintNBytes(tensor_data, n_bytes, prefix); +} + +// same as the PrintNBytes above but the buffer needs to be extracted out of the +// TfLiteEvalTensor* +void PrintNBytes(const TfLiteTensor* tensor, int n_bytes, const char* prefix) { + const int8_t* tensor_data = tflite::GetTensorData(tensor); + PrintNBytes(tensor_data, n_bytes, prefix); +} + +TfLiteStatus CopyOpInputsToSubgraphInputs(TfLiteContext* context, + TfLiteNode* node, + MicroGraph* graph_info, + int subgraph_idx, + int first_tensor_idx) { + TF_LITE_ENSURE(context, + static_cast(node->inputs->size - first_tensor_idx) == + graph_info->NumSubgraphInputs(subgraph_idx)); + for (int i = 0; i < node->inputs->size - first_tensor_idx; i++) { + const TfLiteEvalTensor* input = + tflite::micro::GetEvalInput(context, node, i + first_tensor_idx); + TfLiteEvalTensor* subgraph_input = + graph_info->GetSubgraphInput(subgraph_idx, i); + int bytes = ValidateAndGetTensorSizes(input, subgraph_input); + TF_LITE_ENSURE(context, bytes >= 0); + memcpy(subgraph_input->data.raw, input->data.raw, bytes); + } + return kTfLiteOk; +} + +TfLiteStatus CopyOpOutputsToSubgraphInputs(TfLiteContext* context, + TfLiteNode* node, + MicroGraph* graph_info, + int subgraph_idx) { + TF_LITE_ENSURE(context, static_cast(node->outputs->size) == + graph_info->NumSubgraphInputs(subgraph_idx)); + for (int i = 0; i < node->outputs->size; i++) { + TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, i); + TfLiteEvalTensor* subgraph_input = + graph_info->GetSubgraphInput(subgraph_idx, i); + int bytes = ValidateAndGetTensorSizes(output, subgraph_input); + TF_LITE_ENSURE(context, bytes >= 0); + memcpy(subgraph_input->data.raw, output->data.raw, bytes); + } + return kTfLiteOk; +} + +TfLiteStatus CopySubgraphOutputsToOpOutputs(TfLiteContext* context, + TfLiteNode* node, + MicroGraph* graph_info, + int subgraph_idx) { + TF_LITE_ENSURE(context, static_cast(node->outputs->size) == + graph_info->NumSubgraphOutputs(subgraph_idx)); + for (int i = 0; i < node->outputs->size; i++) { + TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, i); + TfLiteEvalTensor* subgraph_output = + graph_info->GetSubgraphOutput(subgraph_idx, i); + int bytes = ValidateAndGetTensorSizes(output, subgraph_output); + TF_LITE_ENSURE(context, bytes >= 0); + memcpy(output->data.raw, subgraph_output->data.raw, bytes); + } + return kTfLiteOk; +} + +TfLiteEvalTensor MakeUnpackedInt4Tensor(TfLiteContext* context, + int scratch_buffer_index, + const TfLiteEvalTensor* tensor) { + if (tensor->type != kTfLiteInt4) { + return *tensor; + } + + TfLiteEvalTensor new_tensor; + new_tensor.data.data = static_cast( + context->GetScratchBuffer(context, scratch_buffer_index)); + new_tensor.dims = tensor->dims; + new_tensor.type = kTfLiteInt8; + tflite::tensor_utils::UnpackDenseInt4IntoInt8( + tflite::micro::GetTensorData(tensor), + tflite::micro::GetTensorShape(tensor).FlatSize(), + tflite::micro::GetTensorData(&new_tensor)); + return new_tensor; +} + +} // namespace micro +} // namespace tflite diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/l2_pool_2d.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/l2_pool_2d.cpp new file mode 100644 index 0000000..8cd1e7e --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/l2_pool_2d.cpp @@ -0,0 +1,142 @@ +/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include + +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/pooling.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/padding.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h" + +namespace tflite { +namespace { + +// Input/output tensor index. +constexpr int kInputTensor = 0; +constexpr int kOutputTensor = 0; + +// required rank for input/output tensor shape +constexpr int kTensorShapeRank = 4; + +// input/output tensor shape rank associations +enum { kBatchRank = 0, kHeightRank, kWidthRank, kChannelRank }; + +TfLiteStatus L2Prepare(TfLiteContext* context, TfLiteNode* node) { + MicroContext* micro_context = GetMicroContext(context); + + auto* params = static_cast(node->builtin_data); + + TF_LITE_ENSURE_EQ(context, NumInputs(node), 1); + TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); + TfLiteTensor* output = + micro_context->AllocateTempOutputTensor(node, kOutputTensor); + TF_LITE_ENSURE(context, output != nullptr); + TfLiteTensor* input = + micro_context->AllocateTempInputTensor(node, kInputTensor); + TF_LITE_ENSURE(context, input != nullptr); + TF_LITE_ENSURE_EQ(context, NumDimensions(input), kTensorShapeRank); + TF_LITE_ENSURE_EQ(context, NumDimensions(output), kTensorShapeRank); + TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type); + + int batches = SizeOfDimension(input, kBatchRank); + int height = SizeOfDimension(input, kHeightRank); + int width = SizeOfDimension(input, kWidthRank); + int channels_out = SizeOfDimension(input, kChannelRank); + + // Matching GetWindowedOutputSize in TensorFlow. + auto padding = params->padding; + int out_width, out_height; + + params->computed.padding = ComputePaddingHeightWidth( + params->stride_height, params->stride_width, 1, 1, height, width, + params->filter_height, params->filter_width, padding, &out_height, + &out_width); + + // We currently don't have a quantized implementation of L2Pool + TF_LITE_ENSURE_TYPES_EQ(context, input->type, kTfLiteFloat32); + + // We must update the output tensor dimensions. + // The dims storage is expected to be the same area in memory + // for both TfLiteTensor and TfLiteEvalTensor. This is important + // because TfLiteTensor in the MicroInterpreter is a temporary + // allocation. For the KernelRunner interpreter, TfLiteEvalTensor + // is a temporary allocation. We must therefore relocate the dims + // from the FlatBuffer to the persistant storage arena. + TfLiteEvalTensor* output_eval = + tflite::micro::GetEvalOutput(context, node, kOutputTensor); + TF_LITE_ENSURE_OK(context, tflite::micro::CreateWritableTensorDimsWithCopy( + context, output, output_eval)); + output->dims->data[kBatchRank] = batches; + output->dims->data[kHeightRank] = out_height; + output->dims->data[kWidthRank] = out_width; + output->dims->data[kChannelRank] = channels_out; + + micro_context->DeallocateTempTfLiteTensor(output); + micro_context->DeallocateTempTfLiteTensor(input); + + return kTfLiteOk; +} + +void L2EvalFloat(const TfLitePoolParams& params, const TfLiteEvalTensor& input, + tflite::PoolParams* op_params, TfLiteEvalTensor* output) { + float activation_min, activation_max; + CalculateActivationRange(params.activation, &activation_min, &activation_max); + + op_params->float_activation_min = activation_min; + op_params->float_activation_max = activation_max; + reference_ops::L2Pool(*op_params, tflite::micro::GetTensorShape(&input), + tflite::micro::GetTensorData(&input), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); +} + +TfLiteStatus L2Eval(TfLiteContext* context, TfLiteNode* node) { + auto* params = static_cast(node->builtin_data); + + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kOutputTensor); + const TfLiteEvalTensor* input = + tflite::micro::GetEvalInput(context, node, kInputTensor); + + tflite::PoolParams op_params; + op_params.stride_height = params->stride_height; + op_params.stride_width = params->stride_width; + op_params.filter_height = params->filter_height; + op_params.filter_width = params->filter_width; + op_params.padding_values.height = params->computed.padding.height; + op_params.padding_values.width = params->computed.padding.width; + + switch (input->type) { // Already know in/out types are same. + case kTfLiteFloat32: + L2EvalFloat(*params, *input, &op_params, output); + break; + default: + MicroPrintf("L2_POOL_2D only supports float32 currently, got %s.", + TfLiteTypeGetName(input->type)); + return kTfLiteError; + } + return kTfLiteOk; +} + +} // namespace + +TfLiteRegistration Register_L2_POOL_2D() { + return tflite::micro::RegisterOp(nullptr, L2Prepare, L2Eval); +} + +} // namespace tflite diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/l2norm.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/l2norm.cpp new file mode 100644 index 0000000..ede02db --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/l2norm.cpp @@ -0,0 +1,148 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/portable_tensor.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/l2normalization.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/l2normalization.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h" + +namespace tflite { +namespace ops { +namespace micro { +namespace l2norm { + +namespace { + +// This file has two implementation of L2Norm. +enum KernelType { + kReference, + kGenericOptimized, +}; + +constexpr int kInputTensor = 0; +constexpr int kOutputTensor = 0; + +} // namespace + +TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + TFLITE_DCHECK(node->user_data != nullptr); + TFLITE_DCHECK(node->builtin_data != nullptr); + + auto* params = reinterpret_cast(node->builtin_data); + L2NormalizationParams* data = + static_cast(node->user_data); + + TF_LITE_ENSURE_EQ(context, NumInputs(node), 1); + TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); + + MicroContext* micro_context = GetMicroContext(context); + + TfLiteTensor* input = + micro_context->AllocateTempInputTensor(node, kInputTensor); + TF_LITE_ENSURE(context, input != nullptr); + TfLiteTensor* output = + micro_context->AllocateTempOutputTensor(node, kOutputTensor); + TF_LITE_ENSURE(context, output != nullptr); + TF_LITE_ENSURE(context, NumDimensions(input) <= 4); + + TF_LITE_ENSURE(context, + output->type == kTfLiteFloat32 || output->type == kTfLiteInt8); + TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type); + + if (output->type == kTfLiteInt8) { + data->input_zero_point = input->params.zero_point; + } else if (output->type == kTfLiteFloat32) { + data->input_zero_point = 0; + } + + // Our implementations don't currently support activations. + TF_LITE_ENSURE_EQ(context, params->activation, kTfLiteActNone); + + micro_context->DeallocateTempTfLiteTensor(input); + micro_context->DeallocateTempTfLiteTensor(output); + return kTfLiteOk; +} + +void* Init(TfLiteContext* context, const char* buffer, size_t length) { + TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); + return context->AllocatePersistentBuffer(context, + sizeof(L2NormalizationParams)); +} + +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + TFLITE_DCHECK(node->user_data != nullptr); + const L2NormalizationParams& data = + *(static_cast(node->user_data)); + + const TfLiteEvalTensor* input = + tflite::micro::GetEvalInput(context, node, kInputTensor); + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kOutputTensor); + + // TODO(b/143912164): instead of hardcode the epsilon here, we should read it + // from tensorflow, i.e., adding a params. + // We don't compute epsilon for quantized kernel: + // + // epsilon_float = (epsilon_quant - zp) * scale + // so + // espsilon_quant = epsilon_float / scale + zp + // We know epsilon_float is just a very small number to avoid division by + // zero error, and scale is > 1, so the integer value of epsilon for quant + // is just dominated by the zero point. + // Also, GetInvSqrtQuantizedMultiplierExp handles the scenario where the sum + // of input value squared is zero case well. + // So we don't even need to do handle the epsilon for quantized kernel case. + const float epsilon = 1e-6f; + if (output->type == kTfLiteFloat32) { + reference_ops::L2Normalization(data, tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output), + epsilon); + } else if (output->type == kTfLiteInt8) { + const auto input_shape = tflite::micro::GetTensorShape(input); + const auto output_shape = tflite::micro::GetTensorShape(output); + const int trailing_dim = input_shape.DimensionsCount() - 1; + const int depth = + MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim); + const int outer_size = + MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape); + reference_integer_ops::L2Normalization( + data.input_zero_point, outer_size, depth, + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorData(output)); + } else { + MicroPrintf("Output type is %s, requires float.", + TfLiteTypeGetName(output->type)); + return kTfLiteError; + } + + return kTfLiteOk; +} + +} // namespace l2norm + +TfLiteRegistration Register_L2NORM_REF() { + return tflite::micro::RegisterOp(l2norm::Init, l2norm::Prepare, l2norm::Eval); +} + +TfLiteRegistration Register_L2_NORMALIZATION() { return Register_L2NORM_REF(); } + +} // namespace micro +} // namespace ops +} // namespace tflite diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/leaky_relu.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/leaky_relu.cpp new file mode 100644 index 0000000..042528d --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/leaky_relu.cpp @@ -0,0 +1,95 @@ +/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/leaky_relu.h" + +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/quantization_util.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/leaky_relu.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h" + +namespace tflite { + +template +void QuantizeLeakyRelu(const LeakyReluOpData& data, + const TfLiteEvalTensor* input, + TfLiteEvalTensor* output) { + LeakyReluParams op_params = {}; + + op_params.input_offset = data.input_zero_point; + op_params.output_offset = data.output_zero_point; + op_params.output_multiplier_alpha = data.output_multiplier_alpha; + op_params.output_shift_alpha = data.output_shift_alpha; + op_params.output_multiplier_identity = data.output_multiplier_identity; + op_params.output_shift_identity = data.output_shift_identity; + reference_ops::QuantizeLeakyRelu(op_params, + tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); +} + +void* LeakyReluInit(TfLiteContext* context, const char* buffer, size_t length) { + TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); + return context->AllocatePersistentBuffer(context, sizeof(LeakyReluOpData)); +} + +TfLiteStatus LeakyReluEval(TfLiteContext* context, TfLiteNode* node) { + const TfLiteEvalTensor* input = + tflite::micro::GetEvalInput(context, node, kInputTensor); + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kOutputTensor); + const LeakyReluOpData& data = *static_cast(node->user_data); + + switch (input->type) { + case kTfLiteFloat32: { + LeakyReluParams op_params = {}; + const auto* params = + static_cast(node->builtin_data); + + op_params.alpha = params->alpha; + reference_ops::LeakyRelu(op_params, tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + return kTfLiteOk; + } break; + case kTfLiteInt8: { + QuantizeLeakyRelu(data, input, output); + return kTfLiteOk; + } break; + case kTfLiteInt16: { + QuantizeLeakyRelu(data, input, output); + return kTfLiteOk; + } break; + default: + MicroPrintf("Only float32, int8 are supported by LEAKY_RELU, got %s.", + TfLiteTypeGetName(input->type)); + return kTfLiteError; + } + + return kTfLiteError; +} + +TfLiteRegistration Register_LEAKY_RELU() { + return tflite::micro::RegisterOp(LeakyReluInit, LeakyReluPrepare, + LeakyReluEval); +} + +} // namespace tflite diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/leaky_relu.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/leaky_relu.h new file mode 100644 index 0000000..fe43060 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/leaky_relu.h @@ -0,0 +1,43 @@ +/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_LITE_MICRO_KERNELS_LEAKY_RELU_H_ +#define TENSORFLOW_LITE_MICRO_KERNELS_LEAKY_RELU_H_ + +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" + +namespace tflite { + +// Input/output tensor index. +extern const int kInputTensor; +extern const int kOutputTensor; + +struct LeakyReluOpData { + // quantization parameters + int32_t output_multiplier_alpha; + int32_t output_shift_alpha; + int32_t output_multiplier_identity; + int32_t output_shift_identity; + int32_t input_zero_point; + int32_t output_zero_point; +}; + +TfLiteStatus CalculateOpDataLeakyRelu(TfLiteContext* context, TfLiteNode* node); + +TfLiteStatus LeakyReluPrepare(TfLiteContext* context, TfLiteNode* node); + +} // namespace tflite + +#endif // TENSORFLOW_LITE_MICRO_KERNELS_LEAKY_RELU_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/leaky_relu_common.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/leaky_relu_common.cpp new file mode 100644 index 0000000..b71b743 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/leaky_relu_common.cpp @@ -0,0 +1,78 @@ +/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/quantization_util.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/leaky_relu.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/leaky_relu.h" + +namespace tflite { + +// Input/output tensor index. +const int kInputTensor = 0; +const int kOutputTensor = 0; + +TfLiteStatus CalculateOpDataLeakyRelu(TfLiteContext* context, + TfLiteNode* node) { + MicroContext* micro_context = GetMicroContext(context); + + TF_LITE_ENSURE_EQ(context, NumInputs(node), 1); + TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); + TfLiteTensor* input = + micro_context->AllocateTempInputTensor(node, kInputTensor); + TF_LITE_ENSURE(context, input != nullptr); + TfLiteTensor* output = + micro_context->AllocateTempOutputTensor(node, kOutputTensor); + TF_LITE_ENSURE(context, output != nullptr); + TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type); + + if (output->type == kTfLiteInt8 || output->type == kTfLiteInt16) { + LeakyReluOpData* data = static_cast(node->user_data); + const auto* params = + static_cast(node->builtin_data); + + data->input_zero_point = input->params.zero_point; + data->output_zero_point = output->params.zero_point; + + int output_shift_alpha; + double alpha_multiplier = static_cast( + input->params.scale * params->alpha / output->params.scale); + QuantizeMultiplier(alpha_multiplier, &data->output_multiplier_alpha, + &output_shift_alpha); + data->output_shift_alpha = static_cast(output_shift_alpha); + + int output_shift_identity; + double identity_multiplier = + static_cast(input->params.scale / output->params.scale); + QuantizeMultiplier(identity_multiplier, &data->output_multiplier_identity, + &output_shift_identity); + data->output_shift_identity = static_cast(output_shift_identity); + } + + micro_context->DeallocateTempTfLiteTensor(input); + micro_context->DeallocateTempTfLiteTensor(output); + + return kTfLiteOk; +} + +TfLiteStatus LeakyReluPrepare(TfLiteContext* context, TfLiteNode* node) { + return CalculateOpDataLeakyRelu(context, node); +} + +} // namespace tflite diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/log_softmax.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/log_softmax.cpp new file mode 100644 index 0000000..4cfccb2 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/log_softmax.cpp @@ -0,0 +1,148 @@ +/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/log_softmax.h" + +#include +#include + +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/quantization_util.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h" + +namespace tflite { +namespace { + +// used only with quantized data +struct LogSoftmaxOpData { + int32_t input_multiplier; + int32_t input_left_shift; + int32_t reverse_scaling_divisor; + int32_t reverse_scaling_right_shift; + int diff_min; + size_t outer_size; // number of tensor elements skipping computation axis + size_t depth; // number of tensor elements on computation axis +}; + +// input/output tensor index +constexpr int kInputTensor = 0; +constexpr int kOutputTensor = 0; + +TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node) { + MicroContext* micro_context = GetMicroContext(context); + + TF_LITE_ENSURE_EQ(context, NumInputs(node), 1); + TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); + TfLiteTensor* input = + micro_context->AllocateTempInputTensor(node, kInputTensor); + TF_LITE_ENSURE(context, input != nullptr); + TfLiteTensor* output = + micro_context->AllocateTempOutputTensor(node, kOutputTensor); + TF_LITE_ENSURE(context, output != nullptr); + TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type); + + TF_LITE_ENSURE(context, HaveSameShapes(input, output)); + + if (input->type == kTfLiteInt8) { + node->user_data = + context->AllocatePersistentBuffer(context, sizeof(LogSoftmaxOpData)); + auto data = static_cast(node->user_data); + + // quantization datum + constexpr int32_t kOutputZeroPoint = 127; + constexpr float kOutputScale = 16.0 / 256; + constexpr double kBeta = 1.0; + constexpr int kScaledDiffIntegerBits = 5; + + TF_LITE_ENSURE(context, output->params.scale == kOutputScale); + TF_LITE_ENSURE(context, output->params.zero_point == kOutputZeroPoint); + + int input_left_shift; + int reverse_scaling_right_shift; + tflite::PreprocessLogSoftmaxScalingExp( + kBeta, static_cast(input->params.scale), kScaledDiffIntegerBits, + &data->input_multiplier, &input_left_shift, + &data->reverse_scaling_divisor, &reverse_scaling_right_shift); + data->input_left_shift = static_cast(input_left_shift); + data->reverse_scaling_right_shift = + static_cast(-reverse_scaling_right_shift); + // diff_min has a negative value, and is used to limit the maximum magnitude + // of the diffs, which are <= 0. + data->diff_min = + -tflite::CalculateInputRadius(kScaledDiffIntegerBits, input_left_shift); + + RuntimeShape input_shape = GetTensorShape(input); + const int trailing_dim = input_shape.DimensionsCount() - 1; + data->outer_size = + static_cast(FlatSizeSkipDim(input_shape, trailing_dim)); + data->depth = static_cast(input_shape.Dims(trailing_dim)); + } + + micro_context->DeallocateTempTfLiteTensor(input); + micro_context->DeallocateTempTfLiteTensor(output); + return kTfLiteOk; +} + +TfLiteStatus LogSoftmaxPrepare(TfLiteContext* context, TfLiteNode* node) { + return CalculateOpData(context, node); +} + +TfLiteStatus LogSoftmaxEval(TfLiteContext* context, TfLiteNode* node) { + const LogSoftmaxOpData* data = + static_cast(node->user_data); + const TfLiteEvalTensor* input = + tflite::micro::GetEvalInput(context, node, kInputTensor); + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kOutputTensor); + switch (input->type) { + case kTfLiteFloat32: { + SoftmaxParams op_params = {}; + reference_ops::LogSoftmax(op_params, tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + return kTfLiteOk; + } + case kTfLiteInt8: { + SoftmaxParams op_params = {}; + op_params.input_multiplier = data->input_multiplier; + op_params.input_left_shift = data->input_left_shift; + op_params.reverse_scaling_divisor = data->reverse_scaling_divisor; + op_params.reverse_scaling_right_shift = data->reverse_scaling_right_shift; + op_params.diff_min = data->diff_min; + reference_ops::LogSoftmax(op_params, data->outer_size, data->depth, + tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + return kTfLiteOk; + } + default: + MicroPrintf("LOG_SOFTMAX only supports float32, int8, got %s.", + TfLiteTypeGetName(input->type)); + return kTfLiteError; + } +} + +} // namespace + +TfLiteRegistration Register_LOG_SOFTMAX() { + return tflite::micro::RegisterOp(nullptr, LogSoftmaxPrepare, LogSoftmaxEval); +} + +} // namespace tflite diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/logical.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/logical.cpp new file mode 100644 index 0000000..2b38501 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/logical.cpp @@ -0,0 +1,44 @@ +/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/logical.h" + +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/binary_function.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/op_macros.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" + +namespace tflite { +namespace { + +TfLiteStatus LogicalOrEval(TfLiteContext* context, TfLiteNode* node) { + return LogicalImpl(context, node, LogicalOr); +} + +TfLiteStatus LogicalAndEval(TfLiteContext* context, TfLiteNode* node) { + return LogicalImpl(context, node, LogicalAnd); +} + +} // namespace + +TfLiteRegistration Register_LOGICAL_OR() { + return tflite::micro::RegisterOp(nullptr, nullptr, LogicalOrEval); +} + +TfLiteRegistration Register_LOGICAL_AND() { + return tflite::micro::RegisterOp(nullptr, nullptr, LogicalAndEval); +} + +} // namespace tflite diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/logical.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/logical.h new file mode 100644 index 0000000..8dadde4 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/logical.h @@ -0,0 +1,35 @@ +/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_MICRO_KERNELS_LOGICAL_H_ +#define TENSORFLOW_LITE_MICRO_KERNELS_LOGICAL_H_ + +#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h" +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" + +namespace tflite { +// Input/output tensor index. +extern const int kLogicalInputTensor1; +extern const int kLogicalInputTensor2; +extern const int kLogicalOutputTensor; + +TfLiteStatus LogicalImpl(TfLiteContext* context, TfLiteNode* node, + bool (*func)(bool, bool)); + +bool LogicalOr(bool x, bool y); +bool LogicalAnd(bool x, bool y); + +} // namespace tflite + +#endif // TENSORFLOW_LITE_MICRO_KERNELS_LOGICAL_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/logical_common.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/logical_common.cpp new file mode 100644 index 0000000..1586d2f --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/logical_common.cpp @@ -0,0 +1,63 @@ +/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/binary_function.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/op_macros.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/logical.h" + +namespace tflite { + +// Input/output tensor index. +const int kLogicalInputTensor1 = 0; +const int kLogicalInputTensor2 = 1; +const int kLogicalOutputTensor = 0; + +TfLiteStatus LogicalImpl(TfLiteContext* context, TfLiteNode* node, + bool (*func)(bool, bool)) { + const TfLiteEvalTensor* input1 = + tflite::micro::GetEvalInput(context, node, kLogicalInputTensor1); + const TfLiteEvalTensor* input2 = + tflite::micro::GetEvalInput(context, node, kLogicalInputTensor2); + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kLogicalOutputTensor); + + if (tflite::micro::HaveSameShapes(input1, input2)) { + reference_ops::BinaryFunction( + tflite::micro::GetTensorShape(input1), + tflite::micro::GetTensorData(input1), + tflite::micro::GetTensorShape(input2), + tflite::micro::GetTensorData(input2), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output), func); + } else { + reference_ops::BroadcastBinaryFunction4DSlow( + tflite::micro::GetTensorShape(input1), + tflite::micro::GetTensorData(input1), + tflite::micro::GetTensorShape(input2), + tflite::micro::GetTensorData(input2), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output), func); + } + + return kTfLiteOk; +} + +bool LogicalOr(bool x, bool y) { return x || y; } + +bool LogicalAnd(bool x, bool y) { return x && y; } + +} // namespace tflite diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/logistic.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/logistic.cpp new file mode 100644 index 0000000..82579ea --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/logistic.cpp @@ -0,0 +1,111 @@ +/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/logistic.h" + +#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h" +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/quantization_util.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/logistic.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/op_macros.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/logistic.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h" + +namespace tflite { +namespace { + +void* LogisticInit(TfLiteContext* context, const char* buffer, size_t length) { + TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); + return context->AllocatePersistentBuffer(context, sizeof(OpDataLogistic)); +} + +TfLiteStatus LogisticEval(TfLiteContext* context, TfLiteNode* node) { + const TfLiteEvalTensor* input = + tflite::micro::GetEvalInput(context, node, kLogisticInputTensor); + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kLogisticOutputTensor); + + TFLITE_DCHECK(node->user_data != nullptr); + OpDataLogistic* data = static_cast(node->user_data); + + if (input->type == kTfLiteFloat32) { + switch (output->type) { + case kTfLiteFloat32: { + reference_ops::Logistic(tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + return kTfLiteOk; + } + default: + MicroPrintf("Input %s, output %s not supported.", + TfLiteTypeGetName(input->type), + TfLiteTypeGetName(output->type)); + return kTfLiteError; + } + } else if (input->type == kTfLiteInt16) { + switch (output->type) { + case kTfLiteInt16: { + reference_integer_ops::Logistic( + data->input_multiplier, data->input_left_shift, + NumElements(input->dims), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorData(output)); + return kTfLiteOk; + } + default: + MicroPrintf("Input %s, output %s not supported.", + TfLiteTypeGetName(input->type), + TfLiteTypeGetName(output->type)); + return kTfLiteError; + } + } else if (input->type == kTfLiteInt8) { + switch (output->type) { + case kTfLiteInt8: { + reference_integer_ops::Logistic( + data->input_zero_point, data->input_range_radius, + data->input_multiplier, data->input_left_shift, + NumElements(input->dims), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorData(output)); + return kTfLiteOk; + } + default: + MicroPrintf("Input %s, output %s not supported.", + TfLiteTypeGetName(input->type), + TfLiteTypeGetName(output->type)); + return kTfLiteError; + } + } else { + // TODO(b/141211002): Also support other data types once we have supported + // temporary tensors in TFLM. + MicroPrintf("Input %s, output %s not supported.", + TfLiteTypeGetName(input->type), + TfLiteTypeGetName(output->type)); + return kTfLiteError; + } + return kTfLiteOk; +} + +} // namespace + +TfLiteRegistration Register_LOGISTIC() { + return tflite::micro::RegisterOp(LogisticInit, LogisticPrepare, LogisticEval); +} +} // namespace tflite diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/logistic.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/logistic.h new file mode 100644 index 0000000..43325e1 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/logistic.h @@ -0,0 +1,42 @@ +/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_LITE_MICRO_KERNELS_LOGISTIC_H_ +#define TENSORFLOW_LITE_MICRO_KERNELS_LOGISTIC_H_ + +#include + +#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h" +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" + +namespace tflite { +extern const int kLogisticInputTensor; +extern const int kLogisticOutputTensor; + +struct OpDataLogistic { + int32_t input_zero_point; + int32_t input_range_radius; + int32_t input_multiplier; + int input_left_shift; +}; + +TfLiteStatus CalculateArithmeticOpDataLogistic(TfLiteContext* context, + TfLiteNode* node, + OpDataLogistic* data); + +TfLiteStatus LogisticPrepare(TfLiteContext* context, TfLiteNode* node); + +} // namespace tflite +#endif // TENSORFLOW_LITE_MICRO_KERNELS_LOGISTIC_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/logistic_common.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/logistic_common.cpp new file mode 100644 index 0000000..9f27a91 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/logistic_common.cpp @@ -0,0 +1,119 @@ +/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h" +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/quantization_util.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/logistic.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/logistic.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/op_macros.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/logistic.h" + +namespace tflite { +const int kLogisticInputTensor = 0; +const int kLogisticOutputTensor = 0; + +TfLiteStatus CalculateArithmeticOpDataLogistic(TfLiteContext* context, + TfLiteNode* node, + OpDataLogistic* data) { + MicroContext* micro_context = GetMicroContext(context); + + TfLiteTensor* input = + micro_context->AllocateTempInputTensor(node, kLogisticInputTensor); + TF_LITE_ENSURE(context, input != nullptr); + TfLiteTensor* output = + micro_context->AllocateTempOutputTensor(node, kLogisticOutputTensor); + TF_LITE_ENSURE(context, output != nullptr); + + TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type); + if (input->type == kTfLiteInt8) { + TF_LITE_ENSURE_EQ(context, output->params.zero_point, + std::numeric_limits::min()); + + static constexpr int kInputIntegerBits = 4; + const double input_real_multiplier = + static_cast(input->params.scale) * + static_cast(1 << (31 - kInputIntegerBits)); + + data->input_zero_point = input->params.zero_point; + + const double q = std::frexp(input_real_multiplier, &data->input_left_shift); + data->input_multiplier = static_cast(TfLiteRound(q * (1ll << 31))); + + data->input_range_radius = + CalculateInputRadius(kInputIntegerBits, data->input_left_shift, 31); + } + + if (input->type == kTfLiteInt16) { + static constexpr int kInputIntegerBits = 3; + static constexpr int kOutputFractionalBits = 15; + + // See comments in TanhPrepare about requiring zero_point==0 + // and a power-of-two ("POT") scale. + + TF_LITE_ENSURE_EQ(context, input->params.zero_point, 0); + TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0); + + int input_scale_log2_rounded; + bool param_scale_pot = + CheckedLog2(input->params.scale, &input_scale_log2_rounded); + + data->input_left_shift = + (15 - kInputIntegerBits) + input_scale_log2_rounded; + param_scale_pot &= (data->input_left_shift == 0); + + if (param_scale_pot) { + data->input_multiplier = 0; + } else { + // Calculate multiplier to change input scale to 1/(3*4096) + // as required by the table lookup. + // In this scaling +/-2^17 represents +/-10.7 + double multiplier = + static_cast(input->params.scale) * 4096.0 * 3.0; + + data->input_left_shift = 0; + + while (multiplier <= 32767.0 / 2.0 && data->input_left_shift <= 30) { + data->input_left_shift++; + multiplier = multiplier * 2.0; + } + + data->input_multiplier = static_cast(multiplier); + } + + int output_scale_log2_rounded; + TF_LITE_ENSURE( + context, CheckedLog2(output->params.scale, &output_scale_log2_rounded)); + TF_LITE_ENSURE_EQ(context, output_scale_log2_rounded, + -kOutputFractionalBits); + } + + micro_context->DeallocateTempTfLiteTensor(input); + micro_context->DeallocateTempTfLiteTensor(output); + return kTfLiteOk; +} + +TfLiteStatus LogisticPrepare(TfLiteContext* context, TfLiteNode* node) { + TFLITE_DCHECK(node->user_data != nullptr); + OpDataLogistic* data = static_cast(node->user_data); + + return CalculateArithmeticOpDataLogistic(context, node, data); +} + +} // namespace tflite diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/lstm_eval.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/lstm_eval.cpp new file mode 100644 index 0000000..037caf7 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/lstm_eval.cpp @@ -0,0 +1,222 @@ +/* Copyright 2023 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/lstm_eval.h" + +#include + +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/fully_connected.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/logistic.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/mul.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/tanh.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/logistic.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/mul.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/tanh.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h" + +namespace tflite { +namespace lstm_internal { + +const int32_t kInt16Max = std::numeric_limits::max(); +const int32_t kInt16Min = std::numeric_limits::min(); + +void AddElementWise(const int16_t* input_1, const int16_t* input_2, int n_batch, + int n_input, int16_t* output) { + for (int batch = 0; batch < n_batch; ++batch) { + for (int i = 0; i < n_input; ++i) { + const int index = batch * n_input + i; + int32_t sum = input_1[index] + input_2[index]; + const int32_t sum_clamped = std::min(kInt16Max, std::max(kInt16Min, sum)); + output[index] = static_cast(sum_clamped); + } + } +} + +void AddElementWise(const float* input_1, const float* input_2, int n_batch, + int n_input, float* output) { + for (int batch = 0; batch < n_batch; ++batch) { + for (int i = 0; i < n_input; ++i) { + const int index = batch * n_input + i; + output[index] = input_1[index] + input_2[index]; + } + } +} + +void Sigmoid(const RuntimeShape& data_shape, int16_t* data) { + reference_integer_ops::Logistic( + 0 /*data->input_multiplier*/, 0 /*data->input_left_shift */, + data_shape.FlatSize() /*NumElements(input->dims)*/, + data /* tflite::micro::GetTensorData(input) */, + data /*tflite::micro::GetTensorData(output) */); +} + +void Sigmoid(const RuntimeShape& data_shape, float* data) { + reference_ops::Logistic(data_shape, data, data_shape, data); +} + +void Tanh(int32_t cell_state_scale_power, const RuntimeShape& input_data_shape, + int16_t* input_data, const RuntimeShape& output_data_shape, + int16_t* output_data) { + int32_t tanh_input_left_shift = (15 + cell_state_scale_power) - 3; + if (tanh_input_left_shift < 0) /* handling negative shift value */ + { + int32_t i; + tanh_input_left_shift = -tanh_input_left_shift; + for (i = 0; i < input_data_shape.FlatSize(); i++) { + input_data[i] = input_data[i] >> tanh_input_left_shift; + } + tanh_input_left_shift = 0; + } + reference_integer_ops::Tanh(0, tanh_input_left_shift, input_data_shape, + input_data, output_data_shape, output_data); +} + +void Tanh(int32_t cell_state_scale_power, const RuntimeShape& input_data_shape, + float* input_data, const RuntimeShape& output_data_shape, + float* output_data) { + reference_ops::Tanh(input_data_shape, input_data, output_data_shape, + output_data); +} + +// Input and output have the same shape in LSTM +void Mul(const RuntimeShape& shape, const ArithmeticParams& params, + const int16_t* input1_data, const int16_t* input2_data, + int8_t* output_data) { + return reference_integer_ops::MulElementwise( + shape.FlatSize(), params, input1_data, input2_data, output_data); +} + +// Input and output have the same shape in LSTM +void Mul(const RuntimeShape& shape, const ArithmeticParams& params, + const int16_t* input1_data, const int16_t* input2_data, + int16_t* output_data) { + return reference_integer_ops::MulElementwise( + shape.FlatSize(), params, input1_data, input2_data, output_data); +} + +// Input and output have the same shape in LSTM +void Mul(const RuntimeShape& shape, const ArithmeticParams& params, + const float* input1_data, const float* input2_data, + float* output_data) { + return reference_ops::Mul(params, shape, input1_data, shape, input2_data, + shape, output_data); +} + +void FullyConnected(const FullyConnectedParams& params, + const RuntimeShape& input_shape, const int8_t* input_data, + const RuntimeShape& filter_shape, const int8_t* filter_data, + const RuntimeShape& bias_shape, const int32_t* bias_data, + const RuntimeShape& output_shape, int16_t* output_data) { + return tflite::reference_integer_ops::FullyConnected( + params, input_shape, input_data, filter_shape, filter_data, bias_shape, + bias_data, output_shape, output_data); +} + +void FullyConnected(const FullyConnectedParams& params, + const RuntimeShape& input_shape, const int16_t* input_data, + const RuntimeShape& filter_shape, const int8_t* filter_data, + const RuntimeShape& bias_shape, const int64_t* bias_data, + const RuntimeShape& output_shape, int16_t* output_data) { + return tflite::reference_integer_ops::FullyConnected( + params, input_shape, input_data, filter_shape, filter_data, bias_shape, + bias_data, output_shape, output_data); +} + +void FullyConnected(const FullyConnectedParams& params, + const RuntimeShape& input_shape, const float* input_data, + const RuntimeShape& filter_shape, const float* filter_data, + const RuntimeShape& bias_shape, const float* bias_data, + const RuntimeShape& output_shape, float* output_data) { + return tflite::reference_ops::FullyConnected( + params, input_shape, input_data, filter_shape, filter_data, bias_shape, + bias_data, output_shape, output_data); +} + +void Clipping(const int v_size, const CellStateInfo& cell_state_info, + int16_t* vector) { + for (int i = 0; i < v_size; i++) { + vector[i] = + std::max(std::min(cell_state_info.quantized_cell_clip, vector[i]), + static_cast(-cell_state_info.quantized_cell_clip)); + } +} + +void Clipping(const int v_size, const CellStateInfo& cell_state_info, + float* vector) { + for (int i = 0; i < v_size; i++) { + vector[i] = std::max(std::min(cell_state_info.cell_clip, vector[i]), + -cell_state_info.cell_clip); + } +} + +// Increment the data offset so the sigle time step invocation call can access +// the corresponding input/output tensor data at the time step +void LstmStepManager::UpdateTime() { + current_time_ += 1; + TFLITE_DCHECK_LE(current_time_, size_info_.time_steps); + // default as one batch per inference + int input_step = size_info_.input_dimension; + int output_step = size_info_.state_dimension; + // time major: batch inference + if (size_info_.time_major) { + input_step = input_step * size_info_.batch_size; + output_step = output_step * size_info_.batch_size; + } + + input_offset_ += input_step; + output_offset_ += output_step; +} + +// Increment the data offset so the sigle time step invocation call can access +// the corresponding hidden/cell state tensor data at the time step (for single +// batch inference only) +void LstmStepManager::UpdateBatch() { + current_batch_ += 1; + TFLITE_DCHECK_LE(current_batch_, size_info_.batch_size); + // batch inference for time major: no action needed + if (size_info_.time_major) { + return; + } + // otherwise: singe batch inference, go to the next batch + hidden_state_offset_ += size_info_.state_dimension; + cell_state_offset_ += size_info_.state_dimension; +} + +// Input shape for each single time LSTM invocation. +// Multi-batch for time_major input +RuntimeShape LstmStepManager::InputShape() const { + int batch_size = 1; + if (size_info_.time_major) { + batch_size = size_info_.batch_size; + } + const int dims[2] = {batch_size, size_info_.input_dimension}; + const int32_t* dims_data = reinterpret_cast(dims); + return RuntimeShape(2, dims_data); +} + +// State shape (both hidden and cell) for each single time LSTM invocation. +// Multi-batch for time_major input +RuntimeShape LstmStepManager::StateShape() const { + int batch_size = 1; + if (size_info_.time_major) { + batch_size = size_info_.batch_size; + } + const int dims[2] = {batch_size, size_info_.state_dimension}; + const int32_t* dims_data = reinterpret_cast(dims); + return RuntimeShape(2, dims_data); +} + +} // namespace lstm_internal +} // namespace tflite diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/lstm_eval.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/lstm_eval.h new file mode 100644 index 0000000..fcdbfe8 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/lstm_eval.h @@ -0,0 +1,417 @@ +/* Copyright 2023 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// Functions to perform integer evaulation for standard LSTM (e.g., defined in +// the keras lstm layer, no peephole etc.). Currently used by the 16 bits +// activation case only + +#ifndef TENSORFLOW_LITE_MICRO_KERNELS_LSTM_EVAL_GENERAL_H_ +#define TENSORFLOW_LITE_MICRO_KERNELS_LSTM_EVAL_GENERAL_H_ +#include +#include + +#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h" +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/lstm_shared.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h" + +namespace tflite { +// Since LSTM includes multiple intermediate stages, introducing the internal +// namespace to expose them for testing +namespace lstm_internal { + +void Sigmoid(const RuntimeShape& data_shape, int16_t* data); + +void Sigmoid(const RuntimeShape& data_shape, float* data); + +void Tanh(int32_t cell_state_scale_power, const RuntimeShape& input_data_shape, + int16_t* input_data, const RuntimeShape& output_data_shape, + int16_t* output_data); + +void Tanh(int32_t cell_state_scale_power, const RuntimeShape& input_data_shape, + float* input_data, const RuntimeShape& output_data_shape, + float* output_data); + +void Mul(const RuntimeShape& shape, const ArithmeticParams& params, + const int16_t* input1_data, const int16_t* input2_data, + int8_t* output_data); + +void Mul(const RuntimeShape& shape, const ArithmeticParams& params, + const int16_t* input1_data, const int16_t* input2_data, + int16_t* output_data); + +void Mul(const RuntimeShape& shape, const ArithmeticParams& params, + const float* input1_data, const float* input2_data, + float* output_data); + +void FullyConnected(const FullyConnectedParams& params, + const RuntimeShape& input_shape, const int8_t* input_data, + const RuntimeShape& filter_shape, const int8_t* filter_data, + const RuntimeShape& bias_shape, const int32_t* bias_data, + const RuntimeShape& output_shape, int16_t* output_data); + +void FullyConnected(const FullyConnectedParams& params, + const RuntimeShape& input_shape, const int16_t* input_data, + const RuntimeShape& filter_shape, const int8_t* filter_data, + const RuntimeShape& bias_shape, const int64_t* bias_data, + const RuntimeShape& output_shape, int16_t* output_data); + +void FullyConnected(const FullyConnectedParams& params, + const RuntimeShape& input_shape, const float* input_data, + const RuntimeShape& filter_shape, const float* filter_data, + const RuntimeShape& bias_shape, const float* bias_data, + const RuntimeShape& output_shape, float* output_data); + +void AddElementWise(const int16_t* input_1, const int16_t* input_2, int n_batch, + int n_input, int16_t* output); + +void AddElementWise(const float* input_1, const float* input_2, int n_batch, + int n_input, float* output); + +void Clipping(const int v_size, const CellStateInfo& cell_state_info, + int16_t* vector); + +void Clipping(const int v_size, const CellStateInfo& cell_state_info, + float* vector); + +// Manages the slice position (offset), slice length (sliced tensor shape), +// and update rules for input/output/hidden state/cell state tensors at each +// time step. +class LstmStepManager { + public: + LstmStepManager() = delete; + // Does not take any ownership, and all pointers must refer to valid objects + // that outlive the one constructed. + explicit LstmStepManager(const LstmSizeInfo* size_info) + : size_info_(*size_info) {} + + void UpdateTime(); + void UpdateBatch(); + + void ResetTime() { current_time_ = 0; } + RuntimeShape InputShape() const; + RuntimeShape StateShape() const; + + int InputOffset() const { return input_offset_; } + int OutputOffset() const { return output_offset_; } + int HiddenStateOffset() const { return hidden_state_offset_; } + int CellStateOffset() const { return cell_state_offset_; } + + private: + int current_time_ = 0; + int current_batch_ = 0; + int input_offset_ = 0; + int output_offset_ = 0; + int hidden_state_offset_ = 0; + int cell_state_offset_ = 0; + // Sizeinfo is from LstmOpData, which reside in the memory arena + // (guarante to outlast LSTMStepManager, which reside in stack) + const LstmSizeInfo& size_info_; +}; + +// Calculates a single LSTM gate. +// Implements the following formula: +// gate = activate(FC(input) + FC(recurrent)) +// Activation is sigmoid except for the "cell" gate (configurable, usually tanh) +template +void CalculateLstmGate( + const LstmStepManager& step_info, const GateParameters& gate_params, + // Input FC + const TfLiteEvalTensor* input, const TfLiteEvalTensor* input_weight, + const TfLiteEvalTensor* input_bias, + // Recurrent FC + const TfLiteEvalTensor* recurrent, const TfLiteEvalTensor* recurrent_weight, + const TfLiteEvalTensor* recurrent_bias, + // Output + CellType* gate_output, + // Scratch arrays + CellType* fc_output_buffer, const TfLiteFusedActivation activation) { + const auto gate_output_shape = step_info.StateShape(); + // Check offset validity to avoid memory overflow + TFLITE_DCHECK_LE(step_info.InputOffset() + step_info.InputShape().FlatSize(), + tflite::micro::GetTensorShape(input).FlatSize()); + TFLITE_DCHECK_LE( + step_info.HiddenStateOffset() + step_info.StateShape().FlatSize(), + tflite::micro::GetTensorShape(recurrent).FlatSize()); + + // Input FC + FullyConnected(gate_params.input_fc_params, step_info.InputShape(), + tflite::micro::GetTensorData(input) + + step_info.InputOffset(), + micro::GetTensorShape(input_weight), + tflite::micro::GetTensorData(input_weight), + tflite::micro::GetTensorShape(input_bias), + tflite::micro::GetOptionalTensorData(input_bias), + gate_output_shape, gate_output); + + // Recurrent FC + FullyConnected(gate_params.recurrent_fc_params, step_info.StateShape(), + tflite::micro::GetTensorData(recurrent) + + step_info.HiddenStateOffset(), + tflite::micro::GetTensorShape(recurrent_weight), + tflite::micro::GetTensorData(recurrent_weight), + tflite::micro::GetTensorShape(recurrent_bias), + tflite::micro::GetOptionalTensorData(recurrent_bias), + gate_output_shape, fc_output_buffer); + + AddElementWise(gate_output, fc_output_buffer, + /*n_batch=*/gate_output_shape.DimsData()[0], + /*n_state=*/gate_output_shape.DimsData()[1], gate_output); + // Apply activation + switch (activation) { + case kTfLiteActSigmoid: + Sigmoid(gate_output_shape, gate_output); + break; + case kTfLiteActTanh: { + // Set the scale power to -12 to avoid shift + Tanh(/*cell_state_scale_power=*/-12, gate_output_shape, gate_output, + gate_output_shape, gate_output); + } break; + default: + // Only Sigmoid or Tanh is used. + TFLITE_ASSERT_FALSE; + } +} + +// Update the cell state using the output from the forget gate, input gate, and +// cell gate Formula: updated_cell_state = forget_gate_output*cell_state + +// input_gate_output * cell_gate_output, where * denotes element wise +// multiplication +template +void UpdateLstmCell(const LstmStepManager& step_info, + TfLiteEvalTensor* cell_state, + // Gate outputs + CellType* forget_gate_output, + const CellType* input_gate_output, + const CellType* cell_gate_output, + // Mul parameters + const ArithmeticParams& forget_cell_mul_params, + const ArithmeticParams& input_mul_params, + const CellStateInfo& cell_state_info, CellType* buffer) { + // Check offset validity to avoid memory overflow + TFLITE_DCHECK_LE( + step_info.CellStateOffset() + step_info.StateShape().FlatSize(), + tflite::micro::GetTensorShape(cell_state).FlatSize()); + + auto cell_state_shape = step_info.StateShape(); + // Forget Gate x Cell State + Mul(cell_state_shape, forget_cell_mul_params, forget_gate_output, + tflite::micro::GetTensorData(cell_state) + + step_info.CellStateOffset(), + tflite::micro::GetTensorData(cell_state) + + step_info.CellStateOffset()); + // Input Gate x Cell Gate + Mul(cell_state_shape, input_mul_params, input_gate_output, cell_gate_output, + buffer); + + // Update the cell state + AddElementWise(tflite::micro::GetTensorData(cell_state) + + step_info.CellStateOffset(), + buffer, + /*n_batch=*/cell_state_shape.DimsData()[0], + /*n_state=*/cell_state_shape.DimsData()[1], + tflite::micro::GetTensorData(cell_state) + + step_info.CellStateOffset()); + + if (cell_state_info.cell_clip > 0) { + Clipping(cell_state_shape.FlatSize(), cell_state_info, + tflite::micro::GetTensorData(cell_state) + + step_info.CellStateOffset()); + } +} + +// Update the hidden state of the LSTM kernel using the following formula: +// updated_hidden_state = Tanh(updated_cell_state) * output_gate_output, * means +// element wise multiplication +template +void UpdateLstmHidden(const LstmStepManager& step_info, + TfLiteEvalTensor* cell_state, + TfLiteEvalTensor* hidden_state, + const CellType* output_gate_output, + const ArithmeticParams& mul_params, + int32_t cell_state_scale_power, CellType* buffer) { + // Check offset validity to avoid memory overflow + TFLITE_DCHECK_LE( + step_info.CellStateOffset() + step_info.StateShape().FlatSize(), + tflite::micro::GetTensorShape(cell_state).FlatSize()); + TFLITE_DCHECK_LE( + step_info.HiddenStateOffset() + step_info.StateShape().FlatSize(), + tflite::micro::GetTensorShape(hidden_state).FlatSize()); + + auto cell_state_shape = step_info.StateShape(); + CellType* cell_state_data = + tflite::micro::GetTensorData(cell_state) + + step_info.CellStateOffset(); + // Tanh(cell_state) + Tanh(cell_state_scale_power, cell_state_shape, cell_state_data, + cell_state_shape, buffer); + // Update the hidden state + Mul(cell_state_shape, mul_params, buffer, output_gate_output, + tflite::micro::GetTensorData(hidden_state) + + step_info.HiddenStateOffset()); +} + +template +void LstmStep(const LstmStepManager& step_info, const OpDataLSTM& op_data, + LSTMKernelContents& kernel_content, + LSTMBuffers& buffers) { + /*Step1: Calculate gate outputs to prepare cell state update*/ + CellType* gate_internal_buffer = buffers.buffer3; + CellType* forget_gate_output = buffers.buffer0; + CalculateLstmGate( + step_info, op_data.forget_gate_parameters, + // Input FC + kernel_content.GetInternalTensor(tflite::kLstmInputTensor), + kernel_content.GetInternalTensor(tflite::kLstmInputToForgetWeightsTensor), + kernel_content.GetInternalTensor(tflite::kLstmForgetGateBiasTensor), + // Recurrent FC + kernel_content.HiddenStateTensor(), + kernel_content.GetInternalTensor( + tflite::kLstmRecurrentToForgetWeightsTensor), + /*recurrent_bias*/ nullptr, + // Output + forget_gate_output, + // Scratch arrays + gate_internal_buffer, kTfLiteActSigmoid); + + // Input Gate calculation; + CellType* input_gate_output = buffers.buffer1; + CalculateLstmGate( + step_info, op_data.input_gate_parameters, + // Input FC + kernel_content.GetInternalTensor(tflite::kLstmInputTensor), + kernel_content.GetInternalTensor(tflite::kLstmInputToInputWeightsTensor), + kernel_content.GetInternalTensor(tflite::kLstmInputGateBiasTensor), + // Recurrent FC + kernel_content.HiddenStateTensor(), + kernel_content.GetInternalTensor( + tflite::kLstmRecurrentToInputWeightsTensor), + /*recurrent_bias*/ nullptr, + // Output + input_gate_output, + // Scratch arrays + gate_internal_buffer, kTfLiteActSigmoid); + + // Cell Gate calculation + CellType* cell_gate_output = buffers.buffer2; + CalculateLstmGate( + step_info, op_data.cell_gate_parameters, + // Input FC + kernel_content.GetInternalTensor(tflite::kLstmInputTensor), + kernel_content.GetInternalTensor(tflite::kLstmInputToCellWeightsTensor), + kernel_content.GetInternalTensor(tflite::kLstmCellGateBiasTensor), + // Recurrent FC + kernel_content.HiddenStateTensor(), + kernel_content.GetInternalTensor( + tflite::kLstmRecurrentToCellWeightsTensor), + /*recurrent_bias*/ nullptr, + // Output + cell_gate_output, + // Scratch arrays + gate_internal_buffer, op_data.cell_gate_nonlinear_type); + + /*Step2: update the cell state */ + const InterGateParameters& inter_gate_params = op_data.inter_gate_parameters; + CellType* updated_input_buffer = buffers.buffer1; // reuse buffer + + UpdateLstmCell(step_info, kernel_content.CellStateTensor(), + forget_gate_output, input_gate_output, + cell_gate_output, + inter_gate_params.forget_cell_mul_params, + inter_gate_params.input_mul_params, + op_data.cell_state_info, updated_input_buffer); + + /*Step3: update the hidden state */ + CellType* output_gate_output = buffers.buffer1; // reuse buffer + CalculateLstmGate( + step_info, op_data.output_gate_parameters, + // Input FC + kernel_content.GetInternalTensor(tflite::kLstmInputTensor), + kernel_content.GetInternalTensor(tflite::kLstmInputToOutputWeightsTensor), + kernel_content.GetInternalTensor(tflite::kLstmOutputGateBiasTensor), + // Recurrent FC + kernel_content.HiddenStateTensor(), + kernel_content.GetInternalTensor( + tflite::kLstmRecurrentToOutputWeightsTensor), + /*recurrent_bias*/ nullptr, + // Output + output_gate_output, + // Scratch arrays + gate_internal_buffer, kTfLiteActSigmoid); + + CellType* tanh_activated_cell_buffer = buffers.buffer0; // reuse buffer + tflite::lstm_internal::UpdateLstmHidden( + step_info, kernel_content.CellStateTensor(), + kernel_content.HiddenStateTensor(), output_gate_output, + inter_gate_params.output_mul_params, + op_data.cell_state_info.cell_state_scale_power, + tanh_activated_cell_buffer); + + /*Step4: copy the update the hidden state to output*/ + // Check offset validity to avoid memory overflow + TFLITE_DCHECK_LE( + step_info.OutputOffset() + step_info.StateShape().FlatSize(), + tflite::micro::GetTensorShape(kernel_content.output_tensor).FlatSize()); + // record the output (from the updated hidden state) + ActivationType* output_ptr = tflite::micro::GetTensorData( + kernel_content.output_tensor); + const auto* hidden_state = kernel_content.HiddenStateTensor(); + std::memcpy(output_ptr + step_info.OutputOffset(), + tflite::micro::GetTensorData(hidden_state) + + step_info.HiddenStateOffset(), + step_info.StateShape().FlatSize() * sizeof(ActivationType)); +} + +} // namespace lstm_internal + +// Evaulate the LSTM kernel with (potential) multi-steps and multi-batch input +// Since +template +TfLiteStatus EvalLstm(const OpDataLSTM& op_data, + LSTMKernelContents& kernel_content, + LSTMBuffers& buffers) { + lstm_internal::LstmStepManager step_info(&op_data.size_info); + const auto& size_info = op_data.size_info; + // time is the first dimention, enable batch computation + if (size_info.time_major) { + for (int t = 0; t < size_info.time_steps; t++) { + lstm_internal::LstmStep( + step_info, op_data, kernel_content, buffers); + // prepare for the next time step + step_info.UpdateTime(); + } + } else { + // batch first, unable to size the input data. single batch inference + for (int b = 0; b < size_info.batch_size; b++) { + for (int t = 0; t < size_info.time_steps; t++) { + lstm_internal::LstmStep( + step_info, op_data, kernel_content, buffers); + // prepare for the next time step + step_info.UpdateTime(); + } + // prepare for the next batch + step_info.UpdateBatch(); + step_info.ResetTime(); + } + } + return kTfLiteOk; +} +} // namespace tflite + +#endif // TENSORFLOW_LITE_MICRO_KERNELS_LSTM_EVAL_16ACT_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/lstm_eval_test.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/lstm_eval_test.h new file mode 100644 index 0000000..cfaec49 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/lstm_eval_test.h @@ -0,0 +1,817 @@ +/* Copyright 2023 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_LITE_MICRO_KERNELS_LSTM_EVAL_TEST_H_ +#define TENSORFLOW_LITE_MICRO_KERNELS_LSTM_EVAL_TEST_H_ + +#include +#include + +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/lstm_eval.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/testdata/lstm_test_data.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/test_helpers.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/testing/micro_test.h" + +namespace tflite { +namespace testing { + +/*Helper Functions (mainly about mimicking the kernel preparation)*/ + +// Create fully connected parameters using quantization settings of input and +// weight tensors. +// Since TfLiteContext is not available during the kernel test, here we mimic +// (put into stack memory) CalculateOpDataFullyConnected in +// tensorflow/lite/micro/kernels/fully_connected_common.cc +template +tflite::FullyConnectedParams CreateFCParams( + const TensorQuantizationParameters& input_quant_params, + const TensorQuantizationParameters& weight_quant_params, + const float nonlinear_activation_input_scale) { + OpDataFullyConnected data; + const double input_product_scale = + input_quant_params.scale * weight_quant_params.scale; + double effective_scale = + input_product_scale / + static_cast(nonlinear_activation_input_scale); + + QuantizeMultiplier(effective_scale, &data.output_multiplier, + &data.output_shift); + + data.input_zero_point = input_quant_params.zero_point; + + data.filter_zero_point = 0; // symmetrically quantized + data.output_zero_point = 0; // symmetrically quantized + + data.output_activation_min = std::numeric_limits::min(); + data.output_activation_max = std::numeric_limits::max(); + + return tflite::FullyConnectedParamsQuantized(data); +} + +inline tflite::FullyConnectedParams CreateFCParamsFloat() { + FullyConnectedParams op_params; + CalculateActivationRange(kTfLiteActNone, &op_params.float_activation_min, + &op_params.float_activation_max); + return op_params; +} + +// Wrapper function to create gate parameters for the four internal LSTM gates +template +tflite::GateParameters CreateGateParams( + const TensorQuantizationParameters& input_quant_params, + const TensorQuantizationParameters& hidden_state_quant_params, + const GateQuantizationParameters& gate_quantization_settings, + const float nonlinear_activation_input_scale) { + tflite::GateParameters gate_params = {}; + gate_params.input_fc_params = CreateFCParams( + input_quant_params, gate_quantization_settings.activation_weight, + nonlinear_activation_input_scale); + gate_params.recurrent_fc_params = CreateFCParams( + hidden_state_quant_params, gate_quantization_settings.recurrent_weight, + nonlinear_activation_input_scale); + return gate_params; +} + +inline tflite::GateParameters CreateGateParamsFloat() { + tflite::GateParameters gate_params = {}; + gate_params.input_fc_params = CreateFCParamsFloat(); + gate_params.recurrent_fc_params = CreateFCParamsFloat(); + return gate_params; +} +// Create parameters for element wise multiplication that happens in a) cell +// state update ; b) hidden state update +// Note that all the output of gates are symmetrically quantized so only scales +// are required for input. However, during the hidden state update phase, the +// output is the updated hidden state, which is asymmetrically quantized. Thus +// output may require zero point +template +tflite::ArithmeticParams CreateInterGateMulParams(const float input1_scale, + const float input2_scale, + const float output_scale, + const int output_zp = 0) { + tflite::ArithmeticParams op_params = {}; + op_params.quantized_activation_min = std::numeric_limits::min(); + op_params.quantized_activation_max = std::numeric_limits::max(); + op_params.input1_offset = 0; + op_params.input2_offset = 0; + op_params.output_offset = output_zp; + + const double input_product_scale = + static_cast(input1_scale) * static_cast(input2_scale); + double effective_scale = + input_product_scale / static_cast(output_scale); + + QuantizeMultiplier(effective_scale, &op_params.output_multiplier, + &op_params.output_shift); + return op_params; +} + +inline tflite::ArithmeticParams CreateInterGateMulParamsFloat() { + tflite::ArithmeticParams op_params = {}; + CalculateActivationRange(kTfLiteActNone, &op_params.float_activation_min, + &op_params.float_activation_max); + return op_params; +} + +// Create the additional information about the cell state, which include: +// cell_state_scale_power: used in integer nonlinear function (e.g., tanh) +// quantized_cell_clip: quantized cell clip range +CellStateInfo CreateLstmCellStateInfo(const float cell_state_scale, + const float cell_clip) { + CellStateInfo cell_state_info; + // cell_state_scale_power: 2^-cell_state_scale_power = cell state scale + int buffer; + tflite::CheckedLog2(cell_state_scale, &buffer); + cell_state_info.cell_state_scale_power = buffer; + // Cell state specifics + cell_state_info.cell_clip = cell_clip; + cell_state_info.quantized_cell_clip = static_cast( + std::min(std::max(static_cast(cell_clip) / + static_cast(cell_state_scale), + -32768.0), + 32767.0)); + return cell_state_info; +} + +// Create LSTMKernelContents from LstmNodeContent by copying TfLiteEvalTensor +// pointers +template +LSTMKernelContents CreateLSTMKernelContent( + LstmNodeContent& + node_contents) { + LSTMKernelContents kernel_content; + // Point to correct tensors + kernel_content.internal_tensors[kLstmInputTensor] = + node_contents.GetEvalTensor(kLstmInputTensor); + kernel_content.internal_tensors[kLstmInputToInputWeightsTensor] = + node_contents.GetEvalTensor(kLstmInputToInputWeightsTensor); + kernel_content.internal_tensors[kLstmInputToForgetWeightsTensor] = + node_contents.GetEvalTensor(kLstmInputToForgetWeightsTensor); + kernel_content.internal_tensors[kLstmInputToCellWeightsTensor] = + node_contents.GetEvalTensor(kLstmInputToCellWeightsTensor); + kernel_content.internal_tensors[kLstmInputToOutputWeightsTensor] = + node_contents.GetEvalTensor(kLstmInputToOutputWeightsTensor); + kernel_content.internal_tensors[kLstmRecurrentToInputWeightsTensor] = + node_contents.GetEvalTensor(kLstmRecurrentToInputWeightsTensor); + kernel_content.internal_tensors[kLstmRecurrentToForgetWeightsTensor] = + node_contents.GetEvalTensor(kLstmRecurrentToForgetWeightsTensor); + kernel_content.internal_tensors[kLstmRecurrentToCellWeightsTensor] = + node_contents.GetEvalTensor(kLstmRecurrentToCellWeightsTensor); + kernel_content.internal_tensors[kLstmRecurrentToOutputWeightsTensor] = + node_contents.GetEvalTensor(kLstmRecurrentToOutputWeightsTensor); + kernel_content.internal_tensors[kLstmInputGateBiasTensor] = + node_contents.GetEvalTensor(kLstmInputGateBiasTensor); + kernel_content.internal_tensors[kLstmForgetGateBiasTensor] = + node_contents.GetEvalTensor(kLstmForgetGateBiasTensor); + kernel_content.internal_tensors[kLstmCellGateBiasTensor] = + node_contents.GetEvalTensor(kLstmCellGateBiasTensor); + kernel_content.internal_tensors[kLstmOutputGateBiasTensor] = + node_contents.GetEvalTensor(kLstmOutputGateBiasTensor); + kernel_content.internal_tensors[kLstmOutputStateTensor] = + node_contents.GetEvalTensor(kLstmOutputStateTensor); + kernel_content.internal_tensors[kLstmOutputGateBiasTensor] = + node_contents.GetEvalTensor(kLstmOutputGateBiasTensor); + kernel_content.internal_tensors[kLstmCellStateTensor] = + node_contents.GetEvalTensor(kLstmCellStateTensor); + // Not used internal tensors + kernel_content.internal_tensors[kLstmCellToInputWeightsTensor] = nullptr; + kernel_content.internal_tensors[kLstmCellToForgetWeightsTensor] = nullptr; + kernel_content.internal_tensors[kLstmCellToOutputWeightsTensor] = nullptr; + kernel_content.internal_tensors[kLstmProjectionWeightsTensor] = nullptr; + kernel_content.internal_tensors[kLstmProjectionBiasTensor] = nullptr; + kernel_content.internal_tensors[kLstmInputLayerNormCoefficientsTensor] = + nullptr; + kernel_content.internal_tensors[kLstmForgetLayerNormCoefficientsTensor] = + nullptr; + kernel_content.internal_tensors[kLstmInputLayerNormCoefficientsTensor] = + nullptr; + kernel_content.internal_tensors[kLstmCellLayerNormCoefficientsTensor] = + nullptr; + kernel_content.internal_tensors[kLstmOutputLayerNormCoefficientsTensor] = + nullptr; + // Output tensor + kernel_content.output_tensor = node_contents.OutputEvalTensor(); + return kernel_content; +} + +// Deduce the size information (Batch (B), Time Steps (T), Input dimension (I), +// State dimension (S)) that defines the LSTM using the input and hidden state +// tensor +LstmSizeInfo CreateLstmSizeInfo( + const bool time_major, const TfLiteIntArray* input_tensor_shape, + const TfLiteIntArray* hidden_state_tensor_shape) { + LstmSizeInfo size_info; + size_info.time_major = time_major; + size_info.batch_size = + time_major ? input_tensor_shape->data[1] : input_tensor_shape->data[0]; + size_info.time_steps = + time_major ? input_tensor_shape->data[0] : input_tensor_shape->data[1]; + size_info.input_dimension = input_tensor_shape->data[2]; + size_info.state_dimension = hidden_state_tensor_shape->data[1]; + return size_info; +} + +// Create the LstmOpData using the LstmNodeContent and +// NodeQuantizationParameters (defined in test_data/lstm_test_data) During the +// actual inference phase, OpDataLSTM is created using information from the +// flatbuffer file. The test divide the complete LSTM node information into +// LstmNodeContent and NodeQuantizationParameters for easy construction +// purposes +template +OpDataLSTM CreateLstmOpData( + LstmNodeContent& + node_contents) { + const auto& builtin_data = node_contents.BuiltinData(); + const auto& quantization_settings = node_contents.QuantizationSettings(); + OpDataLSTM op_data; + + op_data.cell_gate_nonlinear_type = builtin_data.activation; + op_data.size_info = + CreateLstmSizeInfo(builtin_data.time_major, + node_contents.GetEvalTensor(kLstmInputTensor)->dims, + node_contents.HiddenStateEvalTensor()->dims); + + op_data.cell_state_info = CreateLstmCellStateInfo( + quantization_settings.cell_state.scale, builtin_data.cell_clip); + + // Gate Parameters + op_data.forget_gate_parameters = CreateGateParams( + quantization_settings.input, quantization_settings.hidden_state, + quantization_settings.forget_gate, + quantization_settings.nonlinear_activation_input_scale); + op_data.input_gate_parameters = CreateGateParams( + quantization_settings.input, quantization_settings.hidden_state, + quantization_settings.input_gate, + quantization_settings.nonlinear_activation_input_scale); + op_data.cell_gate_parameters = CreateGateParams( + quantization_settings.input, quantization_settings.hidden_state, + quantization_settings.cell_gate, + quantization_settings.nonlinear_activation_input_scale); + op_data.output_gate_parameters = CreateGateParams( + quantization_settings.input, quantization_settings.hidden_state, + quantization_settings.output_gate, + quantization_settings.nonlinear_activation_input_scale); + // Inter gate multiplication parameters + op_data.inter_gate_parameters.forget_cell_mul_params = + CreateInterGateMulParams( + quantization_settings.nonlinear_activation_output_scale, + quantization_settings.cell_state.scale, + quantization_settings.cell_state.scale); + op_data.inter_gate_parameters.input_mul_params = + CreateInterGateMulParams( + quantization_settings.nonlinear_activation_output_scale, + quantization_settings.nonlinear_activation_output_scale, + quantization_settings.cell_state.scale); + op_data.inter_gate_parameters.output_mul_params = + CreateInterGateMulParams( + quantization_settings.nonlinear_activation_output_scale, + quantization_settings.nonlinear_activation_output_scale, + quantization_settings.hidden_state.scale, + quantization_settings.hidden_state.zero_point); + return op_data; +} + +template +OpDataLSTM CreateLstmOpDataFloat( + LstmNodeContent& node_contents) { + const auto& builtin_data = node_contents.BuiltinData(); + OpDataLSTM op_data; + + op_data.cell_gate_nonlinear_type = builtin_data.activation; + op_data.size_info = + CreateLstmSizeInfo(builtin_data.time_major, + node_contents.GetEvalTensor(kLstmInputTensor)->dims, + node_contents.HiddenStateEvalTensor()->dims); + op_data.cell_state_info.cell_clip = builtin_data.cell_clip; + op_data.cell_state_info.quantized_cell_clip = 0; // No quantization + op_data.cell_state_info.cell_state_scale_power = 0; // No quantization + + // Gate Parameters + op_data.forget_gate_parameters = CreateGateParamsFloat(); + op_data.input_gate_parameters = CreateGateParamsFloat(); + op_data.cell_gate_parameters = CreateGateParamsFloat(); + op_data.output_gate_parameters = CreateGateParamsFloat(); + // Inter gate multiplication parameters + op_data.inter_gate_parameters.forget_cell_mul_params = + CreateInterGateMulParamsFloat(); + op_data.inter_gate_parameters.input_mul_params = + CreateInterGateMulParamsFloat(); + op_data.inter_gate_parameters.output_mul_params = + CreateInterGateMulParamsFloat(); + return op_data; +} + +/*Test Functions Below Here*/ +template +void ValidateResultGoldens(const T* golden, const T* output_data, + const int output_len, const float tolerance) { + for (int i = 0; i < output_len; ++i) { + TF_LITE_MICRO_EXPECT_NEAR(golden[i], output_data[i], tolerance); + } +} + +template +void TestCalculateLstmGateFloat(const TfLiteEvalTensor* input, + const TfLiteEvalTensor* input_weight, + const TfLiteEvalTensor* input_bias, + // Recurrent FC + const TfLiteEvalTensor* recurrent, + const TfLiteEvalTensor* recurrent_weight, + const TfLiteEvalTensor* recurrent_bias, + // Result comparison + TfLiteFusedActivation nonlinear_type, + const float* expected_vals, float tolerance) { + float gate_output[batch_size * state_dimension] = {}; + float fc_output_buffer[batch_size * state_dimension] = {}; + + tflite::GateParameters gate_params = CreateGateParamsFloat(); + + // Create step information: only one time step, no need to update + auto size_info = tflite::testing::CreateLstmSizeInfo( + /*time_major*/ false, input->dims, recurrent->dims); + // revise time_major = true to enable batch inference + size_info.time_major = true; + tflite::lstm_internal::LstmStepManager step_info(&size_info); + + tflite::lstm_internal::CalculateLstmGate( + step_info, gate_params, + // Input FC + input, input_weight, input_bias, + // Recurrent FC + recurrent, recurrent_weight, recurrent_bias, + // Output + gate_output, + // Scratch arrays + fc_output_buffer, nonlinear_type); + + ValidateResultGoldens(expected_vals, gate_output, + batch_size * state_dimension, tolerance); +} + +template +void TestCalculateLstmGateInteger( + const TfLiteEvalTensor* input, const TfLiteEvalTensor* input_weight, + const TfLiteEvalTensor* input_bias, + // Recurrent FC + const TfLiteEvalTensor* recurrent, const TfLiteEvalTensor* recurrent_weight, + const TfLiteEvalTensor* recurrent_bias, + // Quantization settings + const NodeQuantizationParameters& node_quantization_settings, + const GateQuantizationParameters& gate_quantization_settings, + // Result comparison + TfLiteFusedActivation nonlinear_type, const float* expected_vals, + float tolerance) { + CellType gate_output[batch_size * state_dimension] = {}; + CellType fc_output_buffer[batch_size * state_dimension] = {}; + + tflite::GateParameters gate_params = CreateGateParams( + node_quantization_settings.input, node_quantization_settings.hidden_state, + gate_quantization_settings, + node_quantization_settings.nonlinear_activation_input_scale); + + // Create step information: only one time step, no need to update + auto size_info = tflite::testing::CreateLstmSizeInfo( + /*time_major*/ false, input->dims, recurrent->dims); + // revise time_major = true to enable batch inference + size_info.time_major = true; + tflite::lstm_internal::LstmStepManager step_info(&size_info); + + // only int8 weight is supported now + tflite::lstm_internal::CalculateLstmGate( + step_info, gate_params, + // Input FC + input, input_weight, input_bias, + // Recurrent FC + recurrent, recurrent_weight, recurrent_bias, + // Output + gate_output, + // Scratch arrays + fc_output_buffer, nonlinear_type); + + float gate_output_float[batch_size * state_dimension] = {}; + Dequantize(gate_output, batch_size * state_dimension, + node_quantization_settings.nonlinear_activation_output_scale, 0, + gate_output_float); + + ValidateResultGoldens(expected_vals, gate_output_float, + batch_size * state_dimension, tolerance); +} + +template +void TestUpdateLstmCellFloat( + const GateOutputCheckData& gate_output_data, + LstmNodeContent& node_content, + const float tolerance) { + float buffer[batch_size * state_dimension] = {}; + + auto forget_cell_mul_params = CreateInterGateMulParamsFloat(); + auto input_mul_params = CreateInterGateMulParamsFloat(); + + auto cell_state = node_content.CellStateEvalTensor(); + // Create step information: only one time step, no need to update + auto size_info = tflite::testing::CreateLstmSizeInfo( + /*time_major*/ false, + node_content.GetEvalTensor(tflite::kLstmInputTensor)->dims, + node_content.HiddenStateEvalTensor()->dims); + // revise time_major = true to enable batch inference + size_info.time_major = true; + tflite::lstm_internal::LstmStepManager step_info(&size_info); + + // copy the data since it will be updated + float forget_gate[batch_size * state_dimension] = {}; + std::memcpy(forget_gate, gate_output_data.expected_forget_gate_output, + batch_size * state_dimension * sizeof(float)); + + CellStateInfo cell_state_info; + cell_state_info.cell_clip = node_content.BuiltinData().cell_clip; + // Call the function to be tested + tflite::lstm_internal::UpdateLstmCell( + step_info, cell_state, forget_gate, + gate_output_data.expected_input_gate_output, + gate_output_data.expected_cell_gate_output, forget_cell_mul_params, + input_mul_params, cell_state_info, buffer); + + ValidateResultGoldens(gate_output_data.expected_updated_cell, + tflite::micro::GetTensorData(cell_state), + batch_size * state_dimension, tolerance); +} + +template +void TestUpdateLstmCellInteger( + const GateOutputCheckData& gate_output_data, + LstmNodeContent& node_content, + const float tolerance) { + const auto& quantization_settings = node_content.QuantizationSettings(); + CellType quantized_forget_gate[batch_size * state_dimension] = {}; + tflite::Quantize(gate_output_data.expected_forget_gate_output, + quantized_forget_gate, batch_size * state_dimension, + quantization_settings.nonlinear_activation_output_scale, 0); + + CellType quantized_input_gate[batch_size * state_dimension] = {}; + tflite::Quantize(gate_output_data.expected_input_gate_output, + quantized_input_gate, batch_size * state_dimension, + quantization_settings.nonlinear_activation_output_scale, 0); + + CellType quantized_cell_gate[batch_size * state_dimension] = {}; + tflite::Quantize(gate_output_data.expected_cell_gate_output, + quantized_cell_gate, batch_size * state_dimension, + quantization_settings.nonlinear_activation_output_scale, 0); + + CellType buffer[batch_size * state_dimension] = {}; + + auto forget_cell_mul_params = CreateInterGateMulParams( + quantization_settings.nonlinear_activation_output_scale, + quantization_settings.cell_state.scale, + quantization_settings.cell_state.scale); + auto input_mul_params = CreateInterGateMulParams( + quantization_settings.nonlinear_activation_output_scale, + quantization_settings.nonlinear_activation_output_scale, + quantization_settings.cell_state.scale); + + auto cell_state_info = + CreateLstmCellStateInfo(quantization_settings.cell_state.scale, + node_content.BuiltinData().cell_clip); + + auto cell_state = node_content.CellStateEvalTensor(); + // Create step information: only one time step, no need to update + auto size_info = tflite::testing::CreateLstmSizeInfo( + /*time_major*/ false, + node_content.GetEvalTensor(tflite::kLstmInputTensor)->dims, + node_content.HiddenStateEvalTensor()->dims); + // revise time_major = true to enable batch inference + size_info.time_major = true; + tflite::lstm_internal::LstmStepManager step_info(&size_info); + + // Call the function to be tested + tflite::lstm_internal::UpdateLstmCell( + step_info, cell_state, quantized_forget_gate, quantized_input_gate, + quantized_cell_gate, forget_cell_mul_params, input_mul_params, + cell_state_info, buffer); + + float cell_state_float[batch_size * state_dimension] = {}; + Dequantize(tflite::micro::GetTensorData(cell_state), + batch_size * state_dimension, + quantization_settings.cell_state.scale, + quantization_settings.cell_state.zero_point, cell_state_float); + + ValidateResultGoldens(gate_output_data.expected_updated_cell, + cell_state_float, batch_size * state_dimension, + tolerance); +} + +template +void TestUpdateLstmHiddenFloat( + const GateOutputCheckData& gate_output_data, + LstmNodeContent& node_content, + const float tolerance) { + float buffer[batch_size * state_dimension] = {}; + + auto mul_params = CreateInterGateMulParamsFloat(); + + int32_t cell_state_scale_power = 0; + + // Create step information: only one time step, no need to update + auto size_info = tflite::testing::CreateLstmSizeInfo( + /*time_major*/ false, + node_content.GetEvalTensor(tflite::kLstmInputTensor)->dims, + node_content.HiddenStateEvalTensor()->dims); + // revise time_major = true to enable batch inference + size_info.time_major = true; + tflite::lstm_internal::LstmStepManager step_info(&size_info); + + auto cell_state = node_content.CellStateEvalTensor(); + auto hidden_state = node_content.HiddenStateEvalTensor(); + + tflite::lstm_internal::UpdateLstmHidden( + step_info, cell_state, hidden_state, + gate_output_data.expected_output_gate_output, mul_params, + cell_state_scale_power, buffer); + + ValidateResultGoldens(gate_output_data.expected_updated_hidden, + tflite::micro::GetTensorData(hidden_state), + batch_size * state_dimension, tolerance); +} + +template +void TestUpdateLstmHiddenInteger( + const GateOutputCheckData& gate_output_data, + LstmNodeContent& node_content, + const float tolerance) { + const auto& quantization_settings = node_content.QuantizationSettings(); + CellType quantized_output_gate[batch_size * state_dimension] = {}; + tflite::Quantize(gate_output_data.expected_output_gate_output, + quantized_output_gate, batch_size * state_dimension, + quantization_settings.nonlinear_activation_output_scale, 0); + + CellType buffer[batch_size * state_dimension] = {}; + + auto mul_params = CreateInterGateMulParams( + quantization_settings.nonlinear_activation_output_scale, + quantization_settings.nonlinear_activation_output_scale, + quantization_settings.hidden_state.scale, + quantization_settings.hidden_state.zero_point); + + int cell_state_scale_power_buffer; + tflite::CheckedLog2(quantization_settings.cell_state.scale, + &cell_state_scale_power_buffer); + int32_t cell_state_scale_power = cell_state_scale_power_buffer; + + // Create step information: only one time step, no need to update + auto size_info = tflite::testing::CreateLstmSizeInfo( + /*time_major*/ false, + node_content.GetEvalTensor(tflite::kLstmInputTensor)->dims, + node_content.HiddenStateEvalTensor()->dims); + // revise time_major = true to enable batch inference + size_info.time_major = true; + tflite::lstm_internal::LstmStepManager step_info(&size_info); + + auto cell_state = node_content.CellStateEvalTensor(); + auto hidden_state = node_content.HiddenStateEvalTensor(); + + tflite::lstm_internal::UpdateLstmHidden( + step_info, cell_state, hidden_state, quantized_output_gate, mul_params, + cell_state_scale_power, buffer); + + float hidden_state_float[batch_size * state_dimension] = {}; + Dequantize(tflite::micro::GetTensorData(hidden_state), + batch_size * state_dimension, + quantization_settings.hidden_state.scale, + quantization_settings.hidden_state.zero_point, hidden_state_float); + + ValidateResultGoldens(gate_output_data.expected_updated_hidden, + hidden_state_float, batch_size * state_dimension, + tolerance); +} + +template +void TestLstmStepFloat( + const GateOutputCheckData& gate_output_data, + const float hidden_state_tolerance, const float cell_state_tolerance, + /*can not be const, state will be updated*/ + LstmNodeContent& node_contents) { + // Mimicking the kernel preparation phase, node_contents approximate the + LSTMKernelContents kernel_content = CreateLSTMKernelContent(node_contents); + LSTMBuffers buffers; + // Scratch buffers on the stack + float buffer0[batch_size * state_dimension] = {}; + buffers.buffer0 = buffer0; + float buffer1[batch_size * state_dimension] = {}; + buffers.buffer1 = buffer1; + float buffer2[batch_size * state_dimension] = {}; + buffers.buffer2 = buffer2; + float buffer3[batch_size * state_dimension] = {}; + buffers.buffer3 = buffer3; + + OpDataLSTM op_data = CreateLstmOpDataFloat(node_contents); + // set time_major to true to test batch inference + op_data.size_info.time_major = true; + tflite::lstm_internal::LstmStepManager step_info(&op_data.size_info); + tflite::lstm_internal::LstmStep( + step_info, op_data, kernel_content, buffers); + + ValidateResultGoldens( + gate_output_data.expected_updated_hidden, + tflite::micro::GetTensorData(kernel_content.HiddenStateTensor()), + batch_size * state_dimension, hidden_state_tolerance); + ValidateResultGoldens( + gate_output_data.expected_updated_cell, + tflite::micro::GetTensorData(kernel_content.CellStateTensor()), + batch_size * state_dimension, cell_state_tolerance); +} + +template +void TestLstmStepInteger( + const GateOutputCheckData& gate_output_data, + const float hidden_state_tolerance, const float cell_state_tolerance, + /*can not be const, state will be updated*/ + LstmNodeContent& + node_contents) { + // Mimicking the kernel preparation phase, node_contents approximate the + LSTMKernelContents kernel_content = CreateLSTMKernelContent(node_contents); + LSTMBuffers buffers; + + // Scratch buffers on the stack + CellType buffer0[batch_size * state_dimension] = {}; + buffers.buffer0 = buffer0; + CellType buffer1[batch_size * state_dimension] = {}; + buffers.buffer1 = buffer1; + CellType buffer2[batch_size * state_dimension] = {}; + buffers.buffer2 = buffer2; + CellType buffer3[batch_size * state_dimension] = {}; + buffers.buffer3 = buffer3; + + OpDataLSTM op_data = CreateLstmOpData(node_contents); + // set time_major to true to test batch inference + op_data.size_info.time_major = true; + tflite::lstm_internal::LstmStepManager step_info(&op_data.size_info); + tflite::lstm_internal::LstmStep(step_info, op_data, kernel_content, + buffers); + + const auto& quantization_settings = node_contents.QuantizationSettings(); + float dequantized_hidden_state[batch_size * state_dimension] = {}; + Dequantize( + tflite::micro::GetTensorData( + kernel_content.HiddenStateTensor()), + batch_size * state_dimension, quantization_settings.hidden_state.scale, + quantization_settings.hidden_state.zero_point, dequantized_hidden_state); + + float dequantized_cell_state[batch_size * state_dimension] = {}; + Dequantize( + tflite::micro::GetTensorData(kernel_content.CellStateTensor()), + batch_size * state_dimension, quantization_settings.cell_state.scale, + quantization_settings.cell_state.zero_point, dequantized_cell_state); + + ValidateResultGoldens(gate_output_data.expected_updated_hidden, + dequantized_hidden_state, batch_size * state_dimension, + hidden_state_tolerance); + ValidateResultGoldens(gate_output_data.expected_updated_cell, + dequantized_cell_state, batch_size * state_dimension, + cell_state_tolerance); +} + +template +void TestEvalLstmFloat( + const LstmEvalCheckData< + batch_size * time_steps * input_dimension, batch_size * state_dimension, + batch_size * state_dimension * time_steps>& eval_check_data, + const float hidden_state_tolerance, const float cell_state_tolerance, + LstmNodeContent& node_contents) { + // Mimicking the kernel preparation phase, node_contents approximate the node + LSTMKernelContents kernel_content = CreateLSTMKernelContent(node_contents); + // Scratch buffers on the stack + LSTMBuffers buffers; + float buffer0[batch_size * state_dimension] = {}; + buffers.buffer0 = buffer0; + float buffer1[batch_size * state_dimension] = {}; + buffers.buffer1 = buffer1; + float buffer2[batch_size * state_dimension] = {}; + buffers.buffer2 = buffer2; + float buffer3[batch_size * state_dimension] = {}; + buffers.buffer3 = buffer3; + + OpDataLSTM op_data = CreateLstmOpDataFloat(node_contents); + + tflite::EvalLstm(op_data, kernel_content, + buffers); + + ValidateResultGoldens(eval_check_data.expected_hidden_state, + node_contents.GetHiddenStateData(), + batch_size * state_dimension, hidden_state_tolerance); + + ValidateResultGoldens(eval_check_data.expected_cell_state, + node_contents.GetCellStateData(), + batch_size * state_dimension, cell_state_tolerance); + + ValidateResultGoldens(eval_check_data.expected_output, + node_contents.GetOutputData(), + batch_size * state_dimension, hidden_state_tolerance); +} + +template +void TestEvalLstmInteger( + const LstmEvalCheckData< + batch_size * time_steps * input_dimension, batch_size * state_dimension, + batch_size * state_dimension * time_steps>& eval_check_data, + const float hidden_state_tolerance, const float cell_state_tolerance, + LstmNodeContent& + node_contents) { + // Mimicking the kernel preparation phase, node_contents approximate the node + LSTMKernelContents kernel_content = CreateLSTMKernelContent(node_contents); + // Scratch buffers on the stack + LSTMBuffers buffers; + CellType buffer0[batch_size * state_dimension] = {}; + buffers.buffer0 = buffer0; + CellType buffer1[batch_size * state_dimension] = {}; + buffers.buffer1 = buffer1; + CellType buffer2[batch_size * state_dimension] = {}; + buffers.buffer2 = buffer2; + CellType buffer3[batch_size * state_dimension] = {}; + buffers.buffer3 = buffer3; + + OpDataLSTM op_data = CreateLstmOpData(node_contents); + + tflite::EvalLstm( + op_data, kernel_content, buffers); + + const auto& quantization_settings = node_contents.QuantizationSettings(); + float dequantized_hidden_state[batch_size * state_dimension] = {}; + Dequantize(node_contents.GetHiddenStateData(), batch_size * state_dimension, + quantization_settings.hidden_state.scale, + quantization_settings.hidden_state.zero_point, + dequantized_hidden_state); + + ValidateResultGoldens(eval_check_data.expected_hidden_state, + dequantized_hidden_state, batch_size * state_dimension, + hidden_state_tolerance); + + float dequantized_cell_state[batch_size * state_dimension] = {}; + Dequantize(node_contents.GetCellStateData(), batch_size * state_dimension, + quantization_settings.cell_state.scale, + quantization_settings.cell_state.zero_point, + dequantized_cell_state); + ValidateResultGoldens(eval_check_data.expected_cell_state, + dequantized_cell_state, batch_size * state_dimension, + cell_state_tolerance); + + float dequantized_output[batch_size * state_dimension * time_steps] = {}; + Dequantize(node_contents.GetOutputData(), + batch_size * state_dimension * time_steps, + quantization_settings.output.scale, + quantization_settings.output.zero_point, dequantized_output); + ValidateResultGoldens(eval_check_data.expected_output, dequantized_output, + batch_size * state_dimension, hidden_state_tolerance); +} + +} // namespace testing +} // namespace tflite + +#endif // TENSORFLOW_LITE_MICRO_KERNELS_LSTM_EVAL_TEST_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/lstm_shared.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/lstm_shared.h new file mode 100644 index 0000000..54020f6 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/lstm_shared.h @@ -0,0 +1,150 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_MICRO_KERNELS_LSTM_SHARED_H_ +#define TENSORFLOW_LITE_MICRO_KERNELS_LSTM_SHARED_H_ + +#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h" + +namespace tflite { + +// Input Tensors of size {n_batch, n_input} +constexpr int kLstmInputTensor = 0; + +// Input weight tensors of size: {n_cell, n_input} +constexpr int kLstmInputToInputWeightsTensor = 1; // Optional +constexpr int kLstmInputToForgetWeightsTensor = 2; +constexpr int kLstmInputToCellWeightsTensor = 3; +constexpr int kLstmInputToOutputWeightsTensor = 4; + +// Recurrent weight tensors of size {n_cell, n_output} +constexpr int kLstmRecurrentToInputWeightsTensor = 5; // Optional +constexpr int kLstmRecurrentToForgetWeightsTensor = 6; +constexpr int kLstmRecurrentToCellWeightsTensor = 7; +constexpr int kLstmRecurrentToOutputWeightsTensor = 8; + +// Peephole weights tensors of size {n_cell}, representing a diagonal matrix. +constexpr int kLstmCellToInputWeightsTensor = 9; // Optional +constexpr int kLstmCellToForgetWeightsTensor = 10; // Optional +constexpr int kLstmCellToOutputWeightsTensor = 11; // Optional + +// Gates bias tensors of size {n_cell} +constexpr int kLstmInputGateBiasTensor = 12; // Optional +constexpr int kLstmForgetGateBiasTensor = 13; +constexpr int kLstmCellGateBiasTensor = 14; +constexpr int kLstmOutputGateBiasTensor = 15; + +// Projection weight tensor of size {n_output, n_cell} +constexpr int kLstmProjectionWeightsTensor = 16; // Optional +// Projection bias tensor of size {n_output} +constexpr int kLstmProjectionBiasTensor = 17; // Optional + +// These state tensors are defined as variable tensors, and will be modified by +// this op. +constexpr int kLstmOutputStateTensor = 18; +constexpr int kLstmCellStateTensor = 19; + +// Layer norm coefficient tensors of size {n_cell}, representing a diagonal +// matrix. +constexpr int kLstmInputLayerNormCoefficientsTensor = 20; // Optional +constexpr int kLstmForgetLayerNormCoefficientsTensor = 21; // Optional +constexpr int kLstmCellLayerNormCoefficientsTensor = 22; // Optional +constexpr int kLstmOutputLayerNormCoefficientsTensor = 23; // Optional + +// Output tensors. +constexpr int kLstmOutputTensor = 0; + +// Parameters for the two fully conncted computation inside each gate +struct GateParameters { + FullyConnectedParams input_fc_params; + FullyConnectedParams recurrent_fc_params; +}; + +// Paramaters for the element wise multiplications between gate outputs +struct InterGateParameters { + ArithmeticParams forget_cell_mul_params; + ArithmeticParams input_mul_params; + ArithmeticParams output_mul_params; +}; + +// Size information about the LSTM kernel, which is deduced from tensors stored +// in the flat buffer file. +struct LstmSizeInfo { + bool time_major; + int batch_size; + int time_steps; + int input_dimension; + int state_dimension; +}; + +// Contains information about the cell state tensor +struct CellStateInfo { + float cell_clip; + // clipping range for cell state only 16 bits cell is supported (could be + // generalized through templatation) + int16_t quantized_cell_clip; + // 2^-cell_state_scale_power = cell state scale, required by integer tanh + // computation + int32_t cell_state_scale_power; +}; + +// Contains required computation information for LSTM kernel evaluation. +// Specifically, it includes shape and quantization settings for the LSTM +// internal operations. Formatted to support operations defined in the +// tensorflow/lite/kernels/internal/reference/integer_ops +// Should be constructed during the preparation phase +struct OpDataLSTM { + LstmSizeInfo size_info; + CellStateInfo cell_state_info; + TfLiteFusedActivation cell_gate_nonlinear_type; + GateParameters forget_gate_parameters; + GateParameters input_gate_parameters; + GateParameters cell_gate_parameters; + GateParameters output_gate_parameters; + InterGateParameters inter_gate_parameters; + int buffer_indices[4]; // TFLM only +}; + +// Provide an interface to access the internal tensors and buffers used for LSTM +// invocation. Constructed during the invocation phase +struct LSTMKernelContents { + public: + // Internal tensors, fixed (const). see lstm_shared.h for tensor names + const TfLiteEvalTensor* GetInternalTensor(const int tensor_index) const { + return internal_tensors[tensor_index]; + } + // Variable tensors (will be changed, can not be const) + TfLiteEvalTensor* HiddenStateTensor() { + return internal_tensors[kLstmOutputStateTensor]; + } + TfLiteEvalTensor* CellStateTensor() { + return internal_tensors[kLstmCellStateTensor]; + } + // Node internal tensors with indexes defined at the beginning of the file + TfLiteEvalTensor* internal_tensors[24]; + TfLiteEvalTensor* output_tensor; +}; + +template +struct LSTMBuffers { + // TFLM buffers requires buffer index from LstmOpData. + CellType* buffer0; + CellType* buffer1; + CellType* buffer2; + CellType* buffer3; +}; + +} // namespace tflite +#endif // TENSORFLOW_LITE_MICRO_KERNELS_LSTM_SHARED_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/maximum_minimum.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/maximum_minimum.cpp new file mode 100644 index 0000000..c003e68 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/maximum_minimum.cpp @@ -0,0 +1,122 @@ +/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/maximum_minimum.h" + +#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h" +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/quantization_util.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/op_macros.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h" + +namespace tflite { + +namespace { + +// This file has a reference implementation of TFMaximum/TFMinimum. +enum KernelType { + kReference, +}; + +constexpr int kInputTensor1 = 0; +constexpr int kInputTensor2 = 1; +constexpr int kOutputTensor = 0; + +struct OpContext { + OpContext(TfLiteContext* context, TfLiteNode* node) { + input1 = tflite::micro::GetEvalInput(context, node, kInputTensor1); + input2 = tflite::micro::GetEvalInput(context, node, kInputTensor2); + output = tflite::micro::GetEvalOutput(context, node, kOutputTensor); + } + const TfLiteEvalTensor* input1; + const TfLiteEvalTensor* input2; + TfLiteEvalTensor* output; +}; + +struct MaximumOp { + template + static data_type op(data_type el1, data_type el2) { + return el1 > el2 ? el1 : el2; + } +}; + +struct MinimumOp { + template + static data_type op(data_type el1, data_type el2) { + return el1 < el2 ? el1 : el2; + } +}; + +template +void TFLiteOperation(TfLiteContext* context, TfLiteNode* node, + const OpContext& op_context) { + reference_ops::MaximumMinimumBroadcastSlow( + tflite::micro::GetTensorShape(op_context.input1), + tflite::micro::GetTensorData(op_context.input1), + tflite::micro::GetTensorShape(op_context.input2), + tflite::micro::GetTensorData(op_context.input2), + tflite::micro::GetTensorShape(op_context.output), + tflite::micro::GetTensorData(op_context.output), + op_type::template op); +} + +template +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + OpContext op_context(context, node); + + if (kernel_type == kReference) { + switch (op_context.output->type) { + case kTfLiteFloat32: + TFLiteOperation(context, node, op_context); + break; + case kTfLiteInt8: + TFLiteOperation(context, node, op_context); + break; + case kTfLiteInt32: + TFLiteOperation(context, node, op_context); + break; + case kTfLiteInt64: + TFLiteOperation(context, node, op_context); + break; + default: + MicroPrintf("Type %s (%d) is not supported by Maximum/Minimum.", + TfLiteTypeGetName(op_context.output->type), + op_context.output->type); + return kTfLiteError; + } + } else { + MicroPrintf("Kernel type not supported by Maximum/Minimum."); + return kTfLiteError; + } + return kTfLiteOk; +} + +} // namespace + +TfLiteRegistration Register_MAXIMUM() { + return tflite::micro::RegisterOp(nullptr, nullptr, + Eval); +} + +TfLiteRegistration Register_MINIMUM() { + return tflite::micro::RegisterOp(nullptr, nullptr, + Eval); +} + +} // namespace tflite diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/micro_ops.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/micro_ops.h new file mode 100644 index 0000000..fd28a32 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/micro_ops.h @@ -0,0 +1,152 @@ +/* Copyright 2023 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_MICRO_KERNELS_MICRO_OPS_H_ +#define TENSORFLOW_LITE_MICRO_KERNELS_MICRO_OPS_H_ + +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" + +// Forward declaration of all micro op kernel registration methods. These +// registrations are included with the standard `BuiltinOpResolver`. +// +// This header is particularly useful in cases where only a subset of ops are +// needed. In such cases, the client can selectively add only the registrations +// their model requires, using a custom `(Micro)MutableOpResolver`. Selective +// registration in turn allows the linker to strip unused kernels. + +namespace tflite { + +// TFLM is incrementally moving towards a flat tflite namespace +// (https://abseil.io/tips/130). Any new ops (or cleanup of existing ops should +// have their Register function declarations in the tflite namespace. + +TfLiteRegistration Register_ADD(); +TfLiteRegistration Register_ADD_N(); +TfLiteRegistration Register_ARG_MAX(); +TfLiteRegistration Register_ARG_MIN(); +TfLiteRegistration Register_ASSIGN_VARIABLE(); +TfLiteRegistration Register_AVERAGE_POOL_2D(); +TfLiteRegistration Register_BATCH_MATMUL(); +TfLiteRegistration Register_BATCH_TO_SPACE_ND(); +TfLiteRegistration Register_BROADCAST_ARGS(); +TfLiteRegistration Register_BROADCAST_TO(); +TfLiteRegistration Register_CALL_ONCE(); +TfLiteRegistration Register_CAST(); +TfLiteRegistration Register_CEIL(); +TfLiteRegistration Register_COMPLEX_ABS(); +// TODO(b/160234179): Change custom OPs to also return by value. +TfLiteRegistration* Register_CIRCULAR_BUFFER(); +TfLiteRegistration Register_CONCATENATION(); +TfLiteRegistration Register_CONV_2D(); +TfLiteRegistration Register_CUMSUM(); +TfLiteRegistration Register_DEPTH_TO_SPACE(); +TfLiteRegistration Register_DEPTHWISE_CONV_2D(); +TfLiteRegistration Register_DEQUANTIZE(); +TfLiteRegistration Register_DIV(); +TfLiteRegistration Register_ELU(); +TfLiteRegistration Register_EQUAL(); +TfLiteRegistration* Register_ETHOSU(); +TfLiteRegistration Register_EXP(); +TfLiteRegistration Register_EXPAND_DIMS(); +TfLiteRegistration Register_FILL(); +TfLiteRegistration Register_FLOOR(); +TfLiteRegistration Register_FLOOR_DIV(); +TfLiteRegistration Register_FLOOR_MOD(); +TfLiteRegistration Register_FULLY_CONNECTED(); +#ifndef TF_LITE_STATIC_MEMORY +TfLiteRegistration Register_GATHER(); +#endif // TF_LITE_STATIC_MEMORY +TfLiteRegistration Register_GATHER_ND(); +TfLiteRegistration Register_GREATER(); +TfLiteRegistration Register_GREATER_EQUAL(); +TfLiteRegistration Register_HARD_SWISH(); +TfLiteRegistration Register_IMAG(); +TfLiteRegistration Register_IF(); +TfLiteRegistration Register_L2_POOL_2D(); +TfLiteRegistration Register_LEAKY_RELU(); +TfLiteRegistration Register_LESS(); +TfLiteRegistration Register_LESS_EQUAL(); +TfLiteRegistration Register_LOG_SOFTMAX(); +TfLiteRegistration Register_LOGICAL_AND(); +TfLiteRegistration Register_LOGICAL_OR(); +TfLiteRegistration Register_LOGISTIC(); +TfLiteRegistration Register_MAX_POOL_2D(); +TfLiteRegistration Register_MAXIMUM(); +TfLiteRegistration Register_MEAN(); +TfLiteRegistration Register_MINIMUM(); +TfLiteRegistration Register_MIRROR_PAD(); +TfLiteRegistration Register_MUL(); +TfLiteRegistration Register_NEG(); +TfLiteRegistration Register_NOT_EQUAL(); +TfLiteRegistration Register_PACK(); +TfLiteRegistration Register_PAD(); +TfLiteRegistration Register_PADV2(); +TfLiteRegistration Register_PRELU(); +TfLiteRegistration Register_QUANTIZE(); +TfLiteRegistration Register_READ_VARIABLE(); +TfLiteRegistration Register_REAL(); +TfLiteRegistration Register_REDUCE_MAX(); +TfLiteRegistration Register_REDUCE_MIN(); +TfLiteRegistration Register_RELU(); +TfLiteRegistration Register_RELU6(); +TfLiteRegistration Register_RESIZE_BILINEAR(); +TfLiteRegistration Register_RFFT2D(); +#ifndef TF_LITE_STATIC_MEMORY +TfLiteRegistration Register_SELECT(); +TfLiteRegistration Register_SELECT_V2(); +#endif // TF_LITE_STATIC_MEMORY +TfLiteRegistration Register_SHAPE(); +TfLiteRegistration Register_SLICE(); +TfLiteRegistration Register_SOFTMAX(); +TfLiteRegistration Register_SPACE_TO_BATCH_ND(); +TfLiteRegistration Register_SPACE_TO_DEPTH(); +TfLiteRegistration Register_SPLIT_V(); +TfLiteRegistration Register_SQUARED_DIFFERENCE(); +TfLiteRegistration Register_SQUEEZE(); +TfLiteRegistration Register_STRIDED_SLICE(); +TfLiteRegistration Register_SUB(); +TfLiteRegistration Register_SUM(); +TfLiteRegistration Register_SVDF(); +TfLiteRegistration Register_TRANSPOSE(); +TfLiteRegistration Register_TRANSPOSE_CONV(); +// TODO(b/230666079): resolve conflict with xtensa implementation +TfLiteRegistration Register_UNIDIRECTIONAL_SEQUENCE_LSTM(); +TfLiteRegistration Register_VAR_HANDLE(); +TfLiteRegistration Register_WHILE(); +TfLiteRegistration Register_ZEROS_LIKE(); + +namespace ops { +namespace micro { + +TfLiteRegistration Register_ABS(); +TfLiteRegistration Register_COS(); +TfLiteRegistration Register_LOG(); +TfLiteRegistration Register_LOGICAL_NOT(); +TfLiteRegistration Register_RESHAPE(); +TfLiteRegistration Register_RESIZE_NEAREST_NEIGHBOR(); +TfLiteRegistration Register_ROUND(); +TfLiteRegistration Register_RSQRT(); +TfLiteRegistration Register_SIN(); +TfLiteRegistration Register_SPLIT(); +TfLiteRegistration Register_SQRT(); +TfLiteRegistration Register_SQUARE(); +TfLiteRegistration Register_UNPACK(); +TfLiteRegistration Register_L2_NORMALIZATION(); +TfLiteRegistration Register_TANH(); + +} // namespace micro +} // namespace ops +} // namespace tflite + +#endif // TENSORFLOW_LITE_MICRO_KERNELS_MICRO_OPS_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/micro_tensor_utils.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/micro_tensor_utils.cpp new file mode 100644 index 0000000..14be12c --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/micro_tensor_utils.cpp @@ -0,0 +1,67 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/micro_tensor_utils.h" + +#include +#include +#include +#include +#include +#include + +#include "edge-impulse-sdk/third_party/gemmlowp/fixedpoint/fixedpoint.h" // from @gemmlowp +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/compatibility.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/cppmath.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/op_macros.h" + +namespace tflite { + +// Apply sigmoid to elements of a vector. +void PortableApplySigmoidToVector(const float* vector, int v_size, + float* result) { + for (int v = 0; v < v_size; v++) { + result[v] = 1.0f / (1.0f + std::exp(-vector[v])); + } +} + +void PortableApplyTanhToVector(const float* vector, int v_size, float* result) { + for (int v = 0; v < v_size; v++) { + result[v] = std::tanh(vector[v]); + } +} + +void PortableApplyActivationToVector(const float* vector, int v_size, + TfLiteFusedActivation activation, + float* result) { + switch (activation) { + case kTfLiteActNone: + return; + case kTfLiteActRelu: + return tflite::tensor_utils::ApplyReluToVector(vector, v_size, result); + case kTfLiteActReluN1To1: + return tflite::tensor_utils::ApplyRelu1ToVector(vector, v_size, result); + case kTfLiteActRelu6: + return tflite::tensor_utils::ApplyRelu6ToVector(vector, v_size, result); + case kTfLiteActTanh: + return PortableApplyTanhToVector(vector, v_size, result); + case kTfLiteActSignBit: + return tflite::tensor_utils::ApplySignbitToVector(vector, v_size, result); + case kTfLiteActSigmoid: + return PortableApplySigmoidToVector(vector, v_size, result); + } +} + +} // namespace tflite \ No newline at end of file diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/micro_tensor_utils.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/micro_tensor_utils.h new file mode 100644 index 0000000..fb3d97f --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/micro_tensor_utils.h @@ -0,0 +1,56 @@ +/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// This file and the associated .cc file is branched from +// tensorflow/lite/kernels/internal/reference_portable_tensor_utils* +// TFLM needs to create its own because the original files are coupled with +// the tensor_utils module, which we cannot reuse due to its use of the +// Eigen library. + +#ifndef TENSORFLOW_LITE_MICRO_KERNELS_MICRO_TENSOR_UTILS_H_ +#define TENSORFLOW_LITE_MICRO_KERNELS_MICRO_TENSOR_UTILS_H_ + +#include +#include +#include + +#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h" +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/portable_tensor_utils.h" + +#if defined(_MSC_VER) +#define __restrict__ __restrict +#endif + +namespace tflite { + +// Not all backends support CpuBackendContext usage, so forward declare to avoid +// pulling in its implementation. +// TODO(b/230666277): consider removing this since micro does not utilize it +class CpuBackendContext; + +// Apply sigmoid to elements of a vector. +void PortableApplySigmoidToVector(const float* vector, int v_size, + float* result); +// Apply tanh to elements of a vector +void PortableApplyTanhToVector(const float* vector, int v_size, float* result); +// Apply appropriate activation function to elements of a vector. +void PortableApplyActivationToVector(const float* vector, int v_size, + TfLiteFusedActivation activation, + float* result); + +} // namespace tflite + +#endif // TENSORFLOW_LITE_MICRO_KERNELS_MICRO_TENSOR_UTILS_H_ \ No newline at end of file diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/mirror_pad.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/mirror_pad.cpp new file mode 100644 index 0000000..c409fcc --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/mirror_pad.cpp @@ -0,0 +1,215 @@ +/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h" +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" + +namespace tflite { +namespace { + +struct OpDataMirrorPad { + int input_dims; + int output_size; + int offset; + int output_dims_num_elements_buffer_index; + int input_dims_num_elements_buffer_index; +}; + +// Helper method that fills the left and right pads. +template +inline void GetPadding(const T* data, int offset, int64_t* left_pad, + int64_t* right_pad) { + *left_pad = static_cast(*(data + offset * 2)); + *right_pad = static_cast(*(data + offset * 2 + 1)); +} + +// Given dimension index and the left/right padding. +// Returns the corresponding dimension in the input array. +inline int GetInputDimension(int padded_dimension, int left_pad, int right_pad, + int input_dim_size, int offset) { + if (padded_dimension < left_pad) { + const int original_ind = left_pad + offset - 1; + return original_ind - (std::min(padded_dimension, original_ind - offset)); + } + padded_dimension -= left_pad; + if (padded_dimension >= input_dim_size) { + padded_dimension -= input_dim_size; + const int original_ind = input_dim_size - (1 + offset); + return original_ind - std::min(padded_dimension, original_ind); + } + return padded_dimension; +} + +// Given and index in output array, returns the index of the value +// in input array. +int GetFlatIndex(int index, int num_dims, + const TfLiteEvalTensor* padding_matrix, + const TfLiteIntArray* input_dims, + int* output_dims_num_elements, int* input_dims_num_elements, + const int offset) { + int flat_index = 0; + int64_t left_pad = 0, right_pad = 0, dimension_index, index_in_input; + + for (int i = 0; i < num_dims; ++i) { + switch (padding_matrix->type) { + case kTfLiteInt32: + GetPadding(padding_matrix->data.i32, i, &left_pad, &right_pad); + break; + case kTfLiteInt64: + GetPadding(padding_matrix->data.i64, i, &left_pad, &right_pad); + break; + default: + break; + } + dimension_index = index / output_dims_num_elements[i]; + + index_in_input = GetInputDimension(dimension_index, left_pad, right_pad, + input_dims->data[i], offset); + + flat_index += index_in_input * (input_dims_num_elements)[i]; + index %= output_dims_num_elements[i]; + } + + return flat_index; +} + +template +void MirrorPad(const TfLiteEvalTensor* padding_matrix, + const TfLiteIntArray* input_dims, int* output_dims_num_elements, + int* input_dims_num_elements, const T* input_data, + T* output_data, const int offset, const int num_dims, + const int output_size) { + for (int i = 0; i < output_size; ++i) { + output_data[i] = input_data[GetFlatIndex( + i, num_dims, padding_matrix, input_dims, output_dims_num_elements, + input_dims_num_elements, offset)]; + } +} + +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + TFLITE_DCHECK(node->user_data != nullptr); + TfLiteStatus status = kTfLiteOk; + const OpDataMirrorPad* data = + static_cast(node->user_data); + + const TfLiteEvalTensor* input_tensor = + tflite::micro::GetEvalInput(context, node, 0); + const TfLiteEvalTensor* padding_matrix = + tflite::micro::GetEvalInput(context, node, 1); + + TfLiteEvalTensor* output_tensor = + tflite::micro::GetEvalOutput(context, node, 0); + const int input_dims = data->input_dims; + const int output_size = data->output_size; + + int* input_dims_num_elements = (int*)context->GetScratchBuffer( + context, data->input_dims_num_elements_buffer_index); + int* output_dims_num_elements = (int*)context->GetScratchBuffer( + context, data->output_dims_num_elements_buffer_index); + + for (int i = 0; i < input_dims; i++) { + output_dims_num_elements[i] = 1; + input_dims_num_elements[i] = 1; + } + + for (int i = input_dims - 2; i >= 0; i--) { + output_dims_num_elements[i] = + output_dims_num_elements[i + 1] * output_tensor->dims->data[i + 1]; + + input_dims_num_elements[i] = + input_dims_num_elements[i + 1] * input_tensor->dims->data[i + 1]; + } + + switch (output_tensor->type) { + case kTfLiteFloat32: { + MirrorPad(padding_matrix, input_tensor->dims, output_dims_num_elements, + input_dims_num_elements, + tflite::micro::GetTensorData(input_tensor), + tflite::micro::GetTensorData(output_tensor), + data->offset, input_dims, output_size); + break; + } + case kTfLiteInt8: { + MirrorPad(padding_matrix, input_tensor->dims, output_dims_num_elements, + input_dims_num_elements, + tflite::micro::GetTensorData(input_tensor), + tflite::micro::GetTensorData(output_tensor), + data->offset, input_dims, output_size); + break; + } + default: + status = kTfLiteError; + break; + } + +#undef TF_LITE_MIRROR_PAD + + return status; +} + +void* Init(TfLiteContext* context, const char* buffer, size_t length) { + TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); + return context->AllocatePersistentBuffer(context, sizeof(OpDataMirrorPad)); +} + +TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + MicroContext* micro_context = GetMicroContext(context); + + TFLITE_DCHECK(node->user_data != nullptr); + OpDataMirrorPad* data = static_cast(node->user_data); + + TfLiteTensor* input_tensor = micro_context->AllocateTempInputTensor(node, 0); + TfLiteTensor* padding_matrix = + micro_context->AllocateTempInputTensor(node, 1); + TfLiteTensor* output_tensor = + micro_context->AllocateTempOutputTensor(node, 0); + + TF_LITE_ENSURE_EQ(context, NumDimensions(padding_matrix), 2); + TF_LITE_ENSURE_EQ(context, SizeOfDimension(padding_matrix, 0), + NumDimensions(input_tensor)); + auto* params = + reinterpret_cast(node->builtin_data); + if (params == nullptr) { + return kTfLiteError; + } + + data->offset = + params->mode != TfLiteMirrorPaddingMode::kTfLiteMirrorPaddingReflect ? 0 + : 1; + data->input_dims = NumDimensions(input_tensor); + data->output_size = NumElements(output_tensor); + + TF_LITE_ENSURE_STATUS(context->RequestScratchBufferInArena( + context, data->input_dims * sizeof(int), + &data->output_dims_num_elements_buffer_index)); + TF_LITE_ENSURE_STATUS(context->RequestScratchBufferInArena( + context, data->input_dims * sizeof(int), + &data->input_dims_num_elements_buffer_index)); + + micro_context->DeallocateTempTfLiteTensor(input_tensor); + micro_context->DeallocateTempTfLiteTensor(padding_matrix); + micro_context->DeallocateTempTfLiteTensor(output_tensor); + return kTfLiteOk; +} + +} // namespace + +TfLiteRegistration Register_MIRROR_PAD() { + return tflite::micro::RegisterOp(Init, Prepare, Eval); +} + +} // namespace tflite diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/mli_function_specializations.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/mli_function_specializations.h new file mode 100644 index 0000000..1b7b038 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/mli_function_specializations.h @@ -0,0 +1,145 @@ +// Patched by Edge Impulse to include reference and hardware-accelerated kernels +#include "../../../../classifier/ei_classifier_config.h" +#if EI_CLASSIFIER_TFLITE_ENABLE_ARC == 1 +/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "mli_api.h" // NOLINT + +namespace tflite { + +// Convolution specialized function. +typedef mli_status (*conv_func_ptr)(const mli_tensor* /*in*/, + const mli_tensor* /*weights*/, + const mli_tensor* /*bias*/, + const mli_conv2d_cfg* /*cfg*/, + mli_tensor* /*out*/); + +#ifdef MLI_2_0 +conv_func_ptr __attribute__((weak)) +mli_krn_conv2d_hwcn(const mli_tensor* weights) { + int filter_w = weights->shape[KRNL_W_DIM_HWCN]; + int filter_h = weights->shape[KRNL_H_DIM_HWCN]; + + if (filter_w == 1 && filter_h == 1) { + return mli_krn_conv2d_hwcn_sa8_sa8_sa32_k1x1; + } else if (filter_w == 3 && filter_h == 3) { + return mli_krn_conv2d_hwcn_sa8_sa8_sa32_k3x3; + } else if (filter_w == 5 && filter_h == 5) { + return mli_krn_conv2d_hwcn_sa8_sa8_sa32_k5x5; + } else { + return mli_krn_conv2d_hwcn_sa8_sa8_sa32; + } +} +#else +conv_func_ptr __attribute__((weak)) +mli_krn_conv2d_hwcn(const mli_tensor* weights, const mli_conv2d_cfg* cfg) { + return mli_krn_conv2d_nhwc_sa8_sa8_sa32; +} +#endif + +// Depthwise convolution specialized function. +typedef mli_status (*depthwise_func_ptr)(const mli_tensor* /*in*/, + const mli_tensor* /*weights*/, + const mli_tensor* /*bias*/, + const mli_conv2d_cfg* /*cfg*/, + mli_tensor* /*out*/); + +#ifdef MLI_2_0 +depthwise_func_ptr __attribute__((weak)) +mli_krn_depthwise_conv2d(const mli_tensor* weights) { + int filter_w = weights->shape[KRNL_DW_W_DIM_HW1N]; + int filter_h = weights->shape[KRNL_DW_H_DIM_HW1N]; + + if (filter_w == 3 && filter_h == 3) { + return mli_krn_depthwise_conv2d_hwcn_sa8_sa8_sa32_k3x3; + } else if (filter_w == 5 && filter_h == 5) { + return mli_krn_depthwise_conv2d_hwcn_sa8_sa8_sa32_k5x5; + } else { + return mli_krn_depthwise_conv2d_hwcn_sa8_sa8_sa32; + } +} +#else +depthwise_func_ptr __attribute__((weak)) +mli_krn_depthwise_conv2d(const mli_tensor* weights, const mli_conv2d_cfg* cfg) { + return mli_krn_depthwise_conv2d_hwcn_sa8_sa8_sa32; +} +#endif + +#ifdef MLI_2_0 +depthwise_func_ptr __attribute__((weak)) +mli_krn_group_conv2d(const mli_tensor* weights) { + int filter_w = weights->shape[KRNL_DW_W_DIM_HW1N]; + int filter_h = weights->shape[KRNL_DW_H_DIM_HW1N]; + + if (filter_w == 3 && filter_h == 3) { + return mli_krn_group_conv2d_hwcn_sa8_sa8_sa32_k3x3; + } else if (filter_w == 5 && filter_h == 5) { + return mli_krn_group_conv2d_hwcn_sa8_sa8_sa32_k5x5; + } else { + return mli_krn_group_conv2d_hwcn_sa8_sa8_sa32; + } +} +#endif + +// Pooling specialized functions. +typedef mli_status (*pooling_func_ptr)(const mli_tensor* /*in*/, + const mli_pool_cfg* /*cfg*/, + mli_tensor* /*out*/); + +#ifdef MLI_2_0 +pooling_func_ptr __attribute__((weak)) +mli_krn_avepool(const mli_pool_cfg* cfg) { + int filter_w = cfg->kernel_width; + int filter_h = cfg->kernel_height; + + if (filter_w == 2 && filter_h == 2) { + return mli_krn_avepool_hwc_sa8_k2x2; + } else if (filter_w == 3 && filter_h == 3) { + return mli_krn_avepool_hwc_sa8_k3x3; + } else { + return mli_krn_avepool_hwc_sa8; + } +} +#else +pooling_func_ptr __attribute__((weak)) +mli_krn_avepool(const mli_pool_cfg* cfg) { + return mli_krn_avepool_hwc_sa8; +} +#endif + +#ifdef MLI_2_0 +pooling_func_ptr __attribute__((weak)) +mli_krn_maxpool(const mli_pool_cfg* cfg) { + int filter_w = cfg->kernel_width; + int filter_h = cfg->kernel_height; + + if (filter_w == 2 && filter_h == 2) { + return mli_krn_maxpool_hwc_sa8_k2x2; + } else if (filter_w == 3 && filter_h == 3) { + return mli_krn_maxpool_hwc_sa8_k3x3; + } else { + return mli_krn_maxpool_hwc_sa8; + } +} +#else +pooling_func_ptr __attribute__((weak)) +mli_krn_maxpool(const mli_pool_cfg* cfg) { + return mli_krn_maxpool_hwc_sa8; +} +#endif + +} // namespace tflite +#endif // EI_CLASSIFIER_TFLITE_ENABLE_ARC == 1 diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/mli_interface.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/mli_interface.cpp new file mode 100644 index 0000000..bbd5e3a --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/mli_interface.cpp @@ -0,0 +1,160 @@ +// Patched by Edge Impulse to include reference and hardware-accelerated kernels +#include "../../../../classifier/ei_classifier_config.h" +#if EI_CLASSIFIER_TFLITE_ENABLE_ARC == 1 +/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "mli_interface.h" // NOLINT + +#include + +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h" + +namespace tflite { +namespace ops { +namespace micro { + +#ifndef MLI_2_0 +template <> +int8_t* MliTensorInterface::Data(void) { + TFLITE_DCHECK(tensor_->el_type == MLI_EL_ASYM_I8); + return static_cast(tensor_->data); +} + +template <> +int32_t* MliTensorInterface::Data(void) { + TFLITE_DCHECK(tensor_->el_type == MLI_EL_ASYM_I32); + return static_cast(tensor_->data); +} + +template <> +int32_t* MliTensorInterface::Scale(void) { + return &tensor_->el_params.asym.scale.i32; +} + +template <> +int32_t** MliTensorInterface::Scale(void) { + return &tensor_->el_params.asym.scale.pi32; +} + +template <> +void MliTensorInterface::SetData(int8_t* data, uint32_t capacity) const { + TFLITE_DCHECK(tensor_->el_type == MLI_EL_ASYM_I8); + tensor_->data = data; + tensor_->capacity = capacity; +} + +template <> +void MliTensorInterface::SetData(int32_t* data, uint32_t capacity) const { + TFLITE_DCHECK(tensor_->el_type == MLI_EL_ASYM_I32); + tensor_->data = data; + tensor_->capacity = capacity; +} + +mli_tensor* MliTensorInterface::MliTensor(void) { return tensor_; } + +const mli_tensor* MliTensorInterface::MliTensor(void) const { + return static_cast( + const_cast(this)->MliTensor()); +} + +uint32_t* MliTensorInterface::Rank(void) { return &tensor_->rank; } + +const uint32_t* MliTensorInterface::DataCapacity(void) const { + return &tensor_->capacity; +} + +mli_element_type* MliTensorInterface::ElType(void) { return &tensor_->el_type; } + +template <> +int16_t* MliTensorInterface::ZeroPoint(void) { + return &tensor_->el_params.asym.zero_point.i16; +} + +template <> +int16_t** MliTensorInterface::ZeroPoint(void) { + return &tensor_->el_params.asym.zero_point.pi16; +} + +uint32_t* MliTensorInterface::ZeroPointCapacity(void) { return nullptr; } + +int32_t* MliTensorInterface::Dim(void) { return &tensor_->el_params.asym.dim; } + +uint32_t* MliTensorInterface::ScaleCapacity(void) { return nullptr; } + +template <> +int8_t* MliTensorInterface::ScaleFracBits(void) { + return &tensor_->el_params.asym.scale_frac_bits; +} + +uint32_t* MliTensorInterface::ScaleFracBitsCapacity(void) { return nullptr; } + +int32_t* MliTensorInterface::MemStride(void) { return tensor_->mem_stride; } + +uint32_t* MliTensorInterface::Shape(void) { return tensor_->shape; } + +const uint32_t* MliTensorInterface::Shape(void) const { + return static_cast( + const_cast(this)->Shape()); +} + +void MliTensorInterface::SetScale(float fscale) { + int exp; + frexpf(fscale, &exp); + int frac_bits = 31 - exp; + int32_t iscale = (int32_t)((1ll << frac_bits) * fscale + 0.5f); + *(this->ScaleFracBits()) = frac_bits; + *(this->Scale()) = (int32_t)iscale; +} + +void MliTensorInterface::SetScalePerChannel(float* fscale, + const int num_channels) { + int min_frac_bits; + for (int i = 0; i < num_channels; i++) { + int exp; + frexpf(fscale[i], &exp); + int cur_frac_bits = 31 - exp; + if (i == 0) { + min_frac_bits = cur_frac_bits; + } else { + min_frac_bits = + min_frac_bits < cur_frac_bits ? min_frac_bits : cur_frac_bits; + } + } + *this->ScaleFracBits() = min_frac_bits; + + for (int i = 0; i < num_channels; i++) { + int32_t iscale = (int32_t)((1ll << min_frac_bits) * fscale[i] + 0.5f); + (*this->Scale())[i] = iscale; + } +} + +void MliTensorInterface::SetElType(TfLiteType type) { + if (type == kTfLiteInt8) { + *this->ElType() = MLI_EL_ASYM_I8; + } else if (type == kTfLiteInt32) { + *this->ElType() = MLI_EL_ASYM_I32; + } else { + MicroPrintf("Wrong data type. Expected int8_t or int32_t."); + TFLITE_ABORT; + } +} +#endif + +} // namespace micro +} // namespace ops +} // namespace tflite + +#endif // EI_CLASSIFIER_TFLITE_ENABLE_ARC == 1 diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/mli_interface.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/mli_interface.h new file mode 100644 index 0000000..e08f84a --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/mli_interface.h @@ -0,0 +1,80 @@ +// Patched by Edge Impulse to include reference and hardware-accelerated kernels +#include "../../../../classifier/ei_classifier_config.h" +#if EI_CLASSIFIER_TFLITE_ENABLE_ARC == 1 +/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_LITE_MICRO_KERNELS_ARC_MLI_INTERFACE_H_ +#define TENSORFLOW_LITE_MICRO_KERNELS_ARC_MLI_INTERFACE_H_ + +#include "mli_api.h" // NOLINT +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h" +namespace tflite { +namespace ops { +namespace micro { + +// Abstracts access to mli_tensor fields to use different versions of MLI +// Library (1.x and 2.x) +// Example: +// ops::micro::MliTensorInterface mli_in = +// ops::micro::MliTensorInterface(static_cast( +// context->AllocatePersistentBuffer(context, sizeof(mli_tensor)))); + +class MliTensorInterface { + public: + // Make sure that lifetime of MliTensorInterface instance isn't bigger than + // related mli_tensor. + MliTensorInterface(mli_tensor* tensor) : tensor_(tensor){}; + MliTensorInterface() = default; + ~MliTensorInterface() = default; + + template + T* Data(); + template + T Scale(); + template + T ZeroPoint(); + template + T ScaleFracBits(); + mli_tensor* MliTensor(); + const mli_tensor* MliTensor() const; + int32_t* Dim(); + uint32_t* Rank(); + uint32_t* Shape(); + const uint32_t* Shape() const; + const uint32_t* DataCapacity() const; + uint32_t* ScaleCapacity(); + mli_element_type* ElType(); + uint32_t* ScaleFracBitsCapacity(); + int32_t* MemStride(); + uint32_t* ZeroPointCapacity(); + + template + void SetData(T* data, uint32_t capacity) const; + void SetScale(float fscale); + void SetScalePerChannel(float* fscale, const int num_channels); + void SetElType(TfLiteType type); + + private: + mli_tensor* tensor_; +}; + +} // namespace micro +} // namespace ops +} // namespace tflite + +#endif // TENSORFLOW_LITE_MICRO_KERNELS_ARC_MLI_SLICERS_H_ + +#endif // EI_CLASSIFIER_TFLITE_ENABLE_ARC == 1 diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/mli_slicers.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/mli_slicers.cpp new file mode 100644 index 0000000..8b65e38 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/mli_slicers.cpp @@ -0,0 +1,131 @@ +// Patched by Edge Impulse to include reference and hardware-accelerated kernels +#include "../../../../classifier/ei_classifier_config.h" +#if EI_CLASSIFIER_TFLITE_ENABLE_ARC == 1 +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "mli_slicers.h" // NOLINT + +#include + +namespace tflite { +namespace ops { +namespace micro { + +TensorSlicer::TensorSlicer(const mli_tensor* full_tensor, int slice_dim, + int slice_size, int padding_pre, int padding_post, + int overlap, bool interleave_mode) + : full_tensor_(full_tensor), + sub_tensor_{}, + sub_cfg_{}, + done_(false), + sliceDim_(slice_dim), + pad_pre_(padding_pre), + pad_post_(padding_post), + overlap_(overlap) { + /* In the interleave mode, the slicing happens from the deepest dimension up + to the slice_dim for example in an HWC layout this can mode can be used to + slice in the C dimenstion. in this mode the data is not contiguous in memory + anymore */ + if (interleave_mode) { + for (int i = 0; i < static_cast(full_tensor->rank); i++) { + if (i > slice_dim) { + sub_cfg_.size[i] = 1; + } else if (i == slice_dim) { + sub_cfg_.size[i] = slice_size; + } else { + sub_cfg_.size[i] = full_tensor->shape[i]; + } + } + sub_cfg_.sub_tensor_rank = full_tensor->rank; + + } else { + /* In the not interleaved mode, the slicing happens from the outer most + dimension up to the slice_dim for example in an HWC layout this mode can be + used to slice in the H dimension. in this mode the data of the slice is + still contiguous in memory (if that was the case in the input tensor */ + for (int i = 0; i < static_cast(full_tensor->rank); i++) { + if (i < slice_dim) { + sub_cfg_.size[i] = 1; + } else if (i == slice_dim) { + sub_cfg_.size[i] = slice_size; + } else { + sub_cfg_.size[i] = full_tensor->shape[i]; + } + } + sub_cfg_.sub_tensor_rank = full_tensor->rank - slice_dim; + } + + ComputeSubTensor(); +} + +void TensorSlicer::ComputeSubTensor(void) { + // subtsr_cfg_ is used to keep track of the iteration. + // A copy is created to update it with the correct clipping and padding for + // the current slice + mli_sub_tensor_cfg cfg_new = sub_cfg_; + + // begin and end spans the complete input region including padding areas. + const int begin = (int)sub_cfg_.offset[sliceDim_] - pad_pre_; + // end is clipped to the end of the full input region. this is needed for + // cases where the last slice is smaller than the rest. + const int end = std::min(begin + sub_cfg_.size[sliceDim_] + overlap_, + full_tensor_->shape[sliceDim_] + pad_post_); + // The start coordinate of the subtensor is clipped to zero + cfg_new.offset[sliceDim_] = std::max(begin, 0); + // and the stop coordinate is clipped to the size of the full tensor + const int stop_coord = + std::min(end, static_cast(full_tensor_->shape[sliceDim_])); + // compute the size of the subtensor + cfg_new.size[sliceDim_] = stop_coord - cfg_new.offset[sliceDim_]; + + // compute the padding configuration for the current slice. + actual_padding_pre = cfg_new.offset[sliceDim_] - begin; + actual_padding_post = end - stop_coord; + + mli_hlp_create_subtensor(full_tensor_, &cfg_new, &sub_tensor_); +} + +void TensorSlicer::Next(void) { + for (int i = full_tensor_->rank - 1; i >= 0; i--) { + sub_cfg_.offset[i] += sub_cfg_.size[i]; + if (sub_cfg_.offset[i] >= full_tensor_->shape[i]) { + // wrap + sub_cfg_.offset[i] = 0; + // and continue to the next dimension, if no next dimension we are done. + if (i == 0) done_ = true; + continue; + } else { + // carry is false, so break from the loop + break; + } + } + + if (!done_) ComputeSubTensor(); +} + +bool TensorSlicer::Done(void) { return done_; } + +int TensorSlicer::GetPaddingPre(void) { return actual_padding_pre; } + +int TensorSlicer::GetPaddingPost(void) { return actual_padding_post; } + +mli_tensor* TensorSlicer::Sub(void) { return &sub_tensor_; } + +} // namespace micro +} // namespace ops +} // namespace tflite + +#endif // EI_CLASSIFIER_TFLITE_ENABLE_ARC == 1 diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/mli_slicers.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/mli_slicers.h new file mode 100644 index 0000000..fa22020 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/mli_slicers.h @@ -0,0 +1,61 @@ +// Patched by Edge Impulse to include reference and hardware-accelerated kernels +#include "../../../../classifier/ei_classifier_config.h" +#if EI_CLASSIFIER_TFLITE_ENABLE_ARC == 1 +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_LITE_MICRO_KERNELS_ARC_MLI_SLICERS_H_ +#define TENSORFLOW_LITE_MICRO_KERNELS_ARC_MLI_SLICERS_H_ + +#include "mli_api.h" // NOLINT +namespace tflite { +namespace ops { +namespace micro { + +class TensorSlicer { + public: + TensorSlicer(const mli_tensor* full_tensor, int slice_dim, int slice_size, + int padding_pre = 0, int padding_post = 0, int overlap = 0, + bool interleave_mode = false); + ~TensorSlicer() = default; + + void Next(); + bool Done(); + int GetPaddingPre(); + int GetPaddingPost(); + + mli_tensor* Sub(); + + // Default constructor is deleted + TensorSlicer() = delete; + + private: + const mli_tensor* full_tensor_; + mli_tensor sub_tensor_; + mli_sub_tensor_cfg sub_cfg_; + bool done_; + int sliceDim_; + int pad_pre_, pad_post_, overlap_; + int actual_padding_pre, actual_padding_post; + + void ComputeSubTensor(); +}; + +} // namespace micro +} // namespace ops +} // namespace tflite +#endif // TENSORFLOW_LITE_MICRO_KERNELS_ARC_MLI_SLICERS_H_ + +#endif // EI_CLASSIFIER_TFLITE_ENABLE_ARC == 1 diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/mli_tf_utils.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/mli_tf_utils.h new file mode 100644 index 0000000..4179c74 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/mli_tf_utils.h @@ -0,0 +1,315 @@ +// Patched by Edge Impulse to include reference and hardware-accelerated kernels +#include "../../../../classifier/ei_classifier_config.h" +#if EI_CLASSIFIER_TFLITE_ENABLE_ARC == 1 +/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_LITE_MICRO_KERNELS_ARC_MLI_TF_UTILS_H_ +#define TENSORFLOW_LITE_MICRO_KERNELS_ARC_MLI_TF_UTILS_H_ + +#include "mli_api.h" // NOLINT +#include "mli_interface.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h" + +#define KRNL_C_DIM_NHWC 0 // output channels + +namespace tflite { +namespace ops { +namespace micro { + +inline void ConvertToMliTensorData(const TfLiteTensor* tfT, + MliTensorInterface* mliT, + bool is_bias_tensor) { + // Data is NULL until MliTensorAttachBuffer is called. + mliT->SetElType(tfT->type); + if (tfT->type == kTfLiteInt8) { + mliT->SetData(nullptr, tfT->bytes); + } else if (tfT->type == kTfLiteInt32) { + mliT->SetData(nullptr, tfT->bytes); + } else { + MicroPrintf("Wrong data type. Expected int8_t or int32_t."); + TFLITE_ABORT; + } + const int32_t dims_count = GetTensorShape(tfT).DimensionsCount(); + *mliT->Rank() = is_bias_tensor ? 1 : dims_count; + + int mli_tensor_memstride = 1; + if (is_bias_tensor) { + mliT->Shape()[0] = GetTensorShape(tfT).Dims(dims_count - 1); + mliT->MemStride()[0] = mli_tensor_memstride; + } else { + for (int i = dims_count - 1; i >= 0; --i) { + mliT->Shape()[i] = GetTensorShape(tfT).Dims(i); + mliT->MemStride()[i] = mli_tensor_memstride; + mli_tensor_memstride *= GetTensorShape(tfT).Dims(i); + } + } +} + +inline void ConvertToMliQuantParams(const TfLiteTensor* tfT, + MliTensorInterface* mliT) { + *mliT->Dim() = -1; +#ifdef MLI_2_0 + *mliT->ZeroPointCapacity() = 0; +#endif + *mliT->ZeroPoint() = tfT->params.zero_point; + float fscale = tfT->params.scale; + mliT->SetScale(fscale); +} + +inline void ConvertToMliQuantParamsPerChannel(const TfLiteTensor* tfT, + MliTensorInterface* mliT, + bool is_bias_tensor) { + // mli tensor scale and zero_point arrays should be allocated at this point +#ifdef MLI_2_0 + TFLITE_DCHECK_NE(*mliT->Scale(), 0); + TFLITE_DCHECK_NE(*mliT->ZeroPoint(), 0); +#else + TFLITE_DCHECK_NE(*mliT->Scale(), 0); + TFLITE_DCHECK_NE(*mliT->ZeroPoint(), 0); +#endif + + // get per channel quantization parameters + const auto* affine_quantization = + reinterpret_cast(tfT->quantization.params); + int32_t quantized_dimension = + is_bias_tensor ? 0 : affine_quantization->quantized_dimension; + const int num_channels = mliT->Shape()[quantized_dimension]; + + *mliT->Dim() = quantized_dimension; + + // set capacities +#ifdef MLI_2_0 + *mliT->ScaleFracBitsCapacity() = num_channels * sizeof(int8_t); + *mliT->ScaleCapacity() = num_channels * sizeof(int16_t); + *mliT->ZeroPointCapacity() = num_channels * sizeof(int16_t); +#endif + float* fscale = affine_quantization->scale->data; + mliT->SetScalePerChannel(fscale, num_channels); + +#ifdef MLI_2_0 + int16_t* zero_point = *mliT->ZeroPoint(); + for (int i = 0; i < num_channels; i++) { + zero_point[i] = tfT->params.zero_point; + } +#endif +} + +template +inline void MliTensorAttachBuffer(const TfLiteEvalTensor*, + const MliTensorInterface*); + +template <> +inline void MliTensorAttachBuffer(const TfLiteEvalTensor* tfT, + const MliTensorInterface* mliT) { + // "const_cast" here used to attach const data buffer to the initially + // non-const mli_tensor. This is required by current implementation of MLI + // backend and planned for redesign due to this and some other aspects. + mliT->SetData( + const_cast(tflite::micro::GetTensorData(tfT)), + *mliT->DataCapacity()); +} + +template <> +inline void MliTensorAttachBuffer(const TfLiteEvalTensor* tfT, + const MliTensorInterface* mliT) { + // "const_cast" here used to attach const data buffer to the initially + // non-const mli_tensor. This is required by current implementation of MLI + // backend and planned for redesign due to this and some other aspects. + mliT->SetData( + const_cast(tflite::micro::GetTensorData(tfT)), + *mliT->DataCapacity()); +} + +inline void ConvertToMliTensor(const TfLiteTensor* tfT, + MliTensorInterface* mliT) { + ConvertToMliTensorData(tfT, mliT, false); + ConvertToMliQuantParams(tfT, mliT); +} + +inline void ConvertToMliTensorPerChannel(const TfLiteTensor* tfT, + MliTensorInterface* mliT, + bool is_bias_tensor) { + ConvertToMliTensorData(tfT, mliT, is_bias_tensor); + ConvertToMliQuantParamsPerChannel(tfT, mliT, is_bias_tensor); +} + +inline void PrepareLocalTensor(mli_tensor* tensor, mli_tensor* tensor_local) { +#ifdef MLI_2_0 + int8_t* local_data = tensor_local->data.mem.pi8; + *tensor_local = *tensor; + tensor_local->data.mem.pi8 = local_data; +#else + int8_t* local_data = static_cast(tensor_local->data); + *tensor_local = *tensor; + tensor_local->data = local_data; +#endif +} + +inline void AdjustBiasTensor(MliTensorInterface* bias, MliTensorInterface* in, + MliTensorInterface* weights) { + int32_t quantized_dimension = *bias->Dim(); + const int num_channels = + quantized_dimension < 0 ? 1 : bias->Shape()[quantized_dimension]; + for (int i = 0; i < num_channels; i++) { + int32_t adjusted_bias_scale = + (*in->Scale()) * (*weights->Scale())[i]; + int in_shift = *in->ScaleFracBits(); + int w_shift = (*weights->ScaleFracBits())[i]; + int b_shift = (*bias->ScaleFracBits())[i]; + int bias_shift = in_shift + w_shift - b_shift; + (*bias->Scale())[i] = + (int16_t)(adjusted_bias_scale >> bias_shift); + } +} + +#ifdef MLI_2_0_KRNL_TEST +// Reorder an array according to given indexes. If backward is true, order of +// index array must be reversed. +inline static void reorder(uint32_t* arr, const uint8_t index[], + bool backward) { + uint32_t temp[MLI_MAX_RANK]; + for (int8_t i = 0; i < MLI_MAX_RANK; i++) { + if (backward) + temp[index[i]] = arr[i]; + else + temp[i] = arr[index[i]]; + } + for (int8_t i = 0; i < MLI_MAX_RANK; i++) { + arr[i] = temp[i]; + } +} + +// Change shape of mli tensor and recalculate mem strides. +inline void change_shape(mli_tensor* mliT, const uint8_t dim_order[]) { + reorder(mliT->shape, dim_order, false); + + // Calculate strides for new layout + int mli_tensor_memstride = 1; + for (int shape_idx = mliT->rank - 1; shape_idx >= 0; --shape_idx) { + mliT->mem_stride[shape_idx] = mli_tensor_memstride; + mli_tensor_memstride *= mliT->shape[shape_idx]; + } +} + +inline void permute_weights(const mli_tensor* weights_src, + const mli_permute_cfg* permute_cfg, + mli_tensor* weights_dst, + mli_data_container* buffer_data) { + mli_tensor buffer = {}; + buffer.el_params = weights_dst->el_params; + buffer.data = *buffer_data; + // Compare weights tensor size and avaliable buffer capacity. + int buffer_size = buffer_data->capacity; + int weights_size = mli_hlp_count_elem_num(weights_src, 0) * + mli_hlp_tensor_element_size(weights_src); + + // Need to change shape of distanation weights buffer according to permute + // dimensions order to calculate slice sizes + change_shape(weights_dst, permute_cfg->perm_dim); + + if (buffer_size >= weights_size) { + mli_mov_cfg_t copy_config; + mli_mov_cfg_for_copy(©_config); + mli_mov_tensor_sync(weights_src, ©_config, &buffer); + mli_krn_permute_sa8(&buffer, permute_cfg, weights_dst); + } else { + // Weights shape is NHWC and output (buffer) shape is HWC where N_w = C_o. + // Buffer size (H_o * W_o) must be more or equal then the weights size (H_w + // * W_w * C_w). So, this is the reason, why buffer size (output tensor) is + // divided by channel shape. + uint32_t slice_size = buffer_size / weights_src->shape[KRNL_C_DIM_NHWC]; + + mli_mov_cfg_t copy_config = {}; + uint32_t src_offsets[] = {0, 0, 0, 0}; + uint32_t src_sizes[] = {0, 0, 0, 0}; + int dst_mem_stride[] = {0, 0, 0, 0}; + + mli_tensor weights_dst_sub_tensor; + mli_sub_tensor_cfg sub_tensor_cfg = {}; + sub_tensor_cfg.sub_tensor_rank = weights_src->rank; + + // Calculate dimensions for slice accroding to buffer capacity. + // Now, after calling change_shape() function, dst weights buffer has the + // MLI layout (HWCN). This means, the innermost dimension (N) of dst weights + // tensor is equal to the innermost dimension of output tensor (N). + sub_tensor_cfg.size[weights_dst->rank - 1] = + src_sizes[weights_dst->rank - 1] = weights_src->shape[KRNL_C_DIM_NHWC]; + // Now need to calculate other shapes for weights slice. Total slice size is + // H*W*C*N, so to calculate sizes for each axis, avaliable slice size is + // divided by shape for each axis. + uint32_t slice_size_left = slice_size; + for (uint32_t i = 0; i < weights_dst->rank - 1; i++) { + sub_tensor_cfg.size[i] = src_sizes[i] = + slice_size_left / weights_dst->shape[i] > 0 ? weights_dst->shape[i] + : slice_size_left; + slice_size_left /= weights_dst->shape[i]; + slice_size_left = slice_size_left > 0 ? slice_size_left : 1; + } + // Need to reorder src tensor sizes because it is still in TFLM format + // (NHWC) and src_sizes array calculated as (HWCN). + reorder(src_sizes, permute_cfg->perm_dim, true); + + sub_tensor_cfg.offset[KRNL_C_DIM_HWCN] = src_offsets[KRNL_H_DIM_HWCN] = 0; + sub_tensor_cfg.offset[KRNL_H_DIM_HWCN] = src_offsets[KRNL_W_DIM_HWCN] = 0; + sub_tensor_cfg.offset[KRNL_W_DIM_HWCN] = src_offsets[KRNL_D_DIM_HWCN] = 0; + sub_tensor_cfg.offset[KRNL_D_DIM_HWCN] = src_offsets[KRNL_C_DIM_HWCN] = 0; + do { + do { + do { + do { + mli_mov_cfg_for_slice(©_config, (int*)src_offsets, + (int*)src_sizes, dst_mem_stride); + mli_mov_tensor_sync(weights_src, ©_config, &buffer); + + mli_hlp_create_subtensor(weights_dst, &sub_tensor_cfg, + &weights_dst_sub_tensor); + mli_krn_permute_sa8(&buffer, permute_cfg, &weights_dst_sub_tensor); + + // For each axis, it is necessary to recalculate the offsets and + // slice sizes. + sub_tensor_cfg.offset[2] = src_offsets[3] += src_sizes[3]; + src_sizes[3] = + std::min(src_sizes[3], weights_src->shape[3] - src_offsets[3]); + } while (src_offsets[3] < weights_src->shape[3]); + + sub_tensor_cfg.offset[1] = src_offsets[2] += src_sizes[2]; + src_sizes[2] = + std::min(src_sizes[2], weights_src->shape[2] - src_offsets[2]); + } while (src_offsets[2] < weights_src->shape[2]); + + sub_tensor_cfg.offset[0] = src_offsets[1] += src_sizes[1]; + src_sizes[1] = + std::min(src_sizes[1], weights_src->shape[1] - src_offsets[1]); + } while (src_offsets[1] < weights_src->shape[1]); + + sub_tensor_cfg.offset[3] = src_offsets[0] += src_sizes[0]; + src_sizes[0] = + std::min(src_sizes[0], weights_src->shape[0] - src_offsets[0]); + } while (src_offsets[0] < weights_src->shape[0]); + } +} +#endif + +} // namespace micro +} // namespace ops +} // namespace tflite + +#endif // TENSORFLOW_LITE_MICRO_KERNELS_ARC_MLI_TF_UTILS_H_ + +#endif // EI_CLASSIFIER_TFLITE_ENABLE_ARC == 1 diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/mul.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/mul.cpp new file mode 100644 index 0000000..9f00d2e --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/mul.cpp @@ -0,0 +1,387 @@ +// Patched by Edge Impulse to include reference and hardware-accelerated kernels +#include "../../../../classifier/ei_classifier_config.h" +#if 0 == 1 +/* noop */ +#elif EI_CLASSIFIER_TFLITE_ENABLE_CMSIS_NN == 1 +/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/mul.h" + +#include "edge-impulse-sdk/CMSIS/NN/Include/arm_nnfunctions.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/quantization_util.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/mul.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/mul.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/memory_helpers.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h" + +namespace tflite { +namespace { + +void EvalQuantized(TfLiteContext* context, TfLiteNode* node, + const OpDataMul* data, const TfLiteEvalTensor* input1, + const TfLiteEvalTensor* input2, TfLiteEvalTensor* output) { + tflite::ArithmeticParams op_params = {}; + + op_params.quantized_activation_min = data->output_activation_min; + op_params.quantized_activation_max = data->output_activation_max; + op_params.float_activation_max = data->output_activation_max_f32; + op_params.input1_offset = -data->input1_zero_point; + op_params.input2_offset = -data->input2_zero_point; + op_params.output_offset = data->output_zero_point; + op_params.output_multiplier = data->output_multiplier; + op_params.output_shift = data->output_shift; + + bool need_broadcast = reference_ops::ProcessBroadcastShapes( + tflite::micro::GetTensorShape(input1), + tflite::micro::GetTensorShape(input2), &op_params); + + if (need_broadcast) { + if (input1->type == kTfLiteInt8) { + reference_integer_ops::BroadcastMul4DSlow( + op_params, tflite::micro::GetTensorShape(input1), + tflite::micro::GetTensorData(input1), + tflite::micro::GetTensorShape(input2), + tflite::micro::GetTensorData(input2), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + } else if (input1->type == kTfLiteInt16) { + reference_integer_ops::BroadcastMul4DSlow( + op_params, tflite::micro::GetTensorShape(input1), + tflite::micro::GetTensorData(input1), + tflite::micro::GetTensorShape(input2), + tflite::micro::GetTensorData(input2), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + } + + } else { + if (input1->type == kTfLiteInt8) { + arm_elementwise_mul_s8( + tflite::micro::GetTensorData(input1), + tflite::micro::GetTensorData(input2), op_params.input1_offset, + op_params.input2_offset, tflite::micro::GetTensorData(output), + op_params.output_offset, op_params.output_multiplier, + op_params.output_shift, op_params.quantized_activation_min, + op_params.quantized_activation_max, + MatchingElementsSize(tflite::micro::GetTensorShape(input1), + tflite::micro::GetTensorShape(input2), + tflite::micro::GetTensorShape(output))); + } else if (input1->type == kTfLiteInt16) { + arm_elementwise_mul_s16( + tflite::micro::GetTensorData(input1), + tflite::micro::GetTensorData(input2), + op_params.input1_offset, op_params.input2_offset, + tflite::micro::GetTensorData(output), + op_params.output_offset, op_params.output_multiplier, + op_params.output_shift, op_params.quantized_activation_min, + op_params.quantized_activation_max, + MatchingElementsSize(tflite::micro::GetTensorShape(input1), + tflite::micro::GetTensorShape(input2), + tflite::micro::GetTensorShape(output))); + } + } +} + +} // namespace + +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + TFLITE_DCHECK(node->builtin_data != nullptr); + auto* params = reinterpret_cast(node->builtin_data); + + TFLITE_DCHECK(node->user_data != nullptr); + const OpDataMul* data = static_cast(node->user_data); + + const TfLiteEvalTensor* input1 = + tflite::micro::GetEvalInput(context, node, kMulInput1Tensor); + const TfLiteEvalTensor* input2 = + tflite::micro::GetEvalInput(context, node, kMulInput2Tensor); + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kMulOutputTensor); + + switch (input1->type) { + case kTfLiteInt8: + EvalQuantized(context, node, data, input1, input2, output); + break; + case kTfLiteInt16: + EvalQuantized(context, node, data, input1, input2, output); + break; + case kTfLiteInt32: + EvalMulQuantizedReference(context, node, data, input1, input2, output); + break; + case kTfLiteFloat32: + EvalMulFloatReference(context, node, params, data, input1, input2, + output); + break; + default: + MicroPrintf("Type %s (%d) not supported.", + TfLiteTypeGetName(input1->type), input1->type); + return kTfLiteError; + } + + return kTfLiteOk; +} + +TfLiteStatus EvalInt8(TfLiteContext* context, TfLiteNode* node) { + TFLITE_DCHECK(node->builtin_data != nullptr); + TFLITE_DCHECK(node->user_data != nullptr); + + const OpDataMul* data = static_cast(node->user_data); + const TfLiteEvalTensor* input1 = + tflite::micro::GetEvalInput(context, node, kMulInput1Tensor); + const TfLiteEvalTensor* input2 = + tflite::micro::GetEvalInput(context, node, kMulInput2Tensor); + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kMulOutputTensor); + TFLITE_DCHECK(input1->type == kTfLiteInt8); + + EvalQuantized(context, node, data, input1, input2, output); + + return kTfLiteOk; +} + +TfLiteStatus EvalInt16(TfLiteContext* context, TfLiteNode* node) { + TFLITE_DCHECK(node->builtin_data != nullptr); + TFLITE_DCHECK(node->user_data != nullptr); + + const OpDataMul* data = static_cast(node->user_data); + const TfLiteEvalTensor* input1 = + tflite::micro::GetEvalInput(context, node, kMulInput1Tensor); + const TfLiteEvalTensor* input2 = + tflite::micro::GetEvalInput(context, node, kMulInput2Tensor); + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kMulOutputTensor); + TFLITE_DCHECK(input1->type == kTfLiteInt16); + + EvalQuantized(context, node, data, input1, input2, output); + + return kTfLiteOk; +} + +TfLiteRegistration Register_MUL() { + return tflite::micro::RegisterOp(MulInit, MulPrepare, Eval); +} + +TfLiteRegistration Register_MUL_INT8() { + return tflite::micro::RegisterOp(MulInit, MulPrepare, EvalInt8); +} + +TfLiteRegistration Register_MUL_INT16() { + return tflite::micro::RegisterOp(MulInit, MulPrepare, EvalInt16); +} + +} // namespace tflite + +#elif EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN == 1 +/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/mul.h" + +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/quantization_util.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/mul.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/mul.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/memory_helpers.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h" + +#if ESP_NN +#include "edge-impulse-sdk/porting/espressif/ESP-NN/include/esp_nn.h" +#endif + +#include + +long long mul_total_time = 0; + +namespace tflite { +#if ESP_NN +void MulEvalQuantized(TfLiteContext* context, TfLiteNode* node, + const OpDataMul* data, const TfLiteEvalTensor* input1, + const TfLiteEvalTensor* input2, + TfLiteEvalTensor* output) { + tflite::ArithmeticParams op_params = {}; + op_params.quantized_activation_min = data->output_activation_min; + op_params.quantized_activation_max = data->output_activation_max; + op_params.float_activation_max = data->output_activation_max_f32; + op_params.input1_offset = -data->input1_zero_point; + op_params.input2_offset = -data->input2_zero_point; + op_params.output_offset = data->output_zero_point; + op_params.output_multiplier = data->output_multiplier; + op_params.output_shift = data->output_shift; + + bool need_broadcast = reference_ops::ProcessBroadcastShapes( + tflite::micro::GetTensorShape(input1), + tflite::micro::GetTensorShape(input2), &op_params); + + if (need_broadcast) { + reference_integer_ops::BroadcastMul4DSlow( + op_params, tflite::micro::GetTensorShape(input1), + tflite::micro::GetTensorData(input1), + tflite::micro::GetTensorShape(input2), + tflite::micro::GetTensorData(input2), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + } else { + const int8_t *input1_data = tflite::micro::GetTensorData(input1); + const int8_t *input2_data = tflite::micro::GetTensorData(input2); + int8_t *out_data = tflite::micro::GetTensorData(output); + + esp_nn_mul_elementwise_s8(input1_data, input2_data, op_params.input1_offset, + op_params.input2_offset, out_data, op_params.output_offset, + op_params.output_multiplier, op_params.output_shift, + op_params.quantized_activation_min, op_params.quantized_activation_max, + MatchingElementsSize(tflite::micro::GetTensorShape(input1), + tflite::micro::GetTensorShape(input2), + tflite::micro::GetTensorShape(output))); + } +} +#endif + +TfLiteStatus MulEval(TfLiteContext* context, TfLiteNode* node) { + TFLITE_DCHECK(node->builtin_data != nullptr); + auto* params = reinterpret_cast(node->builtin_data); + + TFLITE_DCHECK(node->user_data != nullptr); + const OpDataMul* data = static_cast(node->user_data); + + const TfLiteEvalTensor* input1 = + tflite::micro::GetEvalInput(context, node, kMulInput1Tensor); + const TfLiteEvalTensor* input2 = + tflite::micro::GetEvalInput(context, node, kMulInput2Tensor); + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kMulOutputTensor); + + long long start_time = esp_timer_get_time(); + switch (input1->type) { + case kTfLiteInt8: +#if ESP_NN + MulEvalQuantized(context, node, data, input1, input2, output); +#else + EvalMulQuantizedReference(context, node, data, input1, input2, output); +#endif + break; + case kTfLiteInt32: + EvalMulQuantizedReference(context, node, data, input1, input2, output); + break; + case kTfLiteFloat32: + EvalMulFloatReference(context, node, params, data, input1, input2, + output); + break; + default: + MicroPrintf("Type %s (%d) not supported.", + TfLiteTypeGetName(input1->type), input1->type); + return kTfLiteError; + } + mul_total_time += esp_timer_get_time() - start_time; + return kTfLiteOk; +} + +TfLiteRegistration Register_MUL() { + return tflite::micro::RegisterOp(MulInit, MulPrepare, MulEval); +} + +} // namespace tflite + +#else +/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/mul.h" + +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/quantization_util.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/mul.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/mul.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/memory_helpers.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h" + +namespace tflite { + +TfLiteStatus MulEval(TfLiteContext* context, TfLiteNode* node) { + TFLITE_DCHECK(node->builtin_data != nullptr); + auto* params = reinterpret_cast(node->builtin_data); + + TFLITE_DCHECK(node->user_data != nullptr); + const OpDataMul* data = static_cast(node->user_data); + + const TfLiteEvalTensor* input1 = + tflite::micro::GetEvalInput(context, node, kMulInput1Tensor); + const TfLiteEvalTensor* input2 = + tflite::micro::GetEvalInput(context, node, kMulInput2Tensor); + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kMulOutputTensor); + + switch (input1->type) { + case kTfLiteInt8: + case kTfLiteInt16: + case kTfLiteInt32: + EvalMulQuantizedReference(context, node, data, input1, input2, output); + break; + case kTfLiteFloat32: + EvalMulFloatReference(context, node, params, data, input1, input2, + output); + break; + default: + MicroPrintf("Type %s (%d) not supported.", + TfLiteTypeGetName(input1->type), input1->type); + return kTfLiteError; + } + + return kTfLiteOk; +} + +TfLiteRegistration Register_MUL() { + return tflite::micro::RegisterOp(MulInit, MulPrepare, MulEval); +} + +} // namespace tflite + +#endif diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/mul.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/mul.h new file mode 100644 index 0000000..61d4605 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/mul.h @@ -0,0 +1,74 @@ +/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_LITE_MICRO_KERNELS_MUL_H_ +#define TENSORFLOW_LITE_MICRO_KERNELS_MUL_H_ + +#include + +#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h" +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" + +namespace tflite { + +extern const int kMulInput1Tensor; +extern const int kMulInput2Tensor; +extern const int kMulOutputTensor; + +struct OpDataMul { + int32_t input1_zero_point; + int32_t input2_zero_point; + + int32_t output_activation_min; + int32_t output_activation_max; + int32_t output_zero_point; + int32_t output_multiplier; + int output_shift; + + float output_activation_min_f32; + float output_activation_max_f32; +}; + +void* MulInit(TfLiteContext* context, const char* buffer, size_t length); + +TfLiteStatus CalculateOpDataMul(TfLiteContext* context, TfLiteNode* node, + TfLiteMulParams* params, OpDataMul* data); + +TfLiteStatus MulPrepare(TfLiteContext* context, TfLiteNode* node); + +TfLiteStatus EvalMulQuantizedReference(TfLiteContext* context, TfLiteNode* node, + const OpDataMul* data, + const TfLiteEvalTensor* input1, + const TfLiteEvalTensor* input2, + TfLiteEvalTensor* output); + +void EvalMulFloatReference(TfLiteContext* context, TfLiteNode* node, + TfLiteMulParams* params, const OpDataMul* data, + const TfLiteEvalTensor* input1, + const TfLiteEvalTensor* input2, + TfLiteEvalTensor* output); + +// Generic must define registration function. +TfLiteRegistration Register_MUL(); + +#if defined(CMSIS_NN) +TfLiteRegistration Register_MUL_INT8(); +#else +// Fallback registration +inline TfLiteRegistration Register_MUL_INT8() { return Register_MUL(); } +#endif +} // namespace tflite + +#endif // TENSORFLOW_LITE_MICRO_KERNELS_MUL_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/mul_common.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/mul_common.cpp new file mode 100644 index 0000000..187fae2 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/mul_common.cpp @@ -0,0 +1,213 @@ +/* Copyright 2023 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/quantization_util.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/mul.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/mul.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/mul.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/memory_helpers.h" + +namespace tflite { + +const int kMulInput1Tensor = 0; +const int kMulInput2Tensor = 1; +const int kMulOutputTensor = 0; + +void* MulInit(TfLiteContext* context, const char* buffer, size_t length) { + TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); + return context->AllocatePersistentBuffer(context, sizeof(OpDataMul)); +} + +TfLiteStatus CalculateOpDataMul(TfLiteContext* context, TfLiteNode* node, + TfLiteMulParams* params, OpDataMul* data) { + MicroContext* micro_context = GetMicroContext(context); + + TfLiteTensor* input1 = + micro_context->AllocateTempInputTensor(node, kMulInput1Tensor); + TF_LITE_ENSURE(context, input1 != nullptr); + TfLiteTensor* input2 = + micro_context->AllocateTempInputTensor(node, kMulInput2Tensor); + TF_LITE_ENSURE(context, input2 != nullptr); + TfLiteTensor* output = + micro_context->AllocateTempOutputTensor(node, kMulOutputTensor); + TF_LITE_ENSURE(context, output != nullptr); + + TF_LITE_ENSURE_EQ(context, NumInputs(node), 2); + TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); + + TF_LITE_ENSURE_TYPES_EQ(context, input1->type, input2->type); + + if (output->type == kTfLiteInt8 || output->type == kTfLiteInt16) { + TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized( + context, params->activation, output, &data->output_activation_min, + &data->output_activation_max)); + + double real_multiplier = static_cast(input1->params.scale) * + static_cast(input2->params.scale) / + static_cast(output->params.scale); + QuantizeMultiplier(real_multiplier, &data->output_multiplier, + &data->output_shift); + + data->input1_zero_point = input1->params.zero_point; + data->input2_zero_point = input2->params.zero_point; + data->output_zero_point = output->params.zero_point; + + if (input1->type == kTfLiteInt16) { + TF_LITE_ENSURE_EQ(context, data->input1_zero_point, 0); + TF_LITE_ENSURE_EQ(context, data->input2_zero_point, 0); + TF_LITE_ENSURE_EQ(context, data->output_zero_point, 0); + } + } else if (output->type == kTfLiteInt32) { + CalculateActivationRange(params->activation, &data->output_activation_min, + &data->output_activation_max); + } else { + CalculateActivationRange(params->activation, + &data->output_activation_min_f32, + &data->output_activation_max_f32); + } + + micro_context->DeallocateTempTfLiteTensor(input1); + micro_context->DeallocateTempTfLiteTensor(input2); + micro_context->DeallocateTempTfLiteTensor(output); + return kTfLiteOk; +} + +TfLiteStatus MulPrepare(TfLiteContext* context, TfLiteNode* node) { + TFLITE_DCHECK(node->builtin_data != nullptr); + auto* params = reinterpret_cast(node->builtin_data); + + TFLITE_DCHECK(node->user_data != nullptr); + OpDataMul* data = static_cast(node->user_data); + + return CalculateOpDataMul(context, node, params, data); +} + +TfLiteStatus EvalMulQuantizedReference(TfLiteContext* context, TfLiteNode* node, + const OpDataMul* data, + const TfLiteEvalTensor* input1, + const TfLiteEvalTensor* input2, + TfLiteEvalTensor* output) { + tflite::ArithmeticParams op_params = {}; + op_params.quantized_activation_min = data->output_activation_min; + op_params.quantized_activation_max = data->output_activation_max; + op_params.float_activation_max = data->output_activation_max_f32; + op_params.input1_offset = -data->input1_zero_point; + op_params.input2_offset = -data->input2_zero_point; + op_params.output_offset = data->output_zero_point; + op_params.output_multiplier = data->output_multiplier; + op_params.output_shift = data->output_shift; + + bool need_broadcast = reference_ops::ProcessBroadcastShapes( + tflite::micro::GetTensorShape(input1), + tflite::micro::GetTensorShape(input2), &op_params); + + if (input1->type == kTfLiteInt8) { + if (need_broadcast) { + reference_integer_ops::BroadcastMul4DSlow( + op_params, tflite::micro::GetTensorShape(input1), + tflite::micro::GetTensorData(input1), + tflite::micro::GetTensorShape(input2), + tflite::micro::GetTensorData(input2), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + } else { + reference_integer_ops::Mul(op_params, + tflite::micro::GetTensorShape(input1), + tflite::micro::GetTensorData(input1), + tflite::micro::GetTensorShape(input2), + tflite::micro::GetTensorData(input2), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + } + } else if (input1->type == kTfLiteInt32) { + if (need_broadcast) { + reference_ops::BroadcastMul4DSlow( + op_params, tflite::micro::GetTensorShape(input1), + tflite::micro::GetTensorData(input1), + tflite::micro::GetTensorShape(input2), + tflite::micro::GetTensorData(input2), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + } else { + reference_ops::Mul(op_params, tflite::micro::GetTensorShape(input1), + tflite::micro::GetTensorData(input1), + tflite::micro::GetTensorShape(input2), + tflite::micro::GetTensorData(input2), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + } + } else if (input1->type == kTfLiteInt16) { + TF_LITE_ENSURE_EQ(context, op_params.input1_offset, 0); + TF_LITE_ENSURE_EQ(context, op_params.input2_offset, 0); + TF_LITE_ENSURE_EQ(context, op_params.output_offset, 0); + + if (need_broadcast) { + reference_integer_ops::BroadcastMul4DSlow( + op_params, tflite::micro::GetTensorShape(input1), + tflite::micro::GetTensorData(input1), + tflite::micro::GetTensorShape(input2), + tflite::micro::GetTensorData(input2), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + } else { + reference_integer_ops::Mul(op_params, + tflite::micro::GetTensorShape(input1), + tflite::micro::GetTensorData(input1), + tflite::micro::GetTensorShape(input2), + tflite::micro::GetTensorData(input2), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + } + } + return kTfLiteOk; +} + +void EvalMulFloatReference(TfLiteContext* context, TfLiteNode* node, + TfLiteMulParams* params, const OpDataMul* data, + const TfLiteEvalTensor* input1, + const TfLiteEvalTensor* input2, + TfLiteEvalTensor* output) { + tflite::ArithmeticParams op_params = {}; + op_params.float_activation_min = data->output_activation_min_f32; + op_params.float_activation_max = data->output_activation_max_f32; + + bool need_broadcast = reference_ops::ProcessBroadcastShapes( + tflite::micro::GetTensorShape(input1), + tflite::micro::GetTensorShape(input2), &op_params); + + if (need_broadcast) { + reference_ops::BroadcastMul4DSlow( + op_params, tflite::micro::GetTensorShape(input1), + tflite::micro::GetTensorData(input1), + tflite::micro::GetTensorShape(input2), + tflite::micro::GetTensorData(input2), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + } else { + reference_ops::Mul(op_params, tflite::micro::GetTensorShape(input1), + tflite::micro::GetTensorData(input1), + tflite::micro::GetTensorShape(input2), + tflite::micro::GetTensorData(input2), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + } +} + +} // namespace tflite diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/neg.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/neg.cpp new file mode 100644 index 0000000..249f7ad --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/neg.cpp @@ -0,0 +1,57 @@ +/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/neg.h" + +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h" + +namespace tflite { + +namespace { + +constexpr int kInputTensor = 0; +constexpr int kOutputTensor = 0; + +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + const TfLiteEvalTensor* input = + tflite::micro::GetEvalInput(context, node, kInputTensor); + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kOutputTensor); + switch (input->type) { + // TODO(wangtz): handle for kTfLiteInt8 + case kTfLiteFloat32: + reference_ops::Negate(tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + break; + default: + MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(input->type), + input->type); + return kTfLiteError; + } + return kTfLiteOk; +} + +} // namespace + +TfLiteRegistration Register_NEG() { + return tflite::micro::RegisterOp(nullptr, nullptr, Eval); +} + +} // namespace tflite diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/pack.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/pack.cpp new file mode 100644 index 0000000..79615bd --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/pack.cpp @@ -0,0 +1,112 @@ +/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h" +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h" + +namespace tflite { + +namespace { + +constexpr int kOutputTensor = 0; + +template +TfLiteStatus PackImpl(TfLiteContext* context, TfLiteNode* node, + TfLiteEvalTensor* output, int values_count, int axis) { + const TfLiteEvalTensor* input0 = + tflite::micro::GetEvalInput(context, node, 0); + + const int dimensions = output->dims->size; + const TfLiteIntArray* input_dims = input0->dims; + const TfLiteIntArray* output_dims = output->dims; + + if (axis < 0) { + axis += dimensions; + } + + int outer_size = 1; + for (int i = 0; i < axis; ++i) { + outer_size *= output_dims->data[i]; + } + int copy_size = 1; + for (int i = axis + 1; i < dimensions; ++i) { + copy_size *= output_dims->data[i]; + } + int input_size = 1; + for (int i = 0; i < input_dims->size; ++i) { + input_size *= input_dims->data[i]; + } + TFLITE_DCHECK_EQ(input_size, copy_size * outer_size); + + T* output_data = tflite::micro::GetTensorData(output); + + for (int i = 0; i < values_count; ++i) { + const TfLiteEvalTensor* t = tflite::micro::GetEvalInput(context, node, i); + const T* input_data = tflite::micro::GetTensorData(t); + for (int k = 0; k < outer_size; ++k) { + const T* input_ptr = input_data + copy_size * k; + int loc = k * values_count * copy_size + i * copy_size; + T* output_ptr = output_data + loc; + for (int j = 0; j < copy_size; ++j) output_ptr[j] = input_ptr[j]; + } + } + + return kTfLiteOk; +} + +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + const TfLitePackParams* data = + reinterpret_cast(node->builtin_data); + + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kOutputTensor); + + switch (output->type) { + case kTfLiteFloat32: { + return PackImpl(context, node, output, data->values_count, + data->axis); + } + case kTfLiteInt8: { + return PackImpl(context, node, output, data->values_count, + data->axis); + } + case kTfLiteInt32: { + return PackImpl(context, node, output, data->values_count, + data->axis); + } + case kTfLiteInt64: { + return PackImpl(context, node, output, data->values_count, + data->axis); + } + default: { + MicroPrintf("Type '%s' is not supported by pack.", + TfLiteTypeGetName(output->type)); + return kTfLiteError; + } + } + + return kTfLiteOk; +} + +} // namespace + +TfLiteRegistration Register_PACK() { + return tflite::micro::RegisterOp(nullptr, nullptr, Eval); +} + +} // namespace tflite diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/pad.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/pad.cpp new file mode 100644 index 0000000..a7d7edd --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/pad.cpp @@ -0,0 +1,230 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/pad.h" + +#include + +#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h" +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/portable_tensor.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/op_macros.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h" + +namespace tflite { +namespace { + +struct OpData { + PadParams params; + int32_t output_zero_point; +}; + +void* Init(TfLiteContext* context, const char* buffer, size_t length) { + TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); + return context->AllocatePersistentBuffer(context, sizeof(OpData)); +} + +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + TFLITE_DCHECK(node->user_data != nullptr); + const OpData* data = static_cast(node->user_data); + + const TfLiteEvalTensor* input = + tflite::micro::GetEvalInput(context, node, /*index=*/0); + const TfLiteEvalTensor* constant_values = + NumInputs(node) == 3 + ? tflite::micro::GetEvalInput(context, node, /*index=*/2) + : nullptr; + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, /*index=*/0); + + switch (input->type) { + case kTfLiteFloat32: { + float pad_value = + constant_values == nullptr + ? 0.f + : *tflite::micro::GetTensorData(constant_values); + if (data->params.resizing_category == ResizingCategory::kImageStyle) { + reference_ops::PadImageStyle( + data->params, tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), &pad_value, + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + } else { + reference_ops::Pad(data->params, tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + &pad_value, tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + } + } break; + case kTfLiteInt8: { + int8_t pad_value; + if (constant_values == nullptr) { + pad_value = static_cast(data->output_zero_point); + } else { + pad_value = *tflite::micro::GetTensorData(constant_values); + } + if (data->params.resizing_category == ResizingCategory::kImageStyle) { + reference_ops::PadImageStyle( + data->params, tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), &pad_value, + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + } else { + reference_ops::Pad(data->params, tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + &pad_value, tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + } + } break; + case kTfLiteInt16: { + int16_t pad_value = + constant_values == nullptr + ? 0 + : *tflite::micro::GetTensorData(constant_values); + reference_ops::Pad(data->params, tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + &pad_value, tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + } break; + case kTfLiteInt32: { + int32_t pad_value = + constant_values == nullptr + ? 0 + : *tflite::micro::GetTensorData(constant_values); + reference_ops::Pad(data->params, tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + &pad_value, tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + } break; + default: + + MicroPrintf("Type %s not currently supported by Pad.", + TfLiteTypeGetName(input->type)); + return kTfLiteError; + } + return kTfLiteOk; +} + +} // namespace + +TfLiteStatus PadPrepare(TfLiteContext* context, TfLiteNode* node) { + MicroContext* micro_context = GetMicroContext(context); + + TFLITE_DCHECK(node->user_data != nullptr); + OpData* data = static_cast(node->user_data); + + TF_LITE_ENSURE(context, NumInputs(node) == 2 || NumInputs(node) == 3); + TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); + + TfLiteTensor* input = + micro_context->AllocateTempInputTensor(node, /*index=*/0); + TF_LITE_ENSURE(context, input != nullptr); + TfLiteTensor* paddings = + micro_context->AllocateTempInputTensor(node, /*index=*/1); + TF_LITE_ENSURE(context, paddings != nullptr); + TfLiteTensor* constant_values = + NumInputs(node) == 3 + ? micro_context->AllocateTempInputTensor(node, /*index=*/2) + : nullptr; + TfLiteTensor* output = + micro_context->AllocateTempOutputTensor(node, /*index=*/0); + TF_LITE_ENSURE(context, output != nullptr); + + TF_LITE_ENSURE_EQ(context, input->type, output->type); + + // Current implementations rely on the inputs being <= 4D. + TF_LITE_ENSURE(context, NumDimensions(input) <= + reference_ops::PadKernelMaxDimensionCount()); + + if (constant_values != nullptr) { + TF_LITE_ENSURE_EQ(context, input->type, constant_values->type); + // Ensure that constant_values is a scalar. + TF_LITE_ENSURE_EQ(context, NumElements(constant_values), 1); + } + + // There must be a pair of paddings for each output dimension. + TF_LITE_ENSURE_EQ(context, GetTensorShape(paddings).FlatSize(), + output->dims->size * 2); + + // On Micro, outputs must be properly sized by the converter. + // NOTE: This data is only available because the paddings buffer is stored in + // the flatbuffer: + TF_LITE_ENSURE(context, IsConstantTensor(paddings)); + const int32_t* paddings_data = GetTensorData(paddings); + for (int i = 0; i < output->dims->size; i++) { + int output_dim = output->dims->data[i]; + int expected_dim = + input->dims->data[i] + paddings_data[i * 2] + paddings_data[i * 2 + 1]; + TF_LITE_ENSURE_EQ(context, output_dim, expected_dim); + } + + // Calculate OpData: + data->params.resizing_category = ResizingCategory::kGenericResize; + const int paddings_total = GetTensorShape(paddings).FlatSize(); + if (paddings_total == 8 && (paddings_data[0] == 0 && paddings_data[1] == 0) && + (paddings_data[6] == 0 && paddings_data[7] == 0)) { + data->params.resizing_category = ResizingCategory::kImageStyle; + } + + const int num_input_dimensions = NumDimensions(input); + data->params.left_padding_count = num_input_dimensions; + data->params.right_padding_count = num_input_dimensions; + + for (int idx = num_input_dimensions - 1; idx >= 0; --idx) { + data->params.left_padding[idx] = paddings_data[idx * 2]; + data->params.right_padding[idx] = paddings_data[idx * 2 + 1]; + } + + if (input->type == kTfLiteInt8) { + if (constant_values == nullptr) { + // Quantized Pad requires that 0 is represented in the quantized + // range. + TF_LITE_ENSURE(context, output->params.zero_point >= + std::numeric_limits::min()); + TF_LITE_ENSURE(context, output->params.zero_point <= + std::numeric_limits::max()); + } else { + // Quantized Pad requires that 'constant_values' is represented in the + // same quantized range as the input and output tensors. + TF_LITE_ENSURE_EQ(context, output->params.zero_point, + constant_values->params.zero_point); + TF_LITE_ENSURE_EQ(context, static_cast(output->params.scale), + static_cast(constant_values->params.scale)); + } + data->output_zero_point = output->params.zero_point; + } + + micro_context->DeallocateTempTfLiteTensor(input); + micro_context->DeallocateTempTfLiteTensor(paddings); + if (constant_values != nullptr) { + micro_context->DeallocateTempTfLiteTensor(constant_values); + } + micro_context->DeallocateTempTfLiteTensor(output); + + return kTfLiteOk; +} + +TfLiteRegistration Register_PAD() { + return tflite::micro::RegisterOp(Init, PadPrepare, Eval); +} + +// Also register Pad as PadV2. +TfLiteRegistration Register_PADV2() { + return tflite::micro::RegisterOp(Init, PadPrepare, Eval); +} + +} // namespace tflite diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/pad.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/pad.h new file mode 100644 index 0000000..81d1a9f --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/pad.h @@ -0,0 +1,27 @@ +/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_LITE_MICRO_KERNELS_PAD_H_ +#define TENSORFLOW_LITE_MICRO_KERNELS_PAD_H_ + +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" + +namespace tflite { + +TfLiteStatus PadPrepare(TfLiteContext* context, TfLiteNode* node); + +} // namespace tflite + +#endif // TENSORFLOW_LITE_MICRO_KERNELS_PAD_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/pooling.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/pooling.cpp new file mode 100644 index 0000000..8b6f9e0 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/pooling.cpp @@ -0,0 +1,1567 @@ +// Patched by Edge Impulse to include reference and hardware-accelerated kernels +#include "../../../../classifier/ei_classifier_config.h" +#if 0 == 1 +/* noop */ +#elif EI_CLASSIFIER_TFLITE_ENABLE_CMSIS_NN == 1 +/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/pooling.h" + +#include "edge-impulse-sdk/CMSIS/NN/Include/arm_nnfunctions.h" +#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h" +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/pooling.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h" + +namespace tflite { + +namespace { + +struct OpData { + OpDataPooling reference_op_data; + + // Index to buffer for optimizations if applicable. + int buffer_idx; +}; + +void PopulateCommonParams( + TfLiteContext* const context, cmsis_nn_dims* const input_dims, + cmsis_nn_dims* const output_dims, cmsis_nn_pool_params* const pool_params, + cmsis_nn_context* const ctx, cmsis_nn_dims* const filter_dims, + const OpData& data, const RuntimeShape& input_shape, + const RuntimeShape& output_shape, const TfLitePoolParams* params) { + const int depth = MatchingDim(input_shape, 3, output_shape, 3); + + input_dims->n = 1; + input_dims->h = input_shape.Dims(1); + input_dims->w = input_shape.Dims(2); + input_dims->c = depth; + + output_dims->n = 1; + output_dims->h = output_shape.Dims(1); + output_dims->w = output_shape.Dims(2); + output_dims->c = depth; + + pool_params->stride.h = params->stride_height; + pool_params->stride.w = params->stride_width; + pool_params->padding.h = data.reference_op_data.padding.height; + pool_params->padding.w = data.reference_op_data.padding.width; + pool_params->activation.min = data.reference_op_data.activation_min; + pool_params->activation.max = data.reference_op_data.activation_max; + + filter_dims->n = 1; + filter_dims->h = params->filter_height; + filter_dims->w = params->filter_width; + filter_dims->c = 1; + ctx->buf = nullptr; + ctx->size = 0; + if (data.buffer_idx > -1) { + ctx->buf = context->GetScratchBuffer(context, data.buffer_idx); + } +} + +void AverageEvalQuantized(TfLiteContext* context, const TfLiteNode* node, + const TfLitePoolParams* params, const OpData& data, + const TfLiteEvalTensor* input, + TfLiteEvalTensor* output) { + TFLITE_DCHECK((input->type == kTfLiteInt8) || (input->type == kTfLiteInt16)); + + RuntimeShape input_shape = micro::GetTensorShape(input); + TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4); + + RuntimeShape output_shape = micro::GetTensorShape(output); + TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4); + + cmsis_nn_dims input_dims; + cmsis_nn_dims output_dims; + cmsis_nn_pool_params pool_params; + cmsis_nn_dims filter_dims; + cmsis_nn_context ctx; + + PopulateCommonParams(context, &input_dims, &output_dims, &pool_params, &ctx, + &filter_dims, data, input_shape, output_shape, params); + + if (input->type == kTfLiteInt8) { + TFLITE_DCHECK_EQ( + arm_avgpool_s8(&ctx, &pool_params, &input_dims, + micro::GetTensorData(input), &filter_dims, + &output_dims, micro::GetTensorData(output)), + ARM_CMSIS_NN_SUCCESS); + } else { + TFLITE_DCHECK_EQ( + arm_avgpool_s16(&ctx, &pool_params, &input_dims, + micro::GetTensorData(input), &filter_dims, + &output_dims, micro::GetTensorData(output)), + ARM_CMSIS_NN_SUCCESS); + } +} + +TfLiteStatus MaxEvalQuantized(TfLiteContext* context, const TfLiteNode* node, + const TfLitePoolParams* params, + const OpData& data, const TfLiteEvalTensor* input, + TfLiteEvalTensor* output) { + TFLITE_DCHECK((input->type == kTfLiteInt8) || (input->type == kTfLiteInt16)); + + RuntimeShape input_shape = micro::GetTensorShape(input); + TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4); + + RuntimeShape output_shape = micro::GetTensorShape(output); + TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4); + + cmsis_nn_dims input_dims; + cmsis_nn_dims output_dims; + cmsis_nn_pool_params pool_params; + cmsis_nn_dims filter_dims; + cmsis_nn_context ctx; + + PopulateCommonParams(context, &input_dims, &output_dims, &pool_params, &ctx, + &filter_dims, data, input_shape, output_shape, params); + + if (input->type == kTfLiteInt8) { + TFLITE_DCHECK_EQ( + arm_max_pool_s8(&ctx, &pool_params, &input_dims, + micro::GetTensorData(input), &filter_dims, + &output_dims, micro::GetTensorData(output)), + ARM_CMSIS_NN_SUCCESS); + } else { + TFLITE_DCHECK_EQ( + arm_max_pool_s16(&ctx, &pool_params, &input_dims, + micro::GetTensorData(input), &filter_dims, + &output_dims, micro::GetTensorData(output)), + ARM_CMSIS_NN_SUCCESS); + } + + return kTfLiteOk; +} + +void* Init(TfLiteContext* context, const char* buffer, size_t length) { + TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); + return context->AllocatePersistentBuffer(context, sizeof(OpData)); +} + +TfLiteStatus MaxPrepare(TfLiteContext* context, TfLiteNode* node) { + TF_LITE_ENSURE_STATUS(PoolingPrepare(context, node)); + // Set buffer index to a reset value + static_cast(node->user_data)->buffer_idx = -1; + return kTfLiteOk; +} + +TfLiteStatus AveragePrepare(TfLiteContext* context, TfLiteNode* node) { + TF_LITE_ENSURE_STATUS(PoolingPrepare(context, node)); + + MicroContext* micro_context = GetMicroContext(context); + + TfLiteTensor* input = + micro_context->AllocateTempInputTensor(node, kPoolingInputTensor); + TfLiteTensor* output = + micro_context->AllocateTempOutputTensor(node, kPoolingOutputTensor); + + if (input->type == kTfLiteInt8 || input->type == kTfLiteInt16) { + RuntimeShape input_shape = GetTensorShape(input); + TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4); + + RuntimeShape output_shape = GetTensorShape(output); + TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4); + + const int depth = MatchingDim(input_shape, 3, output_shape, 3); + const int output_width = output_shape.Dims(2); + + const int32_t buffer_size = + input->type == kTfLiteInt16 + ? arm_avgpool_s16_get_buffer_size(output_width, depth) + : arm_avgpool_s8_get_buffer_size(output_width, depth); + + auto* data = static_cast(node->user_data); + if (buffer_size > 0) { + TF_LITE_ENSURE_STATUS(context->RequestScratchBufferInArena( + context, buffer_size, &data->buffer_idx)); + } else { + data->buffer_idx = -1; + } + } + + micro_context->DeallocateTempTfLiteTensor(output); + micro_context->DeallocateTempTfLiteTensor(input); + return kTfLiteOk; +} + +TfLiteStatus AverageEval(TfLiteContext* context, TfLiteNode* node) { + TFLITE_DCHECK(node->builtin_data != nullptr); + auto* params = reinterpret_cast(node->builtin_data); + + TFLITE_DCHECK(node->user_data != nullptr); + const OpData& data = *(static_cast(node->user_data)); + + const TfLiteEvalTensor* input = + micro::GetEvalInput(context, node, kPoolingInputTensor); + TfLiteEvalTensor* output = + micro::GetEvalOutput(context, node, kPoolingOutputTensor); + + // Inputs and outputs share the same type, guaranteed by the converter. + if (input->type == kTfLiteFloat32) { +#if EI_TFLITE_DISABLE_AVERAGE_POOL_2D_IN_F32 + MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(input->type), + input->type); + return kTfLiteError; +#endif + AveragePoolingEvalFloat(context, node, params, &data.reference_op_data, + input, output); + } else if (input->type == kTfLiteInt8) { +#if EI_TFLITE_DISABLE_AVERAGE_POOL_2D_IN_I8 + MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(input->type), + input->type); + return kTfLiteError; +#endif + AverageEvalQuantized(context, node, params, data, input, output); + } else if (input->type == kTfLiteInt16) { + AverageEvalQuantized(context, node, params, data, input, output); + } else { + MicroPrintf("Input type %s is not currently supported", + TfLiteTypeGetName(input->type)); + return kTfLiteError; + } + + return kTfLiteOk; +} + +TfLiteStatus AverageEvalInt8(TfLiteContext* context, TfLiteNode* node) { + TFLITE_DCHECK(node->builtin_data != nullptr); + auto* params = reinterpret_cast(node->builtin_data); + + TFLITE_DCHECK(node->user_data != nullptr); + const OpData& data = *(static_cast(node->user_data)); + + const TfLiteEvalTensor* input = + micro::GetEvalInput(context, node, kPoolingInputTensor); + TFLITE_DCHECK(input->type == kTfLiteInt8); + TfLiteEvalTensor* output = + micro::GetEvalOutput(context, node, kPoolingOutputTensor); + + AverageEvalQuantized(context, node, params, data, input, output); + + return kTfLiteOk; +} + +TfLiteStatus AverageEvalInt16(TfLiteContext* context, TfLiteNode* node) { + TFLITE_DCHECK(node->builtin_data != nullptr); + auto* params = reinterpret_cast(node->builtin_data); + + TFLITE_DCHECK(node->user_data != nullptr); + const OpData& data = *(static_cast(node->user_data)); + + const TfLiteEvalTensor* input = + micro::GetEvalInput(context, node, kPoolingInputTensor); + TFLITE_DCHECK(input->type == kTfLiteInt16); + TfLiteEvalTensor* output = + micro::GetEvalOutput(context, node, kPoolingOutputTensor); + + AverageEvalQuantized(context, node, params, data, input, output); + + return kTfLiteOk; +} +TfLiteStatus MaxEval(TfLiteContext* context, TfLiteNode* node) { + TFLITE_DCHECK(node->builtin_data != nullptr); + auto* params = reinterpret_cast(node->builtin_data); + + TFLITE_DCHECK(node->user_data != nullptr); + const OpData& data = *(static_cast(node->user_data)); + + const TfLiteEvalTensor* input = + micro::GetEvalInput(context, node, kPoolingInputTensor); + TfLiteEvalTensor* output = + micro::GetEvalOutput(context, node, kPoolingOutputTensor); + + if (input->type == kTfLiteFloat32) { +#if EI_TFLITE_DISABLE_MAX_POOL_2D_IN_F32 + MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(input->type), + input->type); + return kTfLiteError; +#endif + MaxPoolingEvalFloat(context, node, params, &data.reference_op_data, input, + output); + } else if (input->type == kTfLiteInt8) { +#if EI_TFLITE_DISABLE_MAX_POOL_2D_IN_I8 + MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(input->type), + input->type); + return kTfLiteError; +#endif + MaxEvalQuantized(context, node, params, data, input, output); + } else if (input->type == kTfLiteInt16) { + MaxEvalQuantized(context, node, params, data, input, output); + } else { + MicroPrintf("Input type %s is not currently supported", + TfLiteTypeGetName(input->type)); + return kTfLiteError; + } + + return kTfLiteOk; +} + +TfLiteStatus MaxEvalInt8(TfLiteContext* context, TfLiteNode* node) { + TFLITE_DCHECK(node->builtin_data != nullptr); + auto* params = reinterpret_cast(node->builtin_data); + + TFLITE_DCHECK(node->user_data != nullptr); + const OpData& data = *(static_cast(node->user_data)); + + const TfLiteEvalTensor* input = + micro::GetEvalInput(context, node, kPoolingInputTensor); + TFLITE_DCHECK(input->type == kTfLiteInt8); + TfLiteEvalTensor* output = + micro::GetEvalOutput(context, node, kPoolingOutputTensor); + + MaxEvalQuantized(context, node, params, data, input, output); + return kTfLiteOk; +} + +TfLiteStatus MaxEvalInt16(TfLiteContext* context, TfLiteNode* node) { + TFLITE_DCHECK(node->builtin_data != nullptr); + auto* params = reinterpret_cast(node->builtin_data); + + TFLITE_DCHECK(node->user_data != nullptr); + const OpData& data = *(static_cast(node->user_data)); + + const TfLiteEvalTensor* input = + micro::GetEvalInput(context, node, kPoolingInputTensor); + TFLITE_DCHECK(input->type == kTfLiteInt16); + TfLiteEvalTensor* output = + micro::GetEvalOutput(context, node, kPoolingOutputTensor); + + MaxEvalQuantized(context, node, params, data, input, output); + return kTfLiteOk; +} + +} // namespace + +TfLiteRegistration Register_AVERAGE_POOL_2D_INT8() { + return tflite::micro::RegisterOp(Init, AveragePrepare, AverageEvalInt8); +} + +TfLiteRegistration Register_AVERAGE_POOL_2D_INT16() { + return tflite::micro::RegisterOp(Init, AveragePrepare, AverageEvalInt16); +} + +TfLiteRegistration Register_AVERAGE_POOL_2D() { + return tflite::micro::RegisterOp(Init, AveragePrepare, AverageEval); +} + +TfLiteRegistration Register_MAX_POOL_2D_INT8() { + return tflite::micro::RegisterOp(Init, MaxPrepare, MaxEvalInt8); +} + +TfLiteRegistration Register_MAX_POOL_2D_INT16() { + return tflite::micro::RegisterOp(Init, MaxPrepare, MaxEvalInt16); +} + +TfLiteRegistration Register_MAX_POOL_2D() { + return tflite::micro::RegisterOp(Init, MaxPrepare, MaxEval); +} + +} // namespace tflite + +#elif EI_CLASSIFIER_TFLITE_ENABLE_ARC == 1 +/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/pooling.h" + +#include "mli_api.h" // NOLINT +#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/pooling.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/padding.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/mli_function_specializations.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/mli_slicers.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/mli_tf_utils.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/scratch_buf_mgr.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/scratch_buffers.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h" + +namespace tflite { + +namespace { + +constexpr int kInputTensor = 0; +constexpr int kOutputTensor = 0; + +struct OpData { + TfLitePaddingValues padding; + int32_t activation_min; + int32_t activation_max; + float activation_min_f32; + float activation_max_f32; + + // The result of checking if MLI optimized version of tensors can be used. + bool is_mli_applicable; + + // Tensors in MLI format. + mutable ops::micro::MliTensorInterface mli_in; + mutable ops::micro::MliTensorInterface mli_out; + mli_pool_cfg* cfg; + + // Pointer to the mli convolution function. + pooling_func_ptr p_mli_krn_avepool_hwc_sa8; + pooling_func_ptr p_mli_krn_maxpool_hwc_sa8; +}; + +enum MliPoolingType { AveragePooling = 0, MaxPooling = 1 }; + +bool IsMliApplicable(TfLiteContext* context, const TfLiteTensor* input, + const TfLitePoolParams* params) { + // MLI optimized version only supports int8_t datatype and no fused Relu + return (input->type == kTfLiteInt8 && params->activation == kTfLiteActNone); +} + +TfLiteStatus CalculateOpData(TfLiteContext* context, + const TfLitePoolParams* params, + const TfLiteTensor* input, + const TfLiteTensor* output, OpData* data) { + // input: batch, height, width, channel + int height = SizeOfDimension(input, 1); + int width = SizeOfDimension(input, 2); + + int out_height, out_width; + + data->padding = ComputePaddingHeightWidth( + params->stride_height, params->stride_width, + /*dilation_rate_height=*/1, + /*dilation_rate_width=*/1, height, width, params->filter_height, + params->filter_width, params->padding, &out_height, &out_width); + return kTfLiteOk; +} + +void* Init(TfLiteContext* context, const char* buffer, size_t length) { + TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); + return context->AllocatePersistentBuffer(context, sizeof(OpData)); +} + +TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + TFLITE_DCHECK(node->builtin_data != nullptr); + auto* params = reinterpret_cast(node->builtin_data); + + TFLITE_DCHECK(node->user_data != nullptr); + OpData* data = static_cast(node->user_data); + + MicroContext* micro_context = GetMicroContext(context); + + TfLiteTensor* input = + micro_context->AllocateTempInputTensor(node, kInputTensor); + TF_LITE_ENSURE(context, input != nullptr); + TfLiteTensor* output = + micro_context->AllocateTempOutputTensor(node, kOutputTensor); + TF_LITE_ENSURE(context, output != nullptr); + + data->is_mli_applicable = IsMliApplicable(context, input, params); + + TF_LITE_ENSURE_STATUS(CalculateOpData(context, params, input, output, data)); + + if (input->type == kTfLiteFloat32) { + CalculateActivationRange(params->activation, &data->activation_min_f32, + &data->activation_max_f32); + } else if (input->type == kTfLiteInt8) { + CalculateActivationRangeQuantized(context, params->activation, output, + &data->activation_min, + &data->activation_max); + } + + if (data->is_mli_applicable) { + data->mli_in = ops::micro::MliTensorInterface(static_cast( + context->AllocatePersistentBuffer(context, sizeof(mli_tensor)))); + data->mli_out = ops::micro::MliTensorInterface(static_cast( + context->AllocatePersistentBuffer(context, sizeof(mli_tensor)))); + data->cfg = static_cast( + context->AllocatePersistentBuffer(context, sizeof(mli_pool_cfg))); + + ops::micro::ConvertToMliTensor(input, &data->mli_in); + ops::micro::ConvertToMliTensor(output, &data->mli_out); + + data->cfg->kernel_width = params->filter_width; + data->cfg->kernel_height = params->filter_height; + data->cfg->stride_width = params->stride_width; + data->cfg->stride_height = params->stride_height; + + if (params->padding == kTfLitePaddingValid) { + data->cfg->padding_left = 0; + data->cfg->padding_right = 0; + data->cfg->padding_top = 0; + data->cfg->padding_bottom = 0; + } else { + data->cfg->padding_left = data->padding.width; + data->cfg->padding_right = + data->padding.width + data->padding.width_offset; + data->cfg->padding_top = data->padding.height; + data->cfg->padding_bottom = + data->padding.height + data->padding.height_offset; + } + + // Choose pooling mli specialized functions. + data->p_mli_krn_avepool_hwc_sa8 = mli_krn_avepool(data->cfg); + data->p_mli_krn_maxpool_hwc_sa8 = mli_krn_maxpool(data->cfg); + } + + micro_context->DeallocateTempTfLiteTensor(input); + micro_context->DeallocateTempTfLiteTensor(output); + return kTfLiteOk; +} + +void AverageEvalFloat(TfLiteContext* context, const TfLiteNode* node, + const TfLitePoolParams* params, const OpData& data, + const TfLiteEvalTensor* input, TfLiteEvalTensor* output) { +#if !defined(TF_LITE_STRIP_REFERENCE_IMPL) + float activation_min, activation_max; + CalculateActivationRange(params->activation, &activation_min, + &activation_max); + + PoolParams op_params; + op_params.stride_height = params->stride_height; + op_params.stride_width = params->stride_width; + op_params.filter_height = params->filter_height; + op_params.filter_width = params->filter_width; + op_params.padding_values.height = data.padding.height; + op_params.padding_values.width = data.padding.width; + op_params.float_activation_min = activation_min; + op_params.float_activation_max = activation_max; + reference_ops::AveragePool(op_params, tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); +#else + MicroPrintf("Type %s (%d) is not supported by ARC MLI Library.", + TfLiteTypeGetName(input->type), input->type); +#endif +} + +// Prepare MLI tensors and run Average or Max Pooling +TfLiteStatus EvalMli(TfLiteContext* context, const TfLitePoolParams* params, + const OpData& data, const TfLiteEvalTensor* input, + TfLiteEvalTensor* output, + const MliPoolingType pooling_type) { + mli_pool_cfg cfg_local = *data.cfg; + + ops::micro::MliTensorAttachBuffer(input, &data.mli_in); + ops::micro::MliTensorAttachBuffer(output, &data.mli_out); + + const int height_dimension = 1; + int in_slice_height = 0; + int out_slice_height = 0; + const int overlap = cfg_local.kernel_height - cfg_local.stride_height; + + // Tensors for data in fast (local) memory and config to copy data from + // external to local memory + mli_tensor in_local = *data.mli_in.MliTensor(); + mli_tensor out_local = *data.mli_out.MliTensor(); + + ops::micro::MliTensorInterface in_local_interface(&in_local); + ops::micro::MliTensorInterface out_local_interface(&out_local); + + mli_mov_cfg_t copy_config; + mli_mov_cfg_for_copy(©_config); + TF_LITE_ENSURE_STATUS(get_arc_scratch_buffer_for_pooling_tensors( + context, &in_local_interface, &out_local_interface)); + + bool in_is_local = + in_local_interface.Data() == data.mli_in.Data(); + bool out_is_local = + out_local_interface.Data() == data.mli_out.Data(); + + TF_LITE_ENSURE_STATUS(arc_scratch_buffer_calc_slice_size_io( + &in_local_interface, &out_local_interface, cfg_local.kernel_height, + cfg_local.stride_height, cfg_local.padding_top, cfg_local.padding_bottom, + &in_slice_height, &out_slice_height)); + + /* mli_in tensor contains batches of HWC tensors. so it is a 4 dimensional + tensor. because the mli kernel will process one HWC tensor at a time, the 4 + dimensional tensor needs to be sliced into nBatch 3 dimensional tensors. on + top of that there could be a need to also slice in the Height dimension. + for that the sliceHeight has been calculated. The tensor slicer is + configured that it will completely slice the nBatch dimension (0) and slice + the height dimension (1) in chunks of 'sliceHeight' */ + ops::micro::TensorSlicer in_slice(data.mli_in.MliTensor(), height_dimension, + in_slice_height, cfg_local.padding_top, + cfg_local.padding_bottom, overlap); + ops::micro::TensorSlicer out_slice(data.mli_out.MliTensor(), height_dimension, + out_slice_height); + + /* is_local indicates that the tensor is already in local memory, + so in that case the original tensor can be used, + and there is no need to copy it to the local tensor*/ + mli_tensor* in_ptr = in_is_local ? in_slice.Sub() : &in_local; + mli_tensor* out_ptr = out_is_local ? out_slice.Sub() : &out_local; + + while (!out_slice.Done()) { + if (!out_is_local) { + ops::micro::PrepareLocalTensor(out_slice.Sub(), &out_local); + ops::micro::PrepareLocalTensor(in_slice.Sub(), &in_local); + } + cfg_local.padding_top = in_slice.GetPaddingPre(); + cfg_local.padding_bottom = in_slice.GetPaddingPost(); + + mli_mov_tensor_sync(in_slice.Sub(), ©_config, in_ptr); + if (pooling_type == AveragePooling) { + TFLITE_DCHECK(data.p_mli_krn_avepool_hwc_sa8 != nullptr); + data.p_mli_krn_avepool_hwc_sa8(in_ptr, &cfg_local, out_ptr); + } else if (pooling_type == MaxPooling) { + TFLITE_DCHECK(data.p_mli_krn_maxpool_hwc_sa8 != nullptr); + data.p_mli_krn_maxpool_hwc_sa8(in_ptr, &cfg_local, out_ptr); + } + mli_mov_tensor_sync(out_ptr, ©_config, out_slice.Sub()); + + in_slice.Next(); + out_slice.Next(); + } + return kTfLiteOk; +} + +void AverageEvalQuantized(TfLiteContext* context, const TfLiteNode* node, + const TfLitePoolParams* params, const OpData& data, + const TfLiteEvalTensor* input, + TfLiteEvalTensor* output) { +#if !defined(TF_LITE_STRIP_REFERENCE_IMPL) + TFLITE_DCHECK(input->type == kTfLiteInt8); + + PoolParams op_params; + op_params.stride_height = params->stride_height; + op_params.stride_width = params->stride_width; + op_params.filter_height = params->filter_height; + op_params.filter_width = params->filter_width; + op_params.padding_values.height = data.padding.height; + op_params.padding_values.width = data.padding.width; + op_params.quantized_activation_min = data.activation_min; + op_params.quantized_activation_max = data.activation_max; + + reference_integer_ops::AveragePool( + op_params, tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); +#else + MicroPrintf("Type %s (%d) is not supported by ARC MLI Library.", + TfLiteTypeGetName(input->type), input->type); +#endif +} + +void MaxEvalFloat(TfLiteContext* context, TfLiteNode* node, + TfLitePoolParams* params, const OpData& data, + const TfLiteEvalTensor* input, TfLiteEvalTensor* output) { +#if !defined(TF_LITE_STRIP_REFERENCE_IMPL) + tflite::PoolParams op_params; + op_params.stride_height = params->stride_height; + op_params.stride_width = params->stride_width; + op_params.filter_height = params->filter_height; + op_params.filter_width = params->filter_width; + op_params.padding_values.height = data.padding.height; + op_params.padding_values.width = data.padding.width; + op_params.float_activation_min = data.activation_min_f32; + op_params.float_activation_max = data.activation_max_f32; + reference_ops::MaxPool(op_params, tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); +#else + MicroPrintf( + + "Node configuration or type %s (%d) is not supported by ARC MLI Library.", + TfLiteTypeGetName(input->type), input->type); +#endif +} + +void MaxEvalQuantized(TfLiteContext* context, TfLiteNode* node, + TfLitePoolParams* params, const OpData& data, + const TfLiteEvalTensor* input, TfLiteEvalTensor* output) { +#if !defined(TF_LITE_STRIP_REFERENCE_IMPL) + TFLITE_DCHECK(input->type == kTfLiteInt8); + tflite::PoolParams op_params; + op_params.stride_height = params->stride_height; + op_params.stride_width = params->stride_width; + op_params.filter_height = params->filter_height; + op_params.filter_width = params->filter_width; + op_params.padding_values.height = data.padding.height; + op_params.padding_values.width = data.padding.width; + op_params.quantized_activation_min = data.activation_min; + op_params.quantized_activation_max = data.activation_max; + + reference_integer_ops::MaxPool(op_params, + tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); +#else + MicroPrintf( + + "Node configuration or type %s (%d) is not supported by ARC MLI Library.", + TfLiteTypeGetName(input->type), input->type); +#endif +} + +TfLiteStatus AverageEval(TfLiteContext* context, TfLiteNode* node) { + TFLITE_DCHECK(node->builtin_data != nullptr); + auto* params = reinterpret_cast(node->builtin_data); + + const TfLiteEvalTensor* input = + tflite::micro::GetEvalInput(context, node, kInputTensor); + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kOutputTensor); + + TFLITE_DCHECK(node->user_data != nullptr); + const OpData& data = *(static_cast(node->user_data)); + + // Inputs and outputs share the same type, guaranteed by the converter. + switch (input->type) { + case kTfLiteFloat32: +#if EI_TFLITE_DISABLE_AVERAGE_POOL_2D_IN_F32 + MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(input->type), + input->type); + return kTfLiteError; +#endif + AverageEvalFloat(context, node, params, data, input, output); + break; + case kTfLiteInt8: +#if EI_TFLITE_DISABLE_AVERAGE_POOL_2D_IN_I8 + MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(input->type), + input->type); + return kTfLiteError; +#endif + if (data.is_mli_applicable) { + EvalMli(context, params, data, input, output, AveragePooling); + } else { + AverageEvalQuantized(context, node, params, data, input, output); + } + break; + default: + MicroPrintf("Input type %s is not currently supported", + TfLiteTypeGetName(input->type)); + return kTfLiteError; + } + return kTfLiteOk; +} + +TfLiteStatus MaxEval(TfLiteContext* context, TfLiteNode* node) { + auto* params = reinterpret_cast(node->builtin_data); + + const TfLiteEvalTensor* input = + tflite::micro::GetEvalInput(context, node, kInputTensor); + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kOutputTensor); + + TFLITE_DCHECK(node->user_data != nullptr); + const OpData& data = *(static_cast(node->user_data)); + + switch (input->type) { + case kTfLiteFloat32: +#if EI_TFLITE_DISABLE_MAX_POOL_2D_IN_F32 + MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(input->type), + input->type); + return kTfLiteError; +#endif + MaxEvalFloat(context, node, params, data, input, output); + break; + case kTfLiteInt8: +#if EI_TFLITE_DISABLE_MAX_POOL_2D_IN_I8 + MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(input->type), + input->type); + return kTfLiteError; +#endif + if (data.is_mli_applicable) { + EvalMli(context, params, data, input, output, MaxPooling); + } else { + MaxEvalQuantized(context, node, params, data, input, output); + } + break; + default: + MicroPrintf("Type %s not currently supported.", + TfLiteTypeGetName(input->type)); + return kTfLiteError; + } + return kTfLiteOk; +} + +} // namespace + +TfLiteRegistration Register_AVERAGE_POOL_2D() { + return tflite::micro::RegisterOp(Init, Prepare, AverageEval); +} + +TfLiteRegistration Register_MAX_POOL_2D() { + return tflite::micro::RegisterOp(Init, Prepare, MaxEval); +} + +} // namespace tflite + +#elif EI_CLASSIFIER_TFLITE_ENABLE_SILABS_MVP == 1 + +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/pooling.h" +#include "edge-impulse-sdk/CMSIS/NN/Include/arm_nnfunctions.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h" +#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/padding.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" + +#include "sl_mvp_ml_pooling.h" + +namespace tflite { +namespace sl { +namespace pooling { + +namespace { + +constexpr int kInputTensor = 0; +constexpr int kOutputTensor = 0; + +enum op_support { kMvp, kCmsisNN, kTFLMrefF32}; + +struct OpData { + float activation_min_f32; + float activation_max_f32; + sli_mvp_ml_pooling_s8_params_t op_params; + op_support supported; + int buffer_idx; +}; + +} // namespace + + +void* Init(TfLiteContext* context, const char* buffer, size_t length) +{ + TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); + return context->AllocatePersistentBuffer(context, sizeof(OpData)); +} + + +TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) +{ + OpData* data = static_cast(node->user_data); + auto* params = reinterpret_cast(node->builtin_data); + const TfLiteTensor* input = GetInput(context, node, kInputTensor); + TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + + data->op_params.padding = params->padding == kTfLitePaddingSame; + data->op_params.stride_height = params->stride_height; + data->op_params.stride_width = params->stride_width; + data->op_params.filter_height = params->filter_height; + data->op_params.filter_width = params->filter_width; + data->op_params.batches = MatchingDim(GetTensorShape(input), 0, + GetTensorShape(output), 0); + data->op_params.channels = MatchingDim(GetTensorShape(input), 3, + GetTensorShape(output), 3); + data->op_params.input_height = SizeOfDimension(input, 1); + data->op_params.input_width = SizeOfDimension(input, 2); + data->op_params.output_height = SizeOfDimension(output, 1); + data->op_params.output_width = SizeOfDimension(output, 2); + + int out_height, out_width; + auto padding = ComputePaddingHeightWidth( + params->stride_height, params->stride_width, + 1, 1, // dilation rate height/width. + data->op_params.input_height, data->op_params.input_width, + params->filter_height, params->filter_width, + params->padding, + &out_height, &out_width); + TFLITE_DCHECK_EQ(out_height, data->op_params.output_height); + TFLITE_DCHECK_EQ(out_width, data->op_params.output_width); + data->op_params.pad_height = padding.height; + data->op_params.pad_width = padding.width; + + if (input->type == kTfLiteFloat32) { + data->supported = kTFLMrefF32; + CalculateActivationRange(params->activation, + &data->activation_min_f32, + &data->activation_max_f32); + } else { + CalculateActivationRangeQuantized(context, params->activation, output, + reinterpret_cast(&data->op_params.output_activation_min), + reinterpret_cast(&data->op_params.output_activation_max)); + if (input->type != kTfLiteInt8) { + TF_LITE_KERNEL_LOG(context, "Type %s not currently supported.", + TfLiteTypeGetName(input->type)); + return kTfLiteError; + } + } + + return kTfLiteOk; +} + +TfLiteStatus AveragePrepare(TfLiteContext* context, TfLiteNode* node) +{ + TFLITE_DCHECK(node->user_data != nullptr); + TFLITE_DCHECK(node->builtin_data != nullptr); + + OpData* data = static_cast(node->user_data); + const TfLiteTensor* input = GetInput(context, node, kInputTensor); + TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + TF_LITE_ENSURE(context, input != nullptr); + TF_LITE_ENSURE(context, output != nullptr); + + TfLiteStatus status = Prepare(context, node); + + if (status == kTfLiteOk) { + if (input->type == kTfLiteInt8) { + data->supported = sli_mvp_ml_average_pooling_s8_is_supported(&data->op_params) + ? kMvp : kCmsisNN; + if (data->supported == kCmsisNN) { + const int32_t buffer_size = arm_avgpool_s8_get_buffer_size( + data->op_params.output_width, + data->op_params.channels); + + if (buffer_size > 0) { + TF_LITE_ENSURE_STATUS(context->RequestScratchBufferInArena( + context, buffer_size, &data->buffer_idx)); + } else { + data->buffer_idx = -1; + } + } + } + } + return status; +} + +TfLiteStatus MaxPrepare(TfLiteContext* context, TfLiteNode* node) +{ + TFLITE_DCHECK(node->user_data != nullptr); + TFLITE_DCHECK(node->builtin_data != nullptr); + + OpData* data = static_cast(node->user_data); + const TfLiteTensor* input = GetInput(context, node, kInputTensor); + TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + TF_LITE_ENSURE(context, input != nullptr); + TF_LITE_ENSURE(context, output != nullptr); + + TfLiteStatus status = Prepare(context, node); + + if (status == kTfLiteOk) { + if (input->type == kTfLiteInt8) { + data->supported = sli_mvp_ml_max_pooling_s8_is_supported(&data->op_params) + ? kMvp : kCmsisNN; + } + } + + return status; +} + +TfLiteStatus AverageEval(TfLiteContext* context, TfLiteNode* node) +{ + TFLITE_DCHECK(node->user_data != nullptr); + TFLITE_DCHECK(node->builtin_data != nullptr); + + OpData* data = static_cast(node->user_data); + const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, kInputTensor); + TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, kOutputTensor); + TF_LITE_ENSURE(context, input != nullptr); + TF_LITE_ENSURE(context, output != nullptr); + data->op_params.input = tflite::micro::GetTensorData(input); + data->op_params.output = tflite::micro::GetTensorData(output); + + if (data->supported == kMvp) { + // Use MVP accelerated kernel. + TF_LITE_ENSURE_EQ(context, + SL_STATUS_OK, + sli_mvp_ml_average_pooling_s8(&data->op_params)); + + } else if (data->supported == kCmsisNN) { + // Use CMSIS-NN optimized kernel. + cmsis_nn_dims input_dims; + input_dims.n = 1; + input_dims.h = data->op_params.input_height; + input_dims.w = data->op_params.input_width; + input_dims.c = data->op_params.channels; + + cmsis_nn_dims output_dims; + output_dims.n = 1; + output_dims.h = data->op_params.output_height; + output_dims.w = data->op_params.output_width; + output_dims.c = data->op_params.channels; + + cmsis_nn_pool_params pool_params; + pool_params.stride.h = data->op_params.stride_height; + pool_params.stride.w = data->op_params.stride_width; + pool_params.padding.h = data->op_params.pad_height; + pool_params.padding.w = data->op_params.pad_width; + pool_params.activation.min = data->op_params.output_activation_min; + pool_params.activation.max = data->op_params.output_activation_max; + + cmsis_nn_dims filter_dims; + filter_dims.n = 1; + filter_dims.h = data->op_params.filter_height; + filter_dims.w = data->op_params.filter_width; + filter_dims.c = 1; + + cmsis_nn_context ctx; + ctx.buf = nullptr; + ctx.size = 0; + if (data->buffer_idx > -1) { + ctx.buf = context->GetScratchBuffer(context, data->buffer_idx); + } + + TFLITE_DCHECK_EQ( + arm_avgpool_s8(&ctx, &pool_params, &input_dims, + data->op_params.input, &filter_dims, + &output_dims, + data->op_params.output), + ARM_MATH_SUCCESS); + } else if (data->supported == kTFLMrefF32) { + #if EI_TFLITE_DISABLE_AVERAGE_POOL_2D_IN_F32 + TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.", + TfLiteTypeGetName(input->type), input->type); + return kTfLiteError; + #endif + + // Use TFLM reference kernel. + tflite::PoolParams op_params; + op_params.stride_height = data->op_params.stride_height; + op_params.stride_width = data->op_params.stride_width; + op_params.filter_height = data->op_params.filter_height; + op_params.filter_width = data->op_params.filter_width; + op_params.padding_values.height = data->op_params.pad_height; + op_params.padding_values.width = data->op_params.pad_width; + op_params.float_activation_min = data->activation_min_f32; + op_params.float_activation_max = data->activation_max_f32; + reference_ops::AveragePool(op_params, + tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + + } else { + return kTfLiteError; + } + + return kTfLiteOk; +} + +TfLiteStatus MaxEval(TfLiteContext* context, TfLiteNode* node) +{ + TFLITE_DCHECK(node->user_data != nullptr); + TFLITE_DCHECK(node->builtin_data != nullptr); + + OpData* data = static_cast(node->user_data); + const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, kInputTensor); + TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, kOutputTensor); + TF_LITE_ENSURE(context, input != nullptr); + TF_LITE_ENSURE(context, output != nullptr); + data->op_params.input = tflite::micro::GetTensorData(input); + data->op_params.output = tflite::micro::GetTensorData(output); + + if (data->supported == kMvp) { + // Use MVP accelerated kernel. + TF_LITE_ENSURE_EQ(context, + SL_STATUS_OK, + sli_mvp_ml_max_pooling_s8(&data->op_params)); + + } else if (data->supported == kCmsisNN) { + // Use CMSIS-NN optimized kernel. + cmsis_nn_dims input_dims; + input_dims.n = 1; + input_dims.h = data->op_params.input_height; + input_dims.w = data->op_params.input_width; + input_dims.c = data->op_params.channels; + + cmsis_nn_dims output_dims; + output_dims.n = 1; + output_dims.h = data->op_params.output_height; + output_dims.w = data->op_params.output_width; + output_dims.c = data->op_params.channels; + + cmsis_nn_pool_params pool_params; + pool_params.stride.h = data->op_params.stride_height; + pool_params.stride.w = data->op_params.stride_width; + pool_params.padding.h = data->op_params.pad_height; + pool_params.padding.w = data->op_params.pad_width; + pool_params.activation.min = data->op_params.output_activation_min; + pool_params.activation.max = data->op_params.output_activation_max; + + cmsis_nn_dims filter_dims; + filter_dims.n = 1; + filter_dims.h = data->op_params.filter_height; + filter_dims.w = data->op_params.filter_width; + filter_dims.c = 1; + + cmsis_nn_context ctx; + ctx.buf = nullptr; + ctx.size = 0; + + TFLITE_DCHECK_EQ( + arm_max_pool_s8(&ctx, &pool_params, &input_dims, + data->op_params.input, &filter_dims, + &output_dims, + data->op_params.output), + ARM_MATH_SUCCESS); + } else if (data->supported == kTFLMrefF32) { + #if EI_TFLITE_DISABLE_MAX_POOL_2D_IN_F32 + TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.", + TfLiteTypeGetName(input->type), input->type); + return kTfLiteError; + #endif + + // Use TFLM reference kernel. + tflite::PoolParams op_params; + op_params.stride_height = data->op_params.stride_height; + op_params.stride_width = data->op_params.stride_width; + op_params.filter_height = data->op_params.filter_height; + op_params.filter_width = data->op_params.filter_width; + op_params.padding_values.height = data->op_params.pad_height; + op_params.padding_values.width = data->op_params.pad_width; + op_params.float_activation_min = data->activation_min_f32; + op_params.float_activation_max = data->activation_max_f32; + reference_ops::MaxPool(op_params, + tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + + } else { + return kTfLiteError; + } + + return kTfLiteOk; +} + +} // namespace pooling +} // namespace sl + +TfLiteRegistration Register_MAX_POOL_2D() { + static TfLiteRegistration max_pool_registration = { + /*init=*/sl::pooling::Init, + /*free=*/nullptr, + /*prepare=*/sl::pooling::MaxPrepare, + /*invoke=*/sl::pooling::MaxEval, + /*profiling_string=*/nullptr, + /*builtin_code=*/0, + /*custom_name=*/nullptr, + /*version=*/0 + }; + + return max_pool_registration; +} + +// Just to keep all_ops_resolver() happy during development ... +TfLiteRegistration Register_AVERAGE_POOL_2D() { + static TfLiteRegistration avg_pool_registration = { + /*init=*/sl::pooling::Init, + /*free=*/nullptr, + /*prepare=*/sl::pooling::AveragePrepare, + /*invoke=*/sl::pooling::AverageEval, + /*profiling_string=*/nullptr, + /*builtin_code=*/0, + /*custom_name=*/nullptr, + /*version=*/0 + }; + + return avg_pool_registration; +} + +} // namespace tflite + +#elif EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN == 1 +/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/pooling.h" + +#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/pooling.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h" + +#if ESP_NN +#include "edge-impulse-sdk/porting/espressif/ESP-NN/include/esp_nn.h" +#endif + +#include + +long long pooling_total_time = 0; + +namespace tflite { + +namespace { +#if ESP_NN +void AverageEvalQuantized(TfLiteContext* context, const TfLiteNode* node, + const TfLitePoolParams* params, const OpDataPooling* data, + const TfLiteEvalTensor* input, + TfLiteEvalTensor* output) { + + const int stride_height = params->stride_height; + const int stride_width = params->stride_width; + const int filter_height = params->filter_height; + const int filter_width = params->filter_width; + const int activation_min = data->activation_min; + const int activation_max = data->activation_max; + const int pad_height = data->padding.height; + const int pad_width = data->padding.width; + + const RuntimeShape& input_shape = tflite::micro::GetTensorShape(input); + const RuntimeShape& output_shape = tflite::micro::GetTensorShape(output); + TFLITE_DCHECK_LE(activation_min, activation_max); + TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4); + TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4); + const int batches = MatchingDim(input_shape, 0, output_shape, 0); + const int depth = MatchingDim(input_shape, 3, output_shape, 3); + const int input_height = input_shape.Dims(1); + const int input_width = input_shape.Dims(2); + const int output_height = output_shape.Dims(1); + const int output_width = output_shape.Dims(2); + + const int8_t *input_data = tflite::micro::GetTensorData(input); + int8_t *output_data = tflite::micro::GetTensorData(output); + + const int input_size = input_width * input_height * depth; + const int output_size = output_width * output_height * depth; + + if (depth % 4 == 0) { // S3 version only supports channels multiple of 4 + for (int batch = 0; batch < batches; ++batch) { + esp_nn_avg_pool_s8(input_data, input_width, input_height, + output_data, output_width, output_height, + stride_width, stride_height, + filter_width, filter_height, + pad_width, pad_height, + activation_min, activation_max, depth); + input_data += input_size; + output_data += output_size; + } + } else { + for (int batch = 0; batch < batches; ++batch) { + esp_nn_avg_pool_s8_ansi(input_data, input_width, input_height, + output_data, output_width, output_height, + stride_width, stride_height, + filter_width, filter_height, + pad_width, pad_height, + activation_min, activation_max, depth); + input_data += input_size; + output_data += output_size; + } + } +} + +void MaxEvalQuantized(TfLiteContext* context, TfLiteNode* node, + TfLitePoolParams* params, const OpDataPooling* data, + const TfLiteEvalTensor* input, TfLiteEvalTensor* output) { + + const int stride_height = params->stride_height; + const int stride_width = params->stride_width; + const int filter_height = params->filter_height; + const int filter_width = params->filter_width; + const int activation_min = data->activation_min; + const int activation_max = data->activation_max; + const int pad_height = data->padding.height; + const int pad_width = data->padding.width; + + const RuntimeShape& input_shape = tflite::micro::GetTensorShape(input); + const RuntimeShape& output_shape = tflite::micro::GetTensorShape(output); + TFLITE_DCHECK_LE(activation_min, activation_max); + TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4); + TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4); + const int batches = MatchingDim(input_shape, 0, output_shape, 0); + const int depth = MatchingDim(input_shape, 3, output_shape, 3); + const int input_height = input_shape.Dims(1); + const int input_width = input_shape.Dims(2); + const int output_height = output_shape.Dims(1); + const int output_width = output_shape.Dims(2); + + const int8_t *input_data = tflite::micro::GetTensorData(input); + int8_t *output_data = tflite::micro::GetTensorData(output); + + const int input_size = input_width * input_height * depth; + const int output_size = output_width * output_height * depth; + if (depth % 4 == 0) { // S3 version only supports channels multiple of 4 + for (int batch = 0; batch < batches; ++batch) { + esp_nn_max_pool_s8(input_data, input_width, input_height, + output_data, output_width, output_height, + stride_width, stride_height, + filter_width, filter_height, + pad_width, pad_height, + activation_min, activation_max, depth); + input_data += input_size; + output_data += output_size; + } + } else { + for (int batch = 0; batch < batches; ++batch) { + esp_nn_max_pool_s8_ansi(input_data, input_width, input_height, + output_data, output_width, output_height, + stride_width, stride_height, + filter_width, filter_height, + pad_width, pad_height, + activation_min, activation_max, depth); + input_data += input_size; + output_data += output_size; + } + } +} +#endif + +TfLiteStatus AverageEval(TfLiteContext* context, TfLiteNode* node) { + TFLITE_DCHECK(node->builtin_data != nullptr); + auto* params = reinterpret_cast(node->builtin_data); + + TFLITE_DCHECK(node->user_data != nullptr); + const OpDataPooling* data = + static_cast(node->user_data); + + const TfLiteEvalTensor* input = + micro::GetEvalInput(context, node, kPoolingInputTensor); + TfLiteEvalTensor* output = + micro::GetEvalOutput(context, node, kPoolingOutputTensor); + + long long start_time = esp_timer_get_time(); + // Inputs and outputs share the same type, guaranteed by the converter. + switch (input->type) { + case kTfLiteFloat32: +#if EI_TFLITE_DISABLE_AVERAGE_POOL_2D_IN_F32 + TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.", + TfLiteTypeGetName(input->type), input->type); + return kTfLiteError; +#endif + AveragePoolingEvalFloat(context, node, params, data, input, output); + break; + case kTfLiteInt8: +#if EI_TFLITE_DISABLE_AVERAGE_POOL_2D_IN_I8 + TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.", + TfLiteTypeGetName(input->type), input->type); + return kTfLiteError; +#endif +#if ESP_NN + AverageEvalQuantized(context, node, params, data, input, output); +#else + AveragePoolingEvalQuantized(context, node, params, data, input, output); +#endif + break; + default: + TF_LITE_KERNEL_LOG(context, "Input type %s is not currently supported", + TfLiteTypeGetName(input->type)); + return kTfLiteError; + } + pooling_total_time += esp_timer_get_time() - start_time; + return kTfLiteOk; +} + +TfLiteStatus MaxEval(TfLiteContext* context, TfLiteNode* node) { + TFLITE_DCHECK(node->builtin_data != nullptr); + auto* params = reinterpret_cast(node->builtin_data); + + TFLITE_DCHECK(node->user_data != nullptr); + const OpDataPooling* data = + static_cast(node->user_data); + + const TfLiteEvalTensor* input = + micro::GetEvalInput(context, node, kPoolingInputTensor); + TfLiteEvalTensor* output = + micro::GetEvalOutput(context, node, kPoolingOutputTensor); + + long long start_time = esp_timer_get_time(); + switch (input->type) { + case kTfLiteFloat32: +#if EI_TFLITE_DISABLE_MAX_POOL_2D_IN_F32 + TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.", + TfLiteTypeGetName(input->type), input->type); + return kTfLiteError; +#endif + MaxPoolingEvalFloat(context, node, params, data, input, output); + break; + case kTfLiteInt8: +#if EI_TFLITE_DISABLE_MAX_POOL_2D_IN_I8 + TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.", + TfLiteTypeGetName(input->type), input->type); + return kTfLiteError; +#endif +#if ESP_NN + MaxEvalQuantized(context, node, params, data, input, output); +#else + MaxPoolingEvalQuantized(context, node, params, data, input, output); +#endif + break; + default: + TF_LITE_KERNEL_LOG(context, "Type %s not currently supported.", + TfLiteTypeGetName(input->type)); + return kTfLiteError; + } + pooling_total_time += esp_timer_get_time() - start_time; + return kTfLiteOk; +} + +void* Init(TfLiteContext* context, const char* buffer, size_t length) { + TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); + return context->AllocatePersistentBuffer(context, sizeof(OpDataPooling)); +} + +} // namespace + +TfLiteRegistration Register_AVERAGE_POOL_2D() { + return tflite::micro::RegisterOp(Init, PoolingPrepare, AverageEval); +} + +TfLiteRegistration Register_MAX_POOL_2D() { + return tflite::micro::RegisterOp(Init, PoolingPrepare, MaxEval); +} + +} // namespace tflite + +#else +/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/pooling.h" + +#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/pooling.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h" + +namespace tflite { + +namespace { + +TfLiteStatus AverageEval(TfLiteContext* context, TfLiteNode* node) { + TFLITE_DCHECK(node->builtin_data != nullptr); + auto* params = reinterpret_cast(node->builtin_data); + + TFLITE_DCHECK(node->user_data != nullptr); + const OpDataPooling* data = + static_cast(node->user_data); + + const TfLiteEvalTensor* input = + micro::GetEvalInput(context, node, kPoolingInputTensor); + TfLiteEvalTensor* output = + micro::GetEvalOutput(context, node, kPoolingOutputTensor); + + // Inputs and outputs share the same type, guaranteed by the converter. + switch (input->type) { + case kTfLiteFloat32: +#if EI_TFLITE_DISABLE_AVERAGE_POOL_2D_IN_F32 + MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(input->type), + input->type); + return kTfLiteError; +#endif + AveragePoolingEvalFloat(context, node, params, data, input, output); + break; + case kTfLiteInt8: +#if EI_TFLITE_DISABLE_AVERAGE_POOL_2D_IN_I8 + MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(input->type), + input->type); + return kTfLiteError; +#endif + AveragePoolingEvalQuantized(context, node, params, data, input, + output); + break; + case kTfLiteInt16: + AveragePoolingEvalQuantized(context, node, params, data, input, + output); + break; + default: + MicroPrintf("Input type %s is not currently supported", + TfLiteTypeGetName(input->type)); + return kTfLiteError; + } + return kTfLiteOk; +} + +TfLiteStatus MaxEval(TfLiteContext* context, TfLiteNode* node) { + TFLITE_DCHECK(node->builtin_data != nullptr); + auto* params = reinterpret_cast(node->builtin_data); + + TFLITE_DCHECK(node->user_data != nullptr); + const OpDataPooling* data = + static_cast(node->user_data); + + const TfLiteEvalTensor* input = + micro::GetEvalInput(context, node, kPoolingInputTensor); + TfLiteEvalTensor* output = + micro::GetEvalOutput(context, node, kPoolingOutputTensor); + + switch (input->type) { + case kTfLiteFloat32: +#if EI_TFLITE_DISABLE_MAX_POOL_2D_IN_F32 + MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(input->type), + input->type); + return kTfLiteError; +#endif + MaxPoolingEvalFloat(context, node, params, data, input, output); + break; + case kTfLiteInt8: +#if EI_TFLITE_DISABLE_MAX_POOL_2D_IN_I8 + MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(input->type), + input->type); + return kTfLiteError; +#endif + MaxPoolingEvalQuantized(context, node, params, data, input, + output); + break; + case kTfLiteInt16: + MaxPoolingEvalQuantized(context, node, params, data, input, + output); + break; + default: + MicroPrintf("Type %s not currently supported.", + TfLiteTypeGetName(input->type)); + return kTfLiteError; + } + return kTfLiteOk; +} + +void* Init(TfLiteContext* context, const char* buffer, size_t length) { + TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); + return context->AllocatePersistentBuffer(context, sizeof(OpDataPooling)); +} + +} // namespace + +TfLiteRegistration Register_AVERAGE_POOL_2D() { + return tflite::micro::RegisterOp(Init, PoolingPrepare, AverageEval); +} + +TfLiteRegistration Register_MAX_POOL_2D() { + return tflite::micro::RegisterOp(Init, PoolingPrepare, MaxEval); +} + +} // namespace tflite + +#endif diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/pooling.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/pooling.h new file mode 100644 index 0000000..d33aa23 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/pooling.h @@ -0,0 +1,142 @@ +/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_LITE_MICRO_KERNELS_POOLING_H_ +#define TENSORFLOW_LITE_MICRO_KERNELS_POOLING_H_ + +#include + +#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h" +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/pooling.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/pooling.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/padding.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/micro_ops.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h" + +namespace tflite { + +extern const int kPoolingInputTensor; +extern const int kPoolingOutputTensor; + +struct OpDataPooling { + TfLitePaddingValues padding; + int32_t activation_min; + int32_t activation_max; + float activation_min_f32; + float activation_max_f32; +}; + +TfLiteStatus CalculateOpDataPooling(const TfLiteContext* context, + const TfLitePoolParams* params, + const TfLiteTensor* input, + const TfLiteTensor* output, + OpDataPooling* data); + +TfLiteStatus PoolingPrepare(TfLiteContext* context, TfLiteNode* node); + +void AveragePoolingEvalFloat(const TfLiteContext* context, + const TfLiteNode* node, + const TfLitePoolParams* params, + const OpDataPooling* data, + const TfLiteEvalTensor* input, + TfLiteEvalTensor* output); + +template +void AveragePoolingEvalQuantized(TfLiteContext* context, const TfLiteNode* node, + const TfLitePoolParams* params, + const OpDataPooling* data, + const TfLiteEvalTensor* input, + TfLiteEvalTensor* output) { + TFLITE_DCHECK(input->type == kTfLiteInt8 || input->type == kTfLiteInt16); + + PoolParams op_params; + op_params.stride_height = params->stride_height; + op_params.stride_width = params->stride_width; + op_params.filter_height = params->filter_height; + op_params.filter_width = params->filter_width; + op_params.padding_values.height = data->padding.height; + op_params.padding_values.width = data->padding.width; + op_params.quantized_activation_min = data->activation_min; + op_params.quantized_activation_max = data->activation_max; + + reference_integer_ops::AveragePool(op_params, + tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); +} + +void MaxPoolingEvalFloat(TfLiteContext* context, TfLiteNode* node, + TfLitePoolParams* params, const OpDataPooling* data, + const TfLiteEvalTensor* input, + TfLiteEvalTensor* output); + +template +void MaxPoolingEvalQuantized(TfLiteContext* context, TfLiteNode* node, + TfLitePoolParams* params, + const OpDataPooling* data, + const TfLiteEvalTensor* input, + TfLiteEvalTensor* output) { + TFLITE_DCHECK(input->type == kTfLiteInt8 || input->type == kTfLiteInt16); + + tflite::PoolParams op_params; + op_params.stride_height = params->stride_height; + op_params.stride_width = params->stride_width; + op_params.filter_height = params->filter_height; + op_params.filter_width = params->filter_width; + op_params.padding_values.height = data->padding.height; + op_params.padding_values.width = data->padding.width; + op_params.quantized_activation_min = data->activation_min; + op_params.quantized_activation_max = data->activation_max; + + reference_integer_ops::MaxPool(op_params, + tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); +} + +#if defined(CMSIS_NN) +TfLiteRegistration Register_AVERAGE_POOL_2D_INT8(); + +TfLiteRegistration Register_MAX_POOL_2D_INT8(); + +TfLiteRegistration Register_AVERAGE_POOL_2D_INT16(); + +TfLiteRegistration Register_MAX_POOL_2D_INT16(); +#else +inline TfLiteRegistration Register_AVERAGE_POOL_2D_INT8() { + return tflite::Register_AVERAGE_POOL_2D(); +} + +inline TfLiteRegistration Register_MAX_POOL_2D_INT8() { + return tflite::Register_MAX_POOL_2D(); +} + +inline TfLiteRegistration Register_AVERAGE_POOL_2D_INT16() { + return tflite::Register_AVERAGE_POOL_2D(); +} + +inline TfLiteRegistration Register_MAX_POOL_2D_INT16() { + return tflite::Register_MAX_POOL_2D(); +} +#endif +} // namespace tflite + +#endif // TENSORFLOW_LITE_MICRO_KERNELS_POOLING_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/pooling_common.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/pooling_common.cpp new file mode 100644 index 0000000..8eb66e7 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/pooling_common.cpp @@ -0,0 +1,128 @@ +/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/pooling.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/pooling.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/padding.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/pooling.h" + +namespace tflite { + +const int kPoolingInputTensor = 0; +const int kPoolingOutputTensor = 0; + +TfLiteStatus CalculateOpDataPooling(const TfLiteContext* context, + const TfLitePoolParams* params, + const TfLiteTensor* input, + const TfLiteTensor* output, + OpDataPooling* data) { + // input: batch, height, width, channel + int height = SizeOfDimension(input, 1); + int width = SizeOfDimension(input, 2); + + int out_height, out_width; + + data->padding = ComputePaddingHeightWidth( + params->stride_height, params->stride_width, + /*dilation_rate_height=*/1, + /*dilation_rate_width=*/1, height, width, params->filter_height, + params->filter_width, params->padding, &out_height, &out_width); + + return kTfLiteOk; +} + +TfLiteStatus PoolingPrepare(TfLiteContext* context, TfLiteNode* node) { + TFLITE_DCHECK(node->builtin_data != nullptr); + auto* params = reinterpret_cast(node->builtin_data); + + TFLITE_DCHECK(node->user_data != nullptr); + OpDataPooling* data = static_cast(node->user_data); + + MicroContext* micro_context = GetMicroContext(context); + + TfLiteTensor* input = + micro_context->AllocateTempInputTensor(node, kPoolingInputTensor); + TF_LITE_ENSURE(context, input != nullptr); + TfLiteTensor* output = + micro_context->AllocateTempOutputTensor(node, kPoolingOutputTensor); + TF_LITE_ENSURE(context, output != nullptr); + + TF_LITE_ENSURE_STATUS( + CalculateOpDataPooling(context, params, input, output, data)); + + if (input->type == kTfLiteFloat32) { + CalculateActivationRange(params->activation, &data->activation_min_f32, + &data->activation_max_f32); + } else if (input->type == kTfLiteInt8 || input->type == kTfLiteInt16) { + CalculateActivationRangeQuantized(context, params->activation, output, + &data->activation_min, + &data->activation_max); + } else { + MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(input->type), + input->type); + return kTfLiteError; + } + + micro_context->DeallocateTempTfLiteTensor(input); + micro_context->DeallocateTempTfLiteTensor(output); + + return kTfLiteOk; +} + +void AveragePoolingEvalFloat(const TfLiteContext* context, + const TfLiteNode* node, + const TfLitePoolParams* params, + const OpDataPooling* data, + const TfLiteEvalTensor* input, + TfLiteEvalTensor* output) { + PoolParams op_params; + op_params.stride_height = params->stride_height; + op_params.stride_width = params->stride_width; + op_params.filter_height = params->filter_height; + op_params.filter_width = params->filter_width; + op_params.padding_values.height = data->padding.height; + op_params.padding_values.width = data->padding.width; + op_params.float_activation_min = data->activation_min_f32; + op_params.float_activation_max = data->activation_max_f32; + reference_ops::AveragePool(op_params, tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); +} + +void MaxPoolingEvalFloat(TfLiteContext* context, TfLiteNode* node, + TfLitePoolParams* params, const OpDataPooling* data, + const TfLiteEvalTensor* input, + TfLiteEvalTensor* output) { + tflite::PoolParams op_params; + op_params.stride_height = params->stride_height; + op_params.stride_width = params->stride_width; + op_params.filter_height = params->filter_height; + op_params.filter_width = params->filter_width; + op_params.padding_values.height = data->padding.height; + op_params.padding_values.width = data->padding.width; + op_params.float_activation_min = data->activation_min_f32; + op_params.float_activation_max = data->activation_max_f32; + reference_ops::MaxPool(op_params, tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); +} + +} // namespace tflite diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/prelu.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/prelu.cpp new file mode 100644 index 0000000..bceb7ff --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/prelu.cpp @@ -0,0 +1,75 @@ +/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/prelu.h" + +#include + +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/quantization_util.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/prelu.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h" + +namespace tflite { + +void* PreluInit(TfLiteContext* context, const char* buffer, size_t length) { + TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); + return context->AllocatePersistentBuffer(context, sizeof(PreluParams)); +} + +TfLiteStatus PreluEval(TfLiteContext* context, TfLiteNode* node) { + TFLITE_DCHECK(node->user_data != nullptr); + const PreluParams& params = + *(static_cast(node->user_data)); + + const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0); + const TfLiteEvalTensor* alpha = tflite::micro::GetEvalInput(context, node, 1); + TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0); + + switch (input->type) { + case kTfLiteFloat32: { + BroadcastPrelu4DSlowFloat(tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(alpha), + tflite::micro::GetTensorData(alpha), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + return kTfLiteOk; + } break; + case kTfLiteInt8: { + reference_ops::BroadcastPrelu4DSlow( + params, tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(alpha), + tflite::micro::GetTensorData(alpha), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + return kTfLiteOk; + } break; + default: + MicroPrintf("Only float32 and uint8_t are supported currently, got %d.", + TfLiteTypeGetName(input->type)); + return kTfLiteError; + } +} + +TfLiteRegistration Register_PRELU() { + return tflite::micro::RegisterOp(PreluInit, PreluPrepare, PreluEval); +} + +} // namespace tflite diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/prelu.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/prelu.h new file mode 100644 index 0000000..d5b780a --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/prelu.h @@ -0,0 +1,39 @@ +/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_LITE_MICRO_KERNELS_PRELU_H_ +#define TENSORFLOW_LITE_MICRO_KERNELS_PRELU_H_ + +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h" + +namespace tflite { + +TfLiteStatus CalculatePreluParams(const TfLiteTensor* input, + const TfLiteTensor* alpha, + TfLiteTensor* output, PreluParams* params); + +void BroadcastPrelu4DSlowFloat(const RuntimeShape& unextended_input1_shape, + const float* input1_data, + const RuntimeShape& unextended_input2_shape, + const float* input2_data, + const RuntimeShape& unextended_output_shape, + float* output_data); + +TfLiteStatus PreluPrepare(TfLiteContext* context, TfLiteNode* node); + +} // namespace tflite + +#endif // TENSORFLOW_LITE_MICRO_KERNELS_PRELU_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/prelu_common.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/prelu_common.cpp new file mode 100644 index 0000000..8c1f2ef --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/prelu_common.cpp @@ -0,0 +1,105 @@ +/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include + +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/quantization_util.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/prelu.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/prelu.h" + +namespace tflite { + +TfLiteStatus CalculatePreluParams(const TfLiteTensor* input, + const TfLiteTensor* alpha, + TfLiteTensor* output, PreluParams* params) { + if (output->type == kTfLiteInt8 || output->type == kTfLiteInt16) { + double real_multiplier_1 = static_cast(input->params.scale) / + static_cast(output->params.scale); + double real_multiplier_2 = static_cast(input->params.scale) * + static_cast(alpha->params.scale) / + static_cast(output->params.scale); + QuantizeMultiplier(real_multiplier_1, ¶ms->output_multiplier_1, + ¶ms->output_shift_1); + QuantizeMultiplier(real_multiplier_2, ¶ms->output_multiplier_2, + ¶ms->output_shift_2); + + params->input_offset = -input->params.zero_point; + params->alpha_offset = -alpha->params.zero_point; + params->output_offset = output->params.zero_point; + } + + return kTfLiteOk; +} + +void BroadcastPrelu4DSlowFloat(const RuntimeShape& unextended_input1_shape, + const float* input1_data, + const RuntimeShape& unextended_input2_shape, + const float* input2_data, + const RuntimeShape& unextended_output_shape, + float* output_data) { + TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(unextended_input2_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4); + const RuntimeShape output_shape = + RuntimeShape::ExtendedShape(4, unextended_output_shape); + + NdArrayDesc<4> desc1; + NdArrayDesc<4> desc2; + NdArrayDescsForElementwiseBroadcast(unextended_input1_shape, + unextended_input2_shape, &desc1, &desc2); + + for (int b = 0; b < output_shape.Dims(0); ++b) { + for (int y = 0; y < output_shape.Dims(1); ++y) { + for (int x = 0; x < output_shape.Dims(2); ++x) { + for (int c = 0; c < output_shape.Dims(3); ++c) { + auto out_idx = Offset(output_shape, b, y, x, c); + auto in1_idx = SubscriptToIndex(desc1, b, y, x, c); + auto in2_idx = SubscriptToIndex(desc2, b, y, x, c); + auto in1_val = input1_data[in1_idx]; + auto in2_val = input2_data[in2_idx]; + output_data[out_idx] = in1_val >= 0.0f ? in1_val : in1_val * in2_val; + } + } + } + } +} + +TfLiteStatus PreluPrepare(TfLiteContext* context, TfLiteNode* node) { + TFLITE_DCHECK(node->user_data != nullptr); + PreluParams* params = static_cast(node->user_data); + + MicroContext* micro_context = GetMicroContext(context); + + TfLiteTensor* input = micro_context->AllocateTempInputTensor(node, 0); + TF_LITE_ENSURE(context, input != nullptr); + TfLiteTensor* alpha = micro_context->AllocateTempInputTensor(node, 1); + TF_LITE_ENSURE(context, alpha != nullptr); + TfLiteTensor* output = micro_context->AllocateTempOutputTensor(node, 0); + TF_LITE_ENSURE(context, output != nullptr); + + TF_LITE_ENSURE_OK(context, + CalculatePreluParams(input, alpha, output, params)); + + micro_context->DeallocateTempTfLiteTensor(input); + micro_context->DeallocateTempTfLiteTensor(alpha); + micro_context->DeallocateTempTfLiteTensor(output); + return kTfLiteOk; +} + +} // namespace tflite diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/quantize.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/quantize.cpp new file mode 100644 index 0000000..487f502 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/quantize.cpp @@ -0,0 +1,41 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/quantize.h" + +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/quantization_util.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_utils.h" + +namespace tflite { +namespace { + +void* Init(TfLiteContext* context, const char* buffer, size_t length) { + TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); + return context->AllocatePersistentBuffer(context, + sizeof(OpDataQuantizeReference)); +} + +} // namespace + +TfLiteRegistration Register_QUANTIZE() { + return tflite::micro::RegisterOp(Init, PrepareQuantizeReference, + EvalQuantizeReference); +} + +} // namespace tflite diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/quantize.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/quantize.h new file mode 100644 index 0000000..ce2c567 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/quantize.h @@ -0,0 +1,37 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_MICRO_KERNELS_QUANTIZE_H_ +#define TENSORFLOW_LITE_MICRO_KERNELS_QUANTIZE_H_ + +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h" + +namespace tflite { + +struct OpDataQuantizeReference { + tflite::QuantizationParams quantization_params; + // The scaling factor from input to output (aka the 'real multiplier') can + // be represented as a fixed point multiplier plus a left shift. + int32_t requantize_output_multiplier; + int requantize_output_shift; + + int32_t input_zero_point; +}; + +TfLiteStatus EvalQuantizeReference(TfLiteContext* context, TfLiteNode* node); +TfLiteStatus PrepareQuantizeReference(TfLiteContext* context, TfLiteNode* node); +} // namespace tflite + +#endif // TENSORFLOW_LITE_MICRO_KERNELS_QUANTIZE_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/quantize_common.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/quantize_common.cpp new file mode 100644 index 0000000..5ba29f4 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/quantize_common.cpp @@ -0,0 +1,239 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include + +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/quantization_util.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/quantize.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/requantize.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/quantize.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_utils.h" + +namespace tflite { + +TfLiteStatus PrepareQuantizeReference(TfLiteContext* context, + TfLiteNode* node) { + TFLITE_DCHECK(node->user_data != nullptr); + auto* data = static_cast(node->user_data); + + TF_LITE_ENSURE_EQ(context, NumInputs(node), 1); + TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); + + MicroContext* micro_context = GetMicroContext(context); + + TfLiteTensor* input = micro_context->AllocateTempInputTensor(node, 0); + TF_LITE_ENSURE(context, input != nullptr); + TfLiteTensor* output = micro_context->AllocateTempOutputTensor(node, 0); + TF_LITE_ENSURE(context, output != nullptr); + + // TODO(b/128934713): Add support for fixed-point per-channel quantization. + // Currently this only support affine per-layer quantization. + TF_LITE_ENSURE_EQ(context, output->quantization.type, + kTfLiteAffineQuantization); + const auto* affine_quantization = + reinterpret_cast(output->quantization.params); + TF_LITE_ENSURE(context, affine_quantization); + TF_LITE_ENSURE(context, affine_quantization->scale); + TF_LITE_ENSURE(context, affine_quantization->scale->size == 1); + + TF_LITE_ENSURE( + context, input->type == kTfLiteFloat32 || input->type == kTfLiteInt32 || + input->type == kTfLiteInt16 || input->type == kTfLiteInt8 || + input->type == kTfLiteUInt8); + TF_LITE_ENSURE(context, output->type == kTfLiteInt8 || + output->type == kTfLiteInt16 || + output->type == kTfLiteInt32 || + output->type == kTfLiteUInt8); + + if ((input->type == kTfLiteInt16 && output->type == kTfLiteInt8) || + (input->type == kTfLiteInt8 && output->type == kTfLiteInt8) || + (input->type == kTfLiteInt8 && output->type == kTfLiteUInt8) || + (input->type == kTfLiteUInt8 && output->type == kTfLiteInt8) || + (input->type == kTfLiteInt8 && output->type == kTfLiteInt16) || + (input->type == kTfLiteInt8 && output->type == kTfLiteInt32) || + (input->type == kTfLiteInt16 && output->type == kTfLiteInt16) || + (input->type == kTfLiteInt16 && output->type == kTfLiteInt32) || + (input->type == kTfLiteInt32 && output->type == kTfLiteInt8) || + (input->type == kTfLiteInt32 && output->type == kTfLiteInt16)) { + double effective_scale = static_cast(input->params.scale) / + static_cast(output->params.scale); + + QuantizeMultiplier(effective_scale, &data->requantize_output_multiplier, + &data->requantize_output_shift); + } + + data->quantization_params.zero_point = output->params.zero_point; + data->quantization_params.scale = static_cast(output->params.scale); + + data->input_zero_point = input->params.zero_point; + + micro_context->DeallocateTempTfLiteTensor(input); + micro_context->DeallocateTempTfLiteTensor(output); + return kTfLiteOk; +} + +TfLiteStatus EvalQuantizeReference(TfLiteContext* context, TfLiteNode* node) { + TFLITE_DCHECK(node->user_data != nullptr); + auto* data = static_cast(node->user_data); + + const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0); + TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0); + + if (input->type == kTfLiteFloat32) { + switch (output->type) { + case kTfLiteInt8: + reference_ops::AffineQuantize( + data->quantization_params, tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + break; + case kTfLiteInt16: + reference_ops::AffineQuantize( + data->quantization_params, tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + return kTfLiteOk; + default: + MicroPrintf("Input %s, output %s not supported.", + TfLiteTypeGetName(input->type), + TfLiteTypeGetName(output->type)); + return kTfLiteError; + } + } else if (input->type == kTfLiteInt32) { + size_t size = ElementCount(*input->dims); + switch (output->type) { + case kTfLiteInt8: + reference_ops::Requantize( + tflite::micro::GetTensorData(input), size, + data->requantize_output_multiplier, data->requantize_output_shift, + data->input_zero_point, data->quantization_params.zero_point, + tflite::micro::GetTensorData(output)); + break; + case kTfLiteInt16: + reference_ops::Requantize( + tflite::micro::GetTensorData(input), size, + data->requantize_output_multiplier, data->requantize_output_shift, + data->input_zero_point, data->quantization_params.zero_point, + tflite::micro::GetTensorData(output)); + break; + default: + MicroPrintf("Input %s, output %s not supported.", + TfLiteTypeGetName(input->type), + TfLiteTypeGetName(output->type)); + return kTfLiteError; + } + } else if (input->type == kTfLiteInt16) { + size_t size = ElementCount(*input->dims); + switch (output->type) { + case kTfLiteInt8: + reference_ops::Requantize( + tflite::micro::GetTensorData(input), size, + data->requantize_output_multiplier, data->requantize_output_shift, + data->input_zero_point, data->quantization_params.zero_point, + tflite::micro::GetTensorData(output)); + break; + case kTfLiteInt16: + reference_ops::Requantize( + tflite::micro::GetTensorData(input), size, + data->requantize_output_multiplier, data->requantize_output_shift, + data->input_zero_point, data->quantization_params.zero_point, + tflite::micro::GetTensorData(output)); + return kTfLiteOk; + case kTfLiteInt32: + reference_ops::Requantize( + tflite::micro::GetTensorData(input), size, + data->requantize_output_multiplier, data->requantize_output_shift, + data->input_zero_point, data->quantization_params.zero_point, + tflite::micro::GetTensorData(output)); + return kTfLiteOk; + default: + MicroPrintf("Input %s, output %s not supported.", + TfLiteTypeGetName(input->type), + TfLiteTypeGetName(output->type)); + return kTfLiteError; + } + } else if (input->type == kTfLiteInt8) { + // Int8 to Int8 requantization, required if the input and output tensors + // have different scales and/or zero points. + size_t size = ElementCount(*input->dims); + switch (output->type) { + case kTfLiteInt8: + reference_ops::Requantize( + tflite::micro::GetTensorData(input), size, + data->requantize_output_multiplier, data->requantize_output_shift, + data->input_zero_point, data->quantization_params.zero_point, + tflite::micro::GetTensorData(output)); + break; + case kTfLiteUInt8: + reference_ops::Requantize( + tflite::micro::GetTensorData(input), size, + data->requantize_output_multiplier, data->requantize_output_shift, + data->input_zero_point, data->quantization_params.zero_point, + tflite::micro::GetTensorData(output)); + break; + case kTfLiteInt16: + reference_ops::Requantize( + tflite::micro::GetTensorData(input), size, + data->requantize_output_multiplier, data->requantize_output_shift, + data->input_zero_point, data->quantization_params.zero_point, + tflite::micro::GetTensorData(output)); + break; + case kTfLiteInt32: + reference_ops::Requantize( + tflite::micro::GetTensorData(input), size, + data->requantize_output_multiplier, data->requantize_output_shift, + data->input_zero_point, data->quantization_params.zero_point, + tflite::micro::GetTensorData(output)); + break; + default: + MicroPrintf("Input %s, output %s not supported.", + TfLiteTypeGetName(input->type), + TfLiteTypeGetName(output->type)); + return kTfLiteError; + } + } else if (input->type == kTfLiteUInt8) { + size_t size = ElementCount(*input->dims); + switch (output->type) { + case kTfLiteInt8: + reference_ops::Requantize( + tflite::micro::GetTensorData(input), size, + data->requantize_output_multiplier, data->requantize_output_shift, + data->input_zero_point, data->quantization_params.zero_point, + tflite::micro::GetTensorData(output)); + break; + default: + MicroPrintf("Input %s, output %s not supported.", + TfLiteTypeGetName(input->type), + TfLiteTypeGetName(output->type)); + return kTfLiteError; + } + } else { + MicroPrintf("Input %s, output %s not supported.", + TfLiteTypeGetName(input->type), + TfLiteTypeGetName(output->type)); + return kTfLiteError; + } + + return kTfLiteOk; +} + +} // namespace tflite diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/read_variable.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/read_variable.cpp new file mode 100644 index 0000000..ba1fe4a --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/read_variable.cpp @@ -0,0 +1,87 @@ +/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include + +#include + +#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h" +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/compatibility.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/memory_helpers.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_graph.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_resource_variable.h" +#include "edge-impulse-sdk/tensorflow/lite/schema/schema_generated.h" + +namespace tflite { + +namespace { + +constexpr int kInputVariableId = 0; +constexpr int kOutputValue = 0; + +TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + TFLITE_DCHECK(NumInputs(node) == 1); + TFLITE_DCHECK(NumOutputs(node) == 1); + + MicroContext* micro_context = GetMicroContext(context); + + TfLiteTensor* input_resource_id_tensor = + micro_context->AllocateTempInputTensor(node, kInputVariableId); + + TFLITE_DCHECK(input_resource_id_tensor != nullptr); + TFLITE_DCHECK(input_resource_id_tensor->type == kTfLiteResource); + TFLITE_DCHECK(NumElements(input_resource_id_tensor) == 1); + + micro_context->DeallocateTempTfLiteTensor(input_resource_id_tensor); + + return kTfLiteOk; +} + +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + const TfLiteEvalTensor* input_resource_id_tensor = + tflite::micro::GetEvalInput(context, node, kInputVariableId); + TFLITE_DCHECK(input_resource_id_tensor != nullptr); + + TfLiteEvalTensor* output_value = + tflite::micro::GetEvalOutput(context, node, kOutputValue); + TFLITE_DCHECK(output_value != nullptr); + + tflite::MicroContext* micro_context = tflite::GetMicroContext(context); + MicroGraph& graph_info = micro_context->graph(); + + MicroResourceVariables* resources = graph_info.GetResourceVariables(); + if (resources == nullptr) { + MicroPrintf( + "READ_VARIABLE requires resource variables. Please create " + "ResourceVariables and pass it to the interpreter."); + return kTfLiteError; + } + TF_LITE_ENSURE_OK( + context, + resources->Read(input_resource_id_tensor->data.i32[0], output_value)); + return kTfLiteOk; +} + +} // namespace. + +TfLiteRegistration Register_READ_VARIABLE() { + return tflite::micro::RegisterOp(nullptr, Prepare, Eval); +} + +} // namespace tflite diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/real.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/real.cpp new file mode 100644 index 0000000..6ec5aad --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/real.cpp @@ -0,0 +1,134 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include +#include +#include +#include + +#include +#include + +#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h" +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_utils.h" + +namespace tflite { +namespace ops { +namespace micro { +namespace real { + +using std::complex; + +constexpr int kInputTensor = 0; +constexpr int kOutputTensor = 0; + +TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + TF_LITE_ENSURE_EQ(context, NumInputs(node), 1); + TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); + + const TfLiteTensor* input = GetInput(context, node, kInputTensor); + TF_LITE_ENSURE(context, input != nullptr); + TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + TF_LITE_ENSURE(context, output != nullptr); + + if (input->type != kTfLiteComplex64 || output->type != kTfLiteFloat32) { + TF_LITE_KERNEL_LOG(context, "Types input %s (%d), output %s (%d) not supported.", + TfLiteTypeGetName(input->type), input->type, + TfLiteTypeGetName(output->type), output->type); + return kTfLiteError; + } + + size_t total_input_els = 1; + for (size_t dim_ix = 0; dim_ix < input->dims->size; dim_ix++) { + total_input_els *= input->dims->data[dim_ix]; + } + + size_t total_output_els = 1; + for (size_t dim_ix = 0; dim_ix < output->dims->size; dim_ix++) { + total_output_els *= output->dims->data[dim_ix]; + } + + TFLITE_DCHECK(total_input_els == total_output_els); + + return kTfLiteOk; +} + +TfLiteStatus RealEval(TfLiteContext* context, TfLiteNode* node) { + const TfLiteTensor* input = GetInput(context, node, kInputTensor); + TF_LITE_ENSURE(context, input != nullptr); + TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + TF_LITE_ENSURE(context, output != nullptr); + + size_t total_input_els = 1; + for (size_t dim_ix = 0; dim_ix < input->dims->size; dim_ix++) { + total_input_els *= input->dims->data[dim_ix]; + } + + for (size_t ix = 0; ix < total_input_els; ix++) { + output->data.f[ix] = input->data.c64[ix].re; + } + + return kTfLiteOk; +} + +TfLiteStatus ImagEval(TfLiteContext* context, TfLiteNode* node) { + const TfLiteTensor* input = GetInput(context, node, kInputTensor); + TF_LITE_ENSURE(context, input != nullptr); + TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + TF_LITE_ENSURE(context, output != nullptr); + + size_t total_input_els = 1; + for (size_t dim_ix = 0; dim_ix < input->dims->size; dim_ix++) { + total_input_els *= input->dims->data[dim_ix]; + } + + for (size_t ix = 0; ix < total_input_els; ix++) { + output->data.f[ix] = input->data.c64[ix].im; + } + + return kTfLiteOk; +} + +} // namespace real +} // namespace micro +} // namespace ops + +TfLiteRegistration Register_REAL() { + return {/*init=*/nullptr, + /*free=*/nullptr, + /*prepare=*/ops::micro::real::Prepare, + /*invoke=*/ops::micro::real::RealEval, + /*profiling_string=*/nullptr, + /*builtin_code=*/0, + /*custom_name=*/nullptr, + /*version=*/0}; +} + +TfLiteRegistration Register_IMAG() { + return {/*init=*/nullptr, + /*free=*/nullptr, + /*prepare=*/ops::micro::real::Prepare, + /*invoke=*/ops::micro::real::ImagEval, + /*profiling_string=*/nullptr, + /*builtin_code=*/0, + /*custom_name=*/nullptr, + /*version=*/0}; +} + +} // namespace tflite diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/reduce.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/reduce.cpp new file mode 100644 index 0000000..b346282 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/reduce.cpp @@ -0,0 +1,86 @@ +/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/reduce.h" + +#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h" +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/quantization_util.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/mean.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/reduce.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_utils.h" + +namespace tflite { + +void* InitReduce(TfLiteContext* context, const char* buffer, size_t length) { + return context->AllocatePersistentBuffer(context, sizeof(OpDataReduce)); +} + +TfLiteStatus PrepareMax(TfLiteContext* context, TfLiteNode* node) { + return PrepareMinMaxHelper(context, node, + static_cast(node->user_data)); +} + +TfLiteStatus PrepareMin(TfLiteContext* context, TfLiteNode* node) { + return PrepareMinMaxHelper(context, node, + static_cast(node->user_data)); +} + +TfLiteStatus PrepareMeanOrSum(TfLiteContext* context, TfLiteNode* node) { + return PrepareMeanOrSumHelper(context, node, + static_cast(node->user_data)); +} + +TfLiteStatus EvalMean(TfLiteContext* context, TfLiteNode* node) { + return EvalMeanHelper(context, node, + static_cast(node->user_data)); +} + +TfLiteStatus EvalMax(TfLiteContext* context, TfLiteNode* node) { + OpDataReduce* op_data = static_cast(node->user_data); + return EvalMaxHelper(context, node, op_data); +} + +TfLiteStatus EvalMin(TfLiteContext* context, TfLiteNode* node) { + OpDataReduce* op_data = static_cast(node->user_data); + return EvalMinHelper(context, node, op_data); +} + +TfLiteStatus EvalSum(TfLiteContext* context, TfLiteNode* node) { + return EvalSumHelper(context, node, + static_cast(node->user_data)); +} + +TfLiteRegistration Register_MEAN() { + return tflite::micro::RegisterOp(InitReduce, PrepareMeanOrSum, EvalMean); +} + +TfLiteRegistration Register_REDUCE_MAX() { + return tflite::micro::RegisterOp(InitReduce, PrepareMax, EvalMax); +} + +TfLiteRegistration Register_REDUCE_MIN() { + return tflite::micro::RegisterOp(InitReduce, PrepareMin, EvalMin); +} + +TfLiteRegistration Register_SUM() { + return tflite::micro::RegisterOp(InitReduce, PrepareMeanOrSum, EvalSum); +} + +} // namespace tflite diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/reduce.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/reduce.h new file mode 100644 index 0000000..6780df4 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/reduce.h @@ -0,0 +1,71 @@ +/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_LITE_MICRO_KERNELS_REDUCE_H_ +#define TENSORFLOW_LITE_MICRO_KERNELS_REDUCE_H_ + +#include + +#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h" +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h" + +namespace tflite { + +extern const int kMaxNumberOfAxis; +extern const int kMaxNumberOfReducedAxis; + +struct OpDataReduce { + int32_t multiplier; + int shift; + int temp_buffer_idx; + int resolved_axis_idx; + int input_zp; + float input_scale; + int output_zp; + float output_scale; + int num_output_elements; + int num_axis; +}; + +TfLiteStatus PrepareMinMaxHelper(TfLiteContext* context, TfLiteNode* node, + OpDataReduce* op_data); + +TfLiteStatus PrepareMeanOrSumHelper(TfLiteContext* context, TfLiteNode* node, + OpDataReduce* op_data); + +TfLiteStatus EvalMaxHelper(TfLiteContext* context, TfLiteNode* node, + OpDataReduce* op_data); + +TfLiteStatus EvalMinHelper(TfLiteContext* context, TfLiteNode* node, + OpDataReduce* op_data); + +TfLiteStatus EvalMeanHelper(TfLiteContext* context, TfLiteNode* node, + OpDataReduce* op_data); + +TfLiteStatus EvalSumHelper(TfLiteContext* context, TfLiteNode* node, + OpDataReduce* op_data); + +void ReduceResolveAxis(const int* axis_data, int axis_count, + MeanParams* op_params); + +TfLiteRegistration Register_MEAN(); +TfLiteRegistration Register_REDUCE_MAX(); +TfLiteRegistration Register_REDUCE_MIN(); +TfLiteRegistration Register_SUM(); + +} // namespace tflite + +#endif // TENSORFLOW_LITE_MICRO_KERNELS_REDUCE_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/reduce_common.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/reduce_common.cpp new file mode 100644 index 0000000..a2c5c38 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/reduce_common.cpp @@ -0,0 +1,417 @@ +/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h" +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/quantization_util.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/mean.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/reduce.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/reduce.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_utils.h" + +namespace tflite { + +const int kMaxNumberOfAxis = 5; +const int kMaxNumberOfReducedAxis = 2; + +TfLiteStatus PrepareSimple(TfLiteContext* context, TfLiteNode* node, + int32_t* multiplier, int* shift) { + MicroContext* micro_context = GetMicroContext(context); + + // Inputs Tensor (dtype depends on quantization): + // [0] = Input + // [1] = Axis + TfLiteTensor* input = micro_context->AllocateTempInputTensor(node, 0); + + // Outputs Tensor (dtype depends on quantization): + // [0] = Output + + // Validate number of inputs and outputs + TF_LITE_ENSURE_EQ(context, node->inputs->size, 2); + TF_LITE_ENSURE_EQ(context, node->outputs->size, 1); + + // Validate axis type + TfLiteTensor* axis = micro_context->AllocateTempInputTensor(node, 1); + TF_LITE_ENSURE(context, axis != nullptr); + TF_LITE_ENSURE_TYPES_EQ(context, axis->type, kTfLiteInt32); + + if (input->type == kTfLiteInt8) { + TfLiteTensor* output = micro_context->AllocateTempOutputTensor(node, 0); + const double real_multiplier = static_cast(input->params.scale) / + static_cast(output->params.scale); + QuantizeMultiplier(real_multiplier, multiplier, shift); + micro_context->DeallocateTempTfLiteTensor(output); + } + micro_context->DeallocateTempTfLiteTensor(axis); + micro_context->DeallocateTempTfLiteTensor(input); + return kTfLiteOk; +} + +TfLiteStatus PrepareMinMaxHelper(TfLiteContext* context, TfLiteNode* node, + OpDataReduce* op_data) { + TF_LITE_ENSURE_OK(context, PrepareSimple(context, node, &op_data->multiplier, + &op_data->shift)); + + MicroContext* micro_context = GetMicroContext(context); + TfLiteTensor* input = micro_context->AllocateTempInputTensor(node, 0); + TfLiteTensor* output = micro_context->AllocateTempOutputTensor(node, 0); + TfLiteTensor* axis = micro_context->AllocateTempInputTensor(node, 1); + + op_data->input_scale = input->params.scale; + op_data->output_scale = output->params.scale; + op_data->num_output_elements = NumElements(output); + + context->RequestScratchBufferInArena(context, sizeof(int) * input->dims->size, + &op_data->temp_buffer_idx); + context->RequestScratchBufferInArena( + context, sizeof(int) * static_cast(ElementCount(*axis->dims)), + &op_data->resolved_axis_idx); + + micro_context->DeallocateTempTfLiteTensor(input); + micro_context->DeallocateTempTfLiteTensor(output); + micro_context->DeallocateTempTfLiteTensor(axis); + return kTfLiteOk; +} + +TfLiteStatus PrepareMeanOrSumHelper(TfLiteContext* context, TfLiteNode* node, + OpDataReduce* op_data) { + MicroContext* micro_context = GetMicroContext(context); + TfLiteTensor* input = micro_context->AllocateTempInputTensor(node, 0); + TfLiteTensor* output = micro_context->AllocateTempOutputTensor(node, 0); + TfLiteTensor* axis = micro_context->AllocateTempInputTensor(node, 1); + if (input->type == kTfLiteInt8 || input->type == kTfLiteInt16) { + const double real_multiplier = static_cast(input->params.scale) / + static_cast(output->params.scale); + QuantizeMultiplier(real_multiplier, &op_data->multiplier, &op_data->shift); + } + + int output_size = NumElements(output); + op_data->num_axis = NumElements(axis); + + if (input->type == kTfLiteInt8 || input->type == kTfLiteInt16) { + context->RequestScratchBufferInArena(context, output_size * sizeof(int32_t), + &op_data->temp_buffer_idx); + op_data->input_zp = input->params.zero_point; + op_data->input_scale = input->params.scale; + op_data->output_zp = output->params.zero_point; + op_data->output_scale = output->params.scale; + } + + TF_LITE_ENSURE_OK( + context, + PrepareSimple(context, node, &(op_data->multiplier), &(op_data->shift))); + // TODO(b/144955155): Support uint8_t(b/144955155) and int8_t(b/144955018) + micro_context->DeallocateTempTfLiteTensor(input); + micro_context->DeallocateTempTfLiteTensor(output); + micro_context->DeallocateTempTfLiteTensor(axis); + return kTfLiteOk; +} + +void ResolveAxis(const int* axis_data, int axis_count, + tflite::MeanParams* op_params) { + int i = 0; + for (; i < axis_count; ++i) { + op_params->axis[i] = static_cast(axis_data[i]); + } + for (; i < 4; ++i) { + op_params->axis[i] = 1; + } + op_params->axis_count = axis_count; +} + +template +TfLiteStatus QuantizedMeanOrSum(TfLiteContext* context, TfLiteNode* node, + int* temp_index, int* resolved_axis, + int32_t* temp_sum, OpDataReduce* op_data, + bool compute_sum) { + const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0); + const TfLiteEvalTensor* axis = tflite::micro::GetEvalInput(context, node, 1); + TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0); + TfLiteReducerParams* params = + static_cast(node->builtin_data); + + bool result = reference_ops::QuantizedMeanOrSumExtraArgs( + tflite::micro::GetTensorData(input), op_data->input_zp, + op_data->input_scale, &input->dims->data[0], input->dims->size, + tflite::micro::GetTensorData(output), op_data->output_scale, + op_data->multiplier, op_data->shift, op_data->output_zp, + &output->dims->data[0], output->dims->size, + tflite::micro::GetTensorData(axis), op_data->num_axis, + params->keep_dims, temp_index, resolved_axis, temp_sum, compute_sum); + TF_LITE_ENSURE(context, result); + + return kTfLiteOk; +} + +template +TfLiteStatus Mean(TfLiteContext* context, TfLiteNode* node, + OpDataReduce* op_data, int* temp_index, int* resolved_axis, + U* temp_sum) { + const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0); + const TfLiteEvalTensor* axis = tflite::micro::GetEvalInput(context, node, 1); + TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0); + TfLiteReducerParams* params = + static_cast(node->builtin_data); + + reference_ops::Mean( + tflite::micro::GetTensorData(input), &input->dims->data[0], + input->dims->size, tflite::micro::GetTensorData(output), + &output->dims->data[0], output->dims->size, + tflite::micro::GetTensorData(axis), op_data->num_axis, + params->keep_dims, temp_index, resolved_axis, temp_sum); + + return kTfLiteOk; +} + +template +TfLiteStatus EvalIntegerMean(TfLiteContext* context, TfLiteNode* node, + int num_axis, OpDataReduce* op_data, + int* temp_index, int* resolved_axis) { + int32_t* temp_sum = static_cast( + context->GetScratchBuffer(context, op_data->temp_buffer_idx)); + + if (op_data->input_zp == op_data->output_zp && + op_data->input_scale == op_data->output_scale) { + Mean(context, node, op_data, temp_index, + resolved_axis, temp_sum); + } else { + QuantizedMeanOrSum(context, node, temp_index, resolved_axis, + temp_sum, op_data, /*compute_sum=*/false); + } + return kTfLiteOk; +} + +TfLiteStatus EvalMeanHelper(TfLiteContext* context, TfLiteNode* node, + OpDataReduce* op_data) { + const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0); + const TfLiteEvalTensor* axis = tflite::micro::GetEvalInput(context, node, 1); + TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0); + TfLiteReducerParams* params = + reinterpret_cast(node->builtin_data); + + int num_axis = static_cast(ElementCount(*axis->dims)); + int temp_index[kMaxNumberOfAxis]; + int resolved_axis[kMaxNumberOfReducedAxis]; + + switch (input->type) { + case kTfLiteFloat32: { + tflite::MeanParams op_params; + ResolveAxis(tflite::micro::GetTensorData(axis), num_axis, + &op_params); + + // Special case mean implementation exists for 4D mean across axes 1 + // and 2. + bool special_case_4d_axes_1_and_2 = + input->dims->size == 4 && op_params.axis_count == 2 && + ((op_params.axis[0] == 1 && op_params.axis[1] == 2) || + (op_params.axis[0] == 2 && op_params.axis[1] == 1)); + + // Defer to specialized implementation for 4D Mean across axes 1 & 2. + if (params->keep_dims && special_case_4d_axes_1_and_2) { + reference_ops::Mean(op_params, tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + } else { + TF_LITE_ENSURE( + context, + reference_ops::Mean( + tflite::micro::GetTensorData(input), input->dims->data, + input->dims->size, tflite::micro::GetTensorData(output), + output->dims->data, output->dims->size, + tflite::micro::GetTensorData(axis), num_axis, + params->keep_dims, temp_index, resolved_axis, + tflite::micro::GetTensorData(output))); + } + } break; + case kTfLiteInt8: { + TF_LITE_ENSURE_OK( + context, EvalIntegerMean(context, node, num_axis, op_data, + temp_index, resolved_axis)); + } break; + case kTfLiteInt16: { + TF_LITE_ENSURE_OK( + context, EvalIntegerMean(context, node, num_axis, op_data, + temp_index, resolved_axis)); + } break; + default: + TF_LITE_ENSURE_MSG(context, false, + "Currently, only float32, int8 or int16 input type " + "is supported."); + } + return kTfLiteOk; +} + +TfLiteStatus EvalMaxHelper(TfLiteContext* context, TfLiteNode* node, + OpDataReduce* op_data) { + const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0); + const TfLiteEvalTensor* axis = tflite::micro::GetEvalInput(context, node, 1); + TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0); + TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type); + TfLiteReducerParams* params = + static_cast(node->builtin_data); + + // Interpret an axis tensor with null dimensions as a scalar + int num_axis = static_cast(ElementCount(*axis->dims)); + int* temp_buffer = static_cast( + context->GetScratchBuffer(context, op_data->temp_buffer_idx)); + int* resolved_axis = static_cast( + context->GetScratchBuffer(context, op_data->resolved_axis_idx)); + switch (input->type) { + case kTfLiteFloat32: + TF_LITE_ENSURE( + context, + reference_ops::ReduceGeneric( + tflite::micro::GetTensorData(input), input->dims->data, + input->dims->size, tflite::micro::GetTensorData(output), + output->dims->data, output->dims->size, + tflite::micro::GetTensorData(axis), num_axis, + params->keep_dims, temp_buffer, resolved_axis, + std::numeric_limits::lowest(), + [](const float current, const float in) -> float { + return (in > current) ? in : current; + })); + break; + case kTfLiteInt8: + TF_LITE_ENSURE_EQ(context, static_cast(op_data->input_scale), + static_cast(op_data->output_scale)); + TF_LITE_ENSURE_EQ(context, op_data->input_zp, op_data->output_zp); + TF_LITE_ENSURE( + context, + reference_ops::ReduceGeneric( + tflite::micro::GetTensorData(input), input->dims->data, + input->dims->size, tflite::micro::GetTensorData(output), + output->dims->data, output->dims->size, + tflite::micro::GetTensorData(axis), num_axis, + params->keep_dims, temp_buffer, resolved_axis, + std::numeric_limits::lowest(), + [](const int8_t current, const int8_t in) -> int8_t { + return (in > current) ? in : current; + })); + break; + default: + MicroPrintf("Only float32 and int8 types are supported."); + return kTfLiteError; + } + return kTfLiteOk; +} + +TfLiteStatus EvalMinHelper(TfLiteContext* context, TfLiteNode* node, + OpDataReduce* op_data) { + const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0); + const TfLiteEvalTensor* axis = tflite::micro::GetEvalInput(context, node, 1); + TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0); + TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type); + TfLiteReducerParams* params = + static_cast(node->builtin_data); + + // Interpret an axis tensor with null dimensions as a scalar + int num_axis = static_cast(ElementCount(*axis->dims)); + int* temp_buffer = static_cast( + context->GetScratchBuffer(context, op_data->temp_buffer_idx)); + int* resolved_axis = static_cast( + context->GetScratchBuffer(context, op_data->resolved_axis_idx)); + switch (input->type) { + case kTfLiteFloat32: + TF_LITE_ENSURE( + context, + reference_ops::ReduceGeneric( + tflite::micro::GetTensorData(input), input->dims->data, + input->dims->size, tflite::micro::GetTensorData(output), + output->dims->data, output->dims->size, + tflite::micro::GetTensorData(axis), num_axis, + params->keep_dims, temp_buffer, resolved_axis, + std::numeric_limits::max(), + [](const float current, const float in) -> float { + return (in < current) ? in : current; + })); + break; + case kTfLiteInt8: + TF_LITE_ENSURE_EQ(context, static_cast(op_data->input_scale), + static_cast(op_data->output_scale)); + TF_LITE_ENSURE_EQ(context, op_data->input_zp, op_data->output_zp); + TF_LITE_ENSURE( + context, + reference_ops::ReduceGeneric( + tflite::micro::GetTensorData(input), input->dims->data, + input->dims->size, tflite::micro::GetTensorData(output), + output->dims->data, output->dims->size, + tflite::micro::GetTensorData(axis), num_axis, + params->keep_dims, temp_buffer, resolved_axis, + std::numeric_limits::max(), + [](const int8_t current, const int8_t in) -> int8_t { + return (in < current) ? in : current; + })); + break; + default: + MicroPrintf("Only float32 and int8 types are supported."); + return kTfLiteError; + } + return kTfLiteOk; +} + +TfLiteStatus EvalSumHelper(TfLiteContext* context, TfLiteNode* node, + OpDataReduce* op_data) { + const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0); + const TfLiteEvalTensor* axis = tflite::micro::GetEvalInput(context, node, 1); + TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0); + TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type); + TfLiteReducerParams* params = + static_cast(node->builtin_data); + + // Interpret an axis tensor with null dimensions as a scalar. + int num_axis = static_cast(ElementCount(*axis->dims)); + int temp_index[kMaxNumberOfAxis]; + int resolved_axis[kMaxNumberOfReducedAxis]; + + switch (input->type) { + case kTfLiteFloat32: { + TF_LITE_ENSURE( + context, + reference_ops::ReduceGeneric( + tflite::micro::GetTensorData(input), input->dims->data, + input->dims->size, tflite::micro::GetTensorData(output), + output->dims->data, output->dims->size, + tflite::micro::GetTensorData(axis), num_axis, + params->keep_dims, temp_index, resolved_axis, /*init_value=*/0.f, + [](const float current, const float in) -> float { + return in + current; + })); + } break; + case kTfLiteInt8: { + int32_t* temp_sum = static_cast( + context->GetScratchBuffer(context, op_data->temp_buffer_idx)); + QuantizedMeanOrSum(context, node, temp_index, resolved_axis, + temp_sum, op_data, /*compute_sum=*/true); + } break; + case kTfLiteInt16: { + int32_t* temp_sum = static_cast( + context->GetScratchBuffer(context, op_data->temp_buffer_idx)); + QuantizedMeanOrSum(context, node, temp_index, resolved_axis, + temp_sum, op_data, /*compute_sum=*/true); + } break; + default: + MicroPrintf("Only float32, int8, and int16 types are supported."); + return kTfLiteError; + } + return kTfLiteOk; +} + +} // namespace tflite diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/reshape.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/reshape.cpp new file mode 100644 index 0000000..f71298c --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/reshape.cpp @@ -0,0 +1,118 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include + +#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h" +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/op_macros.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/memory_helpers.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_utils.h" + +namespace tflite { +namespace ops { +namespace micro { +namespace reshape { + +constexpr int kInputTensor = 0; +constexpr int kOutputTensor = 0; + +TfLiteStatus ReshapeOutput(TfLiteContext* context, TfLiteNode* node) { + MicroContext* micro_context = GetMicroContext(context); + + TfLiteTensor* input = + micro_context->AllocateTempInputTensor(node, kInputTensor); + TF_LITE_ENSURE(context, input != nullptr); + TfLiteTensor* output = + micro_context->AllocateTempOutputTensor(node, kOutputTensor); + TF_LITE_ENSURE(context, output != nullptr); + // Tensorflow's Reshape allows one of the shape components to have the + // special -1 value, meaning it will be calculated automatically based on the + // input. Here we calculate what that dimension should be so that the number + // of output elements in the same as the number of input elements. + int num_input_elements = NumElements(input); + TfLiteIntArray* output_shape = output->dims; + + if (NumInputs(node) == 1 && // Legacy scalar supported with params. + output_shape->size == 1 && output_shape->data[0] == 0) { + // Legacy tflite models use a shape parameter of [0] to indicate scalars, + // so adjust accordingly. TODO(b/111614235): Allow zero-sized buffers during + // toco conversion. + output_shape->size = 0; + } + + int num_output_elements = 1; + int stretch_dim = -1; + for (int i = 0; i < output_shape->size; ++i) { + int value = output_shape->data[i]; + if (value == -1) { + TF_LITE_ENSURE_EQ(context, stretch_dim, -1); + stretch_dim = i; + } else { + num_output_elements *= value; + } + } + if (stretch_dim != -1) { + output_shape->data[stretch_dim] = num_input_elements / num_output_elements; + num_output_elements *= output_shape->data[stretch_dim]; + } + + TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type); + TF_LITE_ENSURE_EQ(context, num_input_elements, num_output_elements); + + micro_context->DeallocateTempTfLiteTensor(input); + micro_context->DeallocateTempTfLiteTensor(output); + return kTfLiteOk; +} + +TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + TF_LITE_ENSURE(context, NumInputs(node) == 1 || NumInputs(node) == 2); + TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); + TF_LITE_ENSURE_EQ(context, ReshapeOutput(context, node), kTfLiteOk); + return kTfLiteOk; +} + +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + const TfLiteEvalTensor* input = + tflite::micro::GetEvalInput(context, node, kInputTensor); + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kOutputTensor); + + // TODO(b/162522304): storing input bytes in OpData increases some models + // significantly, possibly due to alignment issues. + size_t input_bytes; + TF_LITE_ENSURE_STATUS(TfLiteTypeSizeOf(input->type, &input_bytes)); + input_bytes *= ElementCount(*input->dims); + + // Do nothing for in-place reshape. + if (input->data.raw != output->data.raw) { + // Otherwise perform reshape with copy. + memcpy(output->data.raw, input->data.raw, input_bytes); + } + return kTfLiteOk; +} + +} // namespace reshape + +TfLiteRegistration Register_RESHAPE() { + return tflite::micro::RegisterOp(nullptr, reshape::Prepare, reshape::Eval); +} + +} // namespace micro +} // namespace ops +} // namespace tflite diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/resize_bilinear.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/resize_bilinear.cpp new file mode 100644 index 0000000..01399ee --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/resize_bilinear.cpp @@ -0,0 +1,116 @@ +/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/resize_bilinear.h" + +#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h" +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/op_macros.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_utils.h" + +namespace tflite { +namespace { + +constexpr int kInputTensor = 0; +constexpr int kSizeTensor = 1; +constexpr int kOutputTensor = 0; + +TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + MicroContext* micro_context = GetMicroContext(context); + + TF_LITE_ENSURE_EQ(context, NumInputs(node), 2); + TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); + + TfLiteTensor* input = + micro_context->AllocateTempInputTensor(node, kInputTensor); + TfLiteTensor* size = + micro_context->AllocateTempInputTensor(node, kSizeTensor); + TfLiteTensor* output = + micro_context->AllocateTempOutputTensor(node, kOutputTensor); + + TF_LITE_ENSURE_EQ(context, NumDimensions(input), 4); + TF_LITE_ENSURE_EQ(context, NumDimensions(size), 1); + + TF_LITE_ENSURE_EQ(context, size->type, kTfLiteInt32); + output->type = input->type; + + TF_LITE_ENSURE_MSG(context, IsConstantTensor(size), + "Non constant size tensor not supported"); + + // Ensure params are valid. + auto* params = + reinterpret_cast(node->builtin_data); + if (params->half_pixel_centers && params->align_corners) { + MicroPrintf("If half_pixel_centers is True, align_corners must be False."); + return kTfLiteError; + } + + micro_context->DeallocateTempTfLiteTensor(input); + micro_context->DeallocateTempTfLiteTensor(size); + micro_context->DeallocateTempTfLiteTensor(output); + return kTfLiteOk; +} + +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + auto* params = + reinterpret_cast(node->builtin_data); + + const TfLiteEvalTensor* input = + tflite::micro::GetEvalInput(context, node, kInputTensor); + const TfLiteEvalTensor* size = + tflite::micro::GetEvalInput(context, node, kSizeTensor); + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kOutputTensor); + + if (output->type == kTfLiteFloat32) { + tflite::ResizeBilinearParams op_params; + op_params.align_corners = params->align_corners; + op_params.half_pixel_centers = params->half_pixel_centers; + reference_ops::ResizeBilinear(op_params, + tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(size), + tflite::micro::GetTensorData(size), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + } else if (output->type == kTfLiteInt8) { + tflite::ResizeBilinearParams op_params; + op_params.align_corners = params->align_corners; + op_params.half_pixel_centers = params->half_pixel_centers; + reference_ops::ResizeBilinearInteger( + op_params, tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(size), + tflite::micro::GetTensorData(size), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + } else { + MicroPrintf("Output type is %d, requires float or int8.", output->type); + return kTfLiteError; + } + + return kTfLiteOk; +} + +} // namespace + +TfLiteRegistration Register_RESIZE_BILINEAR() { + return tflite::micro::RegisterOp(nullptr, Prepare, Eval); +} + +} // namespace tflite diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/resize_nearest_neighbor.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/resize_nearest_neighbor.cpp new file mode 100644 index 0000000..d6f3df3 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/resize_nearest_neighbor.cpp @@ -0,0 +1,126 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/resize_nearest_neighbor.h" + +#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h" +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/op_macros.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h" + +namespace tflite { +namespace ops { +namespace micro { +namespace resize_nearest_neighbor { + +constexpr int kInputTensor = 0; +constexpr int kSizeTensor = 1; +constexpr int kOutputTensor = 0; + +TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + MicroContext* micro_context = GetMicroContext(context); + + TF_LITE_ENSURE_EQ(context, NumInputs(node), 2); + TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); + + TfLiteTensor* input = + micro_context->AllocateTempInputTensor(node, kInputTensor); + TfLiteTensor* size = + micro_context->AllocateTempInputTensor(node, kSizeTensor); + TfLiteTensor* output = + micro_context->AllocateTempOutputTensor(node, kOutputTensor); + + // Our current implementations rely on the input being 4D, + // and the size being 1D tensor with exactly 2 elements. + TF_LITE_ENSURE_EQ(context, NumDimensions(input), 4); + TF_LITE_ENSURE_EQ(context, NumDimensions(size), 1); + TF_LITE_ENSURE_EQ(context, size->type, kTfLiteInt32); + TF_LITE_ENSURE_EQ(context, size->dims->data[0], 2); + + output->type = input->type; + + if (!IsConstantTensor(size)) { + MicroPrintf("Dynamic tensors are unsupported in tfmicro."); + return kTfLiteError; + } + + micro_context->DeallocateTempTfLiteTensor(input); + micro_context->DeallocateTempTfLiteTensor(size); + micro_context->DeallocateTempTfLiteTensor(output); + + return kTfLiteOk; +} + +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + auto* params = + reinterpret_cast(node->builtin_data); + + const TfLiteEvalTensor* input = + tflite::micro::GetEvalInput(context, node, kInputTensor); + const TfLiteEvalTensor* size = + tflite::micro::GetEvalInput(context, node, kSizeTensor); + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kOutputTensor); + + tflite::ResizeNearestNeighborParams op_params; + op_params.align_corners = params->align_corners; + op_params.half_pixel_centers = false; + + if (output->type == kTfLiteFloat32) { + reference_ops::ResizeNearestNeighbor( + op_params, tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(size), + tflite::micro::GetTensorData(size), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + } else if (output->type == kTfLiteInt8) { + reference_ops::ResizeNearestNeighbor( + op_params, tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(size), + tflite::micro::GetTensorData(size), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + } else if (output->type == kTfLiteInt16) { + reference_ops::ResizeNearestNeighbor( + op_params, tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(size), + tflite::micro::GetTensorData(size), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + } else { + MicroPrintf("Output tensor type %s (%d) not supported.", + TfLiteTypeGetName(output->type), output->type); + + return kTfLiteError; + } + + return kTfLiteOk; +} +} // namespace resize_nearest_neighbor + +TfLiteRegistration Register_RESIZE_NEAREST_NEIGHBOR() { + return tflite::micro::RegisterOp(nullptr, resize_nearest_neighbor::Prepare, + resize_nearest_neighbor::Eval); +} + +} // namespace micro +} // namespace ops +} // namespace tflite diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/rfft2d.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/rfft2d.cpp new file mode 100644 index 0000000..fe4a16c --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/rfft2d.cpp @@ -0,0 +1,207 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include +#include +#include +#include + +#include +#include + +#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h" +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_utils.h" +#include "edge-impulse-sdk/dsp/kissfft/kiss_fftr.h" + +namespace tflite { +namespace ops { +namespace micro { +namespace rfft2d { + +using std::complex; + +constexpr int kInputTensor = 0; +constexpr int kFftLengthTensor = 1; +constexpr int kOutputTensor = 0; + +struct OpData { + int kiss_fft_output_buffer_index; +}; + +bool IsPowerOfTwo(uint32_t v) { return v && !(v & (v - 1)); } + +static int software_rfft(float *fft_input, TfLiteComplex64 *output, size_t n_fft, size_t n_fft_out_features, kiss_fft_cpx *fft_output) { + size_t kiss_fftr_mem_length; + + // create fftr context (this should move to a scratch buffer...) + kiss_fftr_cfg cfg = kiss_fftr_alloc(n_fft, 0, NULL, NULL, &kiss_fftr_mem_length); + if (!cfg) { + ei_free(fft_output); + return -1; + } + + // execute the rfft operation + kiss_fftr(cfg, fft_input, fft_output); + + // and write back to the output + for (size_t ix = 0; ix < n_fft_out_features; ix++) { + output[ix].re = fft_output[ix].r; + output[ix].im = fft_output[ix].i; + } + + ei_free(cfg); + + return 0; +} + +void* Init(TfLiteContext* context, const char* buffer, size_t length) { + (void)buffer; + (void)length; + TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); + return context->AllocatePersistentBuffer(context, sizeof(OpData)); +} + +TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + TF_LITE_ENSURE_EQ(context, NumInputs(node), 2); + TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); + + // Check type and shape of the input tensor + MicroContext* micro_context = GetMicroContext(context); + + TfLiteTensor* input = + micro_context->AllocateTempInputTensor(node, kInputTensor); + + TfLiteTensor* fft_length = + micro_context->AllocateTempInputTensor(node, kFftLengthTensor); + const int32_t* fft_length_data = GetTensorData(fft_length); + + TfLiteTensor* output = + micro_context->AllocateTempOutputTensor(node, kOutputTensor); + + TF_LITE_ENSURE(context, NumDimensions(input) >= 2); + if (input->type != kTfLiteFloat32) { + context->ReportError(context, + "Type '%s' for input is not supported by rfft2d.", + TfLiteTypeGetName(input->type)); + return kTfLiteError; + } + + // Check type and shape of the fft_length tensor + const RuntimeShape fft_length_shape = GetTensorShape(fft_length); + TF_LITE_ENSURE_EQ(context, NumDimensions(fft_length), 1); + TF_LITE_ENSURE_EQ(context, fft_length_shape.Dims(0), 2); + if (fft_length->type != kTfLiteInt32) { + context->ReportError(context, + "Type '%s' for fft_length is not supported by rfft2d.", + TfLiteTypeGetName(fft_length->type)); + return kTfLiteError; + } + + OpData* data = static_cast(node->user_data); + + size_t output_els = output->bytes / sizeof(TfLiteComplex64); + + TF_LITE_ENSURE_STATUS( + context->RequestScratchBufferInArena( + context, output_els * sizeof(kiss_fft_cpx), &data->kiss_fft_output_buffer_index)); + + micro_context->DeallocateTempTfLiteTensor(input); + micro_context->DeallocateTempTfLiteTensor(fft_length); + micro_context->DeallocateTempTfLiteTensor(output); + + return kTfLiteOk; +} + +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + const TfLiteTensor* input; + TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, kInputTensor, &input)); + const TfLiteTensor* fft_length; + TF_LITE_ENSURE_OK(context, + GetInputSafe(context, node, kFftLengthTensor, &fft_length)); + const int32_t* fft_length_data = GetTensorData(fft_length); + TfLiteTensor* output; + TF_LITE_ENSURE_OK(context, + GetOutputSafe(context, node, kOutputTensor, &output)); + + if (output->type != kTfLiteComplex64) { + context->ReportError(context, + "Type '%s' for output is not supported by rfft2d.", + TfLiteTypeGetName(output->type)); + return kTfLiteError; + } + + TF_LITE_ENSURE(context, IsPowerOfTwo(fft_length_data[0])); + TF_LITE_ENSURE(context, IsPowerOfTwo(fft_length_data[1])); + + int fft_height, fft_width; + fft_height = fft_length_data[0]; + fft_width = fft_length_data[1]; + + OpData* data = static_cast(node->user_data); + + if (fft_height != 1) { + context->ReportError(context, + "Only supports fft_height 1", + TfLiteTypeGetName(output->type)); + return kTfLiteError; + } + + kiss_fft_cpx* shift_buffer = (kiss_fft_cpx*)context->GetScratchBuffer(context, data->kiss_fft_output_buffer_index); + + size_t in_row_els = 1; + for (size_t ix = 1; ix < input->dims->size; ix++) { + in_row_els *= input->dims->data[ix]; + } + size_t out_row_els = 1; + for (size_t ix = 1; ix < output->dims->size; ix++) { + out_row_els *= output->dims->data[ix]; + } + + for (size_t row = 0; row < input->dims->data[0]; row++) { + float *in_ptr = &input->data.f[row * in_row_els]; + auto out_ptr = &output->data.c64[row * out_row_els]; + + int x = software_rfft(in_ptr, out_ptr, fft_width, in_row_els, shift_buffer); + if (x != 0) { + context->ReportError(context, + "software_rfft failed (%d)", + x); + return kTfLiteError; + } + } + + return kTfLiteOk; +} + +} // namespace rfft2d +} // namespace micro +} // namespace ops + +TfLiteRegistration Register_RFFT2D() { + return {/*init=*/ops::micro::rfft2d::Init, + /*free=*/nullptr, + /*prepare=*/ops::micro::rfft2d::Prepare, + /*invoke=*/ops::micro::rfft2d::Eval, + /*profiling_string=*/nullptr, + /*builtin_code=*/0, + /*custom_name=*/nullptr, + /*version=*/0}; +} + +} // namespace tflite diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/round.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/round.cpp new file mode 100644 index 0000000..56e30d3 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/round.cpp @@ -0,0 +1,76 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/round.h" + +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" + +namespace tflite { +namespace ops { +namespace micro { +namespace round { + +constexpr int kInputTensor = 0; +constexpr int kOutputTensor = 0; + +TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + MicroContext* micro_context = GetMicroContext(context); + + TfLiteTensor* input = + micro_context->AllocateTempInputTensor(node, kInputTensor); + TF_LITE_ENSURE(context, input != nullptr); + TfLiteTensor* output = + micro_context->AllocateTempOutputTensor(node, kOutputTensor); + TF_LITE_ENSURE(context, output != nullptr); + TF_LITE_ENSURE_EQ(context, NumInputs(node), 1); + TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); + TF_LITE_ENSURE_TYPES_EQ(context, input->type, kTfLiteFloat32); + TF_LITE_ENSURE_TYPES_EQ(context, output->type, input->type); + TF_LITE_ENSURE_EQ(context, output->bytes, input->bytes); + TF_LITE_ENSURE_EQ(context, output->dims->size, input->dims->size); + for (int i = 0; i < output->dims->size; ++i) { + TF_LITE_ENSURE_EQ(context, output->dims->data[i], input->dims->data[i]); + } + + micro_context->DeallocateTempTfLiteTensor(input); + micro_context->DeallocateTempTfLiteTensor(output); + return kTfLiteOk; +} + +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + const TfLiteEvalTensor* input = + tflite::micro::GetEvalInput(context, node, kInputTensor); + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kOutputTensor); + + reference_ops::Round(tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + + return kTfLiteOk; +} +} // namespace round + +TfLiteRegistration Register_ROUND() { + return tflite::micro::RegisterOp(nullptr, round::Prepare, round::Eval); +} + +} // namespace micro +} // namespace ops +} // namespace tflite diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/scratch_buf_mgr.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/scratch_buf_mgr.cpp new file mode 100644 index 0000000..b119d67 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/scratch_buf_mgr.cpp @@ -0,0 +1,397 @@ +// Patched by Edge Impulse to include reference and hardware-accelerated kernels +#include "../../../../classifier/ei_classifier_config.h" +#if EI_CLASSIFIER_TFLITE_ENABLE_ARC == 1 +/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/scratch_buf_mgr.h" + +#include + +#include + +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/scratch_buffers.h" + +namespace tflite { +namespace ops { +namespace micro { + +#if (defined(__Xxy)) || (defined(__Xvdsp)) +static void get_arc_two_buffer_sizes(int request_size_1, int request_size_2, + int* grant_size_1, int* grant_size_2) { + int maxrequest = 0; + int secondrequest = 0; + int maxavailable = 0; + int secondavail = 0; + + // determine the largest requested buffer. + if (request_size_1 > request_size_2) { + maxrequest = request_size_1; + secondrequest = request_size_2; + } else { + maxrequest = request_size_2; + secondrequest = request_size_1; + } + + // find the two largest available buffers. + get_arc_scratch_buffer_two_max_sizes(&maxavailable, &secondavail); + + // in case two buffers are available, the largest buffer can go to the largest + // request. + if (secondavail > 0) { // this condition can be enhanced to prevent cases + // where the second buffer is so small that it is + // better to use one buffer and split it. + if (request_size_1 > request_size_2) { + *grant_size_1 = maxavailable; + *grant_size_2 = secondavail; + } else { + *grant_size_1 = secondavail; + *grant_size_2 = maxavailable; + } + } else { + // In case only one buffer is available, + // use only the max buffer, and split it. + *grant_size_1 = maxavailable / 2; + *grant_size_2 = maxavailable / 2; + } +} + +static TfLiteStatus get_arc_scratch_buffer_for_io_tensors( + TfLiteContext* context, MliTensorInterface* in, MliTensorInterface* out) { + int request_size_in = 0; + int request_size_out = 0; + int grant_size_in = 0; + int grant_size_out = 0; + if (!inside_arc_ccm(in->Data())) { + // In case the input tensor contains multiple batches, it has rank 4 + // because the mli kernel cannot operate on batches, we need to have the + // size of a single HWC tensor. that is why the start_rank is 1 in case of + // input rank 4 + int start_rank = *in->Rank() - 3; + request_size_in = mli_hlp_count_elem_num(in->MliTensor(), start_rank) * + mli_hlp_tensor_element_size(in->MliTensor()); + } + if (!inside_arc_ccm(out->Data())) { + // In case the input tensor contains multiple batches, it has rank 4 + // because the mli kernel cannot operate on batches, we need to have the + // size of a single batch. that is why the start_rank is 1 in case of input + // rank 4 + int start_rank = *out->Rank() - 3; + request_size_out = mli_hlp_count_elem_num(out->MliTensor(), start_rank) * + mli_hlp_tensor_element_size(out->MliTensor()); + } + + get_arc_two_buffer_sizes(request_size_in, request_size_out, &grant_size_in, + &grant_size_out); + if (!inside_arc_ccm(in->Data())) { + in->SetData( + static_cast(get_arc_scratch_buffer(grant_size_in)), + grant_size_in); + if (in->Data() == NULL) return kTfLiteError; + } + + if (!inside_arc_ccm(out->Data())) { + out->SetData( + static_cast(get_arc_scratch_buffer(grant_size_out)), + grant_size_out); + if (out->Data() == NULL) return kTfLiteError; + } + + return kTfLiteOk; +} +#endif + +TfLiteStatus get_arc_scratch_buffer_for_conv_tensors( + TfLiteContext* context, MliTensorInterface* in, MliTensorInterface* weights, + MliTensorInterface* bias, MliTensorInterface* out) { + TfLiteStatus ret_val = kTfLiteOk; +#if (defined(__Xxy)) || (defined(__Xvdsp)) + init_arc_scratch_buffers(); + + if (!inside_arc_ccm(bias->Data())) { + uint32_t bias_mem_requirements = + mli_hlp_count_elem_num(bias->MliTensor(), 0) * + mli_hlp_tensor_element_size(bias->MliTensor()); + bias->SetData( + static_cast(get_arc_scratch_buffer(bias_mem_requirements)), + bias_mem_requirements); + } + + if (bias->Data() == NULL) { + int max_bias_size = 0; + get_arc_scratch_buffer_max_size(&max_bias_size); + bias->SetData( + static_cast(get_arc_scratch_buffer(max_bias_size)), + max_bias_size); + if (max_bias_size == 0) ret_val = kTfLiteError; + } + if (bias->Data() == NULL) ret_val = kTfLiteError; + + if (!inside_arc_ccm(weights->Data())) { + int weights_size = mli_hlp_count_elem_num(weights->MliTensor(), 0) * + mli_hlp_tensor_element_size(weights->MliTensor()); + int max_weights_size = 0; + weights->SetData( + static_cast(get_arc_scratch_buffer(weights_size)), + weights_size); + if (weights->Data() == NULL) { + get_arc_scratch_buffer_max_size(&max_weights_size); + weights->SetData( + static_cast(get_arc_scratch_buffer(max_weights_size)), + max_weights_size); + if (max_weights_size == 0) ret_val = kTfLiteError; + } + if (weights->Data() == NULL) ret_val = kTfLiteError; + } + + if (ret_val == kTfLiteOk) { + ret_val = get_arc_scratch_buffer_for_io_tensors(context, in, out); + } +#endif + return ret_val; +} + +TfLiteStatus get_arc_scratch_buffer_for_fully_connect_tensors( + TfLiteContext* context, MliTensorInterface* in, MliTensorInterface* weights, + MliTensorInterface* bias, MliTensorInterface* out) { + TfLiteStatus ret_val = kTfLiteOk; + +#if (defined(__Xxy)) || (defined(__Xvdsp)) + init_arc_scratch_buffers(); + + if (!inside_arc_ccm(bias->Data())) { + int bias_mem_requirements = mli_hlp_count_elem_num(bias->MliTensor(), 0) * + mli_hlp_tensor_element_size(bias->MliTensor()); + bias->SetData( + static_cast(get_arc_scratch_buffer(bias_mem_requirements)), + bias_mem_requirements); + } + + if (bias->Data() == NULL) { + int max_bias_size = 0; + get_arc_scratch_buffer_max_size(&max_bias_size); + bias->SetData( + static_cast(get_arc_scratch_buffer(max_bias_size)), + max_bias_size); + if (max_bias_size == 0) ret_val = kTfLiteError; + } + if (bias->Data() == NULL) ret_val = kTfLiteError; + + if (!inside_arc_ccm(weights->Data())) { + int weights_size = mli_hlp_count_elem_num(weights->MliTensor(), 0) * + mli_hlp_tensor_element_size(weights->MliTensor()); + int max_weights_size = 0; + weights->SetData( + static_cast(get_arc_scratch_buffer(weights_size)), + weights_size); + if (weights->Data() == NULL) { + get_arc_scratch_buffer_max_size(&max_weights_size); + weights->SetData( + static_cast(get_arc_scratch_buffer(max_weights_size)), + max_weights_size); + if (max_weights_size == 0) ret_val = kTfLiteError; + } + if (weights->Data() == NULL) ret_val = kTfLiteError; + } + + /* strategy for FC kernels: + first allocate input, because this cannot be sliced. (in case of batch + processing, only a single input needs to be allocated) then weights & + bias because if fully loaded, they can be reused over batches. then + output. The number of output channels (for weights slicing) depends on + size of output and size of weights&bias */ + + if (!inside_arc_ccm(in->Data())) { + /* In case the input tensor contains multiple batches, + only count the size if the inner most dimension */ + int size_in = mli_hlp_count_elem_num(in->MliTensor(), *in->Rank() - 1) * + mli_hlp_tensor_element_size(in->MliTensor()); + in->SetData(static_cast(get_arc_scratch_buffer(size_in)), + size_in); + if (in->Data() == NULL) { + in->SetData(nullptr, 0); + ret_val = kTfLiteError; + } + } + if (!inside_arc_ccm(out->Data())) { + /* In case the input tensor contains multiple batches, + only count the size if the inner most dimension */ + int out_size = mli_hlp_count_elem_num(out->MliTensor(), *out->Rank() - 1) * + mli_hlp_tensor_element_size(out->MliTensor()); + int max_out_size = 0; + out->SetData(static_cast(get_arc_scratch_buffer(out_size)), + out_size); + if (out->Data() == NULL) { + get_arc_scratch_buffer_max_size(&max_out_size); + out->SetData( + static_cast(get_arc_scratch_buffer(max_out_size)), + max_out_size); + if (max_out_size == 0) ret_val = kTfLiteError; + } + if (out->Data() == NULL) ret_val = kTfLiteError; + } +#endif + return ret_val; +} + +TfLiteStatus get_arc_scratch_buffer_for_eltwise_tensors( + TfLiteContext* context, MliTensorInterface* in1, MliTensorInterface* in2, + MliTensorInterface* out) { + TfLiteStatus ret_val = kTfLiteOk; +#if (defined(__Xxy)) || (defined(__Xvdsp)) + init_arc_scratch_buffers(); + constexpr int tsr_num = 3; + int in1_size = mli_hlp_count_elem_num(in1->MliTensor(), 0) * + mli_hlp_tensor_element_size(in1->MliTensor()); + int in2_size = mli_hlp_count_elem_num(in2->MliTensor(), 0) * + mli_hlp_tensor_element_size(in2->MliTensor()); + int out_size = mli_hlp_count_elem_num(out->MliTensor(), 0) * + mli_hlp_tensor_element_size(out->MliTensor()); + int sizes[tsr_num] = {in1_size, in2_size, out_size}; + MliTensorInterface* in_tensors[tsr_num] = {in1, in2, out}; + for (int i = 0; i < tsr_num; ++i) { + if (!inside_arc_ccm(in_tensors[i]->Data())) { + auto* data_ptr = get_arc_scratch_buffer(sizes[i]); + if (data_ptr == nullptr) { + get_arc_scratch_buffer_max_size(&sizes[i]); + data_ptr = get_arc_scratch_buffer(sizes[i]); + } + if (data_ptr == nullptr || sizes[i] == 0) { + in_tensors[i]->SetData(nullptr, 0); + ret_val = kTfLiteError; + } else { + in_tensors[i]->SetData(static_cast(data_ptr), + sizes[i]); + } + } + } +#endif + return ret_val; +} + +TfLiteStatus arc_scratch_buffer_calc_slice_size_io( + const MliTensorInterface* in, const MliTensorInterface* out, + const int kernel_height, const int stride_height, const int padding_top, + const int padding_bot, int* in_slice_height, int* out_slice_height) { + const int height_dimension = 1; + const int in_height = in->Shape()[height_dimension]; + const int out_height = out->Shape()[height_dimension]; + const int line_size_in = + mli_hlp_count_elem_num(in->MliTensor(), height_dimension + 1) * + mli_hlp_tensor_element_size(in->MliTensor()); + const int line_size_out = + mli_hlp_count_elem_num(out->MliTensor(), height_dimension + 1) * + mli_hlp_tensor_element_size(out->MliTensor()); + int max_lines_in = 0; + int max_lines_out = 0; + int max_out_lines_for_input = 0; + bool fit = + (static_cast(*in->DataCapacity()) >= in_height * line_size_in) && + (static_cast(*out->DataCapacity()) >= out_height * line_size_out); + if (fit) { + // in case both tensors completely fit in the capacity, there is no need + // for slicing. As padding can affect effective input region, we also + // derive it from output height, and rely on a clipping logic which intend + // to reduce last smaller slice. I.e the only slice is a kind of "smaller + // last slice that need to be corrected" + *in_slice_height = std::max(in_height, out_height * stride_height); + *out_slice_height = out_height; + } else { + // First compute how many lines fit into the input tensor, and compute how + // many output lines can be computed with that. + max_lines_in = std::min( + in_height, static_cast(*in->DataCapacity()) / line_size_in); + if (max_lines_in >= in_height) { + max_out_lines_for_input = out_height; + } else if (2 * max_lines_in >= in_height) { + // in this case only two slices are needed, so both could benefit from + // padding. take the MIN to get the worst case. + max_out_lines_for_input = + (max_lines_in + std::min(padding_top, padding_bot) - kernel_height + + 1) / + stride_height; + } else { + max_out_lines_for_input = + (max_lines_in - kernel_height + 1) / stride_height; + } + // Then compute how many output lines fit into the output tensor. + max_lines_out = std::min( + out_height, static_cast(*out->DataCapacity()) / line_size_out); + // the smallest of the two determines the slice height for the output, and + // the derived sliceheight for the input. + *out_slice_height = std::min(max_out_lines_for_input, max_lines_out); + *in_slice_height = *out_slice_height * stride_height; + } + + if ((*in_slice_height > 0) && (*out_slice_height > 0)) { + return kTfLiteOk; + } else { + return kTfLiteError; + } +} + +TfLiteStatus arc_scratch_buffer_calc_slice_size_weights( + const MliTensorInterface* weights, const MliTensorInterface* bias, + const int weight_out_ch_dimension, int* slice_channels) { + const int channels = weights->Shape()[weight_out_ch_dimension]; + const int ch_size_w = + (mli_hlp_count_elem_num(weights->MliTensor(), 0) / channels) * + mli_hlp_tensor_element_size(weights->MliTensor()); + const int ch_size_b = + (mli_hlp_count_elem_num(bias->MliTensor(), 0) / channels) * + mli_hlp_tensor_element_size(bias->MliTensor()); + int max_ch_weigths = 0; + int max_ch_bias = 0; + + bool fit = + (static_cast(*weights->DataCapacity()) >= channels * ch_size_w) && + (static_cast(*bias->DataCapacity()) >= channels * ch_size_b); + if (fit) { + // in case both tensors completely fit in the capacity, there is no need + // for slicing + *slice_channels = channels; + } else { + // First compute how many channels fit into the weights tensor + max_ch_weigths = std::min( + channels, static_cast(*weights->DataCapacity()) / ch_size_w); + // Ten compute how many channels fit into the bias tensor. + max_ch_bias = + std::min(channels, static_cast(*bias->DataCapacity()) / ch_size_b); + // the smallest of the two determines the slice size + *slice_channels = std::min(max_ch_weigths, max_ch_bias); + } + + if (*slice_channels > 0) { + return kTfLiteOk; + } else { + return kTfLiteError; + } +} + +TfLiteStatus get_arc_scratch_buffer_for_pooling_tensors( + TfLiteContext* context, MliTensorInterface* in, MliTensorInterface* out) { +#if (defined(__Xxy)) || (defined(__Xvdsp)) + init_arc_scratch_buffers(); + return get_arc_scratch_buffer_for_io_tensors(context, in, out); +#else + return kTfLiteOk; +#endif +} + +} // namespace micro +} // namespace ops +} // namespace tflite + +#endif // EI_CLASSIFIER_TFLITE_ENABLE_ARC == 1 diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/scratch_buf_mgr.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/scratch_buf_mgr.h new file mode 100644 index 0000000..2f60948 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/scratch_buf_mgr.h @@ -0,0 +1,150 @@ +// Patched by Edge Impulse to include reference and hardware-accelerated kernels +#include "../../../../classifier/ei_classifier_config.h" +#if EI_CLASSIFIER_TFLITE_ENABLE_ARC == 1 +/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_LITE_MICRO_ARC_SCRATCH_BUF_MGR_H_ +#define TENSORFLOW_LITE_MICRO_ARC_SCRATCH_BUF_MGR_H_ + +#include "mli_api.h" // NOLINT +#include "mli_interface.h" +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" + +namespace tflite { +namespace ops { +namespace micro { + +/** + * @brief Function to allocate scratch buffers for the convolution tensors + * + * @detail This function will update the data pointers in the 4 tensors with + * pointers to scratch buffers in fast local memory. + * + * @param context [I] pointer to TfLite context (needed for error handling) + * @param in [IO] pointer to the input tensor + * @param weights [IO] pointer to the weights tensor + * @param bias [IO] pointer to the bias tensor + * @param output [IO] pointer to the output tensor + * + * @return Tf Lite status code + */ +TfLiteStatus get_arc_scratch_buffer_for_conv_tensors( + TfLiteContext* context, MliTensorInterface* in, MliTensorInterface* weights, + MliTensorInterface* bias, MliTensorInterface* out); + +/** + * @brief Function to allocate scratch buffers for pooling kernels with only + * input and output buffers + * + * @detail This function will update the data pointers in the 2 tensors with + * pointers to scratch buffers in fast local memory. + * + * @param context [I] pointer to TfLite context (needed for error handling) + * @param in [IO] pointer to the input tensor + * @param output [IO] pointer to the output tensor + * + * @return Tf Lite status code + */ +TfLiteStatus get_arc_scratch_buffer_for_pooling_tensors( + TfLiteContext* context, MliTensorInterface* in, MliTensorInterface* out); + +/** + * @brief Function to allocate scratch buffers for the fully connect tensors + * + * @detail This function will update the data pointers in the 4 tensors with + * pointers to scratch buffers in fast local memory. + * + * @param context [I] pointer to TfLite context (needed for error handling) + * @param in [IO] pointer to the input tensor + * @param weights [IO] pointer to the weights tensor + * @param bias [IO] pointer to the bias tensor + * @param output [IO] pointer to the output tensor + * + * @return Tf Lite status code + */ +TfLiteStatus get_arc_scratch_buffer_for_fully_connect_tensors( + TfLiteContext* context, MliTensorInterface* in, MliTensorInterface* weights, + MliTensorInterface* bias, MliTensorInterface* out); + +/** + * @brief Function to allocate scratch buffers for the eltwise function tensors + * + * @detail This function will update the data pointers in the 3 tensors with + * pointers to scratch buffers in fast local memory. + * + * @param context [I] pointer to TfLite context (needed for error handling) + * @param in1 [IO] pointer to the first input tensor + * @param in2 [IO] pointer to the second input tensor + * @param output [IO] pointer to the output tensor + * + * @return Tf Lite status code + */ +TfLiteStatus get_arc_scratch_buffer_for_eltwise_tensors( + TfLiteContext* context, MliTensorInterface* in1, MliTensorInterface* in2, + MliTensorInterface* out); + +/** + * @brief Function to calculate slice size for io tensors + * + * @detail This function will calculate the slice size in the height dimension + * for input and output tensors. it takes into account the kernel size and the + * padding. the function will look at the capacity filed in the in and out + * tensor to determine the available buffersize. + * + * @param in [I] pointer to the input tensor + * @param out [I] pointer to the output tensor + * @param kernelHeight [I] size of the kernel in height dimension + * @param strideHeight [I] input stride in height dimension + * @param padding_top [I] number of lines with zeros at the top + * @param padding_bot [I] number of lines with zeros at the bottom + * @param inSliceHeight [O] slice size in height dimension for the input + * tensor + * @param outSliceHeight [O] slice size in height dimension for the output + * tensor + * + * @return Tf Lite status code + */ +TfLiteStatus arc_scratch_buffer_calc_slice_size_io( + const MliTensorInterface* in, const MliTensorInterface* out, + const int kernelHeight, const int strideHeight, const int padding_top, + const int padding_bot, int* in_slice_height, int* out_slice_height); + +/** + * @brief Function to calculate slice size for weight slicing + * + * @detail This function will calculate the slice size in the output channel + * dimension for weight and bias tensors. the function will look at the capacity + * filed in the weights and bias tensor to determine the available buffersize. + * + * @param weights [I] pointer to the input tensor + * @param bias [I] pointer to the output tensor + * @param weightOutChDimension [I] dimension of the output channels in the + * weights tensor + * @param sliceChannels [O] slice size in output channel dimension + * + * @return Tf Lite status code + */ +TfLiteStatus arc_scratch_buffer_calc_slice_size_weights( + const MliTensorInterface* weights, const MliTensorInterface* bias, + const int weight_out_ch_dimension, int* slice_channels); + +} // namespace micro +} // namespace ops +} // namespace tflite + +#endif // TENSORFLOW_LITE_MICRO_ARC_SCRATCH_BUF_MGR_H_ + +#endif // EI_CLASSIFIER_TFLITE_ENABLE_ARC == 1 diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/scratch_buffers.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/scratch_buffers.cpp new file mode 100644 index 0000000..924cc41 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/scratch_buffers.cpp @@ -0,0 +1,209 @@ +// Patched by Edge Impulse to include reference and hardware-accelerated kernels +#include "../../../../classifier/ei_classifier_config.h" +#if EI_CLASSIFIER_TFLITE_ENABLE_ARC == 1 +/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/scratch_buffers.h" + +#include + +namespace tflite { +namespace ops { +namespace micro { + +/* by default use all the XY memory, and half of the DCCM because DCCM is also + * used for the data section and the stack. the values can be overruled by + * adding a -D option to the makefile of the application + */ + +#ifdef __Xxy + +#ifndef SCRATCH_MEM_X_SIZE +#ifdef core_config_xy_size +#define SCRATCH_MEM_X_SIZE (core_config_xy_size) +#endif +#endif + +#ifndef SCRATCH_MEM_Y_SIZE +#ifdef core_config_xy_size +#define SCRATCH_MEM_Y_SIZE (core_config_xy_size) +#endif +#endif + +#ifndef SCRATCH_MEM_Z_SIZE +#ifdef core_config_dccm_size +#define SCRATCH_MEM_Z_SIZE ((core_config_dccm_size) / 2) +#endif +#endif + +#elif defined(__Xvdsp) + +#ifndef SCRATCH_MEM_VEC_SIZE +#ifdef core_config_vec_mem_size +#define SCRATCH_MEM_VEC_SIZE ((core_config_vec_mem_size * 3) / 4) +#endif +#endif + +#else + +#define SCRATCH_MEM_SIZE (65536) + +#endif + +#ifdef __Xxy + +// Patched by Edge Impulse, ARC GCC fixes +namespace { +#if defined (__GNUC__) +static int8_t scratch_mem_x[SCRATCH_MEM_X_SIZE] __attribute__((section(".Xdata"))); +#else +#pragma Bss(".Xdata") +static int8_t scratch_mem_x[SCRATCH_MEM_X_SIZE]; +#pragma Bss() +#endif + +#if defined (__GNUC__) +static int8_t scratch_mem_y[SCRATCH_MEM_Y_SIZE] __attribute__((section(".Ydata"))); +#else +#pragma Bss(".Ydata") +static int8_t scratch_mem_y[SCRATCH_MEM_Y_SIZE]; +#pragma Bss() +#endif + +#if defined (__GNUC__) +static int8_t scratch_mem_z[SCRATCH_MEM_Z_SIZE] __attribute__((section(".Zdata"))); +#else +#pragma Bss(".Zdata") +static int8_t scratch_mem_z[SCRATCH_MEM_Z_SIZE]; +#pragma Bss() +#endif + +#elif defined(__Xvdsp) + +#pragma Bss(".vecmem_data") +static int8_t scratch_mem_vec_1[SCRATCH_MEM_VEC_SIZE / 4]; +static int8_t scratch_mem_vec_2[SCRATCH_MEM_VEC_SIZE / 4]; +static int8_t scratch_mem_vec_3[SCRATCH_MEM_VEC_SIZE / 2]; +#pragma Bss() + +#else + +static int8_t scratch_mem_stack[SCRATCH_MEM_SIZE]; + +#endif +} // namespace + +#ifdef __Xxy + +static int8_t* scratch_mem[] = {scratch_mem_x, scratch_mem_y, scratch_mem_z}; +static uint32_t scratch_sizes[] = {SCRATCH_MEM_X_SIZE, SCRATCH_MEM_Y_SIZE, + SCRATCH_MEM_Z_SIZE}; + +#elif defined(__Xvdsp) + +static int8_t* scratch_mem[] = {scratch_mem_vec_1, scratch_mem_vec_2, + scratch_mem_vec_3}; +static uint32_t scratch_sizes[] = {SCRATCH_MEM_VEC_SIZE / 4, + SCRATCH_MEM_VEC_SIZE / 4, + SCRATCH_MEM_VEC_SIZE / 2}; + +#else + +static int8_t* scratch_mem[] = {scratch_mem_stack}; +static uint32_t scratch_sizes[] = {SCRATCH_MEM_SIZE}; + +#endif + +void* get_arc_scratch_buffer(int size) { + // Function to asign fast memory from one of 3 scratch buffers. + // Best Fit strategy - memory is allocated from that memory bank that leaves + // the least unused memory. + void* buf = NULL; + int best_mem_idx = -1; + int best_mem_delta = INT_MAX; + const int num_mem = sizeof(scratch_mem) / sizeof(scratch_mem[0]); + // find a local memory that fits the data size. + for (int mem_idx = 0; mem_idx < num_mem; ++mem_idx) { + // Best Fit + if ((size <= static_cast(scratch_sizes[mem_idx])) && + (static_cast(scratch_sizes[mem_idx]) - size < best_mem_delta)) { + best_mem_idx = mem_idx; + best_mem_delta = scratch_sizes[mem_idx] - size; + } + } + if (best_mem_idx >= 0) { + buf = scratch_mem[best_mem_idx]; + scratch_mem[best_mem_idx] += size; + scratch_sizes[best_mem_idx] -= size; + } + return buf; +} + +void get_arc_scratch_buffer_max_size(int* size) { + int maxavailable = 0; + const int num_mem = sizeof(scratch_mem) / sizeof(scratch_mem[0]); + // find the largest available buffer. + for (int i = 0; i < num_mem; i++) { + if (static_cast(scratch_sizes[i]) > maxavailable) { + maxavailable = scratch_sizes[i]; + } + } + *size = maxavailable; +} + +void get_arc_scratch_buffer_two_max_sizes(int* size1, int* size2) { + int maxavailable = 0; + int secondavail = 0; + const int num_mem = sizeof(scratch_mem) / sizeof(scratch_mem[0]); + // find the two largest available buffers. + for (int i = 0; i < num_mem; i++) { + if (static_cast(scratch_sizes[i]) > maxavailable) { + secondavail = maxavailable; + maxavailable = scratch_sizes[i]; + } else if (static_cast(scratch_sizes[i]) > secondavail) { + secondavail = scratch_sizes[i]; + } + } + *size1 = maxavailable; + *size2 = secondavail; +} + +void init_arc_scratch_buffers(void) { +#ifdef __Xxy + scratch_mem[0] = scratch_mem_x; + scratch_mem[1] = scratch_mem_y; + scratch_mem[2] = scratch_mem_z; + scratch_sizes[0] = SCRATCH_MEM_X_SIZE; + scratch_sizes[1] = SCRATCH_MEM_Y_SIZE; + scratch_sizes[2] = SCRATCH_MEM_Z_SIZE; +#elif defined(__Xvdsp) + scratch_mem[0] = scratch_mem_vec_1; + scratch_mem[1] = scratch_mem_vec_2; + scratch_mem[2] = scratch_mem_vec_3; + scratch_sizes[0] = SCRATCH_MEM_VEC_SIZE / 4; + scratch_sizes[1] = SCRATCH_MEM_VEC_SIZE / 4; + scratch_sizes[2] = SCRATCH_MEM_VEC_SIZE / 2; +#else + scratch_mem[0] = scratch_mem_stack; + scratch_sizes[0] = SCRATCH_MEM_SIZE; +#endif +} + +} // namespace micro +} // namespace ops +} // namespace tflite + +#endif // EI_CLASSIFIER_TFLITE_ENABLE_ARC == 1 diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/scratch_buffers.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/scratch_buffers.h new file mode 100644 index 0000000..dc704aa --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/scratch_buffers.h @@ -0,0 +1,83 @@ +// Patched by Edge Impulse to include reference and hardware-accelerated kernels +#include "../../../../classifier/ei_classifier_config.h" +#if EI_CLASSIFIER_TFLITE_ENABLE_ARC == 1 +/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_LITE_MICRO_ARC_SCRATCH_BUFFERS_H_ +#define TENSORFLOW_LITE_MICRO_ARC_SCRATCH_BUFFERS_H_ + +#include "mli_api.h" // NOLINT +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" + +namespace tflite { +namespace ops { +namespace micro { + +void init_arc_scratch_buffers(void); +void* get_arc_scratch_buffer(int size); // Function to assign fast memory + // from one of 3 scratch buffers. + +void get_arc_scratch_buffer_max_size(int* size); +void get_arc_scratch_buffer_two_max_sizes(int* size1, int* size2); + +static inline bool inside_arc_dccm(void* p) { +#if core_config_dccm_present + return ((unsigned)p >= core_config_dccm_base) && + ((unsigned)p < core_config_dccm_base + core_config_dccm_size); +#else + return false; +#endif +} + +static inline bool inside_arc_xccm(void* p) { +#if core_config_xy + return ((unsigned)p >= core_config_xy_x_base) && + ((unsigned)p < core_config_xy_x_base + core_config_xy_size); +#else + return false; +#endif +} + +static inline bool inside_arc_yccm(void* p) { +#if core_config_xy_size + return ((unsigned)p >= core_config_xy_y_base) && + ((unsigned)p < core_config_xy_y_base + core_config_xy_size); +#else + return false; +#endif +} + +static inline bool inside_arc_vccm(void* p) { +#if core_config_vec_mem_size + return ((unsigned)p >= core_config_vec_mem_base) && + ((unsigned)p < core_config_vec_mem_base + core_config_vec_mem_size); +#else + return false; +#endif +} + +static inline bool inside_arc_ccm(void* p) { + return inside_arc_dccm(p) || inside_arc_xccm(p) || inside_arc_yccm(p) || + inside_arc_vccm(p); +} + +} // namespace micro +} // namespace ops +} // namespace tflite + +#endif // TENSORFLOW_LITE_MICRO_ARC_SCRATCH_BUFFERS_H_ + +#endif // EI_CLASSIFIER_TFLITE_ENABLE_ARC == 1 diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/select.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/select.cpp new file mode 100644 index 0000000..dcfed8c --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/select.cpp @@ -0,0 +1,238 @@ +/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TF_LITE_STATIC_MEMORY +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/select.h" + +#include +#include + +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h" + +namespace tflite { + +constexpr int kInputTensorCondition = 0; +constexpr int kInputTensorX = 1; +constexpr int kInputTensorY = 2; +constexpr int kOutputTensor = 0; + +enum KernelType { + kVersionOne, + kVersionTwo, +}; + +struct OpData { + bool requires_broadcast; + // True if input condition is scalar or input condition has rank one and + // matches the first dimension of other inputs. + bool has_low_rank_input_condition; +}; + +void* SelectInit(TfLiteContext* context, const char* buffer, size_t length) { + TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); + auto* data = static_cast( + context->AllocatePersistentBuffer(context, sizeof(OpData))); + data->requires_broadcast = false; + data->has_low_rank_input_condition = false; + return data; +} + +TfLiteStatus CheckBroadcastShape(TfLiteContext* context, + const TfLiteTensor* input1, + const TfLiteTensor* input2, + const TfLiteTensor* input3, + const TfLiteIntArray* output_shape) { + const int dims1 = NumDimensions(input1); + const int dims2 = NumDimensions(input2); + const int dims3 = NumDimensions(input3); + const int out_dims = std::max(std::max(dims1, dims2), dims3); + TF_LITE_ENSURE_EQ(context, out_dims, output_shape->size); + + for (int i = 0; i < out_dims; ++i) { + const int d1 = i >= dims1 ? 1 : SizeOfDimension(input1, dims1 - i - 1); + const int d2 = i >= dims2 ? 1 : SizeOfDimension(input2, dims2 - i - 1); + const int d3 = i >= dims3 ? 1 : SizeOfDimension(input3, dims3 - i - 1); + const int min_value = std::min(std::min(d1, d2), d3); + int max_value = std::max(std::max(d1, d2), d3); + // If one dimention is 0, others must be 0 or 1. + if (min_value == 0) max_value = 0; + if (!(d1 == 1 || d1 == max_value) || !(d2 == 1 || d2 == max_value) || + !(d3 == 1 || d3 == max_value)) { + MicroPrintf("Given shapes are not broadcastable."); + return kTfLiteError; + } + TF_LITE_ENSURE_EQ(context, output_shape->data[out_dims - i - 1], max_value); + } + return kTfLiteOk; +} + +template +TfLiteStatus SelectPrepare(TfLiteContext* context, TfLiteNode* node) { + OpData* data = reinterpret_cast(node->user_data); + + TF_LITE_ENSURE_EQ(context, NumInputs(node), 3); + TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); + + MicroContext* micro_context = GetMicroContext(context); + TfLiteTensor* input_condition = + micro_context->AllocateTempInputTensor(node, kInputTensorCondition); + + TfLiteTensor* input_x = + micro_context->AllocateTempInputTensor(node, kInputTensorX); + + TfLiteTensor* input_y = + micro_context->AllocateTempInputTensor(node, kInputTensorY); + + TfLiteTensor* output = + micro_context->AllocateTempOutputTensor(node, kOutputTensor); + + // Input must be bool. + TF_LITE_ENSURE_TYPES_EQ(context, input_condition->type, kTfLiteBool); + TF_LITE_ENSURE_TYPES_EQ(context, input_x->type, input_y->type); + output->type = input_x->type; + + // Respect the original output shape when there are mixed shapes to represent + // a scalar data. + if (GetTensorShape(input_condition).FlatSize() == 1 && + GetTensorShape(input_x).FlatSize() == 1 && + GetTensorShape(input_y).FlatSize() == 1 && + GetTensorShape(output).FlatSize() == 1) { + return kTfLiteOk; + } + + bool same_shape = HaveSameShapes(input_condition, input_x) && + HaveSameShapes(input_x, input_y); + TfLiteIntArray* output_size; + if (!same_shape) { + switch (kernel_type) { + case kVersionOne: { + bool is_input_condition_scalar = NumDimensions(input_condition) == 0; + bool has_rank_one_input_condition = + NumDimensions(input_condition) == 1 && + SizeOfDimension(input_condition, 0) == SizeOfDimension(input_x, 0); + data->has_low_rank_input_condition = + is_input_condition_scalar || has_rank_one_input_condition; + TF_LITE_ENSURE(context, data->has_low_rank_input_condition); + + output_size = TfLiteIntArrayCopy(input_x->dims); + + // Input tensors must have the same type and size + TF_LITE_ENSURE(context, HaveSameShapes(input_x, input_y)); + break; + } + case kVersionTwo: { + TF_LITE_ENSURE_OK( + context, CheckBroadcastShape(context, input_condition, input_x, input_y, + output->dims)); + data->requires_broadcast = true; + break; + } + default: + return kTfLiteError; + } + } else { + output_size = TfLiteIntArrayCopy(input_x->dims); + } + + micro_context->DeallocateTempTfLiteTensor(input_condition); + micro_context->DeallocateTempTfLiteTensor(input_x); + micro_context->DeallocateTempTfLiteTensor(input_y); + micro_context->DeallocateTempTfLiteTensor(output); + + TfLiteIntArrayFree(output_size); + + return kTfLiteOk; +} + +template +void CallSelect(const TfLiteEvalTensor* input_condition, + const TfLiteEvalTensor* input_x, + const TfLiteEvalTensor* input_y, TfLiteEvalTensor* output, + bool need_broadcast) { + using Func = decltype(reference_ops::Select)*; + Func select_func; + if (need_broadcast) { + select_func = reference_ops::BroadcastSelect5DSlow; + } else { + select_func = reference_ops::Select; + } + + select_func(tflite::micro::GetTensorShape(input_condition), + tflite::micro::GetTensorData(input_condition), + tflite::micro::GetTensorShape(input_x), + tflite::micro::GetTensorData(input_x), + tflite::micro::GetTensorShape(input_y), + tflite::micro::GetTensorData(input_y), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); +} + +TfLiteStatus SelectEval(TfLiteContext* context, TfLiteNode* node) { + OpData* data = static_cast(node->user_data); + + const TfLiteEvalTensor* input_condition = + tflite::micro::GetEvalInput(context, node, kInputTensorX); + + const TfLiteEvalTensor* input_x = + tflite::micro::GetEvalInput(context, node, kInputTensorY); + + const TfLiteEvalTensor* input_y = + tflite::micro::GetEvalInput(context, node, kInputTensorCondition); + + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kOutputTensor); + + switch (input_x->type) { + case kTfLiteFloat32: + CallSelect(input_condition, input_x, input_y, output, + data->requires_broadcast); + break; + case kTfLiteInt8: + CallSelect(input_condition, input_x, input_y, output, + data->requires_broadcast); + break; + case kTfLiteInt16: + CallSelect(input_condition, input_x, input_y, output, + data->requires_broadcast); + break; + default: + MicroPrintf("Does not support type other than %s, but got %s", + "int8|int16|float32", TfLiteTypeGetName(input_x->type)); + return kTfLiteError; + } + + return kTfLiteOk; +} + +TfLiteRegistration Register_SELECT() { + return tflite::micro::RegisterOp(tflite::SelectInit, tflite::SelectPrepare, + tflite::SelectEval); +} + +// SelectV2 op selects values of 'x' if the corresponding value of 'condition' +// is true or the value of 'y' if false. There are valid condition input sizes: +// +// 1. Either the same shape (in which case the select is elementwise), or +// 2. Broadcastable shapes between 'condition', 'x' and 'y'. +TfLiteRegistration Register_SELECT_V2() { + return tflite::micro::RegisterOp(tflite::SelectInit, tflite::SelectPrepare, + tflite::SelectEval); +} + +} // namespace tflite +#endif // TF_LITE_STATIC_MEMORY diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/shape.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/shape.cpp new file mode 100644 index 0000000..21af290 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/shape.cpp @@ -0,0 +1,67 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h" +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/op_macros.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/memory_helpers.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_utils.h" + +namespace tflite { + +namespace { +constexpr int kInputTensor = 0; +constexpr int kOutputTensor = 0; + +void ExtractShape(const TfLiteEvalTensor* input, int32_t* output_data) { + for (int i = 0; i < input->dims->size; ++i) { + output_data[i] = input->dims->data[i]; + } +} + +TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + TF_LITE_ENSURE_EQ(context, NumInputs(node), 1); + TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); + + return kTfLiteOk; +} + +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + const TfLiteEvalTensor* input = + tflite::micro::GetEvalInput(context, node, kInputTensor); + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kOutputTensor); + if (output->type != kTfLiteInt32) { + MicroPrintf("Output type %s (%d) not supported.", + TfLiteTypeGetName(output->type), output->type); + return kTfLiteError; + } else { + ExtractShape(input, tflite::micro::GetTensorData(output)); + } + + return kTfLiteOk; +} + +} // namespace + +TfLiteRegistration Register_SHAPE() { + return tflite::micro::RegisterOp(nullptr, Prepare, Eval); +} + +} // namespace tflite diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/slice.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/slice.cpp new file mode 100644 index 0000000..16ce966 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/slice.cpp @@ -0,0 +1,157 @@ +/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/slice.h" + +#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h" +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h" + +namespace tflite { + +namespace { + +constexpr int kInputTensor = 0; +constexpr int kBeginTensor = 1; +constexpr int kSizeTensor = 2; +constexpr int kOutputTensor = 0; + +const int kMaxDim = 5; + +template +void GetBeginAndSizeVectors(int dimensions, const TfLiteEvalTensor* begin, + const TfLiteEvalTensor* size, int32_t* begins, + int32_t* sizes) { + int offset = kMaxDim - dimensions; + for (int idx = 0; idx < dimensions; ++idx) { + begins[offset + idx] = tflite::micro::GetTensorData(begin)[idx]; + sizes[offset + idx] = tflite::micro::GetTensorData(size)[idx]; + } +} + +TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + MicroContext* micro_context = GetMicroContext(context); + + TF_LITE_ENSURE_EQ(context, NumInputs(node), 3); + TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); + + TfLiteTensor* input = + micro_context->AllocateTempInputTensor(node, kInputTensor); + TFLITE_DCHECK(input != nullptr); + TfLiteTensor* begin = + micro_context->AllocateTempInputTensor(node, kBeginTensor); + TFLITE_DCHECK(begin != nullptr); + TfLiteTensor* size = + micro_context->AllocateTempInputTensor(node, kSizeTensor); + TFLITE_DCHECK(size != nullptr); + TfLiteTensor* output = + micro_context->AllocateTempOutputTensor(node, kOutputTensor); + TFLITE_DCHECK(output != nullptr); + + // Ensure validity of input tensor and its dimension. + TFLITE_DCHECK(input->type == output->type); + TFLITE_DCHECK(begin->type == size->type); + TFLITE_DCHECK(begin->type == kTfLiteInt32 || begin->type == kTfLiteInt64); + TFLITE_DCHECK(size->type == kTfLiteInt32 || size->type == kTfLiteInt64); + TFLITE_DCHECK(NumDimensions(begin) == 1); + TFLITE_DCHECK(NumDimensions(size) == 1); + TFLITE_DCHECK(NumElements(begin) == NumElements(size)); + TFLITE_DCHECK(NumDimensions(input) <= kMaxDim); + + micro_context->DeallocateTempTfLiteTensor(input); + micro_context->DeallocateTempTfLiteTensor(begin); + micro_context->DeallocateTempTfLiteTensor(size); + micro_context->DeallocateTempTfLiteTensor(output); + + return kTfLiteOk; +} + +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + const TfLiteEvalTensor* input = + tflite::micro::GetEvalInput(context, node, kInputTensor); + const TfLiteEvalTensor* begin = + tflite::micro::GetEvalInput(context, node, kBeginTensor); + const TfLiteEvalTensor* size = + tflite::micro::GetEvalInput(context, node, kSizeTensor); + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kOutputTensor); + + tflite::SliceParams op_params; + op_params.begin_count = kMaxDim; + op_params.size_count = kMaxDim; + for (int i = 0; i < kMaxDim; ++i) { + op_params.begin[i] = 0; + op_params.size[i] = 1; + } + + if (begin->type == kTfLiteInt32) { + GetBeginAndSizeVectors(input->dims->size, begin, size, + op_params.begin, op_params.size); + } else if (begin->type == kTfLiteInt64) { + GetBeginAndSizeVectors(input->dims->size, begin, size, + op_params.begin, op_params.size); + } else { + MicroPrintf("Begin tensor type %s (%d) not supported.", + TfLiteTypeGetName(input->type), input->type); + return kTfLiteError; + } + + switch (input->type) { + case kTfLiteFloat32: + reference_ops::Slice(op_params, + tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + break; + case kTfLiteInt32: + reference_ops::Slice( + op_params, tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + break; + case kTfLiteInt8: + reference_ops::Slice( + op_params, tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + break; + case kTfLiteInt16: + reference_ops::Slice( + op_params, tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + break; + default: + MicroPrintf("Input tensor type %s (%d) not supported.", + TfLiteTypeGetName(input->type), input->type); + return kTfLiteError; + } + return kTfLiteOk; +} + +} // namespace + +TfLiteRegistration Register_SLICE() { + return tflite::micro::RegisterOp(nullptr, Prepare, Eval); +} + +} // namespace tflite diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/softmax.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/softmax.cpp new file mode 100644 index 0000000..0f0d7af --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/softmax.cpp @@ -0,0 +1,565 @@ +// Patched by Edge Impulse to include reference and hardware-accelerated kernels +#include "../../../../classifier/ei_classifier_config.h" +#if 0 == 1 +/* noop */ +#elif EI_CLASSIFIER_TFLITE_ENABLE_CMSIS_NN == 1 +/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/softmax.h" + +#include "edge-impulse-sdk/CMSIS/NN/Include/arm_nnfunctions.h" +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/quantization_util.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/softmax.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/op_macros.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h" + +namespace tflite { +namespace { + +struct CMSISNNSoftmaxParams { + SoftmaxParams softmax_params; + int32_t num_rows; + int32_t row_size; +}; + +void* Init(TfLiteContext* context, const char* buffer, size_t length) { + TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); + return context->AllocatePersistentBuffer(context, + sizeof(CMSISNNSoftmaxParams)); +} + +TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + MicroContext* micro_context = GetMicroContext(context); + + TF_LITE_ENSURE_EQ(context, NumInputs(node), 1); + TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); + TfLiteTensor* input = micro_context->AllocateTempInputTensor(node, 0); + TF_LITE_ENSURE(context, input != nullptr); + TF_LITE_ENSURE(context, NumDimensions(input) >= 1); + TfLiteTensor* output = micro_context->AllocateTempOutputTensor(node, 0); + TF_LITE_ENSURE(context, output != nullptr); + + TF_LITE_ENSURE(context, node->user_data != nullptr); + CMSISNNSoftmaxParams* op_data = + static_cast(node->user_data); + + auto* params = static_cast(node->builtin_data); + auto ret_val = CalculateSoftmaxParams(context, input, output, params, + &op_data->softmax_params); + + const auto input_shape = GetTensorShape(input); + const auto output_shape = GetTensorShape(output); + const int trailing_dim = input_shape.DimensionsCount() - 1; + const int outer_size = + MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape); + const int depth = + MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim); + op_data->num_rows = outer_size; + op_data->row_size = depth; + + micro_context->DeallocateTempTfLiteTensor(input); + micro_context->DeallocateTempTfLiteTensor(output); + return ret_val; +} + +TfLiteStatus SoftmaxEval(TfLiteContext* context, TfLiteNode* node) { + const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0); + TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0); + + TFLITE_DCHECK(node->user_data != nullptr); + const CMSISNNSoftmaxParams op_data = + *static_cast(node->user_data); + + switch (input->type) { + case kTfLiteFloat32: { +#if EI_TFLITE_DISABLE_SOFTMAX_IN_F32 + MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(input->type), + input->type); + return kTfLiteError; +#endif + tflite::reference_ops::Softmax( + op_data.softmax_params, tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + return kTfLiteOk; + } + case kTfLiteInt8: { +#if EI_TFLITE_DISABLE_SOFTMAX_IN_I8 + MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(input->type), + input->type); + return kTfLiteError; +#endif + if (output->type == kTfLiteInt8) { +#if EI_TFLITE_DISABLE_SOFTMAX_OUT_I8 + MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(input->type), + output->type); + return kTfLiteError; +#endif + arm_softmax_s8(tflite::micro::GetTensorData(input), + op_data.num_rows, op_data.row_size, + op_data.softmax_params.input_multiplier, + op_data.softmax_params.input_left_shift, + op_data.softmax_params.diff_min, + tflite::micro::GetTensorData(output)); + } else { +#if EI_TFLITE_DISABLE_SOFTMAX_OUT_I16 + MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(input->type), + output->type); + return kTfLiteError; +#endif + arm_softmax_s8_s16(tflite::micro::GetTensorData(input), + op_data.num_rows, op_data.row_size, + op_data.softmax_params.input_multiplier, + op_data.softmax_params.input_left_shift, + op_data.softmax_params.diff_min, + tflite::micro::GetTensorData(output)); + } + return kTfLiteOk; + } + case kTfLiteInt16: { +#if EI_TFLITE_DISABLE_SOFTMAX_IN_I16 + MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(input->type), + input->type); + return kTfLiteError; +#endif + const cmsis_nn_softmax_lut_s16 softmax_params = { + .exp_lut = op_data.softmax_params.exp_lut, + .one_by_one_lut = op_data.softmax_params.one_over_one_plus_x_lut}; + + TFLITE_DCHECK_EQ( + arm_softmax_s16( + tflite::micro::GetTensorData(input), op_data.num_rows, + op_data.row_size, op_data.softmax_params.input_multiplier, + op_data.softmax_params.input_left_shift, &softmax_params, + tflite::micro::GetTensorData(output)), + ARM_CMSIS_NN_SUCCESS); + return kTfLiteOk; + } + default: + MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(input->type), + input->type); + return kTfLiteError; + } +} + +TfLiteStatus SoftmaxEvalInt8(TfLiteContext* context, TfLiteNode* node) { + const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0); + TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0); + + TFLITE_DCHECK(node->user_data != nullptr); + const CMSISNNSoftmaxParams op_data = + *static_cast(node->user_data); + + arm_softmax_s8(tflite::micro::GetTensorData(input), op_data.num_rows, + op_data.row_size, op_data.softmax_params.input_multiplier, + op_data.softmax_params.input_left_shift, + op_data.softmax_params.diff_min, + tflite::micro::GetTensorData(output)); + + return kTfLiteOk; +} + +TfLiteStatus SoftmaxEvalInt8_Int16(TfLiteContext* context, TfLiteNode* node) { + const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0); + TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0); + + TFLITE_DCHECK(node->user_data != nullptr); + const CMSISNNSoftmaxParams op_data = + *static_cast(node->user_data); + + arm_softmax_s8_s16( + tflite::micro::GetTensorData(input), op_data.num_rows, + op_data.row_size, op_data.softmax_params.input_multiplier, + op_data.softmax_params.input_left_shift, op_data.softmax_params.diff_min, + tflite::micro::GetTensorData(output)); + + return kTfLiteOk; +} + +TfLiteStatus SoftmaxEvalInt16(TfLiteContext* context, TfLiteNode* node) { + const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0); + TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0); + + TFLITE_DCHECK(node->user_data != nullptr); + const CMSISNNSoftmaxParams op_data = + *static_cast(node->user_data); + + const cmsis_nn_softmax_lut_s16 softmax_params = { + .exp_lut = op_data.softmax_params.exp_lut, + .one_by_one_lut = op_data.softmax_params.one_over_one_plus_x_lut}; + + TFLITE_DCHECK_EQ( + arm_softmax_s16(tflite::micro::GetTensorData(input), + op_data.num_rows, op_data.row_size, + op_data.softmax_params.input_multiplier, + op_data.softmax_params.input_left_shift, &softmax_params, + tflite::micro::GetTensorData(output)), + ARM_CMSIS_NN_SUCCESS); + + return kTfLiteOk; +} + +} // namespace + +TfLiteRegistration Register_SOFTMAX() { + return tflite::micro::RegisterOp(Init, Prepare, SoftmaxEval); +} + +TfLiteRegistration Register_SOFTMAX_INT8() { + return tflite::micro::RegisterOp(Init, Prepare, SoftmaxEvalInt8); +} + +TfLiteRegistration Register_SOFTMAX_INT8_INT16() { + return tflite::micro::RegisterOp(Init, Prepare, SoftmaxEvalInt8_Int16); +} + +TfLiteRegistration Register_SOFTMAX_INT16() { + return tflite::micro::RegisterOp(Init, Prepare, SoftmaxEvalInt16); +} + +} // namespace tflite + +#elif EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN == 1 +/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/softmax.h" + +#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h" +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/quantization_util.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/softmax.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/op_macros.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h" + +#include + +#if ESP_NN +#include "edge-impulse-sdk/porting/espressif/ESP-NN/include/esp_nn.h" +#endif + +long long softmax_total_time = 0; + +namespace tflite { +namespace { +// Softmax parameter data that persists in user_data +const int kInt16LUTArraySize = 513; + +struct NodeData { + SoftmaxParams op_data; +#if ESP_NN + int buffer_idx; +#endif +}; + +static void* Init(TfLiteContext* context, const char* buffer, size_t length) { + TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); + return context->AllocatePersistentBuffer(context, sizeof(NodeData)); +} + +void SoftmaxQuantized(TfLiteContext* context, const TfLiteEvalTensor* input, + TfLiteEvalTensor* output, const NodeData* data) { + if (input->type == kTfLiteInt8) { +#if EI_TFLITE_DISABLE_SOFTMAX_IN_I8 + TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.", + TfLiteTypeGetName(input->type), input->type); + return kTfLiteError; +#endif + if (output->type == kTfLiteInt16) { +#if EI_TFLITE_DISABLE_SOFTMAX_OUT_I16 + TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.", + TfLiteTypeGetName(output->type), output->type); + return; +#endif + tflite::reference_ops::Softmax( + data->op_data, tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + } else { +#if EI_TFLITE_DISABLE_SOFTMAX_OUT_I8 + TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.", + TfLiteTypeGetName(output->type), output->type); + return; +#endif +#if ESP_NN + const int32_t input_beta_multiplier = data->op_data.input_multiplier; + const int32_t input_beta_left_shift = data->op_data.input_left_shift; + const int diff_min = data->op_data.diff_min; + const RuntimeShape input_shape = tflite::micro::GetTensorShape(input); + const RuntimeShape output_shape = tflite::micro::GetTensorShape(output); + const int trailing_dim = input_shape.DimensionsCount() - 1; + const int outer_size = + MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape); + const int depth = + MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim); + const int8_t *in_ptr = tflite::micro::GetTensorData(input); + int8_t *out_ptr = tflite::micro::GetTensorData(output); + void *scratch_buf = NULL; + if (data->buffer_idx > -1) { + scratch_buf = context->GetScratchBuffer(context, data->buffer_idx); + } + esp_nn_set_softmax_scratch_buf(scratch_buf); + esp_nn_softmax_s8(in_ptr, outer_size, depth, input_beta_multiplier, + input_beta_left_shift, diff_min, out_ptr); +#else + tflite::reference_ops::Softmax( + data->op_data, tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); +#endif + } + } else { +#if EI_TFLITE_DISABLE_SOFTMAX_IN_I16 + TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.", + TfLiteTypeGetName(input->type), input->type); + return; +#endif + tflite::reference_ops::SoftmaxInt16( + data->op_data, tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + } +} + +static TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0); + TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0); + + TFLITE_DCHECK(node->user_data != nullptr); + NodeData data = *static_cast(node->user_data); + + long long start_time = esp_timer_get_time(); + switch (input->type) { + case kTfLiteFloat32: { +#if EI_TFLITE_DISABLE_SOFTMAX_IN_F32 + TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.", + TfLiteTypeGetName(input->type), input->type); + return kTfLiteError; +#endif + tflite::reference_ops::Softmax( + data.op_data, tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + } + break; + case kTfLiteInt8: +#if EI_TFLITE_DISABLE_SOFTMAX_IN_I8 + TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.", + TfLiteTypeGetName(input->type), input->type); + return kTfLiteError; +#endif + SoftmaxQuantized(context, input, output, &data); + break; + case kTfLiteInt16: { +#if EI_TFLITE_DISABLE_SOFTMAX_IN_I16 + TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.", + TfLiteTypeGetName(input->type), input->type); + return kTfLiteError; +#endif + SoftmaxQuantized(context, input, output, &data); + } + break; + default: + TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.", + TfLiteTypeGetName(input->type), input->type); + return kTfLiteError; + } + softmax_total_time += esp_timer_get_time() - start_time; + return kTfLiteOk; +} + +static TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + MicroContext* micro_context = GetMicroContext(context); + + TF_LITE_ENSURE_EQ(context, NumInputs(node), 1); + TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); + TfLiteTensor* input = micro_context->AllocateTempInputTensor(node, 0); + TF_LITE_ENSURE(context, input != nullptr); + TF_LITE_ENSURE(context, NumDimensions(input) >= 1); + TfLiteTensor* output = micro_context->AllocateTempOutputTensor(node, 0); + TF_LITE_ENSURE(context, output != nullptr); + + TF_LITE_ENSURE(context, node->user_data != nullptr); + NodeData* data = static_cast(node->user_data); + SoftmaxParams* op_data = static_cast(&data->op_data); + + auto* params = static_cast(node->builtin_data); + auto ret_val = + CalculateSoftmaxParams(context, input, output, params, op_data); + +#if ESP_NN + if (output->type == kTfLiteInt8 && input->type == kTfLiteInt8) { + const int32_t input_width = input->dims->data[1]; + const int32_t input_height = input->dims->data[2]; + int scratch_buf_size = esp_nn_get_softmax_scratch_size(input_width, + input_height); + if (scratch_buf_size > 0) { + TF_LITE_ENSURE_STATUS(context->RequestScratchBufferInArena( + context, scratch_buf_size, &data->buffer_idx)); + } + } +#endif + + micro_context->DeallocateTempTfLiteTensor(input); + micro_context->DeallocateTempTfLiteTensor(output); + return ret_val; +} + +} // namespace + +TfLiteRegistration Register_SOFTMAX() { + return tflite::micro::RegisterOp(Init, Prepare, Eval); +} + +} // namespace tflite + +#else +/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/softmax.h" + +#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h" +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/quantization_util.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/softmax.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/op_macros.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h" + +namespace tflite { +namespace { + +void SoftmaxQuantized(const TfLiteEvalTensor* input, TfLiteEvalTensor* output, + const SoftmaxParams& op_data) { + if (input->type == kTfLiteInt8) { + if (output->type == kTfLiteInt16) { + tflite::reference_ops::Softmax( + op_data, tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + } else { + tflite::reference_ops::Softmax( + op_data, tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + } + } else { + tflite::reference_ops::SoftmaxInt16( + op_data, tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + } +} + +TfLiteStatus SoftmaxEval(TfLiteContext* context, TfLiteNode* node) { + const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0); + TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0); + + TFLITE_DCHECK(node->user_data != nullptr); + SoftmaxParams op_data = *static_cast(node->user_data); + + switch (input->type) { + case kTfLiteFloat32: { +#if EI_TFLITE_DISABLE_SOFTMAX_IN_F32 + MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(input->type), + input->type); + return kTfLiteError; +#endif + tflite::reference_ops::Softmax( + op_data, tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + return kTfLiteOk; + } + case kTfLiteInt8: { +#if EI_TFLITE_DISABLE_SOFTMAX_IN_I8 + MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(input->type), + input->type); + return kTfLiteError; +#endif + SoftmaxQuantized(input, output, op_data); + return kTfLiteOk; + } + case kTfLiteInt16: { +#if EI_TFLITE_DISABLE_SOFTMAX_IN_I16 + MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(input->type), + input->type); + return kTfLiteError; +#endif + SoftmaxQuantized(input, output, op_data); + return kTfLiteOk; + } + default: + MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(input->type), + input->type); + return kTfLiteError; + } +} +} // namespace + +TfLiteRegistration Register_SOFTMAX() { + return tflite::micro::RegisterOp(SoftmaxInit, SoftmaxPrepare, SoftmaxEval); +} + +} // namespace tflite + +#endif diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/softmax.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/softmax.h new file mode 100644 index 0000000..fb15d38 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/softmax.h @@ -0,0 +1,70 @@ +/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_MICRO_KERNELS_SOFTMAX_H_ +#define TENSORFLOW_LITE_MICRO_KERNELS_SOFTMAX_H_ + +#include "edge-impulse-sdk/classifier/ei_classifier_config.h" +#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h" +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h" + +namespace tflite { + +void* SoftmaxInit(TfLiteContext* context, const char* buffer, size_t length); + +// Common helper function to SoftmaxPrepare. +TfLiteStatus CalculateSoftmaxParams(TfLiteContext* context, + const TfLiteTensor* input, + TfLiteTensor* output, + const TfLiteSoftmaxParams* params, + SoftmaxParams* op_data); + +TfLiteStatus SoftmaxPrepare(TfLiteContext* context, TfLiteNode* node); + +// This is the most generic TfLiteRegistration. The actual supported types may +// still be target dependent. The only requirement is that every implementation +// (reference or optimized) must define this function. +TfLiteRegistration Register_SOFTMAX(); + +#if defined(XTENSA) || defined(CMSIS_NN) +// Returns a TfLiteRegistration struct for kernel variant that only supports +// int8 input and int16 output. +TfLiteRegistration Register_SOFTMAX_INT8_INT16(); +#else +inline TfLiteRegistration Register_SOFTMAX_INT8_INT16() { + return Register_SOFTMAX(); +} +#endif + +#if defined(CMSIS_NN) +// Returns a TfLiteRegistration struct for kernel variant that only supports +// int8 input/output and uses the latency optimized implementations. +TfLiteRegistration Register_SOFTMAX_INT8(); + +// Returns a TfLiteRegistration struct for kernel variant that only supports +// int16 input/output and uses the latency optimized implementations. +TfLiteRegistration Register_SOFTMAX_INT16(); + +#else +inline TfLiteRegistration Register_SOFTMAX_INT8() { return Register_SOFTMAX(); } + +inline TfLiteRegistration Register_SOFTMAX_INT16() { + return Register_SOFTMAX(); +} +#endif + +} // namespace tflite + +#endif // TENSORFLOW_LITE_MICRO_KERNELS_SOFTMAX_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/softmax_common.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/softmax_common.cpp new file mode 100644 index 0000000..82ec071 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/softmax_common.cpp @@ -0,0 +1,195 @@ +/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h" +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/quantization_util.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/op_macros.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/softmax.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_context.h" + +namespace tflite { + +namespace { +// Softmax parameter data that persists in user_data +const int kInt16LUTArraySize = LUTSize(); + +TfLiteStatus InitializeLutForInt16(TfLiteContext* context, + const TfLiteTensor* input, + TfLiteTensor* output, + SoftmaxParams* op_data) { + // Only allocate LUTs for KTfLiteInt16 data type + if (input->type == kTfLiteInt16) { + void* raw_exp_lut = context->AllocatePersistentBuffer( + context, sizeof(int16_t) * kInt16LUTArraySize); + TF_LITE_ENSURE(context, raw_exp_lut != nullptr); + op_data->exp_lut = reinterpret_cast(raw_exp_lut); + void* one_over_one_plus_x_lut = context->AllocatePersistentBuffer( + context, sizeof(int16_t) * kInt16LUTArraySize); + TF_LITE_ENSURE(context, one_over_one_plus_x_lut != nullptr); + op_data->one_over_one_plus_x_lut = + reinterpret_cast(one_over_one_plus_x_lut); + } + + if (output->type == kTfLiteInt16) { + TF_LITE_ENSURE(context, + input->type == kTfLiteInt8 || input->type == kTfLiteInt16); + } else { + TF_LITE_ENSURE_EQ(context, input->type, output->type); + } + + // Populate LUT if required + if (input->type == kTfLiteInt16) { + TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0); + // exp LUT only used on negative values + // we consider exp(-10.0) is insignificant to accumulation + const int32_t range = std::numeric_limits::max() - + std::numeric_limits::min(); + LUTPopulate( + 10.0f / range, std::numeric_limits::max(), 2.0f / range, 0, + [](float value) { return std::exp(value); }, op_data->exp_lut); + + LUTPopulate( + 1.0f / range, std::numeric_limits::min(), 2.0f / range, 0, + [](float value) { return 1.0f / (1.0f + value); }, + op_data->one_over_one_plus_x_lut); + + op_data->zero_point = output->params.zero_point; + op_data->scale = output->params.scale; + } + + return kTfLiteOk; +} + +} // namespace + +TfLiteStatus CalculateSoftmaxParams(TfLiteContext* context, + const TfLiteTensor* input, + TfLiteTensor* output, + const TfLiteSoftmaxParams* params, + SoftmaxParams* op_data) { +#ifndef EI_TFLITE_DISABLE_SOFTMAX_IN_I16 + if (InitializeLutForInt16(context, input, output, op_data) != kTfLiteOk) { + return kTfLiteError; + } +#endif + + if (input->type == kTfLiteInt8 || input->type == kTfLiteInt16) { + if (input->type == kTfLiteInt16) { +#if EI_TFLITE_DISABLE_SOFTMAX_IN_I16 + TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.", + TfLiteTypeGetName(input->type), input->type); + return kTfLiteError; +#endif + TF_LITE_ENSURE_EQ(context, input->params.zero_point, 0); + TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0); + TF_LITE_ENSURE_NEAR(context, output->params.scale, 1.f / 32768, + (0.001f * 1.f / 32768)); + } else { // input->type == kTfLiteInt8 +#if EI_TFLITE_DISABLE_SOFTMAX_IN_I8 + TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.", + TfLiteTypeGetName(input->type), input->type); + return kTfLiteError; +#endif + TF_LITE_ENSURE_TYPES_EQ(context, input->type, kTfLiteInt8); + if (output->type == kTfLiteInt16) { + TF_LITE_ENSURE_EQ(context, output->params.zero_point, -32768); + TF_LITE_ENSURE_NEAR(context, output->params.scale, 1.f / 65536, + (0.001f * 1.f / 65536)); + } else { // output->type == kTfLiteint8 + TF_LITE_ENSURE_TYPES_EQ(context, output->type, kTfLiteInt8); + TF_LITE_ENSURE_EQ(context, output->params.zero_point, -128); + TF_LITE_ENSURE(context, output->params.scale == 1.f / 256); + } + } + + static const int kScaledDiffIntegerBits = 5; + + // Calculate input_multiplier and input_left_shift + if (input->type == kTfLiteInt16) { +#if EI_TFLITE_DISABLE_SOFTMAX_IN_I16 + TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.", + TfLiteTypeGetName(input->type), input->type); + return kTfLiteError; +#endif + int input_left_shift; + double input_scale_beta_rescale = + static_cast(input->params.scale) * + static_cast(params->beta) / + (10.0 / 65535.0); // scale the input_diff such that [-65535, 0] + // correspond to [-10.0, 0.0] + QuantizeMultiplier(input_scale_beta_rescale, &op_data->input_multiplier, + &input_left_shift); + op_data->input_left_shift = input_left_shift; + } else { +#if EI_TFLITE_DISABLE_SOFTMAX_IN_I8 + TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.", + TfLiteTypeGetName(input->type), input->type); + return kTfLiteError; +#endif + int input_left_shift; + tflite::PreprocessSoftmaxScaling( + static_cast(params->beta), + static_cast(input->params.scale), kScaledDiffIntegerBits, + &op_data->input_multiplier, &input_left_shift); + op_data->input_left_shift = input_left_shift; + op_data->diff_min = + -1.0 * tflite::CalculateInputRadius(kScaledDiffIntegerBits, + op_data->input_left_shift); + } + } else { +#if EI_TFLITE_DISABLE_SOFTMAX_IN_F32 + TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.", + TfLiteTypeGetName(input->type), input->type); + return kTfLiteError; +#endif + TF_LITE_ENSURE_TYPES_EQ(context, input->type, kTfLiteFloat32); + TF_LITE_ENSURE_TYPES_EQ(context, output->type, kTfLiteFloat32); + op_data->beta = static_cast(params->beta); + } + return kTfLiteOk; +} + +void* SoftmaxInit(TfLiteContext* context, const char* buffer, size_t length) { + TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); + return context->AllocatePersistentBuffer(context, sizeof(SoftmaxParams)); +} + +TfLiteStatus SoftmaxPrepare(TfLiteContext* context, TfLiteNode* node) { + MicroContext* micro_context = GetMicroContext(context); + + TF_LITE_ENSURE_EQ(context, NumInputs(node), 1); + TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); + TfLiteTensor* input = micro_context->AllocateTempInputTensor(node, 0); + TF_LITE_ENSURE(context, input != nullptr); + TF_LITE_ENSURE(context, NumDimensions(input) >= 1); + TfLiteTensor* output = micro_context->AllocateTempOutputTensor(node, 0); + TF_LITE_ENSURE(context, output != nullptr); + + TF_LITE_ENSURE(context, node->user_data != nullptr); + SoftmaxParams* op_data = static_cast(node->user_data); + + auto* params = static_cast(node->builtin_data); + auto ret_val = + CalculateSoftmaxParams(context, input, output, params, op_data); + + micro_context->DeallocateTempTfLiteTensor(input); + micro_context->DeallocateTempTfLiteTensor(output); + return ret_val; +} + +} // namespace tflite diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/space_to_batch_nd.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/space_to_batch_nd.cpp new file mode 100644 index 0000000..5a7f414 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/space_to_batch_nd.cpp @@ -0,0 +1,121 @@ +/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/space_to_batch_nd.h" + +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_utils.h" + +namespace tflite { + +namespace { + +constexpr int kInputTensor = 0; +constexpr int kBlockShapeTensor = 1; +constexpr int kCropsTensor = 2; +constexpr int kOutputTensor = 0; + +// Currently, only 3D NHC and 4D NHWC input/output op_context are supported. +// In case of 3D input, it will be extended to 3D NHWC by adding W=1. +// The 4D array need to have exactly 2 spatial dimensions. +// TODO(b/149952582): Support arbitrary dimension in SpaceToBatchND. +const int kInputOutputMinDimensionNum = 3; +const int kInputOutputMaxDimensionNum = 4; + +void* Init(TfLiteContext* context, const char* buffer, size_t length) { + TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); + return context->AllocatePersistentBuffer(context, sizeof(SpaceToBatchParams)); +} + +TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + MicroContext* micro_context = GetMicroContext(context); + + TF_LITE_ENSURE_EQ(context, NumInputs(node), 3); + TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); + + TfLiteTensor* input = + micro_context->AllocateTempInputTensor(node, kInputTensor); + TfLiteTensor* output = + micro_context->AllocateTempOutputTensor(node, kOutputTensor); + TF_LITE_ENSURE(context, input != nullptr && output != nullptr); + + TF_LITE_ENSURE(context, NumDimensions(input) >= kInputOutputMinDimensionNum); + TF_LITE_ENSURE(context, NumDimensions(output) >= kInputOutputMinDimensionNum); + TF_LITE_ENSURE(context, NumDimensions(input) <= kInputOutputMaxDimensionNum); + TF_LITE_ENSURE(context, NumDimensions(output) <= kInputOutputMaxDimensionNum); + TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type); + + micro_context->DeallocateTempTfLiteTensor(input); + micro_context->DeallocateTempTfLiteTensor(output); + return kTfLiteOk; +} + +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + TFLITE_DCHECK(node->user_data != nullptr); + const SpaceToBatchParams& params = + *(static_cast(node->user_data)); + + const TfLiteEvalTensor* input = + tflite::micro::GetEvalInput(context, node, kInputTensor); + const TfLiteEvalTensor* block_shape = + tflite::micro::GetEvalInput(context, node, kBlockShapeTensor); + const TfLiteEvalTensor* crops = + tflite::micro::GetEvalInput(context, node, kCropsTensor); + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kOutputTensor); + + switch (input->type) { // Already know in/out types are same. + case kTfLiteFloat32: + reference_ops::SpaceToBatchND( + params, tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(block_shape), + tflite::micro::GetTensorData(block_shape), + tflite::micro::GetTensorShape(crops), + tflite::micro::GetTensorData(crops), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + break; + case kTfLiteInt8: + reference_ops::SpaceToBatchND( + params, tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(block_shape), + tflite::micro::GetTensorData(block_shape), + tflite::micro::GetTensorShape(crops), + tflite::micro::GetTensorData(crops), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + break; + default: + MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(input->type), + input->type); + return kTfLiteError; + } + return kTfLiteOk; +} + +} // namespace. + +TfLiteRegistration Register_SPACE_TO_BATCH_ND() { + return tflite::micro::RegisterOp(Init, Prepare, Eval); +} + +} // namespace tflite diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/space_to_depth.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/space_to_depth.cpp new file mode 100644 index 0000000..2ab0faa --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/space_to_depth.cpp @@ -0,0 +1,127 @@ +/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/space_to_depth.h" + +#include + +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h" + +namespace tflite { + +namespace { + +constexpr int kInputTensor = 0; +constexpr int kOutputTensor = 0; +constexpr int kBatchRank = 0; +constexpr int kHeightRank = 1; +constexpr int kWidthRank = 2; +constexpr int kDepthRank = 3; + +TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + auto* params = + reinterpret_cast(node->builtin_data); + + TF_LITE_ENSURE_EQ(context, NumInputs(node), 1); + TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); + + MicroContext* micro_context = GetMicroContext(context); + + TfLiteTensor* input = + micro_context->AllocateTempInputTensor(node, kInputTensor); + TF_LITE_ENSURE(context, input != nullptr); + TfLiteTensor* output = + micro_context->AllocateTempOutputTensor(node, kOutputTensor); + TF_LITE_ENSURE(context, output != nullptr); + + TF_LITE_ENSURE_EQ(context, NumDimensions(input), 4); + + auto data_type = output->type; + TF_LITE_ENSURE(context, + data_type == kTfLiteFloat32 || data_type == kTfLiteInt8); + TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type); + + const int block_size = params->block_size; + const int input_height = input->dims->data[kHeightRank]; + const int input_width = input->dims->data[kWidthRank]; + int output_height = input_height / block_size; + int output_width = input_width / block_size; + + TF_LITE_ENSURE_EQ(context, input_height, output_height * block_size); + TF_LITE_ENSURE_EQ(context, input_width, output_width * block_size); + + // Relocate dims to the persistent storage arena before changing them, + // otherwise we'd be modifying temporary copies made by the interpreters each + // time they process the layer. + TfLiteEvalTensor* output_eval = + micro::GetEvalOutput(context, node, kOutputTensor); + TF_LITE_ENSURE_OK(context, micro::CreateWritableTensorDimsWithCopy( + context, output, output_eval)); + + output->dims->data[kBatchRank] = input->dims->data[kBatchRank]; + output->dims->data[kHeightRank] = output_height; + output->dims->data[kWidthRank] = output_width; + output->dims->data[kDepthRank] = + input->dims->data[kDepthRank] * block_size * block_size; + + micro_context->DeallocateTempTfLiteTensor(input); + micro_context->DeallocateTempTfLiteTensor(output); + + return kTfLiteOk; +} + +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + auto* params = + reinterpret_cast(node->builtin_data); + + const TfLiteEvalTensor* input = + micro::GetEvalInput(context, node, kInputTensor); + TfLiteEvalTensor* output = micro::GetEvalOutput(context, node, kOutputTensor); + + SpaceToDepthParams op_params; + op_params.block_size = params->block_size; + + switch (input->type) { // Already know in/out types are same. + case kTfLiteFloat32: + reference_ops::SpaceToDepth(op_params, micro::GetTensorShape(input), + micro::GetTensorData(input), + micro::GetTensorShape(output), + micro::GetTensorData(output)); + break; + case kTfLiteInt8: + reference_ops::SpaceToDepth(op_params, micro::GetTensorShape(input), + micro::GetTensorData(input), + micro::GetTensorShape(output), + micro::GetTensorData(output)); + break; + default: + MicroPrintf("SPACE_TO_DEPTH only supports FLOAT32 and INT8, got %s.", + TfLiteTypeGetName(input->type)); + return kTfLiteError; + } + + return kTfLiteOk; +} + +} // namespace + +TfLiteRegistration Register_SPACE_TO_DEPTH() { + return tflite::micro::RegisterOp(nullptr, Prepare, Eval); +} + +} // namespace tflite diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/split.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/split.cpp new file mode 100644 index 0000000..d4d5280 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/split.cpp @@ -0,0 +1,128 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h" +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h" + +namespace tflite { +namespace ops { +namespace micro { +namespace split { + +template +TfLiteStatus SplitImpl(TfLiteContext* context, TfLiteNode* node, + const TfLiteEvalTensor* input, int axis_value) { + const int output_count = NumOutputs(node); + const TfLiteIntArray* input_dims = input->dims; + const TfLiteEvalTensor* output0 = + tflite::micro::GetEvalOutput(context, node, 0); + const TfLiteIntArray* output_dims = output0->dims; + + const int split_dimensions = input_dims->size; + int axis = axis_value < 0 ? axis_value + split_dimensions : axis_value; + + TFLITE_DCHECK_LT(axis, split_dimensions); + TFLITE_DCHECK_EQ(output_dims->size, split_dimensions); + + int64_t split_size = output_dims->data[axis] * output_count; + + TFLITE_DCHECK_EQ(split_size, input_dims->data[axis]); + int64_t outer_size = 1; + for (int i = 0; i < axis; ++i) { + outer_size *= input_dims->data[i]; + } + + int64_t base_inner_size = 1; + for (int i = axis + 1; i < split_dimensions; ++i) { + base_inner_size *= input_dims->data[i]; + } + + const T* input_ptr = tflite::micro::GetTensorData(input); + for (int k = 0; k < outer_size; ++k) { + for (int i = 0; i < output_count; ++i) { + TfLiteEvalTensor* t = tflite::micro::GetEvalOutput(context, node, i); + T* output_data = tflite::micro::GetTensorData(t); + const int copy_size = output_dims->data[axis] * base_inner_size; + T* output_ptr = output_data + k * copy_size; + for (int j = 0; j < copy_size; ++j) output_ptr[j] = input_ptr[j]; + input_ptr += copy_size; + } + } + + return kTfLiteOk; +} + +TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + MicroContext* micro_context = GetMicroContext(context); + TfLiteTensor* axis = micro_context->AllocateTempInputTensor(node, 0); + TF_LITE_ENSURE(context, axis != nullptr); + + // Dynamic output tensors are needed if axis tensor is not constant. + // But Micro doesn't support dynamic memory allocation, so we only support + // constant axis tensor for now. + TF_LITE_ENSURE_MSG(context, IsConstantTensor(axis), + "Non constant axis tensor not supported"); + + micro_context->DeallocateTempTfLiteTensor(axis); + return kTfLiteOk; +} + +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + const TfLiteEvalTensor* axis = tflite::micro::GetEvalInput(context, node, 0); + const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 1); + + int axis_value = tflite::micro::GetTensorData(axis)[0]; + if (axis_value < 0) { + axis_value += input->dims->size; + } + + TF_LITE_ENSURE(context, axis_value >= 0); + TF_LITE_ENSURE(context, axis_value < input->dims->size); + + switch (input->type) { + case kTfLiteFloat32: { + return SplitImpl(context, node, input, axis_value); + } + case kTfLiteInt8: { + return SplitImpl(context, node, input, axis_value); + } + case kTfLiteInt16: { + return SplitImpl(context, node, input, axis_value); + } + case kTfLiteInt32: { + return SplitImpl(context, node, input, axis_value); + } + default: + MicroPrintf("Type %s currently not supported.", + TfLiteTypeGetName(input->type)); + return kTfLiteError; + } + + return kTfLiteOk; +} + +} // namespace split + +TfLiteRegistration Register_SPLIT() { + return tflite::micro::RegisterOp(nullptr, split::Prepare, split::Eval); +} + +} // namespace micro +} // namespace ops +} // namespace tflite diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/split_v.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/split_v.cpp new file mode 100644 index 0000000..caf6083 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/split_v.cpp @@ -0,0 +1,127 @@ +/* Copyright 2023 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h" +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/op_macros.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h" + +namespace tflite { + +namespace { + +template +TfLiteStatus SplitImpl(TfLiteContext* context, TfLiteNode* node, + const TfLiteEvalTensor* input, int axis_value) { + const TfLiteIntArray* input_dims = input->dims; + const TfLiteEvalTensor* output0 = + tflite::micro::GetEvalOutput(context, node, 0); + + const int split_dimensions = input_dims->size; + + TFLITE_DCHECK_LT(axis_value, split_dimensions); + TFLITE_DCHECK_EQ(output0->dims->size, split_dimensions); + + int64_t split_size = 0; + const int output_count = NumOutputs(node); + for (int i = 0; i < output_count; i++) { + split_size += + tflite::micro::GetEvalOutput(context, node, i)->dims->data[axis_value]; + } + TFLITE_DCHECK_EQ(split_size, input_dims->data[axis_value]); + int64_t outer_size = 1; + for (int i = 0; i < axis_value; ++i) { + outer_size *= input_dims->data[i]; + } + + int64_t base_inner_size = 1; + for (int i = axis_value + 1; i < split_dimensions; ++i) { + base_inner_size *= input_dims->data[i]; + } + + const T* input_ptr = tflite::micro::GetTensorData(input); + for (int k = 0; k < outer_size; ++k) { + for (int i = 0; i < output_count; ++i) { + TfLiteEvalTensor* output_tensor = + tflite::micro::GetEvalOutput(context, node, i); + T* output_data = tflite::micro::GetTensorData(output_tensor); + const int copy_size = + output_tensor->dims->data[axis_value] * base_inner_size; + T* output_ptr = output_data + k * copy_size; + for (int j = 0; j < copy_size; ++j) output_ptr[j] = input_ptr[j]; + input_ptr += copy_size; + } + } + + return kTfLiteOk; +} + +TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + TF_LITE_ENSURE_EQ(context, NumInputs(node), 3); + + MicroContext* micro_context = GetMicroContext(context); + // Dynamic output tensors are needed if axis tensor is not constant. + // But Micro doesn't support dynamic memory allocation, so we only support + // constant axis tensor for now. + TfLiteTensor* axis = micro_context->AllocateTempInputTensor(node, 2); + TF_LITE_ENSURE_MSG(context, IsConstantTensor(axis), + "Non constant axis tensor not supported"); + micro_context->DeallocateTempTfLiteTensor(axis); + return kTfLiteOk; +} + +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0); + const TfLiteEvalTensor* axis = tflite::micro::GetEvalInput(context, node, 2); + + int axis_value = tflite::micro::GetTensorData(axis)[0]; + if (axis_value < 0) { + axis_value += input->dims->size; + } + + TF_LITE_ENSURE(context, axis_value >= 0); + TF_LITE_ENSURE(context, axis_value < input->dims->size); + + switch (input->type) { + case kTfLiteFloat32: { + return SplitImpl(context, node, input, axis_value); + } + case kTfLiteInt8: { + return SplitImpl(context, node, input, axis_value); + } + case kTfLiteInt16: { + return SplitImpl(context, node, input, axis_value); + } + case kTfLiteInt32: { + return SplitImpl(context, node, input, axis_value); + } + default: + MicroPrintf("Type %s currently not supported.", + TfLiteTypeGetName(input->type)); + return kTfLiteError; + } + return kTfLiteOk; +} + +} // namespace + +TfLiteRegistration Register_SPLIT_V() { + return tflite::micro::RegisterOp(nullptr, Prepare, Eval); +} + +} // namespace tflite diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/squared_difference.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/squared_difference.cpp new file mode 100644 index 0000000..e45cbbe --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/squared_difference.cpp @@ -0,0 +1,247 @@ +/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/quantization_util.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/binary_function.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/add.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_context.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h" + +namespace tflite { +namespace { +constexpr int kInputTensor1 = 0; +constexpr int kInputTensor2 = 1; +constexpr int kOutputTensor = 0; + +struct OpData { + bool requires_broadcast; + ArithmeticParams arithmetic_params; +}; + +template +T SquaredDifference(T input1, T input2) { + const T difference = input1 - input2; + return difference * difference; +} + +void* SquaredDifferenceInit(TfLiteContext* context, const char* buffer, + size_t length) { + TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); + return context->AllocatePersistentBuffer(context, sizeof(OpData)); +} + +TfLiteStatus SquaredDifferencePrepare(TfLiteContext* context, + TfLiteNode* node) { + TFLITE_DCHECK(node->user_data != nullptr); + OpData* data = reinterpret_cast(node->user_data); + data->requires_broadcast = false; + + TF_LITE_ENSURE_EQ(context, NumInputs(node), 2); + TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); + + MicroContext* micro_context = GetMicroContext(context); + + TfLiteTensor* input1 = + micro_context->AllocateTempInputTensor(node, kInputTensor1); + TF_LITE_ENSURE(context, input1 != nullptr); + TfLiteTensor* input2 = + micro_context->AllocateTempInputTensor(node, kInputTensor2); + TF_LITE_ENSURE(context, input2 != nullptr); + TfLiteTensor* output = + micro_context->AllocateTempOutputTensor(node, kOutputTensor); + TF_LITE_ENSURE(context, output != nullptr); + + TF_LITE_ENSURE_TYPES_EQ(context, input1->type, input2->type); + output->type = input2->type; + + // Ensure the quantization parameters are equivalent. + if (input1->type == kTfLiteInt8) { + const auto& input1_quantization_params = input1->params; + const auto& input2_quantization_params = input2->params; + const auto& output_quantization_params = output->params; + const int32_t integer_type_min = std::numeric_limits::min(); + const int32_t integer_type_max = std::numeric_limits::max(); + TF_LITE_ENSURE(context, + input1_quantization_params.zero_point >= integer_type_min); + TF_LITE_ENSURE(context, + input1_quantization_params.zero_point <= integer_type_max); + TF_LITE_ENSURE(context, + input2_quantization_params.zero_point >= integer_type_min); + TF_LITE_ENSURE(context, + input2_quantization_params.zero_point <= integer_type_max); + TF_LITE_ENSURE(context, + output_quantization_params.zero_point >= integer_type_min); + TF_LITE_ENSURE(context, + output_quantization_params.zero_point <= integer_type_max); + data->arithmetic_params.input1_offset = + -input1_quantization_params.zero_point; + data->arithmetic_params.input2_offset = + -input2_quantization_params.zero_point; + data->arithmetic_params.output_offset = + output_quantization_params.zero_point; + + // shift to make integer for scales. + // 7 is selected so that maximum shifted result 255^2 * (1 << (7 * 2 )) + // does not overflow signed 32-bit integer + data->arithmetic_params.left_shift = 7; + const double twice_max_input_scale = + 2.0 * static_cast(std::max(input1_quantization_params.scale, + input2_quantization_params.scale)); + const double real_input1_multiplier = + static_cast(input1_quantization_params.scale) / + twice_max_input_scale; + double real_input2_multiplier = + static_cast(input2_quantization_params.scale) / + twice_max_input_scale; + const double real_output_multiplier = + (twice_max_input_scale * twice_max_input_scale) / + static_cast((1 << data->arithmetic_params.left_shift * 2) * + output_quantization_params.scale); + QuantizeMultiplierSmallerThanOneExp( + real_input1_multiplier, &data->arithmetic_params.input1_multiplier, + &data->arithmetic_params.input1_shift); + QuantizeMultiplierSmallerThanOneExp( + real_input2_multiplier, &data->arithmetic_params.input2_multiplier, + &data->arithmetic_params.input2_shift); + QuantizeMultiplierSmallerThanOneExp( + real_output_multiplier, &data->arithmetic_params.output_multiplier, + &data->arithmetic_params.output_shift); + data->arithmetic_params.quantized_activation_min = + std::numeric_limits::min(); + data->arithmetic_params.quantized_activation_max = + std::numeric_limits::max(); + } + + data->requires_broadcast = !HaveSameShapes(input1, input2); + + micro_context->DeallocateTempTfLiteTensor(input1); + micro_context->DeallocateTempTfLiteTensor(input2); + micro_context->DeallocateTempTfLiteTensor(output); + return kTfLiteOk; +} + +inline int8_t SquaredDifference(int8_t x, int8_t y, + const ArithmeticParams& params) { + const int32_t input1_val = params.input1_offset + x; + const int32_t input2_val = params.input2_offset + y; + const int32_t shifted_input1_val = input1_val * (1 << params.left_shift); + const int32_t shifted_input2_val = input2_val * (1 << params.left_shift); + const int32_t scaled_input1_val = + MultiplyByQuantizedMultiplierSmallerThanOneExp( + shifted_input1_val, params.input1_multiplier, params.input1_shift); + const int32_t scaled_input2_val = + MultiplyByQuantizedMultiplierSmallerThanOneExp( + shifted_input2_val, params.input2_multiplier, params.input2_shift); + const int32_t raw_diff = scaled_input1_val - scaled_input2_val; + + // Max of this is 255^2 * (1 << 14), so won't overflow 32 bits. + const int32_t squared_raw_diff = raw_diff * raw_diff; + const int32_t raw_output = + MultiplyByQuantizedMultiplierSmallerThanOneExp( + squared_raw_diff, params.output_multiplier, params.output_shift) + + params.output_offset; + const int32_t clamped_output = + std::min(params.quantized_activation_max, + std::max(params.quantized_activation_min, raw_output)); + return static_cast(clamped_output); +} + +template +void EvalQuantizedSquaredDifference(TfLiteContext* context, TfLiteNode* node, + const OpData* data, + const TfLiteEvalTensor* input1, + const TfLiteEvalTensor* input2, + TfLiteEvalTensor* output) { + const auto* op_data = static_cast(node->user_data); + if (data->requires_broadcast) { + reference_integer_ops::BroadcastBinaryFunction4DSlow( + op_data->arithmetic_params, tflite::micro::GetTensorShape(input1), + tflite::micro::GetTensorData(input1), + tflite::micro::GetTensorShape(input2), + tflite::micro::GetTensorData(input2), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output), + reference_integer_ops::CheckArithmeticParams, SquaredDifference); + } else { + const int flat_size = tflite::micro::GetTensorShape(input1).FlatSize(); + reference_integer_ops::ElementWise( + flat_size, op_data->arithmetic_params, + tflite::micro::GetTensorData(input1), + tflite::micro::GetTensorData(input2), + tflite::micro::GetTensorData(output), + reference_integer_ops::CheckArithmeticParams, SquaredDifference); + } +} + +template +void EvalSquaredDifference(TfLiteContext* context, TfLiteNode* node, + const OpData* data, const TfLiteEvalTensor* input1, + const TfLiteEvalTensor* input2, + TfLiteEvalTensor* output) { + if (data->requires_broadcast) { + reference_ops::BroadcastBinaryFunction4DSlow( + tflite::micro::GetTensorShape(input1), + tflite::micro::GetTensorData(input1), + tflite::micro::GetTensorShape(input2), + tflite::micro::GetTensorData(input2), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output), SquaredDifference); + } else { + reference_ops::BinaryFunction( + tflite::micro::GetTensorShape(input1), + tflite::micro::GetTensorData(input1), + tflite::micro::GetTensorShape(input2), + tflite::micro::GetTensorData(input2), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output), SquaredDifference); + } +} + +TfLiteStatus SquaredDifferenceEval(TfLiteContext* context, TfLiteNode* node) { + OpData* data = reinterpret_cast(node->user_data); + + const TfLiteEvalTensor* input1 = + tflite::micro::GetEvalInput(context, node, kInputTensor1); + const TfLiteEvalTensor* input2 = + tflite::micro::GetEvalInput(context, node, kInputTensor2); + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kOutputTensor); + + if (output->type == kTfLiteFloat32) { + EvalSquaredDifference(context, node, data, input1, input2, output); + } else if (output->type == kTfLiteInt32) { + EvalSquaredDifference(context, node, data, input1, input2, output); + } else if (output->type == kTfLiteInt8) { + EvalQuantizedSquaredDifference(context, node, data, input1, input2, + output); + } else { + MicroPrintf( + "SquaredDifference only supports FLOAT32, INT32 and INT8 now, got %d.", + output->type); + return kTfLiteError; + } + + return kTfLiteOk; +} +} // namespace + +TfLiteRegistration Register_SQUARED_DIFFERENCE() { + return tflite::micro::RegisterOp( + SquaredDifferenceInit, SquaredDifferencePrepare, SquaredDifferenceEval); +} + +} // namespace tflite diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/squeeze.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/squeeze.cpp new file mode 100644 index 0000000..8a42410 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/squeeze.cpp @@ -0,0 +1,118 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h" +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/quantization_util.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/op_macros.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/memory_helpers.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h" + +namespace tflite { +namespace { + +struct SqueezeContext { + SqueezeContext(TfLiteContext* context, TfLiteNode* node) { + params = reinterpret_cast(node->builtin_data); + micro_context = GetMicroContext(context); + input = micro_context->AllocateTempInputTensor(node, 0); + output = micro_context->AllocateTempOutputTensor(node, 0); + } + ~SqueezeContext() { + micro_context->DeallocateTempTfLiteTensor(input); + micro_context->DeallocateTempTfLiteTensor(output); + } + MicroContext* micro_context; + TfLiteSqueezeParams* params; + TfLiteTensor* input; + TfLiteTensor* output; +}; + +TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + TF_LITE_ENSURE_EQ(context, NumInputs(node), 1); + TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); + + SqueezeContext op_context(context, node); + const int input_num_dims = NumDimensions(op_context.input); + const int num_squeeze_dims = op_context.params->num_squeeze_dims; + + // Determines number of dimensions of output tensor after squeeze. + const TfLiteIntArray* input_dims = op_context.input->dims; + const TfLiteIntArray* output_dims = op_context.output->dims; + const int* squeeze_dims = op_context.params->squeeze_dims; + + constexpr int max_squeeze_dims = 8; + TF_LITE_ENSURE(context, input_num_dims <= max_squeeze_dims); + bool should_squeeze[max_squeeze_dims] = {}; + + if (num_squeeze_dims == 0) { + for (int idx = 0; idx < input_num_dims; ++idx) { + if (input_dims->data[idx] == 1) { + should_squeeze[idx] = true; + } + } + } else { + for (int idx = 0; idx < num_squeeze_dims; ++idx) { + int current = squeeze_dims[idx] < 0 ? squeeze_dims[idx] + input_num_dims + : squeeze_dims[idx]; + TF_LITE_ENSURE(context, current >= 0 && current < input_num_dims && + input_dims->data[current] == 1); + should_squeeze[current] = true; + } + } + + // Ensure output dimensions are big enough. + for (int in_idx = 0, out_idx = 0; in_idx < input_num_dims; ++in_idx) { + if (!should_squeeze[in_idx]) { + TFLITE_CHECK_GE(output_dims->data[out_idx++], input_dims->data[in_idx]); + } + } + + return kTfLiteOk; +} + +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0); + + if (input->type == kTfLiteString) { + MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(input->type), + input->type); + return kTfLiteError; + } + + TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0); + size_t input_byte_size; + size_t output_byte_size; + TF_LITE_ENSURE_OK(context, + TfLiteEvalTensorByteLength(input, &input_byte_size)); + TF_LITE_ENSURE_OK(context, + TfLiteEvalTensorByteLength(output, &output_byte_size)); + + TF_LITE_ENSURE_EQ(context, input_byte_size, output_byte_size); + memcpy(output->data.raw, input->data.raw, input_byte_size); + return kTfLiteOk; +} + +} // namespace + +TfLiteRegistration Register_SQUEEZE() { + return tflite::micro::RegisterOp(nullptr, Prepare, Eval); +} + +} // namespace tflite diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/strided_slice.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/strided_slice.cpp new file mode 100644 index 0000000..b8c5d71 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/strided_slice.cpp @@ -0,0 +1,250 @@ +/* Copyright 2023 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/strided_slice.h" + +#include +#include + +#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h" +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/op_macros.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h" + +namespace tflite { + +namespace { + +constexpr int kInputTensor = 0; +constexpr int kBeginTensor = 1; +constexpr int kEndTensor = 2; +constexpr int kStridesTensor = 3; +constexpr int kOutputTensor = 0; + +struct StridedSliceContext { + StridedSliceContext(TfLiteContext* context, TfLiteNode* node) { + params = reinterpret_cast(node->builtin_data); + micro_context = GetMicroContext(context); + input = micro_context->AllocateTempInputTensor(node, kInputTensor); + begin = micro_context->AllocateTempInputTensor(node, kBeginTensor); + end = micro_context->AllocateTempInputTensor(node, kEndTensor); + strides = micro_context->AllocateTempInputTensor(node, kStridesTensor); + output = micro_context->AllocateTempOutputTensor(node, kOutputTensor); + dims = NumDimensions(input); + } + ~StridedSliceContext() { + micro_context->DeallocateTempTfLiteTensor(input); + micro_context->DeallocateTempTfLiteTensor(begin); + micro_context->DeallocateTempTfLiteTensor(end); + micro_context->DeallocateTempTfLiteTensor(strides); + micro_context->DeallocateTempTfLiteTensor(output); + } + const TfLiteStridedSliceParams* params; + MicroContext* micro_context; + TfLiteTensor* input; + TfLiteTensor* begin; + TfLiteTensor* end; + TfLiteTensor* strides; + TfLiteTensor* output; + int dims; +}; + +// This Op only supports 1-4D cases and since we use the reference 4D +// implementation, the 1-3D tensors are mapped to 4D. +const int kMaxDim = 4; + +tflite::StridedSliceParams BuildStridedSliceParams( + StridedSliceContext* op_context) { + tflite::StridedSliceParams op_params; + op_params.start_indices_count = op_context->dims; + op_params.stop_indices_count = op_context->dims; + op_params.strides_count = op_context->dims; + + for (int i = 0; i < op_context->dims; ++i) { + op_params.start_indices[i] = GetTensorData(op_context->begin)[i]; + op_params.stop_indices[i] = GetTensorData(op_context->end)[i]; + op_params.strides[i] = GetTensorData(op_context->strides)[i]; + } + + op_params.begin_mask = op_context->params->begin_mask; + op_params.ellipsis_mask = 0; + op_params.end_mask = op_context->params->end_mask; + op_params.new_axis_mask = 0; + op_params.shrink_axis_mask = op_context->params->shrink_axis_mask; + return op_params; +} + +// Processes the indexing tensors (begin, end and strides) to resize the +// output tensor. This function is callable from both Prepare() and Eval() as +// long as the caller ensures the indexing tensors are present. +TfLiteStatus CheckOutputSize(TfLiteContext* context, + StridedSliceContext* op_context) { + using ::tflite::strided_slice::StartForAxis; + using ::tflite::strided_slice::StopForAxis; + TfLiteIntArray* output_shape = op_context->output->dims; + int shape_size = 0; + auto op_params = BuildStridedSliceParams(op_context); + auto input_shape = GetTensorShape(op_context->input); + for (int idx = 0; idx < op_context->dims; ++idx) { + int32_t stride = GetTensorData(op_context->strides)[idx]; + TF_LITE_ENSURE_MSG(context, stride != 0, "stride value has to be non-zero"); + int32_t begin = StartForAxis(op_params, input_shape, idx); + int32_t end = StopForAxis(op_params, input_shape, idx, begin); + + // When shrinking an axis, the end position does not matter (and can be + // incorrect when negative indexing is used, see Issue #19260). Always use + // begin + 1 to generate a length 1 slice, since begin has + // already been adjusted for negative indices by StartForAxis. + const bool shrink_axis = op_context->params->shrink_axis_mask & (1 << idx); + if (shrink_axis) { + end = begin + 1; + } + + // This is valid for both positive and negative strides + int32_t dim_shape = std::ceil((end - begin) / static_cast(stride)); + dim_shape = dim_shape < 0 ? 0 : dim_shape; + if (!shrink_axis) { + TF_LITE_ENSURE_EQ(context, output_shape->data[shape_size], dim_shape); + shape_size++; + } + } + TF_LITE_ENSURE_EQ(context, output_shape->size, shape_size); + return kTfLiteOk; +} + +void* Init(TfLiteContext* context, const char* buffer, size_t length) { + TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); + return context->AllocatePersistentBuffer(context, sizeof(StridedSliceParams)); +} + +TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + TFLITE_DCHECK(node->user_data != nullptr); + StridedSliceParams* op_params = + static_cast(node->user_data); + TF_LITE_ENSURE_EQ(context, NumInputs(node), 4); + TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); + StridedSliceContext op_context(context, node); + TF_LITE_ENSURE_MSG(context, op_context.dims <= kMaxDim, + "input dim should not exceed 4"); + auto params = BuildStridedSliceParams(&op_context); + memcpy(op_params, ¶ms, sizeof(StridedSliceParams)); + return CheckOutputSize(context, &op_context); +} + +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + TFLITE_DCHECK(node->user_data != nullptr); + const StridedSliceParams& op_params = + *(static_cast(node->user_data)); + + const TfLiteEvalTensor* input = + tflite::micro::GetEvalInput(context, node, kInputTensor); + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kOutputTensor); + switch (output->type) { + case kTfLiteFloat32: + #if EI_TFLITE_DISABLE_STRIDED_SLICE_OUT_F32 + TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.", + TfLiteTypeGetName(output->type), output->type); + return kTfLiteError; + #endif + + reference_ops::StridedSlice(op_params, + tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + break; + case kTfLiteUInt8: + #if EI_TFLITE_DISABLE_STRIDED_SLICE_OUT_U8 + TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.", + TfLiteTypeGetName(output->type), output->type); + return kTfLiteError; + #endif + + reference_ops::StridedSlice( + op_params, tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + break; + case kTfLiteInt8: + #if EI_TFLITE_DISABLE_STRIDED_SLICE_OUT_I8 + TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.", + TfLiteTypeGetName(output->type), output->type); + return kTfLiteError; + #endif + + reference_ops::StridedSlice(op_params, + tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + break; + case kTfLiteInt16: + #if EI_TFLITE_DISABLE_STRIDED_SLICE_OUT_I16 + TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.", + TfLiteTypeGetName(output->type), output->type); + return kTfLiteError; + #endif + + reference_ops::StridedSlice( + op_params, tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + break; + case kTfLiteInt32: + #if EI_TFLITE_DISABLE_STRIDED_SLICE_OUT_I32 + TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.", + TfLiteTypeGetName(output->type), output->type); + return kTfLiteError; + #endif + + reference_ops::StridedSlice( + op_params, tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + break; + case kTfLiteBool: + #if EI_TFLITE_DISABLE_STRIDED_SLICE_OUT_BOOL + TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.", + TfLiteTypeGetName(output->type), output->type); + return kTfLiteError; + #endif + + reference_ops::StridedSlice(op_params, + tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + break; + default: + MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(input->type), + input->type); + return kTfLiteError; + } + return kTfLiteOk; +} + +} // namespace + +TfLiteRegistration Register_STRIDED_SLICE() { + return tflite::micro::RegisterOp(Init, Prepare, Eval); +} + +} // namespace tflite diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/sub.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/sub.cpp new file mode 100644 index 0000000..266d6b5 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/sub.cpp @@ -0,0 +1,168 @@ +/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/sub.h" + +#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h" +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/quantization_util.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/add.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/sub.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/op_macros.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h" + +namespace tflite { + +void* SubInit(TfLiteContext* context, const char* buffer, size_t length) { + TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); + return context->AllocatePersistentBuffer(context, sizeof(OpDataSub)); +} + +void EvalSub(TfLiteContext* context, TfLiteNode* node, TfLiteSubParams* params, + const OpDataSub* data, const TfLiteEvalTensor* input1, + const TfLiteEvalTensor* input2, TfLiteEvalTensor* output) { + float output_activation_min, output_activation_max; + CalculateActivationRange(params->activation, &output_activation_min, + &output_activation_max); + tflite::ArithmeticParams op_params; + SetActivationParams(output_activation_min, output_activation_max, &op_params); + if (data->requires_broadcast) { + tflite::reference_ops::BroadcastSubSlow( + op_params, tflite::micro::GetTensorShape(input1), + tflite::micro::GetTensorData(input1), + tflite::micro::GetTensorShape(input2), + tflite::micro::GetTensorData(input2), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + } else { + tflite::reference_ops::SubWithActivation( + op_params, tflite::micro::GetTensorShape(input1), + tflite::micro::GetTensorData(input1), + tflite::micro::GetTensorShape(input2), + tflite::micro::GetTensorData(input2), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + } +} + +TfLiteStatus EvalSubQuantized(TfLiteContext* context, TfLiteNode* node, + TfLiteSubParams* params, const OpDataSub* data, + const TfLiteEvalTensor* input1, + const TfLiteEvalTensor* input2, + TfLiteEvalTensor* output) { + tflite::ArithmeticParams op_params; + op_params.left_shift = data->left_shift; + op_params.input1_offset = data->input1_offset; + op_params.input1_multiplier = data->input1_multiplier; + op_params.input1_shift = data->input1_shift; + op_params.input2_offset = data->input2_offset; + op_params.input2_multiplier = data->input2_multiplier; + op_params.input2_shift = data->input2_shift; + op_params.output_offset = data->output_offset; + op_params.output_multiplier = data->output_multiplier; + op_params.output_shift = data->output_shift; + SetActivationParams(data->output_activation_min, data->output_activation_max, + &op_params); + bool need_broadcast = reference_ops::ProcessBroadcastShapes( + tflite::micro::GetTensorShape(input1), + tflite::micro::GetTensorShape(input2), &op_params); + + switch (output->type) { + case kTfLiteInt8: { + if (need_broadcast) { + tflite::reference_ops::BroadcastQuantSubSlow( + op_params, tflite::micro::GetTensorShape(input1), + tflite::micro::GetTensorData(input1), + tflite::micro::GetTensorShape(input2), + tflite::micro::GetTensorData(input2), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + } else { + tflite::reference_ops::Sub( + op_params, tflite::micro::GetTensorShape(input1), + tflite::micro::GetTensorData(input1), + tflite::micro::GetTensorShape(input2), + tflite::micro::GetTensorData(input2), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + } + break; + } + case kTfLiteInt16: { + if (need_broadcast) { + tflite::reference_ops::BroadcastQuantSubSlow( + op_params, tflite::micro::GetTensorShape(input1), + tflite::micro::GetTensorData(input1), + tflite::micro::GetTensorShape(input2), + tflite::micro::GetTensorData(input2), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + } else { + tflite::reference_ops::Sub( + op_params, tflite::micro::GetTensorShape(input1), + tflite::micro::GetTensorData(input1), + tflite::micro::GetTensorShape(input2), + tflite::micro::GetTensorData(input2), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + } + break; + } + default: + MicroPrintf("Quantized type %s not currently supported.", + TfLiteTypeGetName(output->type)); + return kTfLiteError; + } + return kTfLiteOk; +} + +TfLiteStatus SubEval(TfLiteContext* context, TfLiteNode* node) { + auto* params = reinterpret_cast(node->builtin_data); + + const TfLiteEvalTensor* input1 = + tflite::micro::GetEvalInput(context, node, kSubInputTensor1); + const TfLiteEvalTensor* input2 = + tflite::micro::GetEvalInput(context, node, kSubInputTensor2); + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kSubOutputTensor); + + TFLITE_DCHECK(node->user_data != nullptr); + const OpDataSub& data = *(static_cast(node->user_data)); + + if (output->type == kTfLiteFloat32) { + EvalSub(context, node, params, &data, input1, input2, output); + } else if (output->type == kTfLiteInt8 || output->type == kTfLiteInt16) { + TF_LITE_ENSURE_OK(context, EvalSubQuantized(context, node, params, &data, + input1, input2, output)); + } else { + MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(output->type), + output->type); + return kTfLiteError; + } + + return kTfLiteOk; +} + +TfLiteRegistration Register_SUB() { + return tflite::micro::RegisterOp(SubInit, SubPrepare, SubEval); +} + +} // namespace tflite diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/sub.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/sub.h new file mode 100644 index 0000000..36608d5 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/sub.h @@ -0,0 +1,60 @@ +/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_LITE_MICRO_KERNELS_SUB_H_ +#define TENSORFLOW_LITE_MICRO_KERNELS_SUB_H_ + +#include + +#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h" +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" + +namespace tflite { + +extern const int kSubInputTensor1; +extern const int kSubInputTensor2; +extern const int kSubOutputTensor; + +struct OpDataSub { + bool requires_broadcast; + + // These fields are used in both the general 8-bit -> 8bit quantized path, + // and the special 16-bit -> 16bit quantized path + int input1_shift; + int input2_shift; + int32_t output_activation_min; + int32_t output_activation_max; + + // These fields are used only in the general 8-bit -> 8bit quantized path + int32_t input1_multiplier; + int32_t input2_multiplier; + int32_t output_multiplier; + int output_shift; + int left_shift; + int32_t input1_offset; + int32_t input2_offset; + int32_t output_offset; +}; + +TfLiteStatus CalculateOpDataSub(TfLiteContext* context, TfLiteSubParams* params, + const TfLiteTensor* input1, + const TfLiteTensor* input2, + TfLiteTensor* output, OpDataSub* data); + +TfLiteStatus SubPrepare(TfLiteContext* context, TfLiteNode* node); + +} // namespace tflite + +#endif // TENSORFLOW_LITE_MICRO_KERNELS_SUB_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/sub_common.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/sub_common.cpp new file mode 100644 index 0000000..fcb8d4b --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/sub_common.cpp @@ -0,0 +1,109 @@ +/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h" +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/quantization_util.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/add.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/sub.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/op_macros.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/sub.h" + +namespace tflite { + +const int kSubInputTensor1 = 0; +const int kSubInputTensor2 = 1; +const int kSubOutputTensor = 0; + +TfLiteStatus CalculateOpDataSub(TfLiteContext* context, TfLiteSubParams* params, + const TfLiteTensor* input1, + const TfLiteTensor* input2, + TfLiteTensor* output, OpDataSub* data) { + data->requires_broadcast = !HaveSameShapes(input1, input2); + + if (output->type == kTfLiteInt8 || output->type == kTfLiteInt16) { + // 8bit -> 8bit general quantized path, with general rescalings + data->input1_offset = -input1->params.zero_point; + data->input2_offset = -input2->params.zero_point; + data->output_offset = output->params.zero_point; + + // The shift is set to 15 in case of 16-bit and 20 in case of 8-bit, + // accordingly. In case of 16-bit we have 65535 << 15 which is less than 1 + // << 31, therefore the addition will still fit in a 32 bit accumulator. + data->left_shift = output->type == kTfLiteInt16 ? 15 : 20; + const float twice_max_input_scale = + 2 * std::max(input1->params.scale, input2->params.scale); + const double real_input1_multiplier = + static_cast(input1->params.scale) / + static_cast(twice_max_input_scale); + const double real_input2_multiplier = + static_cast(input2->params.scale) / + static_cast(twice_max_input_scale); + const double real_output_multiplier = + static_cast(twice_max_input_scale) / + ((1 << data->left_shift) * static_cast(output->params.scale)); + + QuantizeMultiplierSmallerThanOneExp( + real_input1_multiplier, &data->input1_multiplier, &data->input1_shift); + + QuantizeMultiplierSmallerThanOneExp( + real_input2_multiplier, &data->input2_multiplier, &data->input2_shift); + + QuantizeMultiplierSmallerThanOneExp( + real_output_multiplier, &data->output_multiplier, &data->output_shift); + + TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized( + context, params->activation, output, &data->output_activation_min, + &data->output_activation_max)); + } + + return kTfLiteOk; +} + +TfLiteStatus SubPrepare(TfLiteContext* context, TfLiteNode* node) { + TFLITE_DCHECK(node->user_data != nullptr); + TFLITE_DCHECK(node->builtin_data != nullptr); + + OpDataSub* data = static_cast(node->user_data); + auto* params = reinterpret_cast(node->builtin_data); + + MicroContext* micro_context = GetMicroContext(context); + + TfLiteTensor* input1 = + micro_context->AllocateTempInputTensor(node, kSubInputTensor1); + TF_LITE_ENSURE(context, input1 != nullptr); + TfLiteTensor* input2 = + micro_context->AllocateTempInputTensor(node, kSubInputTensor2); + TF_LITE_ENSURE(context, input2 != nullptr); + TfLiteTensor* output = + micro_context->AllocateTempOutputTensor(node, kSubOutputTensor); + TF_LITE_ENSURE(context, output != nullptr); + + TF_LITE_ENSURE_STATUS( + CalculateOpDataSub(context, params, input1, input2, output, data)); + + micro_context->DeallocateTempTfLiteTensor(input1); + micro_context->DeallocateTempTfLiteTensor(input2); + micro_context->DeallocateTempTfLiteTensor(output); + return kTfLiteOk; +} + +} // namespace tflite diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/svdf.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/svdf.cpp new file mode 100644 index 0000000..e9b50e5 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/svdf.cpp @@ -0,0 +1,339 @@ +// Patched by Edge Impulse to include reference and hardware-accelerated kernels +#include "../../../../classifier/ei_classifier_config.h" +#if 0 == 1 +/* noop */ +#elif EI_CLASSIFIER_TFLITE_ENABLE_CMSIS_NN == 1 +/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/svdf.h" + +#include "edge-impulse-sdk/CMSIS/NN/Include/arm_nn_types.h" +#include "edge-impulse-sdk/CMSIS/NN/Include/arm_nnfunctions.h" +#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h" +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/quantization_util.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/op_macros.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/activation_utils.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_utils.h" + +namespace tflite { +namespace { + +void* Init(TfLiteContext* context, const char* buffer, size_t length) { + TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); + return context->AllocatePersistentBuffer(context, sizeof(OpDataSvdf)); +} + +TfLiteStatus EvalIntegerSVDF(TfLiteContext* context, TfLiteNode* node, + const TfLiteEvalTensor* input_tensor, + const TfLiteEvalTensor* weights_feature_tensor, + const TfLiteEvalTensor* weights_time_tensor, + const TfLiteEvalTensor* bias_tensor, + const TfLiteSVDFParams* params, + TfLiteEvalTensor* activation_state_tensor, + TfLiteEvalTensor* output_tensor, + const OpDataSvdf& data) { + cmsis_nn_dims input_dims; + input_dims.n = input_tensor->dims->data[0]; + input_dims.h = input_tensor->dims->data[1]; + + cmsis_nn_dims weights_feature_dims; + weights_feature_dims.n = weights_feature_tensor->dims->data[0]; + weights_feature_dims.h = weights_feature_tensor->dims->data[1]; + + cmsis_nn_dims weights_time_dims; + weights_time_dims.n = weights_time_tensor->dims->data[0]; + weights_time_dims.h = weights_time_tensor->dims->data[1]; + + cmsis_nn_dims bias_dims; + bias_dims.n = bias_tensor->dims->data[0]; + + cmsis_nn_dims state_dims; + state_dims.n = bias_tensor->dims->data[0]; + state_dims.h = bias_tensor->dims->data[1]; + + cmsis_nn_dims output_dims; + output_dims.n = output_tensor->dims->data[0]; + output_dims.h = output_tensor->dims->data[1]; + + cmsis_nn_svdf_params svdf_params; + svdf_params.rank = params->rank; + svdf_params.input_offset = data.input_zero_point; + svdf_params.output_offset = data.output_zero_point; + + svdf_params.input_activation.min = INT16_MIN; + svdf_params.input_activation.max = INT16_MAX; + + svdf_params.output_activation.min = INT8_MIN; + svdf_params.output_activation.max = INT8_MAX; + + cmsis_nn_per_tensor_quant_params in_quant_params; + in_quant_params.multiplier = data.effective_scale_1_a; + in_quant_params.shift = data.effective_scale_1_b; + + cmsis_nn_per_tensor_quant_params out_quant_params; + out_quant_params.multiplier = data.effective_scale_2_a; + out_quant_params.shift = data.effective_scale_2_b; + + TFLITE_DCHECK(context != nullptr); + TFLITE_DCHECK(context->GetScratchBuffer != nullptr); + + cmsis_nn_context scratch_ctx; + scratch_ctx.buf = static_cast( + context->GetScratchBuffer(context, data.scratch_tensor_index)); + + cmsis_nn_context scratch_output_ctx; + scratch_output_ctx.buf = static_cast( + context->GetScratchBuffer(context, data.scratch_output_tensor_index)); + + int8_t* output_data = tflite::micro::GetTensorData(output_tensor); + + switch (weights_time_tensor->type) { + case kTfLiteInt8: { + arm_svdf_s8( + &scratch_ctx, &scratch_output_ctx, &svdf_params, &in_quant_params, + &out_quant_params, &input_dims, + tflite::micro::GetTensorData(input_tensor), &state_dims, + tflite::micro::GetTensorData(activation_state_tensor), + &weights_feature_dims, + tflite::micro::GetTensorData(weights_feature_tensor), + &weights_time_dims, + tflite::micro::GetTensorData(weights_time_tensor), &bias_dims, + tflite::micro::GetTensorData(bias_tensor), &output_dims, + output_data); + return kTfLiteOk; + } + + case kTfLiteInt16: { + arm_svdf_state_s16_s8( + &scratch_ctx, &scratch_output_ctx, &svdf_params, &in_quant_params, + &out_quant_params, &input_dims, + tflite::micro::GetTensorData(input_tensor), &state_dims, + tflite::micro::GetTensorData(activation_state_tensor), + &weights_feature_dims, + tflite::micro::GetTensorData(weights_feature_tensor), + &weights_time_dims, + tflite::micro::GetTensorData(weights_time_tensor), + &bias_dims, tflite::micro::GetTensorData(bias_tensor), + &output_dims, output_data); + return kTfLiteOk; + } + + default: + MicroPrintf("Could not find matching function for type %s.", + TfLiteTypeGetName(weights_time_tensor->type)); + return kTfLiteError; + } +} + +TfLiteStatus EvalSvdf(TfLiteContext* context, TfLiteNode* node) { + auto* params = reinterpret_cast(node->builtin_data); + TFLITE_DCHECK(node->user_data != nullptr); + const OpDataSvdf& data = *(static_cast(node->user_data)); + + const TfLiteEvalTensor* input = + tflite::micro::GetEvalInput(context, node, kSvdfInputTensor); + const TfLiteEvalTensor* weights_feature = + tflite::micro::GetEvalInput(context, node, kSvdfWeightsFeatureTensor); + const TfLiteEvalTensor* weights_time = + tflite::micro::GetEvalInput(context, node, kSvdfWeightsTimeTensor); + const TfLiteEvalTensor* bias = + (NumInputs(node) == 5) + ? tflite::micro::GetEvalInput(context, node, kSvdfBiasTensor) + : nullptr; + TfLiteEvalTensor* activation_state = tflite::micro::GetMutableEvalInput( + context, node, kSvdfInputActivationStateTensor); + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kSvdfOutputTensor); + + switch (weights_time->type) { + case kTfLiteFloat32: { + EvalFloatSvdfReference( + context, node, input, weights_feature, weights_time, bias, params, + data.scratch_tensor_index, activation_state, output); + return kTfLiteOk; + } + + case kTfLiteInt8: + case kTfLiteInt16: { + return EvalIntegerSVDF(context, node, input, weights_feature, + weights_time, bias, params, activation_state, + output, data); + } + + default: + MicroPrintf("Type %s not currently supported.", + TfLiteTypeGetName(weights_feature->type)); + return kTfLiteError; + } + return kTfLiteOk; +} + +TfLiteStatus EvalSvdfInt8(TfLiteContext* context, TfLiteNode* node) { + auto* params = reinterpret_cast(node->builtin_data); + TFLITE_DCHECK(node->user_data != nullptr); + const OpDataSvdf& data = *(static_cast(node->user_data)); + + const TfLiteEvalTensor* input = + tflite::micro::GetEvalInput(context, node, kSvdfInputTensor); + const TfLiteEvalTensor* weights_feature = + tflite::micro::GetEvalInput(context, node, kSvdfWeightsFeatureTensor); + const TfLiteEvalTensor* weights_time = + tflite::micro::GetEvalInput(context, node, kSvdfWeightsTimeTensor); + const TfLiteEvalTensor* bias = + (NumInputs(node) == 5) + ? tflite::micro::GetEvalInput(context, node, kSvdfBiasTensor) + : nullptr; + TfLiteEvalTensor* activation_state = tflite::micro::GetMutableEvalInput( + context, node, kSvdfInputActivationStateTensor); + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kSvdfOutputTensor); + + TFLITE_DCHECK((weights_time->type == kTfLiteInt8) || + (weights_time->type == kTfLiteInt16)); + // Because of the TODO mentioned below, the int16 weight data type is not + // split into a seperate registration. + // TODO(#523): remove 16-bit code when no longer needed. + return EvalIntegerSVDF(context, node, input, weights_feature, weights_time, + bias, params, activation_state, output, data); +} + +} // namespace + +TfLiteRegistration Register_SVDF() { + return tflite::micro::RegisterOp(Init, PrepareSvdf, EvalSvdf); +} + +TfLiteRegistration Register_SVDF_INT8() { + return tflite::micro::RegisterOp(Init, PrepareSvdf, EvalSvdfInt8); +} + +} // namespace tflite + +#else +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/svdf.h" + +#include + +#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h" +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/quantization_util.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/op_macros.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/activation_utils.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_utils.h" + +namespace tflite { +namespace { + +void* Init(TfLiteContext* context, const char* buffer, size_t length) { + TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); + return context->AllocatePersistentBuffer(context, sizeof(OpDataSvdf)); +} + +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + auto* params = reinterpret_cast(node->builtin_data); + TFLITE_DCHECK(node->user_data != nullptr); + const OpDataSvdf& data = *(static_cast(node->user_data)); + + const TfLiteEvalTensor* input = + tflite::micro::GetEvalInput(context, node, kSvdfInputTensor); + const TfLiteEvalTensor* weights_feature = + tflite::micro::GetEvalInput(context, node, kSvdfWeightsFeatureTensor); + const TfLiteEvalTensor* weights_time = + tflite::micro::GetEvalInput(context, node, kSvdfWeightsTimeTensor); + const TfLiteEvalTensor* bias = + (NumInputs(node) == 5) + ? tflite::micro::GetEvalInput(context, node, kSvdfBiasTensor) + : nullptr; + TfLiteEvalTensor* activation_state = tflite::micro::GetMutableEvalInput( + context, node, kSvdfInputActivationStateTensor); + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kSvdfOutputTensor); + + switch (weights_feature->type) { + case kTfLiteFloat32: { + EvalFloatSvdfReference( + context, node, input, weights_feature, weights_time, bias, params, + data.scratch_tensor_index, activation_state, output); + return kTfLiteOk; + break; + } + + case kTfLiteInt8: { + switch (weights_time->type) { + case kTfLiteInt16: { + EvalInt16SvdfReference(context, node, input, weights_feature, + weights_time, bias, params, activation_state, + output, data); + return kTfLiteOk; + break; + } + case kTfLiteInt8: { + EvalInt8SvdfReference(context, node, input, weights_feature, + weights_time, bias, params, activation_state, + output, data); + return kTfLiteOk; + break; + } + default: + MicroPrintf("Type %s not currently supported.", + TfLiteTypeGetName(weights_time->type)); + return kTfLiteError; + } + } + + default: + MicroPrintf("Type %s not currently supported.", + TfLiteTypeGetName(weights_feature->type)); + return kTfLiteError; + } + return kTfLiteOk; +} + +} // namespace + +TfLiteRegistration Register_SVDF() { + return tflite::micro::RegisterOp(Init, PrepareSvdf, Eval); +} + +} // namespace tflite + +#endif diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/svdf.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/svdf.h new file mode 100644 index 0000000..8bc068e --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/svdf.h @@ -0,0 +1,99 @@ +/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_MICRO_KERNELS_SVDF_H_ +#define TENSORFLOW_LITE_MICRO_KERNELS_SVDF_H_ + +#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h" +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" + +namespace tflite { + +struct OpDataSvdf { + int32_t effective_scale_1_a; + int32_t effective_scale_2_a; + // b versions of each scale are kept at int since the numbers are just the + // shift value - typically between [-32, 32]. + int effective_scale_1_b; + int effective_scale_2_b; + int scratch_tensor_index; + int scratch_output_tensor_index; + + // Cached tensor zero point values for quantized operations. + int input_zero_point; + int output_zero_point; + int activation_state_zero_point; +}; + +// Input tensors. +extern const int kSvdfInputTensor; +extern const int kSvdfWeightsFeatureTensor; +extern const int kSvdfWeightsTimeTensor; +extern const int kSvdfBiasTensor; +// This is a variable tensor, and will be modified by this op. +extern const int kSvdfInputActivationStateTensor; + +// Output tensor. +extern const int kSvdfOutputTensor; + +void EvalInt8SvdfReference(TfLiteContext* context, TfLiteNode* node, + const TfLiteEvalTensor* input_tensor, + const TfLiteEvalTensor* weights_feature_tensor, + const TfLiteEvalTensor* weights_time_tensor, + const TfLiteEvalTensor* bias_tensor, + const TfLiteSVDFParams* params, + TfLiteEvalTensor* activation_state_tensor, + TfLiteEvalTensor* output_tensor, + const OpDataSvdf& data); + +// TODO(#523): remove 16-bit code when no longer needed. +void EvalInt16SvdfReference(TfLiteContext* context, TfLiteNode* node, + const TfLiteEvalTensor* input_tensor, + const TfLiteEvalTensor* weights_feature_tensor, + const TfLiteEvalTensor* weights_time_tensor, + const TfLiteEvalTensor* bias_tensor, + const TfLiteSVDFParams* params, + TfLiteEvalTensor* activation_state_tensor, + TfLiteEvalTensor* output_tensor, + const OpDataSvdf& data); + +void EvalFloatSvdfReference( + TfLiteContext* context, TfLiteNode* node, const TfLiteEvalTensor* input, + const TfLiteEvalTensor* weights_feature, + const TfLiteEvalTensor* weights_time, const TfLiteEvalTensor* bias, + const TfLiteSVDFParams* params, int scratch_tensor_index, + TfLiteEvalTensor* activation_state, TfLiteEvalTensor* output); + +TfLiteStatus PrepareSvdf(TfLiteContext* context, TfLiteNode* node); + +// This is the most generic TfLiteRegistration. The actual supported types may +// still be target dependent. The only requirement is that every implementation +// (reference or optimized) must define this function. +TfLiteRegistration Register_SVDF(); + +#if defined(HEXAGON) || defined(CMSIS_NN) +TfLiteRegistration Register_SVDF_INT8(); + +#else +// Note that while this block gets used for both reference and optimized kernels +// that do not have any specialized implementations, the only goal here is to +// define fallback implementation that allow reference kernels to still be used +// from applications that call a more specific kernel variant. + +inline TfLiteRegistration Register_SVDF_INT8() { return Register_SVDF(); } + +#endif +} // namespace tflite + +#endif // TENSORFLOW_LITE_MICRO_KERNELS_SVDF_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/svdf_common.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/svdf_common.cpp new file mode 100644 index 0000000..bdc36b8 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/svdf_common.cpp @@ -0,0 +1,516 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include + +#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h" +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/quantization_util.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/op_macros.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/activation_utils.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/svdf.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_utils.h" + +namespace tflite { + +/** + * This version of SVDF is specific to TFLite Micro. It contains the following + * differences between the TFLite version: + * + * 1.) Scratch tensor allocation - scratch tensors must be known ahead of time + * for the Micro interpreter. + * 2.) Output dimensions - the TFLite version determines output size and runtime + * and resizes the output tensor. Micro runtime does not support tensor + * resizing. + */ + +const int kSvdfInputTensor = 0; +const int kSvdfWeightsFeatureTensor = 1; +const int kSvdfWeightsTimeTensor = 2; +const int kSvdfBiasTensor = 3; +const int kSvdfInputActivationStateTensor = + 4; // This is a variable tensor, and will be modified by this op. +const int kSvdfOutputTensor = 0; + +template +void EvalIntegerSvdfReference(TfLiteContext* context, TfLiteNode* node, + const TfLiteEvalTensor* input_tensor, + const TfLiteEvalTensor* weights_feature_tensor, + const TfLiteEvalTensor* weights_time_tensor, + const TfLiteEvalTensor* bias_tensor, + const TfLiteSVDFParams* params, + TfLiteEvalTensor* activation_state_tensor, + TfLiteEvalTensor* output_tensor, + const OpDataSvdf& data) { + const int n_rank = params->rank; + const int n_batch = input_tensor->dims->data[0]; + const int n_input = input_tensor->dims->data[1]; + const int n_filter = weights_feature_tensor->dims->data[0]; + const int n_unit = n_filter / n_rank; + const int n_memory = weights_time_tensor->dims->data[1]; + + TFLITE_DCHECK(context != nullptr); + TFLITE_DCHECK(context->GetScratchBuffer != nullptr); + + int32_t* scratch_tensor = static_cast( + context->GetScratchBuffer(context, data.scratch_tensor_index)); + int32_t* scratch_output_tensor = static_cast( + context->GetScratchBuffer(context, data.scratch_output_tensor_index)); + + // Shift states. + T* const state_ptr = tflite::micro::GetTensorData(activation_state_tensor); + + // Left shift the activation_state. + { + T* new_state_start = state_ptr; + const T* old_state_start = state_ptr + 1; + const T* old_state_end = state_ptr + n_batch * n_filter * n_memory; + while (old_state_start != old_state_end) { + *new_state_start++ = *old_state_start++; + } + } + + // Note: no need to clear the latest activation, matmul is not accumulative. + + // Feature matmul. + { + T* state = tflite::micro::GetTensorData(activation_state_tensor); + const int8_t* input = tflite::micro::GetTensorData(input_tensor); + const int8_t* weight_feature = + tflite::micro::GetTensorData(weights_feature_tensor); + const int32_t output_max = std::numeric_limits::max(); + const int32_t output_min = std::numeric_limits::min(); + T* result_in_batch = state + (n_memory - 1); + for (int b = 0; b < n_batch; b++) { + const int8_t* matrix_ptr = weight_feature; + for (int r = 0; r < n_filter; r++) { + int32_t dot_prod = 0; + const int8_t* vector_in_batch = input + b * n_input; + for (int c = 0; c < n_input; c++) { + dot_prod += + *matrix_ptr++ * (*vector_in_batch++ - data.input_zero_point); + } + dot_prod = MultiplyByQuantizedMultiplier( + dot_prod, data.effective_scale_1_a, data.effective_scale_1_b); + dot_prod = std::min(std::max(output_min, dot_prod), output_max); + // The int16 version of the op assumes a zero_point of 0. This + // code accounts for the potentially non-zero zero_point for the int8 + // version of the op. + *result_in_batch = data.activation_state_zero_point + dot_prod; + result_in_batch += n_memory; + } + } + } + + // Time. + { + for (int b = 0; b < n_batch; ++b) { + int32_t* scratch_ptr_batch = scratch_tensor + b * n_filter; + + // Perform batched vector dot product: + const T* vector1_ptr = + tflite::micro::GetTensorData(weights_time_tensor); + const T* vector2_ptr = + tflite::micro::GetTensorData(activation_state_tensor) + + b * n_memory * n_filter; + + for (int i = 0; i < n_filter; i++) { + *scratch_ptr_batch = 0; + for (int j = 0; j < n_memory; j++) { + *scratch_ptr_batch += + *vector1_ptr++ * + (*vector2_ptr++ - data.activation_state_zero_point); + } + scratch_ptr_batch++; + } + } + } + + // Reduce, add bias, rescale, activation. + { + // Add bias. + if (bias_tensor) { + // Vector batch assign: + const int32_t* bias_data = + tflite::micro::GetTensorData(bias_tensor); + for (int i = 0; i < n_batch; ++i) { + int32_t* output_ptr = scratch_output_tensor + i * n_unit; + const int32_t* bias_ptr = bias_data; + for (int j = 0; j < n_unit; ++j) { + *output_ptr++ = *bias_ptr++; + } + } + } else { + int32_t* output_ptr = scratch_output_tensor; + for (int i = 0; i < n_batch * n_unit; ++i) { + *output_ptr++ = 0; + } + } + + // Reduce. + for (int b = 0; b < n_batch; ++b) { + int32_t* output_temp_ptr = scratch_output_tensor + b * n_unit; + int32_t* scratch_ptr_batch = scratch_tensor + b * n_filter; + + // Reduction sum vector + for (int i = 0; i < n_unit; ++i) { + for (int j = 0; j < n_rank; ++j) { + output_temp_ptr[i] += *scratch_ptr_batch++; + } + } + } + + // Rescale. + const int32_t output_max = std::numeric_limits::max(); + const int32_t output_min = std::numeric_limits::min(); + for (int i = 0; i < n_batch * n_unit; ++i) { + int32_t x1 = scratch_output_tensor[i]; + int32_t x2 = MultiplyByQuantizedMultiplier(x1, data.effective_scale_2_a, + data.effective_scale_2_b); + int32_t x3 = x2 + data.output_zero_point; + int32_t x4 = std::min(std::max(output_min, x3), output_max); + tflite::micro::GetTensorData(output_tensor)[i] = + static_cast(x4); + } + } +} + +/** + * Generate two versions of the integer code. One with int16_t type for the + * time weights and the activation state, and another one with int8_t for the + * same. + */ + +void EvalInt16SvdfReference(TfLiteContext* context, TfLiteNode* node, + const TfLiteEvalTensor* input_tensor, + const TfLiteEvalTensor* weights_feature_tensor, + const TfLiteEvalTensor* weights_time_tensor, + const TfLiteEvalTensor* bias_tensor, + const TfLiteSVDFParams* params, + TfLiteEvalTensor* activation_state_tensor, + TfLiteEvalTensor* output_tensor, + const OpDataSvdf& data) { + EvalIntegerSvdfReference( + context, node, input_tensor, weights_feature_tensor, weights_time_tensor, + bias_tensor, params, activation_state_tensor, output_tensor, data); +} + +void EvalInt8SvdfReference(TfLiteContext* context, TfLiteNode* node, + const TfLiteEvalTensor* input_tensor, + const TfLiteEvalTensor* weights_feature_tensor, + const TfLiteEvalTensor* weights_time_tensor, + const TfLiteEvalTensor* bias_tensor, + const TfLiteSVDFParams* params, + TfLiteEvalTensor* activation_state_tensor, + TfLiteEvalTensor* output_tensor, + const OpDataSvdf& data) { + EvalIntegerSvdfReference( + context, node, input_tensor, weights_feature_tensor, weights_time_tensor, + bias_tensor, params, activation_state_tensor, output_tensor, data); +} + +static inline void ApplyTimeWeightsBiasAndActivation( + int batch_size, int memory_size, int num_filters, int num_units, int rank, + const float* const weights_time_ptr, const float* const bias_ptr, + TfLiteFusedActivation activation, float* const state_ptr, + float* const scratch_ptr, float* const output_ptr) { + // Compute matmul(activation_state, weights_time). + for (int b = 0; b < batch_size; ++b) { + // Perform batched vector dot product: + float* scratch_ptr_batch = scratch_ptr + b * num_filters; + const float* vector1_ptr = weights_time_ptr; + const float* vector2_ptr = state_ptr + b * memory_size * num_filters; + for (int i = 0; i < num_filters; ++i) { + *scratch_ptr_batch = 0.f; + for (int j = 0; j < memory_size; ++j) { + *scratch_ptr_batch += *vector1_ptr++ * *vector2_ptr++; + } + scratch_ptr_batch++; + } + } + + // Initialize output with bias if provided. + if (bias_ptr) { + // VectorBatchVectorAssign + for (int i = 0; i < batch_size; ++i) { + float* output_data = output_ptr + i * num_units; + const float* bias_data = bias_ptr; + for (int j = 0; j < num_units; ++j) { + *output_data++ = *bias_data++; + } + } + } else { + float* output_data = output_ptr; + for (int i = 0; i < batch_size * num_units; ++i) { + *output_data++ = 0.0f; + } + } + + // Reduction sum. + for (int b = 0; b < batch_size; ++b) { + float* output_ptr_batch = output_ptr + b * num_units; + float* scratch_ptr_batch = scratch_ptr + b * num_filters; + + // Reduction sum vector + for (int i = 0; i < num_units; ++i) { + for (int j = 0; j < rank; j++) { + output_ptr_batch[i] += *scratch_ptr_batch++; + } + } + } + + // Apply activation. + for (int b = 0; b < batch_size; ++b) { + float* output_ptr_batch = output_ptr + b * num_units; + for (int i = 0; i < num_units; ++i) { + *output_ptr_batch = + tflite::ops::micro::ActivationValFloat(activation, *output_ptr_batch); + ++output_ptr_batch; + } + } +} + +void EvalFloatSvdfReference( + TfLiteContext* context, TfLiteNode* node, const TfLiteEvalTensor* input, + const TfLiteEvalTensor* weights_feature, + const TfLiteEvalTensor* weights_time, const TfLiteEvalTensor* bias, + const TfLiteSVDFParams* params, int scratch_tensor_index, + TfLiteEvalTensor* activation_state, TfLiteEvalTensor* output) { + const int rank = params->rank; + const int batch_size = input->dims->data[0]; + const int input_size = input->dims->data[1]; + const int num_filters = weights_feature->dims->data[0]; + const int num_units = num_filters / rank; + const int memory_size = weights_time->dims->data[1]; + + const float* weights_feature_ptr = + tflite::micro::GetTensorData(weights_feature); + const float* weights_time_ptr = + tflite::micro::GetTensorData(weights_time); + const float* bias_ptr = tflite::micro::GetTensorData(bias); + const float* input_ptr = tflite::micro::GetTensorData(input); + + float* state_ptr = tflite::micro::GetTensorData(activation_state); + + TFLITE_DCHECK(context != nullptr); + TFLITE_DCHECK(context->GetScratchBuffer != nullptr); + + float* scratch_ptr = static_cast( + context->GetScratchBuffer(context, scratch_tensor_index)); + + float* output_ptr = tflite::micro::GetTensorData(output); + + // Left shift the activation_state. + { + float* new_state_start = state_ptr; + const float* old_state_start = state_ptr + 1; + const float* old_state_end = + state_ptr + batch_size * num_filters * memory_size; + while (old_state_start != old_state_end) { + *new_state_start++ = *old_state_start++; + } + } + + // Note: no need to clear the latest activation, matmul is not accumulative. + + // Compute conv1d(inputs, weights_feature). + // The activation_state's rightmost column is used to save current cycle + // activation. This is achieved by starting at state_ptr[memory_size - 1] and + // having the stride equal to memory_size. + + // Perform batched matrix vector multiply operation: + { + const float* matrix = weights_feature_ptr; + const float* vector = input_ptr; + float* result = &state_ptr[memory_size - 1]; + float* result_in_batch = result; + for (int i = 0; i < batch_size; ++i) { + const float* matrix_ptr = matrix; + for (int j = 0; j < num_filters; ++j) { + float dot_prod = 0.0f; + const float* vector_in_batch = vector + i * input_size; + for (int k = 0; k < input_size; ++k) { + dot_prod += *matrix_ptr++ * *vector_in_batch++; + } + *result_in_batch = dot_prod; + result_in_batch += memory_size; + } + } + } + + ApplyTimeWeightsBiasAndActivation( + batch_size, memory_size, num_filters, num_units, rank, weights_time_ptr, + bias_ptr, params->activation, state_ptr, scratch_ptr, output_ptr); +} + +TfLiteStatus PrepareSvdf(TfLiteContext* context, TfLiteNode* node) { + TFLITE_DCHECK(node->builtin_data != nullptr); + + const auto* params = static_cast(node->builtin_data); + + MicroContext* micro_context = GetMicroContext(context); + + // Validate Tensor Inputs (dtype depends on quantization): + // [0] = Input, {2, batch_size, input_size} + // [1] = Weights Feature, {2, num_filters, input_size} + // [2] = Weights Time, {2, num_filters, memory_size} + // [3] = Bias (optional), {1, num_units} + // [4] = Activation State (variable), + // {2, batch_size, memory_size * num_filters} + TfLiteTensor* input = + micro_context->AllocateTempInputTensor(node, kSvdfInputTensor); + TF_LITE_ENSURE(context, input != nullptr); + TfLiteTensor* weights_feature = + micro_context->AllocateTempInputTensor(node, kSvdfWeightsFeatureTensor); + TF_LITE_ENSURE(context, weights_feature != nullptr); + TfLiteTensor* weights_time = + micro_context->AllocateTempInputTensor(node, kSvdfWeightsTimeTensor); + TF_LITE_ENSURE(context, weights_time != nullptr); + TfLiteTensor* bias = + micro_context->AllocateTempInputTensor(node, kSvdfBiasTensor); + TfLiteTensor* activation_state = micro_context->AllocateTempInputTensor( + node, kSvdfInputActivationStateTensor); + TF_LITE_ENSURE(context, activation_state != nullptr); + + // Define input constants based on input tensor definition above: + const int rank = params->rank; + const int input_size = input->dims->data[1]; + const int batch_size = input->dims->data[0]; + const int num_filters = weights_feature->dims->data[0]; + TF_LITE_ENSURE_EQ(context, num_filters % rank, 0); + const int num_units = num_filters / rank; + const int memory_size = weights_time->dims->data[1]; + + // Validate Input Tensor: + TF_LITE_ENSURE(context, + input->type == kTfLiteFloat32 || input->type == kTfLiteInt8); + TF_LITE_ENSURE_EQ(context, NumDimensions(input), 2); + + // Validate Tensor Output: + // [0] = float/int8_t, {2, batch_size, num_units} + TF_LITE_ENSURE_EQ(context, node->outputs->size, 1); + TfLiteTensor* output = + micro_context->AllocateTempOutputTensor(node, kSvdfOutputTensor); + TF_LITE_ENSURE(context, output != nullptr); + TF_LITE_ENSURE_EQ(context, NumDimensions(output), 2); + TF_LITE_ENSURE_EQ(context, output->dims->data[0], batch_size); + TF_LITE_ENSURE_EQ(context, output->dims->data[1], num_units); + + // Validate Weights Feature Input Tensor: + TF_LITE_ENSURE_EQ(context, NumDimensions(weights_feature), 2); + TF_LITE_ENSURE_EQ(context, weights_feature->dims->data[1], input_size); + + // Validate Weights Time Input Tensor: + TF_LITE_ENSURE_EQ(context, NumDimensions(weights_time), 2); + TF_LITE_ENSURE_EQ(context, weights_time->dims->data[0], num_filters); + TF_LITE_ENSURE_EQ(context, weights_time->dims->data[1], memory_size); + + // Validate Optional Bias Input Tensor: + if (bias != nullptr) { + TF_LITE_ENSURE_EQ(context, bias->dims->data[0], num_units); + } + + // Validate Activation State Input Tensor: + TF_LITE_ENSURE_EQ(context, NumDimensions(activation_state), 2); + TF_LITE_ENSURE_EQ(context, activation_state->dims->data[0], batch_size); + TF_LITE_ENSURE_EQ(context, activation_state->dims->data[1], + memory_size * num_filters); + // Since is_variable is not part of TFLiteEvalTensor, check is_variable here. + TF_LITE_ENSURE_EQ(context, activation_state->is_variable, true); + + TF_LITE_ENSURE_EQ(context, node->inputs->size, 5); + + TFLITE_DCHECK(node->user_data != nullptr); + OpDataSvdf* data = static_cast(node->user_data); + + if (input->type == kTfLiteInt8) { + TF_LITE_ENSURE_EQ(context, weights_feature->type, kTfLiteInt8); + TF_LITE_ENSURE(context, (weights_time->type == kTfLiteInt16) || + (weights_time->type == kTfLiteInt8)); + TF_LITE_ENSURE(context, (activation_state->type == kTfLiteInt16) || + (activation_state->type == kTfLiteInt8)); + if (bias != nullptr) { + TF_LITE_ENSURE_EQ(context, bias->type, kTfLiteInt32); + } + + TF_LITE_ENSURE_TYPES_EQ(context, output->type, kTfLiteInt8); + + const double effective_scale_1 = + static_cast(input->params.scale) * + static_cast(weights_feature->params.scale) / + static_cast(activation_state->params.scale); + const double effective_scale_2 = + static_cast(activation_state->params.scale) * + static_cast(weights_time->params.scale) / + static_cast(output->params.scale); + + // TODO(b/162018098): Use TF_LITE_ENSURE_NEAR when it is ready. + TF_LITE_ENSURE( + context, + std::abs(static_cast(bias->params.scale) - + (static_cast(activation_state->params.scale) * + static_cast(weights_time->params.scale))) < 1e-5); + + QuantizeMultiplier(effective_scale_1, &(data->effective_scale_1_a), + &(data->effective_scale_1_b)); + QuantizeMultiplier(effective_scale_2, &(data->effective_scale_2_a), + &(data->effective_scale_2_b)); + + data->input_zero_point = input->params.zero_point; + data->output_zero_point = output->params.zero_point; + data->activation_state_zero_point = activation_state->params.zero_point; + + TFLITE_DCHECK(context->RequestScratchBufferInArena != nullptr); + + const TfLiteStatus scratch_status = context->RequestScratchBufferInArena( + context, batch_size * num_filters * sizeof(int32_t), + &(data->scratch_tensor_index)); + TF_LITE_ENSURE_OK(context, scratch_status); + + const TfLiteStatus scratch_output_status = + context->RequestScratchBufferInArena( + context, batch_size * num_units * sizeof(int32_t), + &(data->scratch_output_tensor_index)); + TF_LITE_ENSURE_OK(context, scratch_output_status); + } else { + TF_LITE_ENSURE_EQ(context, weights_feature->type, kTfLiteFloat32); + TF_LITE_ENSURE_EQ(context, weights_time->type, kTfLiteFloat32); + TF_LITE_ENSURE_EQ(context, activation_state->type, kTfLiteFloat32); + if (bias != nullptr) { + TF_LITE_ENSURE_EQ(context, bias->type, kTfLiteFloat32); + } + TF_LITE_ENSURE_TYPES_EQ(context, output->type, kTfLiteFloat32); + + TFLITE_DCHECK(context->RequestScratchBufferInArena != nullptr); + const TfLiteStatus scratch_status = context->RequestScratchBufferInArena( + context, batch_size * num_filters * sizeof(float), + &(data->scratch_tensor_index)); + TF_LITE_ENSURE_OK(context, scratch_status); + } + + micro_context->DeallocateTempTfLiteTensor(input); + micro_context->DeallocateTempTfLiteTensor(weights_feature); + micro_context->DeallocateTempTfLiteTensor(weights_time); + micro_context->DeallocateTempTfLiteTensor(activation_state); + micro_context->DeallocateTempTfLiteTensor(output); + micro_context->DeallocateTempTfLiteTensor(bias); + return kTfLiteOk; +} + +} // namespace tflite diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/tanh.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/tanh.cpp new file mode 100644 index 0000000..2ae32b6 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/tanh.cpp @@ -0,0 +1,204 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/tanh.h" + +#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h" +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/quantization_util.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/tanh.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/op_macros.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_utils.h" + +namespace tflite { +namespace ops { +namespace micro { +namespace activations { +namespace { +constexpr int kInputTensor = 0; +constexpr int kOutputTensor = 0; + +struct OpData { + int32_t input_zero_point; + int32_t input_range_radius; + int32_t input_multiplier; + int input_left_shift; +}; + +void* TanhInit(TfLiteContext* context, const char* buffer, size_t length) { + TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); + return context->AllocatePersistentBuffer(context, sizeof(OpData)); +} + +TfLiteStatus CalculateArithmeticOpData(TfLiteContext* context, TfLiteNode* node, + OpData* data) { + MicroContext* micro_context = GetMicroContext(context); + TF_LITE_ENSURE_EQ(context, NumInputs(node), 1); + TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); + TfLiteTensor* input = + micro_context->AllocateTempInputTensor(node, kInputTensor); + TF_LITE_ENSURE(context, input != nullptr); + TfLiteTensor* output = + micro_context->AllocateTempOutputTensor(node, kOutputTensor); + TF_LITE_ENSURE(context, output != nullptr); + + TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type); + + if (input->type == kTfLiteInt8) { + static constexpr int kInputIntegerBits = 4; + const double input_real_multiplier = + static_cast(input->params.scale) * + static_cast(1 << (31 - kInputIntegerBits)); + + const double q = std::frexp(input_real_multiplier, &data->input_left_shift); + data->input_multiplier = static_cast(TfLiteRound(q * (1ll << 31))); + + data->input_range_radius = + CalculateInputRadius(kInputIntegerBits, data->input_left_shift, 31); + } + + if (input->type == kTfLiteInt16) { + static constexpr int kInputIntegerBits = 3; + static constexpr int kOutputFractionalBits = 15; + + // These operators are implemented in fixed-point arithmetic, + // which intrinsically wants symmetric ranges (zero_point==0) + // and power-of-two scales (power-of-two is abbreviated below as POT). + // While more general support would be possible by means of rescaling, + // that would add some overhead and some loss of accuracy and wouldn't + // be used at the moment as current quantized LSTM applications are + // happy with symmetric, power-of-two-scales quantization. So we just + // implement that narrow case only for now. + + TF_LITE_ENSURE_EQ(context, input->params.zero_point, 0); + TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0); + + int input_scale_log2_rounded; + bool param_scale_pot = + CheckedLog2(input->params.scale, &input_scale_log2_rounded); + + data->input_left_shift = + (15 - kInputIntegerBits) + input_scale_log2_rounded; + param_scale_pot &= + (data->input_left_shift == 0 || data->input_left_shift == 1); + + if (param_scale_pot) { + data->input_multiplier = 0; + } else { + // Calculate multiplier to change input scale to 1/(3*4096) + // as required by the table lookup. + // The number 3.0 in the multiplier comes from here, + // because the interval is [-10.7, 10.7] instead of [-8, 8]. + // So, in this scaling +/-2^17 represents +/-10.7. + + double multiplier = + static_cast(input->params.scale) * 4096.0 * 3.0; + data->input_left_shift = 0; + + while (multiplier <= 32767.0 / 2.0 && data->input_left_shift <= 30) { + data->input_left_shift++; + multiplier = multiplier * 2.0; + } + + data->input_multiplier = static_cast(multiplier); + } + + int output_scale_log2_rounded; + TF_LITE_ENSURE( + context, CheckedLog2(output->params.scale, &output_scale_log2_rounded)); + TF_LITE_ENSURE_EQ(context, output_scale_log2_rounded, + -kOutputFractionalBits); + } + + micro_context->DeallocateTempTfLiteTensor(input); + micro_context->DeallocateTempTfLiteTensor(output); + return kTfLiteOk; +} + +TfLiteStatus TanhPrepare(TfLiteContext* context, TfLiteNode* node) { + TFLITE_DCHECK(node->user_data != nullptr); + + OpData* data = static_cast(node->user_data); + + MicroContext* micro_context = GetMicroContext(context); + TfLiteTensor* input = + micro_context->AllocateTempInputTensor(node, kInputTensor); + TF_LITE_ENSURE(context, input != nullptr); + data->input_zero_point = input->params.zero_point; + TF_LITE_ENSURE_OK(context, CalculateArithmeticOpData(context, node, data)); + + micro_context->DeallocateTempTfLiteTensor(input); + return kTfLiteOk; +} + +} // namespace + +TfLiteStatus TanhEval(TfLiteContext* context, TfLiteNode* node) { + const TfLiteEvalTensor* input = + tflite::micro::GetEvalInput(context, node, kInputTensor); + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kOutputTensor); + + TFLITE_DCHECK(node->user_data != nullptr); + const OpData& data = *(static_cast(node->user_data)); + + switch (input->type) { + case kTfLiteFloat32: { + reference_ops::Tanh(tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + return kTfLiteOk; + } break; + case kTfLiteInt16: { + reference_integer_ops::Tanh( + data.input_multiplier, data.input_left_shift, + tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + return kTfLiteOk; + } break; + case kTfLiteInt8: { + reference_integer_ops::Tanh( + data.input_zero_point, data.input_range_radius, data.input_multiplier, + data.input_left_shift, tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + return kTfLiteOk; + } break; + default: + MicroPrintf("Input %s, output %s not supported.", + TfLiteTypeGetName(input->type), + TfLiteTypeGetName(output->type), context); + return kTfLiteError; + } +} + +} // namespace activations + +TfLiteRegistration Register_TANH() { + return tflite::micro::RegisterOp( + activations::TanhInit, activations::TanhPrepare, activations::TanhEval); +} +} // namespace micro +} // namespace ops +} // namespace tflite diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/transpose.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/transpose.cpp new file mode 100644 index 0000000..c0bd6e4 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/transpose.cpp @@ -0,0 +1,122 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/transpose.h" + +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h" + +namespace tflite { +namespace { + +constexpr int kInputTensor = 0; +constexpr int kPermTensor = 1; +constexpr int kOutputTensor = 0; + +struct TransposeContext { + TransposeContext(TfLiteContext* context, TfLiteNode* node) { + micro_context = GetMicroContext(context); + input = micro_context->AllocateTempInputTensor(node, kInputTensor); + perm = micro_context->AllocateTempInputTensor(node, kPermTensor); + output = micro_context->AllocateTempOutputTensor(node, kOutputTensor); + } + ~TransposeContext() { + micro_context->DeallocateTempTfLiteTensor(input); + micro_context->DeallocateTempTfLiteTensor(perm); + micro_context->DeallocateTempTfLiteTensor(output); + } + MicroContext* micro_context; + TfLiteTensor* input; + TfLiteTensor* perm; + TfLiteTensor* output; +}; + +TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + TF_LITE_ENSURE_EQ(context, NumInputs(node), 2); + TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); + + TransposeContext op_context(context, node); + + // Ensure validity of input tensor. + TF_LITE_ENSURE_MSG(context, NumDimensions(op_context.input) <= 5, + "Transpose op only supports 1D-5D input arrays."); + TF_LITE_ENSURE_TYPES_EQ(context, op_context.input->type, + op_context.output->type); + + int dims = NumDimensions(op_context.input); + const int32_t* perm_data = GetTensorData(op_context.perm); + + // Ensure validity of the permutations tensor as a 1D tensor. + TF_LITE_ENSURE_EQ(context, NumDimensions(op_context.perm), 1); + TF_LITE_ENSURE_EQ(context, op_context.perm->dims->data[0], dims); + for (int idx = 0; idx < dims; ++idx) { + TF_LITE_ENSURE_MSG(context, (perm_data[idx] >= 0 && perm_data[idx] < dims), + "Transpose op permutations array is out of bounds."); + } + + return kTfLiteOk; +} + +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + const TfLiteEvalTensor* perm_tensor = + tflite::micro::GetEvalInput(context, node, kPermTensor); + const int32_t* perm_data = perm_tensor->data.i32; + const int size = perm_tensor->dims->data[0]; + TransposeParams params; + params.perm_count = size; + for (int i = 0; i < size; ++i) { + params.perm[i] = perm_data[i]; + } + + // Transpose kernel only does rearranging values not numeric evaluations + // on each cell. It's safe to implement per size of scalar type and this + // trick keeps the total code size in a reasonable range. + const TfLiteEvalTensor* input = + tflite::micro::GetEvalInput(context, node, kInputTensor); + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kOutputTensor); + switch (input->type) { + case kTfLiteFloat32: + reference_ops::Transpose(params, tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + break; + case kTfLiteInt8: + reference_ops::Transpose(params, tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + break; + default: + MicroPrintf( + "Type %s is currently not supported by Transpose. " + "Only float32 and int8 is supported", + TfLiteTypeGetName(input->type)); + return kTfLiteError; + } + + return kTfLiteOk; +} + +} // namespace + +TfLiteRegistration Register_TRANSPOSE() { + return tflite::micro::RegisterOp(nullptr, Prepare, Eval); +} +} // namespace tflite diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/transpose_conv.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/transpose_conv.cpp new file mode 100644 index 0000000..411d4e0 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/transpose_conv.cpp @@ -0,0 +1,708 @@ +// Patched by Edge Impulse to include reference and hardware-accelerated kernels +#include "../../../../classifier/ei_classifier_config.h" +#if 0 == 1 +/* noop */ +#elif EI_CLASSIFIER_TFLITE_ENABLE_SILABS_MVP == 1 + +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +using int8 = int8_t; +using int16 = int16_t; +using int32 = int32_t; +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/transpose_conv.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/transpose_conv.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/padding.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" + +#include "sl_mvp_ml_transpose_conv2d.h" + +namespace tflite { +namespace sl { +namespace transpose_conv2d { + +constexpr int kFilterTensor = 1; +constexpr int kInputTensor = 2; +constexpr int kBiasTensor = 3; +constexpr int kOutputTensor = 0; + +// TransposeConv is quantized along dimension 0 of filter tensor. +// https://www.tensorflow.org/lite/performance/quantization_spec +constexpr int kConvQuantizedDimension = 0; + +enum op_support { kMvp, kTFLMrefF32, kTFLMrefI8 }; + +struct OpData { + op_support supported; + int scratch_buffer_index; + sli_mvp_ml_transpose_conv2d_s8_params_t op_params; + + // Per channel output multiplier and shift. + int32_t *per_channel_output_multiplier; + int32_t *per_channel_output_shift; +}; + +inline float16_t normalize_fp16(float f) +{ + return (float16_t)std::min(std::max(f, SLI_MVP_FP16_MIN), SLI_MVP_FP16_MAX); +} + +inline PaddingType RuntimePaddingType(TfLitePadding padding) +{ + switch (padding) { + case TfLitePadding::kTfLitePaddingSame: + return PaddingType::kSame; + case TfLitePadding::kTfLitePaddingValid: + return PaddingType::kValid; + case TfLitePadding::kTfLitePaddingUnknown: + default: + return PaddingType::kNone; + } +} + +TfLiteStatus PopulateConvolutionQuantizationParams( + TfLiteContext* context, + const TfLiteTensor* input, + const TfLiteTensor* filter, + TfLiteTensor* output, + const TfLiteFusedActivation& activation, + int32_t* output_activation_min, int32_t* output_activation_max, + float16_t* per_channel_scalers, int num_channels, float accumulator_multipler) +{ + auto affine_quantization = + reinterpret_cast(filter->quantization.params); + + // Populate multiplier and shift using affine quantization. + const float input_scale = input->params.scale; + const float output_scale = output->params.scale; + const float* filter_scales = affine_quantization->scale->data; + + for (int i = 0; i < num_channels; ++i) { + // If per-tensor quantization parameter is specified, broadcast it along the + // quantization dimension. + const float filter_scale = filter_scales[i]; + const float effective_output_scale = (input_scale * filter_scale) / output_scale; + const float acc_output_scale = effective_output_scale * accumulator_multipler; + per_channel_scalers[i] = normalize_fp16(acc_output_scale); + } + + TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized( + context, activation, output, output_activation_min, + output_activation_max)); + + return kTfLiteOk; +} + +void *Init(TfLiteContext* context, const char* buffer, size_t length) +{ + (void)buffer; + (void)length; + TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); + return context->AllocatePersistentBuffer(context, sizeof(OpData)); +} + +TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) +{ + int scratch_buffer_size = 0; + + TFLITE_DCHECK(node->user_data != nullptr); + TFLITE_DCHECK(node->builtin_data != nullptr); + + OpData* data = static_cast(node->user_data); + const auto params = static_cast(node->builtin_data); + + TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + const TfLiteTensor* bias = GetOptionalInputTensor(context, node, kBiasTensor); + const TfLiteTensor* input = GetInput(context, node, kInputTensor); + const TfLiteTensor* filter = GetInput(context, node, kFilterTensor); + TF_LITE_ENSURE(context, input != nullptr); + TF_LITE_ENSURE(context, output != nullptr); + TF_LITE_ENSURE(context, filter != nullptr); + + data->op_params.batches = input->dims->data[0]; + data->op_params.in_channels = input->dims->data[3]; + data->op_params.input_height = input->dims->data[1]; + data->op_params.input_width = input->dims->data[2]; + data->op_params.out_channels = filter->dims->data[kConvQuantizedDimension]; + data->op_params.output_height = output->dims->data[1]; + data->op_params.output_width = output->dims->data[2]; + data->op_params.filter_height = filter->dims->data[1]; + data->op_params.filter_width = filter->dims->data[2]; + data->op_params.input_offset = -input->params.zero_point; + data->op_params.output_offset = output->params.zero_point; + data->op_params.stride_height = params->stride_height; + data->op_params.stride_width = params->stride_width; + data->op_params.padding = params->padding == kTfLitePaddingSame; + + int dummy_height, dummy_width; + const auto padding = ComputePaddingHeightWidth( + params->stride_height, params->stride_width, + 1, 1, //dilation_rate_height and dilation_rate_width + data->op_params.input_height, data->op_params.input_width, + data->op_params.filter_height, data->op_params.filter_width, + params->padding, + &dummy_height, &dummy_width); + + data->op_params.pad_height = padding.height; + data->op_params.pad_width = padding.width; + + const int num_channels = data->op_params.out_channels; + + if (input->type == kTfLiteInt8) { + if (sli_mvp_ml_transpose_conv2d_s8_is_supported(&data->op_params)) { + data->supported = kMvp; + scratch_buffer_size = GetTensorShape(output).FlatSize() * sizeof(float16_t); + + float16_t *bias_data = static_cast(context->AllocatePersistentBuffer( + context, num_channels * sizeof(float16_t))); + if(bias != nullptr) { + data->op_params.bias = bias_data; + int32_t i32_bias; + for(int i = 0; i < num_channels; i++) { + i32_bias = bias->data.i32[i]; + bias_data[i] = float16_t(i32_bias * SLI_MVP_ACCUMULATOR_SCALER); + } + } else { + data->op_params.bias = nullptr; + } + + float16_t *scaler_data = static_cast(context->AllocatePersistentBuffer( + context, num_channels * sizeof(float16_t))); + data->op_params.output_scaler = scaler_data; + TF_LITE_ENSURE_STATUS(PopulateConvolutionQuantizationParams( + context, input, filter, output, kTfLiteActNone, + reinterpret_cast(&data->op_params.output_activation_min), + reinterpret_cast(&data->op_params.output_activation_max), + scaler_data, num_channels, SLI_MVP_ACCUMULATOR_MULTIPLIER)); + + } else { + data->supported = kTFLMrefI8; + scratch_buffer_size = GetTensorShape(output).FlatSize() * sizeof(int32_t); + data->per_channel_output_multiplier = static_cast(context->AllocatePersistentBuffer( + context, num_channels * sizeof(int32_t))); + data->per_channel_output_shift = static_cast(context->AllocatePersistentBuffer( + context, num_channels * sizeof(int32_t))); + + int32_t dummy_output_multiplier; + int dummy_output_shift; + TF_LITE_ENSURE_STATUS(tflite::PopulateConvolutionQuantizationParams( + context, input, filter, bias, output, kTfLiteActNone, + &dummy_output_multiplier, &dummy_output_shift, + reinterpret_cast(&data->op_params.output_activation_min), + reinterpret_cast(&data->op_params.output_activation_max), + data->per_channel_output_multiplier, + reinterpret_cast(data->per_channel_output_shift), + num_channels)); + } + + } else if (input->type == kTfLiteFloat32) { + data->supported = kTFLMrefF32; + } else { + TF_LITE_KERNEL_LOG(context, "Type %s not currently supported.", + TfLiteTypeGetName(input->type)); + return kTfLiteError; + } + + if(scratch_buffer_size > 0) { + TF_LITE_ENSURE_STATUS( + context->RequestScratchBufferInArena( + context, scratch_buffer_size, &data->scratch_buffer_index)); + } else { + data->scratch_buffer_index = -1; + } + + return kTfLiteOk; +} + +TfLiteStatus eval_mvp_int8(TfLiteContext* context, + OpData* data, + const TfLiteEvalTensor* input, + const TfLiteEvalTensor* filter, + TfLiteEvalTensor* output) +{ + float16_t *scratch; + if (data->scratch_buffer_index > -1) { + scratch = reinterpret_cast(context->GetScratchBuffer(context, data->scratch_buffer_index)); + } else { + return kTfLiteError; + } + + data->op_params.scratch_buffer = scratch; + data->op_params.input = tflite::micro::GetTensorData(input); + data->op_params.output = tflite::micro::GetTensorData(output); + data->op_params.filter = tflite::micro::GetTensorData(filter); + + TF_LITE_ENSURE_EQ(context, SL_STATUS_OK, sli_mvp_ml_transpose_conv2d_s8(&data->op_params)); + + return kTfLiteOk; +} + +TfLiteStatus eval_tflm_int8(TfLiteContext* context, + OpData* data, + const TfLiteEvalTensor* input, + const TfLiteEvalTensor* filter, + const TfLiteEvalTensor* bias, + TfLiteEvalTensor* output) +{ + int32_t *scratch; + ConvParams op_params; + + if (data->scratch_buffer_index > -1) { + scratch = reinterpret_cast(context->GetScratchBuffer(context, data->scratch_buffer_index)); + } else { + return kTfLiteError; + } + + op_params.input_offset = data->op_params.input_offset; + op_params.output_offset = data->op_params.output_offset; + op_params.stride_height = data->op_params.stride_height; + op_params.stride_width = data->op_params.stride_width; + op_params.padding_values.height = data->op_params.pad_height; + op_params.padding_values.width = data->op_params.pad_width; + + reference_integer_ops::TransposeConv(op_params, + data->per_channel_output_multiplier, + data->per_channel_output_shift, + tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(filter), + tflite::micro::GetTensorData(filter), + tflite::micro::GetTensorShape(bias), + tflite::micro::GetTensorData(const_cast(bias)), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output), + RuntimeShape(), + nullptr, + scratch); + return kTfLiteOk; +} + +TfLiteStatus eval_float(TfLiteConvParams* params, + const OpData* data, + const TfLiteEvalTensor* input, + const TfLiteEvalTensor* filter, + const TfLiteEvalTensor* bias, + TfLiteEvalTensor* output) +{ + ConvParams op_params; + + op_params.padding_type = RuntimePaddingType(params->padding); + op_params.padding_values.width = data->op_params.pad_width; + op_params.padding_values.height = data->op_params.pad_height; + op_params.stride_width = data->op_params.stride_width; + op_params.stride_height = data->op_params.stride_height; + + reference_ops::TransposeConv(op_params, + tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(filter), + tflite::micro::GetTensorData(filter), + tflite::micro::GetTensorShape(bias), + tflite::micro::GetTensorData(const_cast(bias)), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output), + RuntimeShape(), + nullptr); + return kTfLiteOk; +} + +TfLiteStatus Invoke(TfLiteContext* context, TfLiteNode* node) +{ + TfLiteStatus status = kTfLiteError; + + TFLITE_DCHECK(node->user_data != nullptr); + TFLITE_DCHECK(node->builtin_data != nullptr); + + auto* params = reinterpret_cast(node->builtin_data); + OpData* data = static_cast(node->user_data); + + const auto input = tflite::micro::GetEvalInput(context, node, kInputTensor); + const auto filter = tflite::micro::GetEvalInput(context, node, kFilterTensor); + const auto bias = NumInputs(node) == 4 + ? tflite::micro::GetEvalInput(context, node, kBiasTensor) + : nullptr; + auto output = tflite::micro::GetEvalOutput(context, node, kOutputTensor); + + if (data->supported == kMvp) { + status = eval_mvp_int8(context, data, input, filter, output); + + } else if (data->supported == kTFLMrefI8) { + status = eval_tflm_int8(context, data, input, filter, bias, output); + + } else if (data->supported == kTFLMrefF32) { + status = eval_float(params, data, input, filter, bias, output); + } + + return status; +} + +} // namespace transpose_conv2d +} // namespace sl + +TfLiteRegistration Register_TRANSPOSE_CONV() { + return {/*init=*/sl::transpose_conv2d::Init, + /*free=*/nullptr, + /*prepare=*/sl::transpose_conv2d::Prepare, + /*invoke=*/sl::transpose_conv2d::Invoke, + /*profiling_string=*/nullptr, + /*builtin_code=*/0, + /*custom_name=*/nullptr, + /*version=*/0}; +} + +} // namespace tflite + +#else +/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/transpose_conv.h" + +#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h" +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/quantization_util.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/transpose_conv.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/padding.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h" + +namespace tflite { +namespace { + +// For the TfLite transpose_conv implementation, input tensor 0 corresponds to +// the OutputShapeTensor. However, since TFLM does not support dynamic tensors, +// the TFLM implementation ignores input tensor 0 and the only inputs we care +// about are kFilterTensor, kInputTensor and kBiasTensor. +constexpr int kFilterTensor = 1; +constexpr int kInputTensor = 2; +constexpr int kBiasTensor = 3; +constexpr int kOutputTensor = 0; + +// Conv is quantized along dimension 0: +// https://www.tensorflow.org/lite/performance/quantization_spec +constexpr int kConvQuantizedDimension = 0; + +struct OpData { + ConvParams params; + + // A scratch buffer is required for quantized implementations. + int scratch_buffer_index; + + // TODO(b/192090531): Remove this once all 8x16 transpose conv models use + // 64-bit biases. + int bias_converted_buffer_index; + + // Multiplier and shift arrays are required for the int8 implementation. + int32_t* per_channel_output_multiplier; + int32_t* per_channel_output_shift; +}; + +inline PaddingType RuntimePaddingType(TfLitePadding padding) { + switch (padding) { + case TfLitePadding::kTfLitePaddingSame: + return PaddingType::kSame; + case TfLitePadding::kTfLitePaddingValid: + return PaddingType::kValid; + case TfLitePadding::kTfLitePaddingUnknown: + default: + return PaddingType::kNone; + } +} + +TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node, + const TfLiteTransposeConvParams* params, int width, + int height, int filter_width, int filter_height, + const TfLiteType data_type, OpData* data) { + bool has_bias = node->inputs->size == 4; + // Check number of inputs/outputs + TF_LITE_ENSURE(context, has_bias || node->inputs->size == 3); + TF_LITE_ENSURE_EQ(context, node->outputs->size, 1); + + // Matching GetWindowedOutputSize in TensorFlow. + auto padding = params->padding; + int unused_output_width; + int unused_output_height; + TfLitePaddingValues padding_values = ComputePaddingHeightWidth( + params->stride_height, params->stride_width, 1, + 1, // Dilation height and width are always 1 for transpose_conv. + height, width, filter_height, filter_width, padding, + &unused_output_height, &unused_output_width); + + data->params.padding_type = RuntimePaddingType(padding); + data->params.padding_values.width = padding_values.width; + data->params.padding_values.height = padding_values.height; + + // Note that quantized inference requires that all tensors have their + // parameters set. This is usually done during quantized training. + if (data_type != kTfLiteFloat32) { + MicroContext* micro_context = GetMicroContext(context); + + TfLiteTensor* input = + micro_context->AllocateTempInputTensor(node, kInputTensor); + TF_LITE_ENSURE(context, input != nullptr); + TfLiteTensor* filter = + micro_context->AllocateTempInputTensor(node, kFilterTensor); + TF_LITE_ENSURE(context, filter != nullptr); + TfLiteTensor* bias = + micro_context->AllocateTempInputTensor(node, kBiasTensor); + TfLiteTensor* output = + micro_context->AllocateTempOutputTensor(node, kOutputTensor); + TF_LITE_ENSURE(context, output != nullptr); + int output_channels = filter->dims->data[kConvQuantizedDimension]; + + TF_LITE_ENSURE_STATUS(tflite::PopulateConvolutionQuantizationParams( + context, input, filter, bias, output, kTfLiteActNone, + &data->params.output_multiplier, &data->params.output_shift, + &data->params.quantized_activation_min, + &data->params.quantized_activation_max, + data->per_channel_output_multiplier, data->per_channel_output_shift, + output_channels)); + + // TODO(b/192090531): Remove this once all 8x16 transpose conv models use + // 64-bit biases. + if (input->type == kTfLiteInt16) { + TFLITE_DCHECK(filter->type == kTfLiteInt8); + TFLITE_DCHECK(output->type == kTfLiteInt16); + if (bias->type == kTfLiteInt16) { + TFLITE_DCHECK( + context->RequestScratchBufferInArena( + context, GetTensorShape(bias).FlatSize() * sizeof(std::int64_t), + &(data->bias_converted_buffer_index)) == kTfLiteOk); + } + } + + micro_context->DeallocateTempTfLiteTensor(input); + micro_context->DeallocateTempTfLiteTensor(filter); + micro_context->DeallocateTempTfLiteTensor(output); + if (bias != nullptr) { + micro_context->DeallocateTempTfLiteTensor(bias); + } + } + return kTfLiteOk; +} + +void* Init(TfLiteContext* context, const char* buffer, size_t length) { + TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); + return context->AllocatePersistentBuffer(context, sizeof(OpData)); +} + +TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + TFLITE_DCHECK(node->user_data != nullptr); + TFLITE_DCHECK(node->builtin_data != nullptr); + + OpData* data = static_cast(node->user_data); + const auto params = + static_cast(node->builtin_data); + + MicroContext* micro_context = GetMicroContext(context); + + TfLiteTensor* output = + micro_context->AllocateTempOutputTensor(node, kOutputTensor); + TF_LITE_ENSURE(context, output != nullptr); + TfLiteTensor* input = + micro_context->AllocateTempInputTensor(node, kInputTensor); + TF_LITE_ENSURE(context, input != nullptr); + TfLiteTensor* filter = + micro_context->AllocateTempInputTensor(node, kFilterTensor); + TF_LITE_ENSURE(context, filter != nullptr); + + // Get height and width of the output. + const int width = SizeOfDimension(output, 2); + const int height = SizeOfDimension(output, 1); + const int filter_width = SizeOfDimension(filter, 2); + const int filter_height = SizeOfDimension(filter, 1); + + // Dynamically allocate per-channel quantization parameters. + const int num_channels = filter->dims->data[kConvQuantizedDimension]; + data->per_channel_output_multiplier = + static_cast(context->AllocatePersistentBuffer( + context, num_channels * sizeof(int32_t))); + data->per_channel_output_shift = + static_cast(context->AllocatePersistentBuffer( + context, num_channels * sizeof(int32_t))); + + // Quantized kernels use an int32 scratch buffer. + if (input->type == kTfLiteInt8) { + TFLITE_DCHECK(context->RequestScratchBufferInArena != nullptr); + TFLITE_DCHECK(context->RequestScratchBufferInArena( + context, + GetTensorShape(output).FlatSize() * sizeof(int32_t), + &(data->scratch_buffer_index)) == kTfLiteOk); + } + + // Quantized 16x8 kernels use an int64 scratch buffer. + if (input->type == kTfLiteInt16) { + TFLITE_DCHECK(context->RequestScratchBufferInArena != nullptr); + TFLITE_DCHECK(context->RequestScratchBufferInArena( + context, + GetTensorShape(output).FlatSize() * sizeof(std::int64_t), + &(data->scratch_buffer_index)) == kTfLiteOk); + } + + // All per-channel quantized tensors need valid zero point and scale arrays. + if (input->type == kTfLiteInt8 || input->type == kTfLiteInt16) { + TF_LITE_ENSURE_EQ(context, filter->quantization.type, + kTfLiteAffineQuantization); + + const auto* affine_quantization = + static_cast(filter->quantization.params); + TF_LITE_ENSURE(context, affine_quantization); + TF_LITE_ENSURE(context, affine_quantization->scale); + TF_LITE_ENSURE(context, affine_quantization->zero_point); + + TF_LITE_ENSURE(context, + affine_quantization->scale->size == 1 || + affine_quantization->scale->size == + filter->dims->data[kConvQuantizedDimension]); + TF_LITE_ENSURE_EQ(context, affine_quantization->scale->size, + affine_quantization->zero_point->size); + } + + TF_LITE_ENSURE_STATUS(CalculateOpData(context, node, params, width, height, + filter_width, filter_height, + input->type, data)); + + // Offsets (zero points) + data->params.input_offset = -input->params.zero_point; + data->params.weights_offset = -filter->params.zero_point; + data->params.output_offset = output->params.zero_point; + + // Stride + data->params.stride_width = params->stride_width; + data->params.stride_height = params->stride_height; + + micro_context->DeallocateTempTfLiteTensor(output); + micro_context->DeallocateTempTfLiteTensor(input); + micro_context->DeallocateTempTfLiteTensor(filter); + return kTfLiteOk; +} + +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + const TfLiteEvalTensor* input = + tflite::micro::GetEvalInput(context, node, kInputTensor); + const TfLiteEvalTensor* filter = + tflite::micro::GetEvalInput(context, node, kFilterTensor); + const TfLiteEvalTensor* bias = + (NumInputs(node) == 4) + ? tflite::micro::GetEvalInput(context, node, kBiasTensor) + : nullptr; + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kOutputTensor); + + TFLITE_DCHECK(node->user_data != nullptr); + const OpData& data = *(static_cast(node->user_data)); + + TF_LITE_ENSURE_EQ(context, input->type, output->type); + TF_LITE_ENSURE_MSG( + context, + input->type == filter->type || + (input->type == kTfLiteInt16 && filter->type == kTfLiteInt8), + "Hybrid models are not supported on TFLite Micro."); + + switch (input->type) { // Already know in/out types are same. + case kTfLiteFloat32: { + const auto& params = + *(reinterpret_cast(node->builtin_data)); + ConvParams op_params = data.params; + CalculateActivationRange(params.activation, + &op_params.float_activation_min, + &op_params.float_activation_max); + + reference_ops::TransposeConv( + op_params, tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(filter), + tflite::micro::GetTensorData(filter), + tflite::micro::GetTensorShape(bias), + tflite::micro::GetOptionalTensorData(bias), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output), + tflite::micro::GetTensorShape(nullptr), nullptr); + break; + } + case kTfLiteInt8: { + int32_t* scratch_buffer = static_cast( + context->GetScratchBuffer(context, data.scratch_buffer_index)); + reference_integer_ops::TransposeConv( + data.params, data.per_channel_output_multiplier, + data.per_channel_output_shift, tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(filter), + tflite::micro::GetTensorData(filter), + tflite::micro::GetTensorShape(bias), + tflite::micro::GetOptionalTensorData(bias), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output), + tflite::micro::GetTensorShape(nullptr), nullptr, scratch_buffer); + break; + } + case kTfLiteInt16: { + std::int64_t* scratch_buffer = static_cast( + context->GetScratchBuffer(context, data.scratch_buffer_index)); + // TODO(b/192090531): Remove this once all 8x16 transpose conv models use + // 64-bit biases. + if (bias != nullptr && bias->type == kTfLiteInt16) { + std::int64_t* bias_converted_buffer = + static_cast(context->GetScratchBuffer( + context, data.bias_converted_buffer_index)); + for (int i = 0; i < tflite::micro::GetTensorShape(bias).FlatSize(); + i++) { + bias_converted_buffer[i] = bias->data.i16[i]; + } + reference_integer_ops::TransposeConv( + data.params, data.per_channel_output_multiplier, + data.per_channel_output_shift, tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(filter), + tflite::micro::GetTensorData(filter), + tflite::micro::GetTensorShape(bias), bias_converted_buffer, + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output), + tflite::micro::GetTensorShape(nullptr), nullptr, scratch_buffer); + } else { + reference_integer_ops::TransposeConv( + data.params, data.per_channel_output_multiplier, + data.per_channel_output_shift, tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(filter), + tflite::micro::GetTensorData(filter), + tflite::micro::GetTensorShape(bias), + tflite::micro::GetOptionalTensorData(bias), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output), + tflite::micro::GetTensorShape(nullptr), nullptr, scratch_buffer); + } + break; + } + default: + MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(input->type), + input->type); + return kTfLiteError; + } + return kTfLiteOk; +} + +} // namespace + +TfLiteRegistration Register_TRANSPOSE_CONV() { + return tflite::micro::RegisterOp(Init, Prepare, Eval); +} + +} // namespace tflite + +#endif diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/tree_ensemble_classifier.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/tree_ensemble_classifier.cpp new file mode 100644 index 0000000..c0f4317 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/tree_ensemble_classifier.cpp @@ -0,0 +1,194 @@ +/* Copyright 2023 Edge Impulse Inc. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#define FLATBUFFERS_LOCALE_INDEPENDENT 0 +#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/flexbuffers.h" +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h" +#include + +#define FEATURE_TYPE float + +namespace tflite { +namespace { + +struct OpDataTree { + uint32_t num_leaf_nodes; + uint32_t num_internal_nodes; + uint32_t num_trees; + const uint16_t* nodes_modes; + const uint16_t* nodes_featureids; + const float* nodes_values; + const uint16_t* nodes_truenodeids; + const uint16_t* nodes_falsenodeids; + const float* nodes_weights; + const uint8_t* nodes_classids; + const uint16_t* tree_root_ids; + const uint8_t* buffer_t; + size_t buffer_length; +}; + +void* Init(TfLiteContext* context, const char* buffer, size_t length) { + const uint8_t* buffer_t = reinterpret_cast(buffer); + const flexbuffers::Map& m = flexbuffers::GetRoot(buffer_t, length).AsMap(); + + TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); + OpDataTree* data = static_cast(context->AllocatePersistentBuffer(context, sizeof(OpDataTree))); + + data->buffer_t = buffer_t; + data->buffer_length = length; + + data->num_leaf_nodes = m["num_leaf_nodes"].AsUInt32(); + data->num_internal_nodes = m["num_internal_nodes"].AsUInt32(); + data->num_trees = m["num_trees"].AsUInt32(); + + data->nodes_modes = (uint16_t*)(m["nodes_modes"].AsBlob().data()); + data->nodes_featureids = (uint16_t*)(m["nodes_featureids"].AsBlob().data()); + data->nodes_values = (float*)(m["nodes_values"].AsBlob().data()); + data->nodes_truenodeids = (uint16_t*)(m["nodes_truenodeids"].AsBlob().data()); + data->nodes_falsenodeids = (uint16_t*)(m["nodes_falsenodeids"].AsBlob().data()); + data->nodes_weights = (float*)(m["nodes_weights"].AsBlob().data()); + data->nodes_classids = (uint8_t*)(m["nodes_classids"].AsBlob().data()); + data->tree_root_ids = (uint16_t*)(m["tree_root_ids"].AsBlob().data()); + + return data; +} + +TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + + MicroContext* micro_context = GetMicroContext(context); + const OpDataTree* data = static_cast(node->user_data); + const flexbuffers::Map& m = flexbuffers::GetRoot(data->buffer_t, data->buffer_length).AsMap(); + + // The OOB checks below are very important to prevent vulnerabilities where an adversary sends + // us a malicious TFLite model, similar to: https://nvd.nist.gov/vuln/detail/CVE-2022-23560 + + int num_nodes = data->num_leaf_nodes + data->num_internal_nodes; + + // Check that the tree root ids are valid. + for (uint32_t i = 0; i < data->num_trees; i++) { + TF_LITE_ENSURE_EQ(context, data->tree_root_ids[i] < num_nodes, true); + TF_LITE_ENSURE_EQ(context, data->tree_root_ids[i] >= 0, true); + } + + // Check that all node indices are valid + for (uint32_t i = 0; i < data->num_internal_nodes; i++) { + TF_LITE_ENSURE_EQ(context, data->nodes_truenodeids[i] < num_nodes, true); + TF_LITE_ENSURE_EQ(context, data->nodes_truenodeids[i] >= 0, true); + TF_LITE_ENSURE_EQ(context, data->nodes_falsenodeids[i] < num_nodes, true); + TF_LITE_ENSURE_EQ(context, data->nodes_falsenodeids[i] >= 0, true); + } + + // Check all node arrays have the same length + TF_LITE_ENSURE_EQ(context, data->num_internal_nodes, m["nodes_featureids"].AsBlob().size()); + TF_LITE_ENSURE_EQ(context, data->num_internal_nodes, m["nodes_values"].AsBlob().size()); + TF_LITE_ENSURE_EQ(context, data->num_internal_nodes, m["nodes_truenodeids"].AsBlob().size()); + TF_LITE_ENSURE_EQ(context, data->num_internal_nodes, m["nodes_falsenodeids"].AsBlob().size()); + TF_LITE_ENSURE_EQ(context, data->num_leaf_nodes, m["nodes_weights"].AsBlob().size()); + TF_LITE_ENSURE_EQ(context, data->num_leaf_nodes, m["nodes_classids"].AsBlob().size()); + + // Check data types are supported. Currently we only support one combination. + TF_LITE_ENSURE_EQ(context, strncmp(m["tree_index_type"].AsString().c_str(), "uint16", 6), 0); + TF_LITE_ENSURE_EQ(context, strncmp(m["node_value_type"].AsString().c_str(), "float32", 7), 0); + TF_LITE_ENSURE_EQ(context, strncmp(m["class_index_type"].AsString().c_str(), "uint8", 5), 0); + TF_LITE_ENSURE_EQ(context, strncmp(m["class_weight_type"].AsString().c_str(), "float32", 7), 0); + TF_LITE_ENSURE_EQ(context, strncmp(m["equality_operator"].AsString().c_str(), "leq", 3), 0); + + TF_LITE_ENSURE_EQ(context, NumInputs(node), 1); + TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); + TfLiteTensor* input = micro_context->AllocateTempInputTensor(node, 0); + TF_LITE_ENSURE(context, input != nullptr); + TF_LITE_ENSURE(context, NumDimensions(input) == 2); + TfLiteTensor* output = micro_context->AllocateTempOutputTensor(node, 0); + TF_LITE_ENSURE(context, output != nullptr); + + int input_width = SizeOfDimension(input, 1); + int output_width = SizeOfDimension(output, 1); + + // Check that all indices into the input/output tensor are valid + for (uint32_t i = 0; i < data->num_internal_nodes; i++) { + TF_LITE_ENSURE(context, data->nodes_featureids[i] < input_width); + TF_LITE_ENSURE(context, data->nodes_featureids[i] >= 0); + if (!m["nodes_modes"].AsBlob().IsTheEmptyBlob()) { + if (data->nodes_modes[i] == 0) { + TF_LITE_ENSURE(context, data->nodes_classids[i] < output_width); + TF_LITE_ENSURE(context, data->nodes_classids[i] >= 0); + } + } + } + + micro_context->DeallocateTempTfLiteTensor(input); + micro_context->DeallocateTempTfLiteTensor(output); + + return kTfLiteOk; +} + +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + + const OpDataTree* data = static_cast(node->user_data); + + const TfLiteEvalTensor* input = + tflite::micro::GetEvalInput(context, node, 0); + const float *in_data = tflite::micro::GetTensorData(input); + + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, 0); + float *out_data = tflite::micro::GetTensorData(output); + + const tflite::RuntimeShape output_shape = tflite::micro::GetTensorShape(output); + memset(out_data, 0, output_shape.FlatSize() * sizeof(float)); + + for (uint32_t i = 0; i < data->num_trees; i++) { + uint16_t ix = data->tree_root_ids[i]; + + while (ix < data->num_internal_nodes) { + float node_val = 0; + memcpy(&node_val, (data->nodes_values + ix), sizeof(float)); + + if (in_data[data->nodes_featureids[ix]] <= node_val) { + ix = data->nodes_truenodeids[ix]; + } else { + ix = data->nodes_falsenodeids[ix]; + } + } + ix -= data->num_internal_nodes; + + float weight = 0; + memcpy(&weight, (data->nodes_weights + ix), sizeof(float)); + out_data[data->nodes_classids[ix]] += weight; + } + + return kTfLiteOk; +} + + +} // namespace + +TfLiteRegistration* Register_TreeEnsembleClassifier() { + static TfLiteRegistration r = {Init, + nullptr, + Prepare, + Eval, + /*profiling_string=*/nullptr, + /*builtin_code=*/0, + /*custom_name=*/nullptr, + /*version=*/0}; + return &r; +} + +const char* GetString_TreeEnsembleClassifier() { return "TreeEnsembleClassifier"; } + +} // namespace tflite diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/tree_ensemble_classifier.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/tree_ensemble_classifier.h new file mode 100644 index 0000000..335c312 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/tree_ensemble_classifier.h @@ -0,0 +1,29 @@ +/* Copyright 2023 Edge Impulse Inc. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_LITE_MICRO_KERNELS_TREE_ENSEMBLE_CLASSIFIER_H_ +#define TENSORFLOW_LITE_MICRO_KERNELS_TREE_ENSEMBLE_CLASSIFIER_H_ + +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" + +namespace tflite { + +TfLiteRegistration* Register_TreeEnsembleClassifier(); + +const char* GetString_TreeEnsembleClassifier(); + +} // namespace tflite + +#endif // TENSORFLOW_LITE_MICRO_KERNELS_TREE_ENSEMBLE_CLASSIFIER_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/unidirectional_sequence_lstm.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/unidirectional_sequence_lstm.cpp new file mode 100644 index 0000000..7ff9a2f --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/unidirectional_sequence_lstm.cpp @@ -0,0 +1,589 @@ +/* Copyright 2023 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// Integer version of unidirectional sequence lstm. Only the standard LSTM +// (defined in the keras LSTM layer, e.g., no peephole etc.) is supported here. +// Currently used by the 16 bits activation case only + +#include +#include + +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/quantization_util.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/fully_connected.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/lstm_eval.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/lstm_shared.h" + +namespace tflite { + +namespace { +/*Helper Functions*/ + +// Interface to access all the TempTfLiteTensors of the LSTM kernel during the +// preparation phase. Can only be constructed through the constructor to avoid +// memory leakage. All TempTfLiteTensors will be deallocated through the +// destructor. +class LstmTensors { + public: + LstmTensors(const LstmTensors& other) = delete; + LstmTensors& operator=(const LstmTensors& other) = delete; + + LstmTensors(TfLiteContext* context, TfLiteNode* node) { + micro_context_ = GetMicroContext(context); + // 24 internal tensors. see lstm_shared.h for tensor names + for (size_t i = 0; i < 24; i++) { + internal_tensors_[i] = micro_context_->AllocateTempInputTensor(node, i); + } + output_tensor_ = + micro_context_->AllocateTempOutputTensor(node, kLstmOutputTensor); + } + + ~LstmTensors() { + for (size_t i = 0; i < 24; i++) { + if (internal_tensors_[i] != nullptr) { + micro_context_->DeallocateTempTfLiteTensor(internal_tensors_[i]); + } + } + micro_context_->DeallocateTempTfLiteTensor(output_tensor_); + } + + // Verify the LSTM internal tensor properties (e.g., type checks) + // Input/output/states/fc weights tensors are required for kernel evaulation. + // The state tensors should be variables. Variants of the standard LSTM + // are not supported here, therefore their corresponding tensors should be + // invalid + TfLiteStatus ValidateTensorStatus(TfLiteContext* context) const { + // Verify certain tensor properties + // input tensor + TF_LITE_ENSURE(context, internal_tensors_[kLstmInputTensor] != nullptr); + // hidden state + TF_LITE_ENSURE(context, + internal_tensors_[kLstmOutputStateTensor] != nullptr); + TF_LITE_ENSURE(context, + internal_tensors_[kLstmOutputStateTensor]->is_variable); + // hidden state becomes input so they must have the same type + TF_LITE_ENSURE_EQ(context, internal_tensors_[kLstmOutputStateTensor]->type, + internal_tensors_[kLstmInputTensor]->type); + // cell state + TF_LITE_ENSURE(context, internal_tensors_[kLstmCellStateTensor] != nullptr); + TF_LITE_ENSURE(context, + internal_tensors_[kLstmCellStateTensor]->is_variable); + // output + TF_LITE_ENSURE(context, output_tensor_ != nullptr); + // output type is the same as the input type (activations) + TF_LITE_ENSURE_EQ(context, output_tensor_->type, + internal_tensors_[kLstmInputTensor]->type); + + // weight tensors (1-9, see lstm_shared for index definition) + const auto weight_type = + internal_tensors_[kLstmInputToForgetWeightsTensor]->type; + for (size_t i = 1; i < 9; i++) { + TF_LITE_ENSURE(context, internal_tensors_[i] != nullptr); + TF_LITE_ENSURE_EQ(context, internal_tensors_[i]->type, weight_type); + } + + // bias tensors (12-15, see lstm_shared for index definition) + const auto bias_type = internal_tensors_[kLstmForgetGateBiasTensor]->type; + for (size_t i = 12; i < 16; i++) { + TF_LITE_ENSURE(context, internal_tensors_[i] != nullptr); + TF_LITE_ENSURE_EQ(context, internal_tensors_[i]->type, bias_type); + } + // Tensors from LSTM variants are invalid + // No peephole + for (size_t i = 9; i < 12; i++) { + TF_LITE_ENSURE(context, internal_tensors_[i] == nullptr); + } + // No projection + for (size_t i = 16; i < 18; i++) { + TF_LITE_ENSURE(context, internal_tensors_[i] == nullptr); + } + // No internal layer norm + for (size_t i = 20; i < 24; i++) { + TF_LITE_ENSURE(context, internal_tensors_[i] == nullptr); + } + return kTfLiteOk; + } + + // Internal tensors. see lstm_shared.h for tensor names + const TfLiteTensor* GetInternalTensor(const int tensor_index) const { + return internal_tensors_[tensor_index]; + } + + const TfLiteTensor* HiddenStateTensor() const { + return internal_tensors_[kLstmOutputStateTensor]; + } + const TfLiteTensor* CellStateTensor() const { + return internal_tensors_[kLstmCellStateTensor]; + } + const TfLiteTensor* OutputTensor() const { return output_tensor_; } + + private: + // see lstm_shared.h for tensor names + MicroContext* micro_context_; + TfLiteTensor* internal_tensors_[24]; + TfLiteTensor* output_tensor_; +}; + +// Deduce the size information (Batch (B), Time Steps (T), Input dimension (I), +// State dimension (S)) that defines the LSTM using the input and hidden state +// tensor +LstmSizeInfo CreateLstmSizeInfo( + const bool time_major, const TfLiteIntArray* input_tensor_shape, + const TfLiteIntArray* hidden_state_tensor_shape) { + LstmSizeInfo size_info; + size_info.time_major = time_major; + size_info.batch_size = + time_major ? input_tensor_shape->data[1] : input_tensor_shape->data[0]; + size_info.time_steps = + time_major ? input_tensor_shape->data[0] : input_tensor_shape->data[1]; + size_info.input_dimension = input_tensor_shape->data[2]; + size_info.state_dimension = hidden_state_tensor_shape->data[1]; + return size_info; +} + +TfLiteStatus ValidateWeightTensorSize(TfLiteContext* context, + const TfLiteTensor* tensor, int dim1_size, + int dim2_size) { + TF_LITE_ENSURE_EQ(context, tensor->dims->size, 2); + TF_LITE_ENSURE_EQ(context, tensor->dims->data[0], dim1_size); + TF_LITE_ENSURE_EQ(context, tensor->dims->data[1], dim2_size); + return kTfLiteOk; +} + +TfLiteStatus ValidateBiasTensorSize(TfLiteContext* context, + const TfLiteTensor* tensor, int size) { + TF_LITE_ENSURE_EQ(context, tensor->dims->size, 1); + TF_LITE_ENSURE_EQ(context, tensor->dims->data[0], size); + return kTfLiteOk; +} + +// Go through every tensors and make sure their shape match the kernel +// configuration +TfLiteStatus ValidateTensorSize(TfLiteContext* context, + const LstmTensors& tensors, + const LstmSizeInfo& size_info) { + // Input FC weights + for (size_t i = 1; i < 5; i++) { + TF_LITE_ENSURE_OK( + context, ValidateWeightTensorSize(context, tensors.GetInternalTensor(i), + size_info.state_dimension, + size_info.input_dimension)); + } + // Recurrent FC weights + for (size_t i = 5; i < 9; i++) { + TF_LITE_ENSURE_OK( + context, ValidateWeightTensorSize(context, tensors.GetInternalTensor(i), + size_info.state_dimension, + size_info.state_dimension)); + } + // Biases + for (size_t i = 12; i < 16; i++) { + TF_LITE_ENSURE_OK( + context, ValidateBiasTensorSize(context, tensors.GetInternalTensor(i), + size_info.state_dimension)); + } + + // Check the shape of input state tensors. + // These tensor may be 1D or 2D. It's fine as long as the total size is + // correct. + TF_LITE_ENSURE_EQ(context, NumElements(tensors.HiddenStateTensor()), + size_info.batch_size * size_info.state_dimension); + TF_LITE_ENSURE_EQ(context, NumElements(tensors.CellStateTensor()), + size_info.batch_size * size_info.state_dimension); + + // Check the shape of output tensor against that of input tensor + TF_LITE_ENSURE_EQ(context, tensors.OutputTensor()->dims->size, 3); + TF_LITE_ENSURE_EQ(context, + tensors.GetInternalTensor(kLstmInputTensor)->dims->data[0], + tensors.OutputTensor()->dims->data[0]); + TF_LITE_ENSURE_EQ(context, + tensors.GetInternalTensor(kLstmInputTensor)->dims->data[1], + tensors.OutputTensor()->dims->data[1]); + TF_LITE_ENSURE_EQ(context, tensors.OutputTensor()->dims->data[2], + size_info.state_dimension); + return kTfLiteOk; +} + +// Wrapper function to create gate parameters for the four internal LSTM gates +TfLiteStatus CreateGateParams( + TfLiteContext* context, + /*Input tensors*/ + const TfLiteTensor* input, const TfLiteTensor* input_weight, + const TfLiteTensor* input_bias, + /*Hidden state tensors*/ + const TfLiteTensor* hidden_state, const TfLiteTensor* hidden_state_weight, + const TfLiteTensor* hidden_state_bias, + /*Scale of the fc output (input to non-linear activation)*/ + const float nonlinear_activation_input_scale, const TfLiteType cell_type, + tflite::GateParameters& gate_params) { + // A temp tflite tensor to represent the output of fc operation. Only the data + // type and quantization parameters are set since it is only used for + // parameter calculations + TfLiteTensor fc_output_temp; + fc_output_temp.type = cell_type; + fc_output_temp.params.scale = nonlinear_activation_input_scale; + fc_output_temp.params.zero_point = 0; // symmetrical quantized + + // A temp fc opdata to reuse the helper function on creating fc parameters + tflite::OpDataFullyConnected fc_data_temp; + // TODO(b/265853320): due to the lack of precision for the float scale, + // scale_diff / output_scale <= 0.02 (potentially requires 1e-8 precision) can + // not be satisified for the bias. Here we rely on the correctiveness of the + // conversion process (set input_bias=nullptr to avoid checking) for + // tensor scales + TF_LITE_ENSURE_STATUS(CalculateOpDataFullyConnected( + context, kTfLiteActNone, input->type, input, input_weight, + /*input_bias=*/nullptr, &fc_output_temp, &fc_data_temp)); + gate_params.input_fc_params = FullyConnectedParamsQuantized(fc_data_temp); + double real_multiplier = 0.0; + GetQuantizedConvolutionMultipler(context, input, input_weight, nullptr, + &fc_output_temp, &real_multiplier); + + TF_LITE_ENSURE_STATUS(CalculateOpDataFullyConnected( + context, kTfLiteActNone, hidden_state->type, hidden_state, + hidden_state_weight, hidden_state_bias, &fc_output_temp, &fc_data_temp)); + gate_params.recurrent_fc_params = FullyConnectedParamsQuantized(fc_data_temp); + return kTfLiteOk; +} + +// Create parameters for element wise multiplication that happens in a) cell +// state update ; b) hidden state update +// Note that all the output of gates are symmetrically quantized so only scales +// are required for input. However, during the hidden state update phase, the +// output is the updated hidden state, which is asymmetrically quantized. Thus +// output may require zero point +tflite::ArithmeticParams CreateInterGateMulParams(const float input1_scale, + const float input2_scale, + const float output_scale, + const TfLiteType output_type, + const int output_zp = 0) { + tflite::ArithmeticParams op_params = {}; + if (output_type == kTfLiteInt16) { + op_params.quantized_activation_min = std::numeric_limits::min(); + op_params.quantized_activation_max = std::numeric_limits::max(); + } else if (output_type == kTfLiteInt8) { + op_params.quantized_activation_min = std::numeric_limits::min(); + op_params.quantized_activation_max = std::numeric_limits::max(); + } + + op_params.input1_offset = 0; // symmetric + op_params.input2_offset = 0; // symmetric + op_params.output_offset = output_zp; + + const double input_product_scale = + static_cast(input1_scale) * static_cast(input2_scale); + double effective_scale = + input_product_scale / static_cast(output_scale); + + QuantizeMultiplier(effective_scale, &op_params.output_multiplier, + &op_params.output_shift); + return op_params; +} + +// Create the additional information about the cell state, which include: +// cell_state_scale_power: used in integer nonlinear function (e.g., tanh) +// quantized_cell_clip: quantized cell clip range +CellStateInfo CreateLstmCellStateInfo(const float cell_state_scale, + const float cell_clip) { + CellStateInfo cell_state_info; + // cell_state_scale_power: 2^-cell_state_scale_power = cell state scale + int buffer; + tflite::CheckedLog2(cell_state_scale, &buffer); + cell_state_info.cell_state_scale_power = buffer; + // Cell state specifics + cell_state_info.cell_clip = cell_clip; + cell_state_info.quantized_cell_clip = static_cast( + std::min(std::max(static_cast(cell_clip) / + static_cast(cell_state_scale), + static_cast(-32768.0)), + static_cast(32767.0))); + return cell_state_info; +} + +CellStateInfo CreateLstmCellStateInfoFloat(const float cell_clip) { + CellStateInfo cell_state_info; + cell_state_info.cell_clip = cell_clip; + cell_state_info.cell_state_scale_power = 0; // no quantization + cell_state_info.quantized_cell_clip = 0; // no quantization + return cell_state_info; +} + +tflite::FullyConnectedParams CreateFCParamsFloat() { + FullyConnectedParams op_params; + CalculateActivationRange(kTfLiteActNone, &op_params.float_activation_min, + &op_params.float_activation_max); + return op_params; +} + +tflite::GateParameters CreateGateParamsFloat() { + tflite::GateParameters gate_params = {}; + gate_params.input_fc_params = CreateFCParamsFloat(); + gate_params.recurrent_fc_params = CreateFCParamsFloat(); + return gate_params; +} + +tflite::ArithmeticParams CreateInterGateMulParamsFloat() { + tflite::ArithmeticParams op_params = {}; + CalculateActivationRange(kTfLiteActNone, &op_params.float_activation_min, + &op_params.float_activation_max); + return op_params; +} + +TfLiteStatus PrepareGateParametersFloat(TfLiteContext* context, + const LstmTensors& lstm_tensors, + OpDataLSTM* op_data) { + // Gate Parameters + op_data->forget_gate_parameters = CreateGateParamsFloat(); + op_data->input_gate_parameters = CreateGateParamsFloat(); + op_data->cell_gate_parameters = CreateGateParamsFloat(); + op_data->output_gate_parameters = CreateGateParamsFloat(); + // Inter gate multiplication parameters + op_data->inter_gate_parameters.forget_cell_mul_params = + CreateInterGateMulParamsFloat(); + op_data->inter_gate_parameters.input_mul_params = + CreateInterGateMulParamsFloat(); + op_data->inter_gate_parameters.output_mul_params = + CreateInterGateMulParamsFloat(); + return kTfLiteOk; +} + +TfLiteStatus PrepareGateParametersInteger(TfLiteContext* context, + const LstmTensors& lstm_tensors, + OpDataLSTM* op_data) { + float nonlinear_input_scale = 0.00024414062; // 2^-12 Q3.12 -> Q0.15 + TF_LITE_ENSURE_OK( + context, + CreateGateParams( + context, lstm_tensors.GetInternalTensor(kLstmInputTensor), + lstm_tensors.GetInternalTensor(kLstmInputToForgetWeightsTensor), + lstm_tensors.GetInternalTensor(kLstmForgetGateBiasTensor), + lstm_tensors.GetInternalTensor(kLstmOutputStateTensor), + lstm_tensors.GetInternalTensor(kLstmRecurrentToForgetWeightsTensor), + /*hidden_state_bias=*/nullptr, nonlinear_input_scale, kTfLiteInt16, + op_data->forget_gate_parameters)); + TF_LITE_ENSURE_OK( + context, + CreateGateParams( + context, lstm_tensors.GetInternalTensor(kLstmInputTensor), + lstm_tensors.GetInternalTensor(kLstmInputToInputWeightsTensor), + lstm_tensors.GetInternalTensor(kLstmInputGateBiasTensor), + lstm_tensors.GetInternalTensor(kLstmOutputStateTensor), + lstm_tensors.GetInternalTensor(kLstmRecurrentToInputWeightsTensor), + /*hidden_state_bias=*/nullptr, nonlinear_input_scale, kTfLiteInt16, + op_data->input_gate_parameters)); + TF_LITE_ENSURE_OK( + context, + CreateGateParams( + context, lstm_tensors.GetInternalTensor(kLstmInputTensor), + lstm_tensors.GetInternalTensor(kLstmInputToCellWeightsTensor), + lstm_tensors.GetInternalTensor(kLstmCellGateBiasTensor), + lstm_tensors.GetInternalTensor(kLstmOutputStateTensor), + lstm_tensors.GetInternalTensor(kLstmRecurrentToCellWeightsTensor), + /*hidden_state_bias=*/nullptr, nonlinear_input_scale, kTfLiteInt16, + op_data->cell_gate_parameters)); + TF_LITE_ENSURE_OK( + context, + CreateGateParams( + context, lstm_tensors.GetInternalTensor(kLstmInputTensor), + lstm_tensors.GetInternalTensor(kLstmInputToOutputWeightsTensor), + lstm_tensors.GetInternalTensor(kLstmOutputGateBiasTensor), + lstm_tensors.GetInternalTensor(kLstmOutputStateTensor), + lstm_tensors.GetInternalTensor(kLstmRecurrentToOutputWeightsTensor), + /*hidden_state_bias=*/nullptr, nonlinear_input_scale, kTfLiteInt16, + op_data->output_gate_parameters)); + + // Inter gate multiplication parameters + float nonlinear_output_scale = 0.00003051757; // 2^-15 Q3.12 -> Q0.15 + float cell_state_scale = lstm_tensors.CellStateTensor()->params.scale; + // forget gate output (nonlinear output) x cell state -> cell state + op_data->inter_gate_parameters.forget_cell_mul_params = + CreateInterGateMulParams(nonlinear_output_scale, cell_state_scale, + cell_state_scale, kTfLiteInt16); + // input gate output x cell gate output -> cell state + op_data->inter_gate_parameters.input_mul_params = + CreateInterGateMulParams(nonlinear_output_scale, nonlinear_output_scale, + cell_state_scale, kTfLiteInt16); + // tanh output x output gate output -> hidden state (potentially asymmetric) + op_data->inter_gate_parameters.output_mul_params = CreateInterGateMulParams( + nonlinear_output_scale, nonlinear_output_scale, + lstm_tensors.HiddenStateTensor()->params.scale, + lstm_tensors.HiddenStateTensor()->type, + lstm_tensors.HiddenStateTensor()->params.zero_point); + return kTfLiteOk; +} + +LSTMKernelContents CreateLSTMKernelContent(TfLiteContext* context, + TfLiteNode* node) { + LSTMKernelContents kernel_content; + // Point to correct tensors + for (size_t i = 0; i < 24; i++) { + kernel_content.internal_tensors[i] = + tflite::micro::GetMutableEvalInput(context, node, i); + } + // Output tensor + kernel_content.output_tensor = tflite::micro::GetEvalOutput(context, node, 0); + return kernel_content; +} + +template +LSTMBuffers CreateLSTMBuffers(TfLiteContext* context, + const int* buffer_indices) { + LSTMBuffers buffers; + buffers.buffer0 = reinterpret_cast( + context->GetScratchBuffer(context, buffer_indices[0])); + buffers.buffer1 = reinterpret_cast( + context->GetScratchBuffer(context, buffer_indices[1])); + buffers.buffer2 = reinterpret_cast( + context->GetScratchBuffer(context, buffer_indices[2])); + buffers.buffer3 = reinterpret_cast( + context->GetScratchBuffer(context, buffer_indices[3])); + return buffers; +} + +/*Kernel functions*/ + +void* UnidirectionalSequenceLstmInit(TfLiteContext* context, const char* buffer, + size_t length) { + TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); + return context->AllocatePersistentBuffer(context, sizeof(OpDataLSTM)); +} + +TfLiteStatus UnidirectionalSequenceLstmPrepare(TfLiteContext* context, + TfLiteNode* node) { + TF_LITE_ENSURE_EQ(context, node->outputs->size, 1); + TF_LITE_ENSURE_EQ(context, node->inputs->size, 24); + + TFLITE_DCHECK(node->builtin_data != nullptr); + TFLITE_DCHECK(node->user_data != nullptr); + + OpDataLSTM* op_data = reinterpret_cast(node->user_data); + const auto* builtin_data = + static_cast(node->builtin_data); + // All TempTfLiteTensors will be deallocated through the destructor. + LstmTensors lstm_tensors(context, node); + TF_LITE_ENSURE_OK(context, lstm_tensors.ValidateTensorStatus(context)); + + op_data->cell_gate_nonlinear_type = builtin_data->activation; + op_data->size_info = + CreateLstmSizeInfo(builtin_data->time_major, + lstm_tensors.GetInternalTensor(kLstmInputTensor)->dims, + lstm_tensors.HiddenStateTensor()->dims); + TF_LITE_ENSURE_OK( + context, ValidateTensorSize(context, lstm_tensors, op_data->size_info)); + + // Create cell state information and gate parameters (Fully Connected and Mul) + auto cell_state_type = + lstm_tensors.GetInternalTensor(kLstmCellStateTensor)->type; + if (cell_state_type == kTfLiteFloat32) { + op_data->cell_state_info = + CreateLstmCellStateInfoFloat(builtin_data->cell_clip); + TF_LITE_ENSURE_OK( + context, PrepareGateParametersFloat(context, lstm_tensors, op_data)); + } else if (cell_state_type == kTfLiteInt16) { + op_data->cell_state_info = CreateLstmCellStateInfo( + lstm_tensors.CellStateTensor()->params.scale, builtin_data->cell_clip); + TF_LITE_ENSURE_OK( + context, PrepareGateParametersInteger(context, lstm_tensors, op_data)); + } else { + MicroPrintf( + "Cell state type %s (%d) not supported. The quantized Unidirectional " + "Sequence LSTM Op only support int16 cell state", + TfLiteTypeGetName(cell_state_type), cell_state_type); + return kTfLiteError; + } + // request buffers (four buffers) + for (size_t i = 0; i < 4; i++) { + TF_LITE_ENSURE_OK(context, context->RequestScratchBufferInArena( + context, + op_data->size_info.batch_size * + op_data->size_info.state_dimension * + TfLiteTypeGetSize(cell_state_type), + &(op_data->buffer_indices[i]))); + } + return kTfLiteOk; +} + +TfLiteStatus UnidirectionalSequenceLstmEval(TfLiteContext* context, + TfLiteNode* node) { + TFLITE_DCHECK(node->user_data != nullptr); + const OpDataLSTM& op_data = *reinterpret_cast(node->user_data); + auto kernel_content = CreateLSTMKernelContent(context, node); + + const auto activation_type = + kernel_content.internal_tensors[kLstmInputTensor]->type; + const auto weight_type = + kernel_content.internal_tensors[kLstmInputToInputWeightsTensor]->type; + + switch (activation_type) { + case kTfLiteFloat32: { + LSTMBuffers buffers = + CreateLSTMBuffers(context, op_data.buffer_indices); + EvalLstm(op_data, kernel_content, buffers); + break; + } + case kTfLiteInt8: { + switch (weight_type) { + case kTfLiteInt8: { + // 8(activation)x8(weight)->16(cell) LSTM with 32 bits bias + LSTMBuffers buffers = + CreateLSTMBuffers(context, op_data.buffer_indices); + EvalLstm(op_data, kernel_content, + buffers); + break; + } + default: { + MicroPrintf("Filter type %s (%d) not supported.", + TfLiteTypeGetName(weight_type), activation_type); + return kTfLiteError; + } + } + break; + } + case kTfLiteInt16: { + switch (weight_type) { + case kTfLiteInt8: { + // 16(activation)x8(weight)->16(cell) LSTM with 64 bits bias + LSTMBuffers buffers = + CreateLSTMBuffers(context, op_data.buffer_indices); + EvalLstm(op_data, kernel_content, + buffers); + break; + } + default: { + MicroPrintf("Filter type %s (%d) not supported.", + TfLiteTypeGetName(weight_type), weight_type); + return kTfLiteError; + } + } + break; + } + default: { + MicroPrintf("Input type %s (%d) not supported.", + TfLiteTypeGetName(activation_type), activation_type); + return kTfLiteError; + } + } + return kTfLiteOk; +} + +} // namespace + +TfLiteRegistration Register_UNIDIRECTIONAL_SEQUENCE_LSTM() { + return tflite::micro::RegisterOp(UnidirectionalSequenceLstmInit, + UnidirectionalSequenceLstmPrepare, + UnidirectionalSequenceLstmEval); +} +} // namespace tflite diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/unpack.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/unpack.cpp new file mode 100644 index 0000000..c0d3d8b --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/unpack.cpp @@ -0,0 +1,112 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h" +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h" + +namespace tflite { +namespace ops { +namespace micro { +namespace unpack { +namespace { + +constexpr int kInputTensor = 0; + +template +TfLiteStatus UnpackImpl(TfLiteContext* context, TfLiteNode* node, + const TfLiteEvalTensor* input, int output_count, + int axis) { + const TfLiteEvalTensor* output0 = + tflite::micro::GetEvalOutput(context, node, 0); + const TfLiteIntArray* input_dims = input->dims; + const TfLiteIntArray* output_dims = output0->dims; + const int dimensions = input_dims->size; + + if (axis < 0) { + axis += input->dims->size; + } + + TFLITE_DCHECK_LT(axis, dimensions); + + int outer_size = 1; + for (int i = 0; i < axis; ++i) { + outer_size *= input_dims->data[i]; + } + int copy_size = 1; + for (int i = axis + 1; i < dimensions; ++i) { + copy_size *= input_dims->data[i]; + } + int output_size = 1; + for (int i = 0; i < output_dims->size; ++i) { + output_size *= output_dims->data[i]; + } + TFLITE_DCHECK_EQ(output_size, copy_size * outer_size); + + const T* input_data = tflite::micro::GetTensorData(input); + + for (int i = 0; i < output_count; ++i) { + TfLiteEvalTensor* t = tflite::micro::GetEvalOutput(context, node, i); + T* output_data = tflite::micro::GetTensorData(t); + for (int k = 0; k < outer_size; ++k) { + T* output_ptr = output_data + copy_size * k; + int loc = k * output_count * copy_size + i * copy_size; + const T* input_ptr = input_data + loc; + for (int j = 0; j < copy_size; ++j) output_ptr[j] = input_ptr[j]; + } + } + + return kTfLiteOk; +} + +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + TfLiteUnpackParams* data = + reinterpret_cast(node->builtin_data); + + const TfLiteEvalTensor* input = + tflite::micro::GetEvalInput(context, node, kInputTensor); + + switch (input->type) { + case kTfLiteFloat32: { + return UnpackImpl(context, node, input, data->num, data->axis); + } + case kTfLiteInt32: { + return UnpackImpl(context, node, input, data->num, data->axis); + } + case kTfLiteInt8: { + return UnpackImpl(context, node, input, data->num, data->axis); + } + default: { + MicroPrintf("Type '%s' is not supported by unpack.", + TfLiteTypeGetName(input->type)); + return kTfLiteError; + } + } + + return kTfLiteOk; +} +} // namespace +} // namespace unpack + +TfLiteRegistration Register_UNPACK() { + return tflite::micro::RegisterOp(nullptr, nullptr, unpack::Eval); +} + +} // namespace micro +} // namespace ops +} // namespace tflite diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/var_handle.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/var_handle.cpp new file mode 100644 index 0000000..2329f2c --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/var_handle.cpp @@ -0,0 +1,93 @@ +/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include + +#include + +#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h" +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/compatibility.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/memory_helpers.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_graph.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_resource_variable.h" +#include "edge-impulse-sdk/tensorflow/lite/schema/schema_generated.h" + +namespace tflite { + +namespace { + +struct OpData { + int32_t resource_id; +}; + +void* Init(TfLiteContext* context, const char* buffer, size_t length) { + TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); + return context->AllocatePersistentBuffer(context, sizeof(OpData)); +} + +TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + OpData* op_data = reinterpret_cast(node->user_data); + const auto* params = + reinterpret_cast(node->builtin_data); + + tflite::MicroContext* micro_context = tflite::GetMicroContext(context); + MicroGraph& graph_info = micro_context->graph(); + + MicroResourceVariables* resources = graph_info.GetResourceVariables(); + if (resources == nullptr) { + MicroPrintf( + "VAR_HANDLE requires resource variables. Please create " + "ResourceVariables and pass it to the interpreter."); + return kTfLiteError; + } + op_data->resource_id = + resources->CreateIdIfNoneFound(params->container, params->shared_name); + if (op_data->resource_id < 0) { + return kTfLiteError; + } + + TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0); + TFLITE_DCHECK(output != nullptr); + + // Assign saved resource_id so this output tensor will always return the + // correct resource id. + output->data.i32 = &op_data->resource_id; + + return kTfLiteOk; +} + +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + OpData* op_data = reinterpret_cast(node->user_data); + + TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0); + TFLITE_DCHECK(output != nullptr); + + // Assign saved resource_id so this output tensor will always return the + // correct resource id. + output->data.i32 = &op_data->resource_id; + return kTfLiteOk; +} + +} // namespace. + +TfLiteRegistration Register_VAR_HANDLE() { + return tflite::micro::RegisterOp(Init, Prepare, Eval); +} + +} // namespace tflite diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/while.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/while.cpp new file mode 100644 index 0000000..ba18ba6 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/while.cpp @@ -0,0 +1,133 @@ +/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include + +#include + +#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h" +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/compatibility.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/memory_helpers.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_context.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_graph.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_utils.h" + +namespace tflite { + +namespace { + +struct OpData { + int cond_subgraph_index; + int body_subgraph_index; +}; + +void* Init(TfLiteContext* context, const char* buffer, size_t length) { + TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); + return context->AllocatePersistentBuffer(context, sizeof(OpData)); +} + +TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + OpData* op_data = reinterpret_cast(node->user_data); + const auto* params = + reinterpret_cast(node->builtin_data); + + op_data->cond_subgraph_index = params->cond_subgraph_index; + op_data->body_subgraph_index = params->body_subgraph_index; + + // The first input is the condition. + tflite::MicroContext* micro_context = tflite::GetMicroContext(context); + + size_t num_inputs = node->inputs->size; + size_t num_outputs = node->outputs->size; + + MicroGraph& graph_info = micro_context->graph(); + + TF_LITE_ENSURE(context, + op_data->cond_subgraph_index < graph_info.NumSubgraphs()); + TF_LITE_ENSURE(context, + op_data->body_subgraph_index < graph_info.NumSubgraphs()); + + TF_LITE_ENSURE_EQ(context, num_inputs, + graph_info.NumSubgraphInputs(op_data->cond_subgraph_index)); + TF_LITE_ENSURE_EQ(context, num_inputs, + graph_info.NumSubgraphInputs(op_data->body_subgraph_index)); + TF_LITE_ENSURE_EQ(context, num_inputs, num_outputs); + TF_LITE_ENSURE_EQ( + context, num_outputs, + graph_info.NumSubgraphOutputs(op_data->body_subgraph_index)); + + return kTfLiteOk; +} + +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + const OpData* op_data = reinterpret_cast(node->user_data); + + tflite::MicroContext* micro_context = tflite::GetMicroContext(context); + MicroGraph* graph_info = µ_context->graph(); + + TF_LITE_ENSURE_OK(context, + tflite::micro::CopyOpInputsToSubgraphInputs( + context, node, graph_info, op_data->cond_subgraph_index, + /*first_tensor_idx=*/0)); + + TF_LITE_ENSURE_OK(context, + graph_info->InvokeSubgraph(op_data->cond_subgraph_index)); + + TfLiteEvalTensor* cond_subgraph_output = graph_info->GetSubgraphOutput( + op_data->cond_subgraph_index, /*tensor_idx=*/0); + bool cond_value = cond_subgraph_output->data.b[0]; + + TF_LITE_ENSURE_OK(context, + tflite::micro::CopyOpInputsToSubgraphInputs( + context, node, graph_info, op_data->body_subgraph_index, + /*first_tensor_idx=*/0)); + TF_LITE_ENSURE_OK(context, + tflite::micro::CopyOpInputsToOpOutputs(context, node)); + + while (cond_value == true) { + // Copy output of this iteration back to the body input. + TF_LITE_ENSURE_OK( + context, tflite::micro::CopyOpOutputsToSubgraphInputs( + context, node, graph_info, op_data->body_subgraph_index)); + TF_LITE_ENSURE_OK(context, + graph_info->InvokeSubgraph(op_data->body_subgraph_index)); + + TF_LITE_ENSURE_OK( + context, tflite::micro::CopySubgraphOutputsToOpOutputs( + context, node, graph_info, op_data->body_subgraph_index)); + TF_LITE_ENSURE_OK( + context, tflite::micro::CopyOpOutputsToSubgraphInputs( + context, node, graph_info, op_data->cond_subgraph_index)); + TF_LITE_ENSURE_OK(context, + graph_info->InvokeSubgraph(op_data->cond_subgraph_index)); + + cond_subgraph_output = graph_info->GetSubgraphOutput( + op_data->cond_subgraph_index, /*tensor_idx=*/0); + cond_value = cond_subgraph_output->data.b[0]; + } + + return kTfLiteOk; +} + +} // namespace. + +TfLiteRegistration Register_WHILE() { + return tflite::micro::RegisterOp(Init, Prepare, Eval); +} + +} // namespace tflite diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/zeros_like.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/zeros_like.cpp new file mode 100644 index 0000000..c868341 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/kernels/zeros_like.cpp @@ -0,0 +1,88 @@ +/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h" + +namespace tflite { +namespace { + +constexpr int kInputTensor = 0; +constexpr int kOutputTensor = 0; + +TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + MicroContext* micro_context = GetMicroContext(context); + + TF_LITE_ENSURE_EQ(context, NumInputs(node), 1); + TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); + TfLiteTensor* input = + micro_context->AllocateTempInputTensor(node, kInputTensor); + TF_LITE_ENSURE(context, input != nullptr); + TfLiteTensor* output = + micro_context->AllocateTempOutputTensor(node, kOutputTensor); + TF_LITE_ENSURE(context, output != nullptr); + output->type = input->type; + + micro_context->DeallocateTempTfLiteTensor(input); + micro_context->DeallocateTempTfLiteTensor(output); + return kTfLiteOk; +} + +template +void resetZeros(T* out, const int num_elements) { + for (int i = 0; i < num_elements; ++i) { + out[i] = static_cast(0); + } +} + +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + const TfLiteEvalTensor* input = + tflite::micro::GetEvalInput(context, node, kInputTensor); + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kOutputTensor); + int flat_size = MatchingFlatSize(tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorShape(output)); + switch (input->type) { + case kTfLiteInt64: + resetZeros(tflite::micro::GetTensorData(output), flat_size); + break; + case kTfLiteInt32: + resetZeros(tflite::micro::GetTensorData(output), flat_size); + break; + case kTfLiteInt8: + resetZeros(tflite::micro::GetTensorData(output), flat_size); + break; + case kTfLiteFloat32: + resetZeros(tflite::micro::GetTensorData(output), flat_size); + break; + default: + MicroPrintf( + "ZerosLike only currently supports int64, int32, " + "and float32, got %d.", + input->type); + return kTfLiteError; + } + return kTfLiteOk; +} +} // namespace + +TfLiteRegistration Register_ZEROS_LIKE() { + return tflite::micro::RegisterOp(nullptr, Prepare, Eval); +} + +} // namespace tflite diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/memory_helpers.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/memory_helpers.cpp new file mode 100644 index 0000000..486b68e --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/memory_helpers.cpp @@ -0,0 +1,171 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "edge-impulse-sdk/tensorflow/lite/micro/memory_helpers.h" + +#include +#include + +#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/flatbuffers.h" // from @flatbuffers +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/flatbuffer_conversions_bridge.h" +#include "edge-impulse-sdk/tensorflow/lite/schema/schema_generated.h" + +namespace tflite { + +uint8_t* AlignPointerUp(uint8_t* data, size_t alignment) { + std::uintptr_t data_as_uintptr_t = reinterpret_cast(data); + uint8_t* aligned_result = reinterpret_cast( + ((data_as_uintptr_t + (alignment - 1)) / alignment) * alignment); + return aligned_result; +} + +uint8_t* AlignPointerDown(uint8_t* data, size_t alignment) { + std::uintptr_t data_as_uintptr_t = reinterpret_cast(data); + uint8_t* aligned_result = + reinterpret_cast((data_as_uintptr_t / alignment) * alignment); + return aligned_result; +} + +size_t AlignSizeUp(size_t size, size_t alignment) { + size_t aligned_size = (((size + (alignment - 1)) / alignment) * alignment); + return aligned_size; +} + +TfLiteStatus TfLiteTypeSizeOf(TfLiteType type, size_t* size) { + switch (type) { + case kTfLiteFloat16: + *size = sizeof(int16_t); + break; + case kTfLiteFloat32: + *size = sizeof(float); + break; + case kTfLiteFloat64: + *size = sizeof(double); + break; + case kTfLiteInt16: + *size = sizeof(int16_t); + break; + case kTfLiteInt32: + *size = sizeof(int32_t); + break; + case kTfLiteUInt32: + *size = sizeof(uint32_t); + break; + case kTfLiteUInt8: + *size = sizeof(uint8_t); + break; + case kTfLiteInt8: + *size = sizeof(int8_t); + break; + case kTfLiteInt64: + *size = sizeof(int64_t); + break; + case kTfLiteUInt64: + *size = sizeof(uint64_t); + break; + case kTfLiteBool: + *size = sizeof(bool); + break; + case kTfLiteResource: + *size = sizeof(int32_t); + break; + case kTfLiteComplex64: + *size = sizeof(float) * 2; + break; + case kTfLiteComplex128: + *size = sizeof(double) * 2; + break; + case kTfLiteInt4: + *size = sizeof(int8_t); + break; + default: + return kTfLiteError; + } + return kTfLiteOk; +} + +TfLiteStatus BytesRequiredForTensor(const tflite::Tensor& flatbuffer_tensor, + size_t* bytes, size_t* type_size) { + int element_count = 1; + // If flatbuffer_tensor.shape == nullptr, then flatbuffer_tensor is a scalar + // so has 1 element. + if (flatbuffer_tensor.shape() != nullptr) { + for (size_t n = 0; n < flatbuffer_tensor.shape()->Length(); ++n) { + element_count *= flatbuffer_tensor.shape()->Get(n); + } + } + + TfLiteType tf_lite_type; + TF_LITE_ENSURE_STATUS( + ConvertTensorType(flatbuffer_tensor.type(), &tf_lite_type)); + TF_LITE_ENSURE_STATUS(TfLiteTypeSizeOf(tf_lite_type, type_size)); + *bytes = element_count * (*type_size); + return kTfLiteOk; +} + +TfLiteStatus TfLiteEvalTensorByteLength(const TfLiteEvalTensor* eval_tensor, + size_t* out_bytes) { + TFLITE_DCHECK(out_bytes != nullptr); + + int element_count = 1; + // If eval_tensor->dims == nullptr, then tensor is a scalar so has 1 element. + if (eval_tensor->dims != nullptr) { + for (int n = 0; n < eval_tensor->dims->size; ++n) { + element_count *= eval_tensor->dims->data[n]; + } + } + size_t type_size; + TF_LITE_ENSURE_STATUS(TfLiteTypeSizeOf(eval_tensor->type, &type_size)); + *out_bytes = element_count * type_size; + return kTfLiteOk; +} + +TfLiteStatus AllocateOutputDimensionsFromInput(TfLiteContext* context, + const TfLiteTensor* input1, + const TfLiteTensor* input2, + TfLiteTensor* output) { + const TfLiteTensor* input = nullptr; + + TF_LITE_ENSURE(context, input1->dims != nullptr); + TF_LITE_ENSURE(context, input2->dims != nullptr); + TF_LITE_ENSURE(context, output->dims->size == 0); + + input = input1->dims->size > input2->dims->size ? input1 : input2; + TF_LITE_ENSURE(context, output->type == input->type); + + size_t size = 0; + TfLiteTypeSizeOf(input->type, &size); + const int dimensions_count = tflite::GetTensorShape(input).DimensionsCount(); + for (int i = 0; i < dimensions_count; i++) { + size *= input->dims->data[i]; + } + + output->bytes = size; + + output->dims = + reinterpret_cast(context->AllocatePersistentBuffer( + context, TfLiteIntArrayGetSizeInBytes(size))); + + output->dims->size = input->dims->size; + for (int i = 0; i < dimensions_count; i++) { + output->dims->data[i] = input->dims->data[i]; + } + + return kTfLiteOk; +} + +} // namespace tflite diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/memory_helpers.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/memory_helpers.h new file mode 100644 index 0000000..2ceb2bc --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/memory_helpers.h @@ -0,0 +1,65 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_MICRO_MEMORY_HELPERS_H_ +#define TENSORFLOW_LITE_MICRO_MEMORY_HELPERS_H_ + +#include +#include + +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/schema/schema_generated.h" +#include "edge-impulse-sdk/tensorflow/lite/schema/schema_generated_full.h" + +namespace tflite { + +// Returns the next pointer address aligned to the given alignment. +uint8_t* AlignPointerUp(uint8_t* data, size_t alignment); + +// Returns the previous pointer address aligned to the given alignment. +uint8_t* AlignPointerDown(uint8_t* data, size_t alignment); + +// Returns an increased size that's a multiple of alignment. +size_t AlignSizeUp(size_t size, size_t alignment); + +// Templated version of AlignSizeUp +// Returns an increased size that's a multiple of alignment. +template +size_t AlignSizeUp(size_t count = 1) { + return AlignSizeUp(sizeof(T) * count, alignof(T)); +} + +// Returns size in bytes for a given TfLiteType. +TfLiteStatus TfLiteTypeSizeOf(TfLiteType type, size_t* size); + +// How many bytes are needed to hold a tensor's contents. +TfLiteStatus BytesRequiredForTensor(const tflite::Tensor& flatbuffer_tensor, + size_t* bytes, size_t* type_size); + +// How many bytes are used in a TfLiteEvalTensor instance. The byte length is +// returned in out_bytes. +TfLiteStatus TfLiteEvalTensorByteLength(const TfLiteEvalTensor* eval_tensor, + size_t* out_bytes); + +// Deduce output dimensions from input and allocate given size. +// Useful for operators with two inputs where the largest input should equal the +// output dimension. +TfLiteStatus AllocateOutputDimensionsFromInput(TfLiteContext* context, + const TfLiteTensor* input1, + const TfLiteTensor* input2, + TfLiteTensor* output); + +} // namespace tflite + +#endif // TENSORFLOW_LITE_MICRO_MEMORY_HELPERS_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/memory_planner/greedy_memory_planner.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/memory_planner/greedy_memory_planner.cpp new file mode 100644 index 0000000..ff98fc2 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/memory_planner/greedy_memory_planner.cpp @@ -0,0 +1,448 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "edge-impulse-sdk/tensorflow/lite/micro/memory_planner/greedy_memory_planner.h" + +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_string.h" + +namespace tflite { + +namespace { + +// Returns a character representing a numbered buffer +// for GreedyMemoryPlanner::PrintMemoryPlan() +char GetOrdinalCharacter(int i) { + if (i < 10) { + return '0' + i; + } else if (i < 36) { + return 'a' + (i - 10); + } else if (i < 62) { + return 'A' + (i - 36); + } + return '*'; +} + +} // namespace + +// Simple stable in-place sort function. Not time-efficient for large arrays. +// Would normally be in an anonymous namespace to keep it private, but we want +// to be able to test it externally. +void ReverseSortInPlace(int* values, int* ids, int size) { + bool any_swapped; + do { + any_swapped = false; + for (int i = 1; i < size; ++i) { + if (values[i - 1] < values[i]) { + const int value_temp = values[i - 1]; + values[i - 1] = values[i]; + values[i] = value_temp; + const int id_temp = ids[i - 1]; + ids[i - 1] = ids[i]; + ids[i] = id_temp; + any_swapped = true; + } + } + } while (any_swapped); +} + +GreedyMemoryPlanner::GreedyMemoryPlanner() {} + +TfLiteStatus GreedyMemoryPlanner::Init(unsigned char* scratch_buffer, + int scratch_buffer_size) { + // Reset internal states + buffer_count_ = 0; + need_to_calculate_offsets_ = true; + + // Allocate the arrays we need within the scratch buffer arena. + max_buffer_count_ = scratch_buffer_size / per_buffer_size(); + + unsigned char* next_free = scratch_buffer; + requirements_ = reinterpret_cast(next_free); + next_free += sizeof(BufferRequirements) * max_buffer_count_; + + buffer_sizes_sorted_ = reinterpret_cast(next_free); + next_free += sizeof(int) * max_buffer_count_; + + buffer_ids_sorted_ = reinterpret_cast(next_free); + next_free += sizeof(int) * max_buffer_count_; + + buffers_sorted_by_offset_ = reinterpret_cast(next_free); + next_free += sizeof(ListEntry) * max_buffer_count_; + + buffer_offsets_ = reinterpret_cast(next_free); + return kTfLiteOk; +} + +GreedyMemoryPlanner::~GreedyMemoryPlanner() { + // We don't own the scratch buffer, so don't deallocate anything. +} + +TfLiteStatus GreedyMemoryPlanner::AddBuffer(int size, int first_time_used, + int last_time_used) { + if (buffer_count_ >= max_buffer_count_) { + MicroPrintf("Too many buffers (max is %d)", max_buffer_count_); + return kTfLiteError; + } + BufferRequirements* current = &requirements_[buffer_count_]; + current->size = size; + current->first_time_used = first_time_used; + current->last_time_used = last_time_used; + current->offline_offset = kOnlinePlannedBuffer; + ++buffer_count_; + need_to_calculate_offsets_ = true; + return kTfLiteOk; +} + +TfLiteStatus GreedyMemoryPlanner::AddBuffer(int size, int first_time_used, + int last_time_used, + int offline_offset) { + BufferRequirements* current = &requirements_[buffer_count_]; + if (AddBuffer(size, first_time_used, last_time_used) != kTfLiteOk) { + return kTfLiteError; + } + current->offline_offset = offline_offset; + return kTfLiteOk; +} + +bool GreedyMemoryPlanner::DoesEntryOverlapInTime( + const GreedyMemoryPlanner::ListEntry* entry, const int first_time_used, + const int last_time_used) const { + const BufferRequirements* entry_requirements = + &requirements_[entry->requirements_index]; + if (entry_requirements->first_time_used > last_time_used) { + return false; + } + if (first_time_used > entry_requirements->last_time_used) { + return false; + } + return true; +} + +GreedyMemoryPlanner::ListEntry* +GreedyMemoryPlanner::NextSimultaneouslyActiveBuffer( + const GreedyMemoryPlanner::ListEntry* start, const int first_time_used, + const int last_time_used) { + ListEntry* result = nullptr; + ListEntry* candidate_next_entry; + if (start == nullptr) { + candidate_next_entry = &buffers_sorted_by_offset_[first_entry_index_]; + } else { + if (start->next_entry_index == -1) { + return nullptr; + } + candidate_next_entry = &buffers_sorted_by_offset_[start->next_entry_index]; + } + do { + if (DoesEntryOverlapInTime(candidate_next_entry, first_time_used, + last_time_used)) { + result = candidate_next_entry; + break; + } + if (candidate_next_entry->next_entry_index == -1) { + break; + } + candidate_next_entry = + &buffers_sorted_by_offset_[candidate_next_entry->next_entry_index]; + } while (true); + return result; +} + +void GreedyMemoryPlanner::CalculateOffsetsIfNeeded() { + if (!need_to_calculate_offsets_ || (buffer_count_ == 0)) { + return; + } + need_to_calculate_offsets_ = false; + + // Start off by ordering the buffers in descending order of size. + // This helps find a more compact layout. Intuitively, you can think + // about putting the large buffers in place first, and then the + // smaller buffers can fit in the gaps, rather than fragmenting the + // gaps with small buffers at the beginning. Add offline planned offsets + // first in the list, since they have a predetermined offset. + int idx_from_tail = buffer_count_; + int idx_from_head = 0; + for (int i = 0; i < buffer_count_; ++i) { + if (requirements_[i].offline_offset == kOnlinePlannedBuffer) { + idx_from_tail--; + buffer_sizes_sorted_[idx_from_tail] = requirements_[i].size; + buffer_ids_sorted_[idx_from_tail] = i; + buffer_offsets_[i] = -1; + } else { + buffer_sizes_sorted_[idx_from_head] = requirements_[i].size; + buffer_ids_sorted_[idx_from_head] = i; + buffer_offsets_[i] = requirements_[i].offline_offset; + idx_from_head++; + } + } + + // This sorting algorithm is naive, and may end up taking a very long time + // with hundreds of buffers. Do not sort the offline planned offsets. + ReverseSortInPlace(&buffer_sizes_sorted_[idx_from_head], + &buffer_ids_sorted_[idx_from_head], + buffer_count_ - idx_from_head); + + // Initialize the first entry to the first buffer in + // buffer_ids_sorted_. + // - If there are no offline planned offsets, the largest buffer will be + // first, and the buffers will be handled in size order. + // - If offline offsets are present, these will be handled first in order + // for the greedy algorithm to utilized gaps in the offline plan. + first_entry_index_ = 0; + next_free_entry_ = 1; + ListEntry* first_entry = &buffers_sorted_by_offset_[first_entry_index_]; + first_entry->next_entry_index = -1; // to mark the entry as end of list + int buffer_id = buffer_ids_sorted_[0]; + first_entry->requirements_index = buffer_id; + if (requirements_[buffer_id].offline_offset == kOnlinePlannedBuffer) { + buffer_offsets_[buffer_id] = 0; + } + first_entry->offset = buffer_offsets_[buffer_id]; + + // Work through the rest of the buffers to find a good gap to place each one. + for (int i = 1; i < buffer_count_; ++i) { + // The id is the order the buffer was originally added by the client. + buffer_id = buffer_ids_sorted_[i]; + // Look at what size and time range the buffer needs to be active. + BufferRequirements* wanted_requirements = &requirements_[buffer_id]; + const int wanted_size = wanted_requirements->size; + const int wanted_first_time_used = wanted_requirements->first_time_used; + const int wanted_last_time_used = wanted_requirements->last_time_used; + + // Find the first buffer that's active in our time range. All placed + // buffers are stored in the order of their starting position in the arena + // so that it's easy to find the next buffer in memory, and so the gap. + // The candidate_entry variable holds the buffer that we're considering + // placing the current buffer after. + + int candidate_offset = 0; + // Loop through the offset-ordered list of buffers, looking for gaps. + if (wanted_requirements->offline_offset == kOnlinePlannedBuffer) { + ListEntry* prior_entry = nullptr; + while (true) { + // Find out what the next active buffer is. + ListEntry* next_entry = NextSimultaneouslyActiveBuffer( + prior_entry, wanted_first_time_used, wanted_last_time_used); + + if (prior_entry) { + BufferRequirements* candidate_requirements = + &requirements_[prior_entry->requirements_index]; + const int prior_entry_offset = + prior_entry->offset + candidate_requirements->size; + if (prior_entry_offset > candidate_offset) { + candidate_offset = prior_entry_offset; + } + } + if (next_entry == nullptr) { + // We're at the end of the list, so we can always append the buffer + // here. + break; + } + // Find out how much space there is between us and the next buffer. + const int gap = next_entry->offset - candidate_offset; + if (gap >= wanted_size) { + // This entry has a big enough gap between it and the next, so + // use it! + break; + } + // The gap wasn't big enough, so move on to another candidate. + prior_entry = next_entry; + } + } else { + // Offline planned offset are to be considered constant + candidate_offset = wanted_requirements->offline_offset; + } + // At this point, we've either found a gap (possibly at the end of the + // list) and want to place the buffer there, or there are no other active + // buffers in this time range and so we can put it at offset zero. + // Record the buffer's offset in our plan. + buffer_offsets_[buffer_id] = candidate_offset; + // Add the newly-placed buffer to our offset-ordered list, so that + // subsequent passes can fit in their buffers around it. + ListEntry* new_entry = &buffers_sorted_by_offset_[next_free_entry_]; + new_entry->offset = candidate_offset; + new_entry->requirements_index = buffer_id; + const int new_entry_index = next_free_entry_; + ++next_free_entry_; + + if (first_entry->offset > candidate_offset) { + // The new entry offset is smaller than the first entry offset => + // replace the first entry + first_entry = new_entry; + first_entry->next_entry_index = first_entry_index_; + first_entry_index_ = new_entry_index; + } else { + ListEntry* current_entry = first_entry; + // Make sure that we insert the buffer at the correct place in the + // buffer-offset-ordered list + while (true) { + const int next_entry_index = current_entry->next_entry_index; + if (next_entry_index == -1) { + // We're at the end of the list, so just add the new entry here. + current_entry->next_entry_index = new_entry_index; + new_entry->next_entry_index = -1; + break; + } + // not at the end of the list -> take a look at next entry + ListEntry* next_entry = &buffers_sorted_by_offset_[next_entry_index]; + if (next_entry->offset > candidate_offset) { + // We're at the right spot to do an insertion and retain the sorting + // order, so place the new entry here. + new_entry->next_entry_index = current_entry->next_entry_index; + current_entry->next_entry_index = new_entry_index; + break; + } + current_entry = next_entry; + } + } + } +} + +size_t GreedyMemoryPlanner::GetMaximumMemorySize() { + CalculateOffsetsIfNeeded(); + if (buffer_count_ == 0) { + return 0; + } + ListEntry* entry = &buffers_sorted_by_offset_[first_entry_index_]; + size_t max_size = 0; + while (entry) { + BufferRequirements* requirements = + &requirements_[entry->requirements_index]; + const size_t current_size = entry->offset + requirements->size; + if (current_size > max_size) { + max_size = current_size; + } + if (entry->next_entry_index == -1) { + break; + } + entry = &buffers_sorted_by_offset_[entry->next_entry_index]; + } + return max_size; +} + +void GreedyMemoryPlanner::PrintMemoryPlan() { + CalculateOffsetsIfNeeded(); + + for (int i = 0; i < buffer_count_; ++i) { + MicroPrintf("%c (id=%d): size=%d, offset=%d, first_used=%d last_used=%d", + GetOrdinalCharacter(i), i, requirements_[i].size, + buffer_offsets_[i], requirements_[i].first_time_used, + requirements_[i].last_time_used); + } + + constexpr int kLineWidth = 80; + int max_size = kLineWidth; + int max_time = 0; + for (int i = 0; i < buffer_count_; ++i) { + BufferRequirements* requirements = &requirements_[i]; + const int offset = buffer_offsets_[i]; + const int last_time_used = requirements->last_time_used; + const int size = offset + requirements->size; + if (size > max_size) { + max_size = size; + } + if (last_time_used > max_time) { + max_time = last_time_used; + } + } + + char line[kLineWidth + 1]; + for (int t = 0; t <= max_time; ++t) { + for (int c = 0; c < kLineWidth; ++c) { + line[c] = '.'; + } + int memory_use = 0; + for (int i = 0; i < buffer_count_; ++i) { + BufferRequirements* requirements = &requirements_[i]; + if ((t < requirements->first_time_used) || + (t > requirements->last_time_used)) { + continue; + } + const int offset = buffer_offsets_[i]; + if (offset == -1) { + continue; + } + const int size = requirements->size; + memory_use += size; + const int line_start = (offset * kLineWidth) / max_size; + const int line_end = ((offset + size) * kLineWidth) / max_size; + for (int n = line_start; n < line_end; ++n) { + if (line[n] == '.') { + line[n] = GetOrdinalCharacter(i); + } else { + line[n] = '!'; + } + } + } + line[kLineWidth] = 0; + + MicroPrintf("%s%d: %s (%dk)", t < 10 ? " " : "", t, (const char*)line, + (memory_use + 1023) / 1024); + } +} + +int GreedyMemoryPlanner::GetBufferCount() { return buffer_count_; } + +TfLiteStatus GreedyMemoryPlanner::GetOffsetForBuffer(int buffer_index, + int* offset) { + CalculateOffsetsIfNeeded(); + if ((buffer_index < 0) || (buffer_index >= buffer_count_)) { + MicroPrintf("buffer index %d is outside range 0 to %d", buffer_index, + buffer_count_); + return kTfLiteError; + } + *offset = buffer_offsets_[buffer_index]; + return kTfLiteOk; +} + +bool GreedyMemoryPlanner::DoAnyBuffersOverlap() { + CalculateOffsetsIfNeeded(); + bool were_overlaps_found = false; + for (int i = 0; i < buffer_count_; ++i) { + BufferRequirements* a_requirements = &requirements_[i]; + const int a_start_offset = buffer_offsets_[i]; + const int a_first_time_used = a_requirements->first_time_used; + const int a_last_time_used = a_requirements->last_time_used; + const int a_end_offset = a_start_offset + a_requirements->size; + for (int j = 0; j < buffer_count_; ++j) { + if (i == j) { + continue; + } + BufferRequirements* b_requirements = &requirements_[j]; + const int b_start_offset = buffer_offsets_[j]; + const int b_first_time_used = b_requirements->first_time_used; + const int b_last_time_used = b_requirements->last_time_used; + const int b_end_offset = b_start_offset + b_requirements->size; + if ((a_first_time_used > b_last_time_used) || + (b_first_time_used > a_last_time_used)) { + // Buffers don't overlap in time. + continue; + } + if ((a_start_offset >= b_end_offset) || + (b_start_offset >= a_end_offset)) { + // No overlap in memory. + continue; + } + were_overlaps_found = true; + MicroPrintf("Overlap: %d (%d=>%d, %d->%d) vs %d (%d=>%d, %d->%d)", i, + a_first_time_used, a_last_time_used, a_start_offset, + a_end_offset, j, b_first_time_used, b_last_time_used, + b_start_offset, b_end_offset); + } + } + return were_overlaps_found; +} + +} // namespace tflite diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/memory_planner/greedy_memory_planner.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/memory_planner/greedy_memory_planner.h new file mode 100644 index 0000000..d77a595 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/memory_planner/greedy_memory_planner.h @@ -0,0 +1,165 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_LITE_MICRO_MEMORY_PLANNER_GREEDY_MEMORY_PLANNER_H_ +#define TENSORFLOW_LITE_MICRO_MEMORY_PLANNER_GREEDY_MEMORY_PLANNER_H_ + +#include "edge-impulse-sdk/tensorflow/lite/micro/compatibility.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/memory_planner/micro_memory_planner.h" + +namespace tflite { + +constexpr int kOnlinePlannedBuffer = -1; + +// A memory planner that uses a greedy algorithm to arrange buffers in memory +// to minimize the overall arena size needed. +// +// The algorithm works like this: +// - The client enters the buffer information through AddBuffer(). +// - When a function like GetOffsetForBuffer() is called, the +// CalculateOffsetsIfNeeded() method is invoked. +// - If an up to date plan is not already present, one will be calculated. +// - The buffers are sorted in descending order of size. +// - The largest buffer is placed at offset zero. +// - The rest of the buffers are looped through in descending size order. +// - The other buffers that need to be in memory at the same time are found. +// - The first gap between simultaneously active buffers that the current +// buffer fits into will be used. +// - If no large-enough gap is found, the current buffer is placed after the +// last buffer that's simultaneously active. +// - This continues until all buffers are placed, and the offsets stored. +// +// This is not guaranteed to produce the best placement, since that's an +// NP-Complete problem, but in practice it should produce one that's decent. +class GreedyMemoryPlanner : public MicroMemoryPlanner { + public: + GreedyMemoryPlanner(); + ~GreedyMemoryPlanner() override; + + // You need to pass in an area of memory to be used for planning. The client + // should ensure the validity of the memory when it needs to use this object. + // This memory isn't owned by this object, so management should be handled by + // the client. This is so it can be stack or globally allocated if necessary + // on devices without dynamic memory allocation. How many buffers can be + // planned for will depend on the size of this scratch memory, so you should + // enlarge it if you see an error when calling AddBuffer(). The memory can be + // reused once you're done with the planner, as long as you copy the + // calculated offsets to another location. Each buffer requires about 36 bytes + // of scratch. + TfLiteStatus Init(unsigned char* scratch_buffer, + int scratch_buffer_size) override; + + // Record details of a buffer we want to place. + TfLiteStatus AddBuffer(int size, int first_time_used, + int last_time_used) override; + + // Record details of an offline planned buffer offset we want to place. + // offline_offset is the buffer offset from the start of the arena. + TfLiteStatus AddBuffer(int size, int first_time_used, int last_time_used, + int offline_offset) override; + + // Returns the high-water mark of used memory. This is the minimum size of a + // memory arena you'd need to allocate to hold these buffers. + size_t GetMaximumMemorySize() override; + + // How many buffers have been recorded. + int GetBufferCount() override; + + // Where a given buffer should be placed in the memory arena. + // This information is stored in the memory arena itself, so once the arena + // is used for inference, it will be overwritten. + TfLiteStatus GetOffsetForBuffer(int buffer_index, int* offset) override; + + // Prints an ascii-art diagram of the buffer layout plan. + void PrintMemoryPlan() override; + + // Debug method to check whether any buffer allocations are overlapping. This + // is an O(N^2) complexity operation, so only use for testing. + bool DoAnyBuffersOverlap(); + + // Used to store a list of buffers ordered by their offset. + struct ListEntry { + int offset; + int requirements_index; + int next_entry_index; + }; + + // Number of bytes required in order to plan a buffer. + static size_t per_buffer_size() { + const int per_buffer_size = + sizeof(BufferRequirements) + // requirements_ + sizeof(int) + // buffer_sizes_sorted_ + sizeof(int) + // buffer_ids_sorted_ + sizeof(ListEntry) + // buffers_sorted_by_offset_ + sizeof(int); // buffer_offsets_; + return per_buffer_size; + } + + private: + // Whether a buffer is active in a given time range. + bool DoesEntryOverlapInTime(const ListEntry* entry, const int first_time_used, + const int last_time_used) const; + + // Walks the list to return the next buffer that is active in a given time + // range, or a null pointer if there are none. + ListEntry* NextSimultaneouslyActiveBuffer(const ListEntry* start, + const int first_time_used, + const int last_time_used); + + // If there isn't an up to date plan, calculate a new one. + void CalculateOffsetsIfNeeded(); + + // How many buffers we can plan for, based on the arena size we're given in + // the constructor. + int max_buffer_count_; + + // The number of buffers added so far. + int buffer_count_; + + // Records the client-provided information about each buffer. + struct BufferRequirements { + int size; + int offline_offset; + int first_time_used; + int last_time_used; + }; + + // Working arrays used during the layout algorithm. + BufferRequirements* requirements_; + // buffer_sizes_sorted_ and buffer_ids_sorted_ are sorted according to: + // { + // offline planned buffers, + // online planned buffers sorted by size + // } + int* buffer_sizes_sorted_; + int* buffer_ids_sorted_; + ListEntry* buffers_sorted_by_offset_; + int next_free_entry_; // Index of the next free entry of + // buffers_sorted_by_offset_ + int first_entry_index_; // Index of the first entry (smallest offset) of + // buffers_sorted_by_offset_ + + // Stores the outcome of the plan, the location of each buffer in the arena. + int* buffer_offsets_; + + // Whether buffers have been added since the last plan was calculated. + bool need_to_calculate_offsets_; + + TF_LITE_REMOVE_VIRTUAL_DELETE +}; + +} // namespace tflite + +#endif // TENSORFLOW_LITE_MICRO_MEMORY_PLANNER_GREEDY_MEMORY_PLANNER_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/memory_planner/linear_memory_planner.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/memory_planner/linear_memory_planner.cpp new file mode 100644 index 0000000..6e21eb6 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/memory_planner/linear_memory_planner.cpp @@ -0,0 +1,56 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "edge-impulse-sdk/tensorflow/lite/micro/memory_planner/linear_memory_planner.h" + +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h" + +namespace tflite { + +// Patched by Edge Impulse +constexpr int LinearMemoryPlanner::kMaxBufferCount; + +LinearMemoryPlanner::LinearMemoryPlanner() + : current_buffer_count_(0), next_free_offset_(0) {} +LinearMemoryPlanner::~LinearMemoryPlanner() {} + +TfLiteStatus LinearMemoryPlanner::AddBuffer(int size, int first_time_used, + int last_time_used) { + if (current_buffer_count_ >= kMaxBufferCount) { + MicroPrintf("Too many buffers (max is %d)", kMaxBufferCount); + return kTfLiteError; + } + buffer_offsets_[current_buffer_count_] = next_free_offset_; + next_free_offset_ += size; + ++current_buffer_count_; + return kTfLiteOk; +} + +size_t LinearMemoryPlanner::GetMaximumMemorySize() { return next_free_offset_; } + +int LinearMemoryPlanner::GetBufferCount() { return current_buffer_count_; } + +TfLiteStatus LinearMemoryPlanner::GetOffsetForBuffer(int buffer_index, + int* offset) { + if ((buffer_index < 0) || (buffer_index >= current_buffer_count_)) { + MicroPrintf("buffer index %d is outside range 0 to %d", buffer_index, + current_buffer_count_); + return kTfLiteError; + } + *offset = buffer_offsets_[buffer_index]; + return kTfLiteOk; +} + +} // namespace tflite diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/memory_planner/linear_memory_planner.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/memory_planner/linear_memory_planner.h new file mode 100644 index 0000000..f699f8b --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/memory_planner/linear_memory_planner.h @@ -0,0 +1,49 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_LITE_MICRO_MEMORY_PLANNER_LINEAR_MEMORY_PLANNER_H_ +#define TENSORFLOW_LITE_MICRO_MEMORY_PLANNER_LINEAR_MEMORY_PLANNER_H_ + +#include "edge-impulse-sdk/tensorflow/lite/micro/compatibility.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/memory_planner/micro_memory_planner.h" + +namespace tflite { + +// The simplest possible memory planner that just lays out all buffers at +// increasing offsets without trying to reuse memory. +class LinearMemoryPlanner : public MicroMemoryPlanner { + public: + LinearMemoryPlanner(); + ~LinearMemoryPlanner() override; + + TfLiteStatus AddBuffer(int size, int first_time_used, + int last_time_used) override; + + size_t GetMaximumMemorySize() override; + int GetBufferCount() override; + TfLiteStatus GetOffsetForBuffer(int buffer_index, int* offset) override; + + private: + static constexpr int kMaxBufferCount = 1024; + size_t buffer_offsets_[kMaxBufferCount]; + int current_buffer_count_; + size_t next_free_offset_; + + TF_LITE_REMOVE_VIRTUAL_DELETE +}; + +} // namespace tflite + +#endif // TENSORFLOW_LITE_MICRO_MEMORY_PLANNER_LINEAR_MEMORY_PLANNER_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/memory_planner/memory_plan_struct.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/memory_planner/memory_plan_struct.h new file mode 100644 index 0000000..5f3b7ef --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/memory_planner/memory_plan_struct.h @@ -0,0 +1,73 @@ +/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_LITE_MICRO_MEMORY_PLANNER_MEMORY_PLAN_STRUCT_H_ +#define TENSORFLOW_LITE_MICRO_MEMORY_PLANNER_MEMORY_PLAN_STRUCT_H_ + +#include +#include + +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_utils.h" + +namespace tflite { + +// This is an experimental feature and subjected to change. +// More description is available at +// tensorflow/lite/micro/docs/offline_memory_plan.md. + +// Describes a buffer's layout inside an arena. This struct should be kept as +// small as possible for memory footprint sensitive applications and should use +// only primitive fields, making it easy to adjust offline. +struct BufferDescriptor { + // Starting offset inside an arena for this buffer. + // Offset is the minimum information needed for the buffer. The user knows + // the model and the size of each buffer in order to lay out a valid buffer + // plan. + int32_t offset; +}; + +// A structure describing the lay out of buffers inside an arena. +struct BufferPlan { + // Number of buffers described in this plan. + int32_t buffer_count; + + // Each element describes one buffer. + // Buffer index is implicit by the order of AddBuffer() call. + // Specifically, indices of activation tensors are 0 … N-1 where N is the + // number of activation tensors. + // The rest are based on the order of OP requests. + // + // This is a flexible array member and should ideally be + // arena_entries[]; However, in order to support a variety + // of compilers (and without needing to add ifdef's), we + // are implementing the flexible array member with an array of + // length 1 as the last member of the struct. When the size of a BufferPlan + // is needed, use the provided SizeOfBufferPlan(buffer_count) that + // accounts for this implemenatation caveat. + BufferDescriptor buffer_plan_entries[1]; +}; + +// Returns size of a BufferPlan given a buffer count. This size is compile time +// known if buffer_count is a compile time constant. +constexpr size_t SizeOfBufferPlan(int32_t buffer_count) { + // Minus 1 because a BufferPlan struct have a BufferDescriptor already. + // Max to provide a lower bound for the corner case of buffer_count = 0. + return sizeof(BufferPlan) + + sizeof(BufferDescriptor) * Max(buffer_count - 1, 0); +} + +} // namespace tflite + +#endif // TENSORFLOW_LITE_MICRO_MEMORY_PLANNER_MEMORY_PLAN_STRUCT_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/memory_planner/micro_memory_planner.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/memory_planner/micro_memory_planner.h new file mode 100644 index 0000000..0d0d74f --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/memory_planner/micro_memory_planner.h @@ -0,0 +1,91 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_LITE_MICRO_MICRO_MEMORY_PLANNER_MEMORY_PLANNER_H_ +#define TENSORFLOW_LITE_MICRO_MICRO_MEMORY_PLANNER_MEMORY_PLANNER_H_ + +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" + +namespace tflite { + +// Interface class for planning the layout of memory buffers during the +// execution of a graph. +// It's designed to be used by a client that iterates in any order through the +// buffers it wants to lay out, and then calls the getter functions for +// information about the calculated layout. For example: +// +// SomeMemoryPlanner planner; +// planner.AddBuffer(100, 0, 1); // Buffer 0 +// planner.AddBuffer(50, 2, 3); // Buffer 1 +// planner.AddBuffer(50, 2, 3); // Buffer 2 +// +// int offset0; +// TF_EXPECT_OK(planner.GetOffsetForBuffer(0, &offset0)); +// int offset1; +// TF_EXPECT_OK(planner.GetOffsetForBuffer(1, &offset1)); +// int offset2; +// TF_EXPECT_OK(planner.GetOffsetForBuffer(2, &offset2)); +// const int arena_size_needed = planner.GetMaximumMemorySize(); +// +// The goal is for applications to be able to experiment with different layout +// strategies without changing their client code, by swapping out classes that +// implement this interface.= +class MicroMemoryPlanner { + public: + MicroMemoryPlanner() {} + virtual ~MicroMemoryPlanner() {} + + // Pass information about a buffer's size and lifetime to the layout + // algorithm. The order this is called implicitly assigns an index to the + // result, so the buffer information that's passed into the N-th call of + // this method will be used as the buffer_index argument to + // GetOffsetForBuffer(). + virtual TfLiteStatus AddBuffer(int size, int first_time_used, + int last_time_used) = 0; + + // Record details of an offline planned buffer offset we want to place. + // offline_offset is the buffer offset from the start of the arena. + // This is to support offline memory planning from the flatbuffer metadata. + // By default, it returns an error. + virtual TfLiteStatus AddBuffer(int size, int first_time_used, + int last_time_used, int offline_offset) { + return kTfLiteError; + } + + // The largest contiguous block of memory that's needed to hold the layout. + virtual size_t GetMaximumMemorySize() = 0; + // How many buffers have been added to the planner. + virtual int GetBufferCount() = 0; + // Calculated layout offset for the N-th buffer added to the planner. + virtual TfLiteStatus GetOffsetForBuffer(int buffer_index, int* offset) = 0; + + // Provides the scratch buffer in case that the memory planner needs it. + // The lifetime of scratch buffers lifetime lasts until the static memory plan + // is committed. + // The default implementation is for the memory planner that does not need + // scratch buffer and simply returns ok. + virtual TfLiteStatus Init(unsigned char* scratch_buffer, + int scratch_buffer_size) { + return kTfLiteOk; + } + + virtual void PrintMemoryPlan() { + // Default does nothing. + } +}; + +} // namespace tflite + +#endif // TENSORFLOW_LITE_MICRO_MICRO_MEMORY_PLANNER_MEMORY_PLANNER_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/memory_planner/non_persistent_buffer_planner_shim.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/memory_planner/non_persistent_buffer_planner_shim.cpp new file mode 100644 index 0000000..0c1fd6d --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/memory_planner/non_persistent_buffer_planner_shim.cpp @@ -0,0 +1,66 @@ +/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "edge-impulse-sdk/tensorflow/lite/micro/memory_planner/non_persistent_buffer_planner_shim.h" + +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h" + +namespace tflite { + +NonPersistentMemoryPlannerShim::NonPersistentMemoryPlannerShim( + const BufferPlan* buffer_plan) + : buffer_plan_(buffer_plan), buffer_request_count_(0) {} + +NonPersistentMemoryPlannerShim::~NonPersistentMemoryPlannerShim() {} + +TfLiteStatus NonPersistentMemoryPlannerShim::AddBuffer(int size, + int first_time_used, + int last_time_used) { + buffer_request_count_++; + if (buffer_request_count_ > buffer_plan_->buffer_count) { + MicroPrintf( + "Attempting to add buffer %d, but only %d buffers in given buffer " + "plan.", + buffer_request_count_, buffer_plan_->buffer_count); + return kTfLiteError; + } + return kTfLiteOk; +} + +size_t NonPersistentMemoryPlannerShim::GetMaximumMemorySize() { + // Simply return 0 to let the framework accept this memory plan + // because the client ensure validity of the memory plan. + return 0; +} + +// How many buffers are in the given memory plan. +int NonPersistentMemoryPlannerShim::GetBufferCount() { + return buffer_plan_->buffer_count; +} + +TfLiteStatus NonPersistentMemoryPlannerShim::GetOffsetForBuffer( + int buffer_request_index, int* offset) { + if (buffer_request_index >= buffer_plan_->buffer_count) { + MicroPrintf( + "Attempting to get offset for buffer %d, but only %d buffers in given " + "buffer plan.", + buffer_request_index, buffer_plan_->buffer_count); + return kTfLiteError; + } + *offset = buffer_plan_->buffer_plan_entries[buffer_request_index].offset; + return kTfLiteOk; +} + +} // namespace tflite diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/memory_planner/non_persistent_buffer_planner_shim.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/memory_planner/non_persistent_buffer_planner_shim.h new file mode 100644 index 0000000..291c678 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/memory_planner/non_persistent_buffer_planner_shim.h @@ -0,0 +1,129 @@ +/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_LITE_MICRO_MEMORY_PLANNER_NON_PERSISTENT_MEMORY_PLANNER_SHIM_H__ +#define TENSORFLOW_LITE_MICRO_MEMORY_PLANNER_NON_PERSISTENT_MEMORY_PLANNER_SHIM_H__ + +#include "edge-impulse-sdk/tensorflow/lite/micro/compatibility.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/memory_planner/memory_plan_struct.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/memory_planner/micro_memory_planner.h" + +namespace tflite { + +/* This is an experimental feature and subjected to change. + * +The NonPersistentMemoryPlannerShim enables TFLM to work with an external tooling +that can plan the offset of each non persistent buffer for the Model within the +TFLM arena. + +If the NonPersistentMemoryPlannerShim is used, then the final binary does not +have any of the symbols associated with the GreedyMemoryPlanner which results in +a reduced memory footprint. + +Additionally, the offline planning of the non-persistent buffers can be used to +have a more efficient utilization compared to the GreedyMemoryPlanner. + +For example, consider the following hypothetical model: + +A1(400) A2(401) +──┬─────────┠┌─────────── + │ │ │ + │ │ │ + │ â–¼ â–¼ + │ ┌────────┠+ │ │ OP1 │ + │ └───┬────┘ A4(201) + │ A3(10) │ │ + │ │ │ + │ │ │ + │ ┌───┴────┠│ + │ │ OP2 │◄────────┤ + │ └───┬────┘ │ + │ A5(11) │ A6(202) │ + │ │ │ │ + │ â–¼ │ │ + │ ┌────────┠│ │ + │ │ OP3 │◄─┘ │ + │ └───┬────┘ │ + │ │ A8(200) │ + │ A7(12) │ │ │ + │ │ │ │ + │ ┌───┴────â”◄──┘ │ + └──────►│ OP4 │ │ + └───┬────┘◄────────┘ + │ + A9(13) │ + â–¼ + +The GreedyMemoryPlanner will give the following memory layout that requires 1012 +bytes of scratch arena size: + +┌─────────────────────────────────────────┬──────────────────────────┬────────┬───────┠+│ A2(401) │ A1(400) │ A4(201)│ +A3(10)│ +└─────────────────────────────────────────┴──────────────────────────┴────────┴───────┘ + +┌───────────┬──────┬──────┠+│ A6(202) │A5(11)│A7(12)│ +└───────────┴──────┴──────┘ + +┌──────────┬───────┠+│ A8(200) │A9(13) │ +└──────────┴───────┘ + +But a more efficient offline memory plan that requires only 826 bytes of scratch +arena size can be + +┌──────────────────────────────────────┬─────────────────────────────┬───────┬──────┠+│ A1(400) │ A2(401) │ +A3(10)│A5(11)│ +└──────────────────────────────────────┴─────────────────────────────┴───────┴──────┘ + + ┌────────────────┬────────────┬────────┬───────┠+ │A4(201) │ A8(200) │A9(13) +│A7(12) │ └────────────────┴────────────┴────────┴───────┘ + + ┌─────────────┠+ │ A6(202) │ + └─────────────┘ + +*/ +class NonPersistentMemoryPlannerShim : public MicroMemoryPlanner { + public: + // Does not take ownership of buffer_plan, which must refer to a valid + // BufferPlan that outlives this object. + explicit NonPersistentMemoryPlannerShim(const BufferPlan* buffer_plan); + ~NonPersistentMemoryPlannerShim() override; + + TfLiteStatus GetOffsetForBuffer(int buffer_request_index, + int* offset) override; + + TfLiteStatus AddBuffer(int size, int first_time_used, + int last_time_used) override; + size_t GetMaximumMemorySize() override; + int GetBufferCount() override; + + private: + const BufferPlan* buffer_plan_; // not owned, can't be null + + // The number of buffers requested so far. Used for error checking. + int buffer_request_count_; + + TF_LITE_REMOVE_VIRTUAL_DELETE +}; + +} // namespace tflite + +#endif // TENSORFLOW_LITE_MICRO_MEMORY_PLANNER_NON_PERSISTENT_MEMORY_PLANNER_SHIM_H__ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/micro_allocation_info.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/micro_allocation_info.cpp new file mode 100644 index 0000000..296a502 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/micro_allocation_info.cpp @@ -0,0 +1,375 @@ +/* Copyright 2023 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_allocation_info.h" + +#include + +#include "edge-impulse-sdk/tensorflow/lite/c/c_api_types.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/compatibility.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/memory_helpers.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/memory_planner/greedy_memory_planner.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h" + +namespace tflite { + +namespace { +constexpr char kOfflineMemAllocMetadata[] = "OfflineMemoryAllocation"; +constexpr int kUninitializedLifetime = -1; +} // namespace + +// Mark the given Allocation info as first created at the specified allocation +// scope count. Only the first creation must be recorded since the allocation +// scope count monotonically increases throughout the lifetime marking process. +void AllocationInfoBuilder::UpdateFirstCreated(AllocationInfo* current, + int allocation_scope_count) { + TFLITE_DCHECK(current->first_created <= allocation_scope_count); + if (current->first_created == kUninitializedLifetime) { + current->first_created = allocation_scope_count; + } +} + +// Mark the given AllocationInfo as last used at the specified allocation scope +// count. Update the last used marker every time, since the allocation scope +// count monotonically increases through the lifetime marking process. +void AllocationInfoBuilder::UpdateLastUsed(AllocationInfo* current, + int allocation_scope_count) { + TFLITE_DCHECK(current->last_used <= allocation_scope_count); + current->last_used = allocation_scope_count; +} + +TfLiteStatus AllocationInfoBuilder::MarkSubgraphLifetimesIfNecessary( + const Operator* op, internal::ScratchBufferRequest* scratch_buffer_requests, + ScratchBufferHandle* scratch_buffer_handles, + SubgraphAllocations* allocations) { + int first_subgraph_index = -1; + int second_subgraph_index = -1; + const OperatorCode* opcode = + model_->operator_codes()->Get(op->opcode_index()); + switch (opcode->builtin_code()) { + case BuiltinOperator_IF: { + first_subgraph_index = + op->builtin_options_as_IfOptions()->then_subgraph_index(); + second_subgraph_index = + op->builtin_options_as_IfOptions()->else_subgraph_index(); + break; + } + case BuiltinOperator_CALL_ONCE: { + first_subgraph_index = + op->builtin_options_as_CallOnceOptions()->init_subgraph_index(); + break; + } + case BuiltinOperator_WHILE: { + first_subgraph_index = + op->builtin_options_as_WhileOptions()->cond_subgraph_index(); + second_subgraph_index = + op->builtin_options_as_WhileOptions()->body_subgraph_index(); + break; + } + default: { + break; + } + } + if (first_subgraph_index != -1) { + // Enter a new allocation scope for each subgraph. + allocation_scope_count_++; + TF_LITE_ENSURE_STATUS( + MarkAllocationLifetimes(first_subgraph_index, scratch_buffer_requests, + scratch_buffer_handles, allocations)); + } + if (second_subgraph_index != -1) { + // Enter a new allocation scope for each subgraph. + allocation_scope_count_++; + TF_LITE_ENSURE_STATUS( + MarkAllocationLifetimes(second_subgraph_index, scratch_buffer_requests, + scratch_buffer_handles, allocations)); + } + return kTfLiteOk; +} + +TfLiteStatus AllocationInfoBuilder::CreateAllocationInfo( + int scratch_buffer_request_count) { + size_t subgraph_offsets_length = model_->subgraphs()->size() * sizeof(size_t); + info_.subgraph_offsets = + reinterpret_cast(non_persistent_allocator_->AllocateTemp( + subgraph_offsets_length, alignof(size_t))); + if (info_.subgraph_offsets == nullptr) { + MicroPrintf( + "Failed to allocate memory for memory planning, %d bytes required", + subgraph_offsets_length); + return kTfLiteError; + } + size_t tensor_count = 0; + for (size_t subgraph_idx = 0; subgraph_idx < model_->subgraphs()->size(); + subgraph_idx++) { + // Add all tensors in each subgraph to the AllocationInfo array. Even weight + // tensors are added but marked with needs_allocating = false. Including all + // tensors in the graph here simplifies logic. + info_.subgraph_offsets[subgraph_idx] = tensor_count; + tensor_count += model_->subgraphs()->Get(subgraph_idx)->tensors()->size(); + } + info_.tensor_count = tensor_count; + + // Scratch buffer allocations follow tensor allocations, so the scratch offset + // is equal to the number of tensor allocations. + info_.scratch_offset = tensor_count; + info_.allocation_info_count = tensor_count + scratch_buffer_request_count; + info_.scratch_buffer_count = scratch_buffer_request_count; + size_t bytes = sizeof(AllocationInfo) * info_.allocation_info_count; + + // Allocate an array of AllocationInfo structs from the temp section. This + // struct will be used by AllocationInfoBuilder to find buffer usage. + info_.allocation_info = reinterpret_cast( + non_persistent_allocator_->AllocateTemp(bytes, alignof(AllocationInfo))); + if (info_.allocation_info == nullptr) { + MicroPrintf( + "Failed to allocate memory for memory planning, %d bytes required", + bytes); + return kTfLiteError; + } + return kTfLiteOk; +} + +TfLiteStatus AllocationInfoBuilder::FreeAllocationInfo() { + non_persistent_allocator_->DeallocateTemp( + reinterpret_cast(info_.allocation_info)); + non_persistent_allocator_->DeallocateTemp( + reinterpret_cast(info_.subgraph_offsets)); + return kTfLiteOk; +} + +TfLiteStatus AllocationInfoBuilder::ValidateSubgraph( + const SubGraph* subgraph, TfLiteEvalTensor* eval_tensors) { + uint32_t operators_size = NumSubgraphOperators(subgraph); + + for (uint32_t i = 0; i < operators_size; i++) { + const auto op = subgraph->operators()->Get(i); + for (size_t n = 0; + op->intermediates() != nullptr && n < op->intermediates()->size(); + n++) { + const int tensor_index = op->intermediates()->Get(n); + size_t tensor_size = -1; + TF_LITE_ENSURE_STATUS(TfLiteEvalTensorByteLength( + &eval_tensors[tensor_index], &tensor_size)); + if (tensor_size != 0) { + MicroPrintf( + "Does not support intermediate tensor with non-zero size: %d", + tensor_size); + return kTfLiteError; + } + } + } + return kTfLiteOk; +} + +TfLiteStatus AllocationInfoBuilder::InitializeAllocationInfo( + const int32_t* offline_offsets, SubgraphAllocations* allocations) { + AllocationInfo* allocation_info = info_.allocation_info; + // Initialize allocation info for every tensor in every subgraph. + for (size_t subgraph_idx = 0; subgraph_idx < model_->subgraphs()->size(); + subgraph_idx++) { + const SubGraph* subgraph = model_->subgraphs()->Get(subgraph_idx); + TfLiteEvalTensor* eval_tensors = allocations[subgraph_idx].tensors; + AllocationInfo* subgraph_allocation_info = + &allocation_info[info_.subgraph_offsets[subgraph_idx]]; + + // Ensure constraints are met. + TF_LITE_ENSURE_STATUS(ValidateSubgraph(subgraph, eval_tensors)); + + for (size_t i = 0; i < subgraph->tensors()->size(); ++i) { + AllocationInfo* current = &subgraph_allocation_info[i]; + current->output_ptr = &(eval_tensors[i].data.data); + + TF_LITE_ENSURE_STATUS( + TfLiteEvalTensorByteLength(&eval_tensors[i], ¤t->bytes)); + + current->first_created = kUninitializedLifetime; + current->last_used = kUninitializedLifetime; + current->needs_allocating = + (eval_tensors[i].data.data == nullptr) && + (!subgraph->tensors()->Get(i)->is_variable()) && + (current->bytes != 0); + if (offline_offsets) { + current->offline_offset = offline_offsets[i]; + + // Mark offline planned variable tensors so they can get an offline + // offset and be handled offline. + if (subgraph->tensors()->Get(i)->is_variable() && + current->offline_offset != kOnlinePlannedBuffer) { + current->needs_allocating = true; + } + + } else { + current->offline_offset = kOnlinePlannedBuffer; + } + } + } + // Initialize allocation info for every scratch buffer. + AllocationInfo* scratch_allocation_info = + &allocation_info[info_.scratch_offset]; + for (size_t i = 0; i < info_.scratch_buffer_count; i++) { + AllocationInfo* current = &scratch_allocation_info[i]; + current->first_created = kUninitializedLifetime; + current->last_used = kUninitializedLifetime; + current->needs_allocating = true; + current->offline_offset = kOnlinePlannedBuffer; + } + return kTfLiteOk; +} + +TfLiteStatus AllocationInfoBuilder::MarkAllocationLifetimes( + int subgraph_idx, internal::ScratchBufferRequest* scratch_buffer_requests, + ScratchBufferHandle* scratch_buffer_handles, + SubgraphAllocations* allocations) { + const SubGraph* subgraph = model_->subgraphs()->Get(subgraph_idx); + + AllocationInfo* allocation_info = info_.allocation_info; + // Each subgraph's tensor allocations are in a contiguous block starting at + // subgraph_offsets_[subgraph index] with one entry per tensor. + AllocationInfo* subgraph_allocation_info = + &allocation_info[info_.subgraph_offsets[subgraph_idx]]; + + uint32_t operators_size = NumSubgraphOperators(subgraph); + // Mark all inputs as created at the start of the subgraph invocation. + for (size_t i = 0; + subgraph->inputs() != nullptr && i < subgraph->inputs()->size(); ++i) { + const int tensor_index = subgraph->inputs()->Get(i); + AllocationInfo* current = &subgraph_allocation_info[tensor_index]; + UpdateFirstCreated(current, allocation_scope_count_); + // This will ensure that the tensors that are inputs to the subgraphs + // but not used in any ops also have a reasonable lifetime. + UpdateLastUsed(current, allocation_scope_count_); + } + + for (uint32_t i = 0; i < operators_size; i++) { + // Each operator has a new allocation scope. + allocation_scope_count_++; + const auto* op = subgraph->operators()->Get(i); + // Figure out when the first creation and use of each tensor is. + for (size_t n = 0; op->outputs() != nullptr && n < op->outputs()->size(); + ++n) { + const int tensor_index = op->outputs()->Get(n); + AllocationInfo* current = &subgraph_allocation_info[tensor_index]; + UpdateFirstCreated(current, allocation_scope_count_); + } + + // Keep track of scope count before any subgraphs, so that scratch buffers' + // lifetime within a control flow op properly overlaps with all subgraphs. + int start_allocation_scope_count = allocation_scope_count_; + + // Control flow operators can invoke subgraphs. Plan these subgraphs + // before continuing on to the rest of the graph. + MarkSubgraphLifetimesIfNecessary(op, scratch_buffer_requests, + scratch_buffer_handles, allocations); + + // Figure out when the last use of each tensor is. + for (size_t n = 0; op->inputs() != nullptr && n < op->inputs()->size(); + ++n) { + const int tensor_index = op->inputs()->Get(n); + // Optional bias tensors can have an index of -1 when they are omitted. + if (tensor_index >= 0) { + AllocationInfo* current = &subgraph_allocation_info[tensor_index]; + // No need to update creation since it is either marked by the subgraph + // or producer op, or it is not part of the memory plan (weight, bias + // tensor). + UpdateLastUsed(current, allocation_scope_count_); + } + } + for (size_t n = 0; op->outputs() != nullptr && n < op->outputs()->size(); + ++n) { + const int tensor_index = op->outputs()->Get(n); + AllocationInfo* current = &subgraph_allocation_info[tensor_index]; + UpdateLastUsed(current, allocation_scope_count_); + } + + // Mark thse lifetime of scratch buffers belonging to the current node. This + // operation is O(N * M) where N is the total number of visited nodes and M + // is the total number of scratch buffers. + // TODO(b/217794030): Optimize this memory planning code. + AllocationInfo* scratch_allocation_info = + &allocation_info[info_.scratch_offset]; + for (size_t scratch_idx = 0; scratch_idx < info_.scratch_buffer_count; + scratch_idx++) { + internal::ScratchBufferRequest request = + scratch_buffer_requests[scratch_idx]; + AllocationInfo* current = &scratch_allocation_info[scratch_idx]; + if (request.node_idx == static_cast(i) && + request.subgraph_idx == static_cast(subgraph_idx)) { + ScratchBufferHandle* current_handle = + &(scratch_buffer_handles[scratch_idx]); + current->output_ptr = reinterpret_cast(¤t_handle->data); + current->bytes = request.bytes; + UpdateFirstCreated(current, start_allocation_scope_count); + UpdateLastUsed(current, allocation_scope_count_); + } + } + } + + // Mark all outputs as persistent to the end of the subgraph invocation. + for (size_t i = 0; + subgraph->outputs() != nullptr && i < subgraph->outputs()->size(); ++i) { + const int tensor_index = subgraph->outputs()->Get(i); + AllocationInfo* current = &subgraph_allocation_info[tensor_index]; + // Make sure to assign the First created value of the subgraph output + // This will handle the case where the subgraph is empty. This helps + // ensure all tensors have valid lifetimes before those are used by the + // memory planner. + UpdateFirstCreated(current, allocation_scope_count_); + UpdateLastUsed(current, allocation_scope_count_); + } + return kTfLiteOk; +} + +// Get offline tensors allocation plan. See +// micro/docs/memory_management.md for more info. +TfLiteStatus AllocationInfoBuilder::GetOfflinePlannedOffsets( + const int32_t** offline_planner_offsets) { + if (model_->metadata()) { + for (size_t i = 0; i < model_->metadata()->size(); ++i) { + auto metadata = model_->metadata()->Get(i); + + if (metadata->name()) { + const size_t metadata_name_size = metadata->name()->size(); + + if ((strncmp(metadata->name()->c_str(), kOfflineMemAllocMetadata, + std::min(metadata_name_size, + strlen(kOfflineMemAllocMetadata))) == 0) && + metadata_name_size == strlen(kOfflineMemAllocMetadata)) { + const flatbuffers::Vector>* buffers = + model_->buffers(); + auto* buffer = (*buffers)[metadata->buffer()]; + auto* array = buffer->data(); + const uint32_t* metadata_buffer = + reinterpret_cast(array->data()); + const size_t nbr_tensors = static_cast(metadata_buffer[2]); + *offline_planner_offsets = + reinterpret_cast(&metadata_buffer[3]); + + if (info_.tensor_count != nbr_tensors) { + MicroPrintf( + "Nbr of offline buffer offsets (%d) in metadata " + "not equal nbr tensors (%d)\n", + nbr_tensors, info_.tensor_count); + return kTfLiteError; + } + } + } + } + } + return kTfLiteOk; +} + +} // namespace tflite diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/micro_allocation_info.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/micro_allocation_info.h new file mode 100644 index 0000000..a02503e --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/micro_allocation_info.h @@ -0,0 +1,139 @@ +/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_MICRO_MICRO_ALLOCATION_INFO_H_ +#define TENSORFLOW_LITE_MICRO_MICRO_ALLOCATION_INFO_H_ + +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/compatibility.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/flatbuffer_utils.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_allocator.h" +#include "edge-impulse-sdk/tensorflow/lite/schema/schema_generated.h" +#include "edge-impulse-sdk/tensorflow/lite/schema/schema_generated_full.h" + +namespace tflite { + +// Used to hold information used during allocation calculations. +struct AllocationInfo { + size_t bytes; + void** output_ptr; + int first_created; + int last_used; + int32_t offline_offset; + bool needs_allocating; +}; + +// Used to hold the allocation info list and related metadata for the entire +// graph (including subgraphs). Since all subgraphs are planned together, the +// allocation info list contains allocations for all subgraphs. Track the offset +// into this list for each subgraph then reserve space to track all allocations. +// +// The AllocationInfo list is a contiguous list of allocations across all +// subgraphs and scratch buffers. Each element here is marked as +// st. The following is a possible +// AllocationInfo list: +// [s0t0, s0t1, s1t0, s2t1, s1t2, s3t0, s3t1, scratch0, scratch1, scratch2] +// +// For this example, the subgraph offsets would be [0, 2, 5] and the scratch +// offset would be 7. +struct GraphAllocationInfo { + AllocationInfo* allocation_info; + size_t allocation_info_count; + size_t* subgraph_offsets; + size_t scratch_offset; + size_t tensor_count; + size_t scratch_buffer_count; +}; + +// A helper class to construct AllocationInfo array. This array contains the +// lifetime of tensors / scratch_buffer and will be used to calculate the memory +// plan. Methods need to be called in order from `Create`, Init`, `Add*`, to +// `Finish`. +class AllocationInfoBuilder { + public: + AllocationInfoBuilder(const Model* model, + INonPersistentBufferAllocator* non_persistent_allocator) + : model_(model), non_persistent_allocator_(non_persistent_allocator) {} + + // Check if model contains offline planned buffer offsets. + // - If there's no metadata available, offline_planner_offsets is not set + // - If there's metadata available, offline_planner_offsets will point to the + // first offset in the metadata buffer list. + TfLiteStatus GetOfflinePlannedOffsets( + const int32_t** offline_planner_offsets); + + // Allocate memory for the allocation info array as well as offsets into that + // array for each subgraph. + TfLiteStatus CreateAllocationInfo(int scratch_buffer_request_count); + + // Release memory used for the allocation info array. + TfLiteStatus FreeAllocationInfo(); + + // Initialize AllocationInfo for all tensors and scratch buffers in the graph. + TfLiteStatus InitializeAllocationInfo(const int32_t* offline_offsets, + SubgraphAllocations* allocations); + + // Mark the scope of each tensor and scratch buffer across the graph. Enter + // all possible subgraphs invoked by each control flow operator. This method + // marks the maximum lifetime of each buffer so that tensors are correctly + // planned for all valid invocation flows. + TfLiteStatus MarkAllocationLifetimes( + int subgraph_idx, internal::ScratchBufferRequest* scratch_buffer_request, + ScratchBufferHandle* scratch_buffer_handles, + SubgraphAllocations* allocations); + + // Identify control flow operators and recursively mark all subgraphs which + // that operator can invoke. The lifetime of all tensors within a subgraph + // can only be extended. The order of subgraph invocation does not matter + // since subgraphs within the same control flow operator are executed + // within their own allocation scope (planned buffers in a subgraph cannot + // persist beyond the end of that subgraph's invocation). + TfLiteStatus MarkSubgraphLifetimesIfNecessary( + const Operator* op, + internal::ScratchBufferRequest* scratch_buffer_requests, + ScratchBufferHandle* scratch_buffer_handles, + SubgraphAllocations* allocations); + + // Returns the number of allocations. + int AllocationCount() const { return info_.allocation_info_count; } + + // Returns a pointer to the built AllocationInfo array. + AllocationInfo* Finish() const { return info_.allocation_info; } + + private: + // Mark the given Allocation info as first created at the specified allocation + // scope count. Only the first creation must be recorded since the allocation + // scope count monotonically increases throughout the lifetime marking + // process. + void UpdateFirstCreated(AllocationInfo* current, int allocation_scope_count); + + // Mark the given AllocationInfo as last used at the specified allocation + // scope + // count. Update the last used marker every time, since the allocation scope + // count monotonically increases through the lifetime marking process. + void UpdateLastUsed(AllocationInfo* current, int allocation_scope_count); + + // Validate if a subgraph satisfies assumptions. + TfLiteStatus ValidateSubgraph(const SubGraph* subgraph, + TfLiteEvalTensor* eval_tensors); + + const tflite::Model* model_ = nullptr; + INonPersistentBufferAllocator* non_persistent_allocator_ = nullptr; + GraphAllocationInfo info_; + int allocation_scope_count_ = 0; +}; + +} // namespace tflite + +#endif // TENSORFLOW_LITE_MICRO_MICRO_ALLOCATION_INFO_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/micro_allocator.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/micro_allocator.cpp new file mode 100644 index 0000000..2d4b858 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/micro_allocator.cpp @@ -0,0 +1,938 @@ +/* Copyright 2023 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_allocator.h" + +#include +#include + +#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/flatbuffers.h" // from @flatbuffers +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/compatibility.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/non_persistent_arena_buffer_allocator.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/persistent_arena_buffer_allocator.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/single_arena_buffer_allocator.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/compatibility.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/flatbuffer_utils.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/memory_helpers.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/memory_planner/greedy_memory_planner.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/memory_planner/micro_memory_planner.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_allocation_info.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_arena_constants.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/flatbuffer_conversions_bridge.h" +#include "edge-impulse-sdk/tensorflow/lite/schema/schema_generated.h" +#include "edge-impulse-sdk/tensorflow/lite/schema/schema_utils.h" + +namespace tflite { + +namespace { + +// Maximum number of scratch buffer requests per operator. Operator kernels that +// request more than this value will receive an exception. +constexpr size_t kMaxScratchBuffersPerOp = 12; + +// Sentinel value used as a placeholder to mark a ScratchBufferRequest request +// needs a node id assignment. +constexpr int kUnassignedScratchBufferRequestIndex = -1; + +const TfLiteIntArray kZeroLengthIntArray = {}; + +class MicroBuiltinDataAllocator : public TfLiteBridgeBuiltinDataAllocator { + public: + explicit MicroBuiltinDataAllocator( + IPersistentBufferAllocator* persistent_allocator) + : persistent_allocator_(persistent_allocator) {} + + void* Allocate(size_t size, size_t alignment_hint) override { + return persistent_allocator_->AllocatePersistentBuffer(size, + alignment_hint); + } + void Deallocate(void* data) override { + // Do not deallocate, builtin data needs to be available for the life time + // of the model. + } + + TF_LITE_REMOVE_VIRTUAL_DELETE + + private: + IPersistentBufferAllocator* persistent_allocator_; +}; + +TfLiteStatus CreatePlan(MicroMemoryPlanner* planner, + const AllocationInfo* allocation_info, + size_t allocation_info_size) { + // Add the tensors to our allocation plan. + for (size_t i = 0; i < allocation_info_size; ++i) { + const AllocationInfo* current = &allocation_info[i]; + if (current->needs_allocating) { + size_t aligned_bytes_required = + AlignSizeUp(current->bytes, MicroArenaBufferAlignment()); + if (current->offline_offset == kOnlinePlannedBuffer) { + TF_LITE_ENSURE_STATUS(planner->AddBuffer(aligned_bytes_required, + current->first_created, + current->last_used)); + } else { + TF_LITE_ENSURE_STATUS( + planner->AddBuffer(aligned_bytes_required, current->first_created, + current->last_used, current->offline_offset)); + } + } + } + return kTfLiteOk; +} + +TfLiteStatus CommitPlan(MicroMemoryPlanner* planner, uint8_t* starting_point, + const AllocationInfo* allocation_info, + size_t allocation_info_size) { + // Figure out the actual memory addresses for each buffer, based on the plan. + int planner_index = 0; + for (size_t i = 0; i < allocation_info_size; ++i) { + const AllocationInfo* current = &allocation_info[i]; + if (current->needs_allocating) { + int offset = -1; + TF_LITE_ENSURE_STATUS( + planner->GetOffsetForBuffer(planner_index, &offset)); + *current->output_ptr = reinterpret_cast(starting_point + offset); + ++planner_index; + } + } + return kTfLiteOk; +} + +IPersistentBufferAllocator* CreatePersistentArenaAllocator(uint8_t* buffer_head, + size_t buffer_size) { + // Align the actually used area by the tail because persistent buffer grows + // from the bottom to top. + uint8_t* aligned_buffer_tail = + AlignPointerDown(buffer_head + buffer_size, MicroArenaBufferAlignment()); + size_t aligned_buffer_size = aligned_buffer_tail - buffer_head; + PersistentArenaBufferAllocator tmp = + PersistentArenaBufferAllocator(buffer_head, aligned_buffer_size); + + // Allocate enough bytes from the buffer to create a + // SingleArenaBufferAllocator. The new instance will use the current adjusted + // tail buffer from the tmp allocator instance. + uint8_t* allocator_buffer = + tmp.AllocatePersistentBuffer(sizeof(PersistentArenaBufferAllocator), + alignof(PersistentArenaBufferAllocator)); + // Use the default copy constructor to populate internal states. + return new (allocator_buffer) PersistentArenaBufferAllocator(tmp); +} + +// NonPersistentBufferAllocator instance is created in the persistent buffer +// because it has to be persistent to keep track of the non-persistent buffer +// information. +INonPersistentBufferAllocator* CreateNonPersistentArenaAllocator( + uint8_t* buffer_head, size_t buffer_size, + IPersistentBufferAllocator* persistent_buffer_allocator) { + uint8_t* allocator_buffer = + persistent_buffer_allocator->AllocatePersistentBuffer( + sizeof(NonPersistentArenaBufferAllocator), + alignof(NonPersistentArenaBufferAllocator)); + // Align the actually used area by the head because persistent buffer grows + // from the head to bottom. + uint8_t* aligned_buffer_head = + AlignPointerUp(buffer_head, MicroArenaBufferAlignment()); + size_t aligned_buffer_size = buffer_head + buffer_size - aligned_buffer_head; + + INonPersistentBufferAllocator* non_persistent_buffer_allocator = + new (allocator_buffer) NonPersistentArenaBufferAllocator( + aligned_buffer_head, aligned_buffer_size); + return non_persistent_buffer_allocator; +} + +} // namespace + +namespace internal { + +// Returns a pointer to any buffer associated with the flatbuffer tensor. Can +// return nullptr if no buffer is found. +void* GetFlatbufferTensorBuffer( + const tflite::Tensor& flatbuffer_tensor, + const flatbuffers::Vector>* buffers) { + // We need to figure out where the actual contents of this tensor are stored + // in memory. We'll check to see if there's a serialized buffer (pretty much + // the same as a constant op in TensorFlow) associated with this tensor first, + // and if there is update the runtime structure to point to its location in + // memory. + // First see if there's any buffer information in the serialized tensor. + // TODO(b/170379532): Add better unit tests to validate flatbuffer values. + void* out_buffer = nullptr; + if (auto* buffer = (*buffers)[flatbuffer_tensor.buffer()]) { + // If we've found a buffer, does it have any data? + if (auto* array = buffer->data()) { + // If it has any data, is the data size larger than zero? + if (array->size()) { + // We've found a buffer with valid data, so update the runtime tensor + // data structure to point to it. + out_buffer = const_cast(static_cast(array->data())); + } + } + // TODO(petewarden): It's not clear in what circumstances we could have a + // buffer in the serialized tensor, but it doesn't have any data in it. Is + // that a validly-generated file, and if so what does it mean, or is it an + // error condition? It would be good to tighten up the specification to make + // it less ambiguous. + } + return out_buffer; +} + +TfLiteStatus InitializeTfLiteTensorFromFlatbuffer( + IPersistentBufferAllocator* persistent_buffer_allocator, + INonPersistentBufferAllocator* non_persistent_buffer_allocator, + bool allocate_temp, const tflite::Tensor& flatbuffer_tensor, + const flatbuffers::Vector>* buffers, + TfLiteTensor* result) { + TFLITE_DCHECK(result != nullptr); + + *result = {}; + // Make sure the serialized type is one we know how to deal with, and convert + // it from a flatbuffer enum into a constant used by the kernel C API. + TF_LITE_ENSURE_STATUS( + tflite::ConvertTensorType(flatbuffer_tensor.type(), &result->type)); + // Make sure we remember if the serialized tensor is designated as a variable. + result->is_variable = flatbuffer_tensor.is_variable(); + + result->data.data = GetFlatbufferTensorBuffer(flatbuffer_tensor, buffers); + + // TODO(petewarden): Some of these paths aren't getting enough testing + // coverage, so we should figure out some tests that exercise them. + if (result->data.data == nullptr) { + // The tensor contents haven't been set from a serialized buffer, so + // make a note that they will be allocated from memory. The actual + // allocation won't happen until later. + result->allocation_type = kTfLiteArenaRw; + } else { + // We set the data from a serialized buffer, so record tha. + result->allocation_type = kTfLiteMmapRo; + } + + // Figure out what the size in bytes of the buffer is and store it. + size_t type_size; + TF_LITE_ENSURE_STATUS( + BytesRequiredForTensor(flatbuffer_tensor, &result->bytes, &type_size)); + + if (flatbuffer_tensor.shape() == nullptr) { + // flatbuffer_tensor.shape() can return a nullptr in the case of a scalar + // tensor. + // TODO(b/188459715): figure out why const_cast is required here. + result->dims = const_cast(&kZeroLengthIntArray); + } else { + // TFLM doesn't allow reshaping the tensor which requires dynamic memory + // allocation so it is safe to drop the const qualifier. In the future, if + // we really want to update the tensor shape, we can always pass in a new + // TfLiteIntArray - especially we have to do so if the dimension is + result->dims = FlatBufferVectorToTfLiteTypeArray(flatbuffer_tensor.shape()); + } + + // Copy the quantization information from the serialized data. + const auto* src_quantization = flatbuffer_tensor.quantization(); + if (src_quantization && src_quantization->scale() && + (src_quantization->scale()->size() > 0) && + src_quantization->zero_point() && + (src_quantization->zero_point()->size() > 0)) { + // Always populate the TfLiteTensor.params field, even if there are + // per-channel quantization parameters. + result->params.scale = src_quantization->scale()->Get(0); + // Note that the zero_point field in the FlatBuffers schema is a 64-bit + // integer, but the zero_point field in the TfLiteQuantizationParams struct + // is a 32-bit integer. + result->params.zero_point = + static_cast(src_quantization->zero_point()->Get(0)); + + // Populate per-channel quantization params. + int channels = src_quantization->scale()->size(); + TfLiteAffineQuantization* quantization = + allocate_temp + ? reinterpret_cast( + non_persistent_buffer_allocator->AllocateTemp( + sizeof(TfLiteAffineQuantization), + alignof(TfLiteAffineQuantization))) + : reinterpret_cast( + persistent_buffer_allocator->AllocatePersistentBuffer( + sizeof(TfLiteAffineQuantization), + alignof(TfLiteAffineQuantization))); + if (quantization == nullptr) { + MicroPrintf("Unable to allocate TfLiteAffineQuantization.\n"); + return kTfLiteError; + } + + // TODO(b/153688719): Reduce tail allocation by using a global zero-point + // buffer. This value can not be reused from the flatbuffer since the + // zero_point is stored as a int64_t. + quantization->zero_point = + allocate_temp + ? reinterpret_cast( + non_persistent_buffer_allocator->AllocateTemp( + TfLiteIntArrayGetSizeInBytes(channels), + alignof(TfLiteIntArray))) + : reinterpret_cast( + persistent_buffer_allocator->AllocatePersistentBuffer( + TfLiteIntArrayGetSizeInBytes(channels), + alignof(TfLiteIntArray))); + if (quantization->zero_point == nullptr) { + MicroPrintf("Unable to allocate quantization->zero_point.\n"); + return kTfLiteError; + } + + quantization->scale = + FlatBufferVectorToTfLiteTypeArray(src_quantization->scale()); + + quantization->zero_point->size = channels; + int* zero_point_data = quantization->zero_point->data; + for (int i = 0; i < channels; i++) { + // As a space-saving optimization, zero point arrays for weights can be + // reduced to a single value, since all zero points for weights are 0. + zero_point_data[i] = src_quantization->zero_point()->size() == + src_quantization->scale()->size() + ? src_quantization->zero_point()->Get(i) + : src_quantization->zero_point()->Get(0); + } + // TODO(rocky): Need to add a micro_allocator test case that fails when + // this is not copied: + quantization->quantized_dimension = src_quantization->quantized_dimension(); + + result->quantization = {kTfLiteAffineQuantization, quantization}; + } + return kTfLiteOk; +} + +TfLiteStatus InitializeTfLiteEvalTensorFromFlatbuffer( + const tflite::Tensor& flatbuffer_tensor, + const flatbuffers::Vector>* buffers, + TfLiteEvalTensor* result) { + *result = {}; + // Make sure the serialized type is one we know how to deal with, and convert + // it from a flatbuffer enum into a constant used by the kernel C API. + TF_LITE_ENSURE_STATUS( + tflite::ConvertTensorType(flatbuffer_tensor.type(), &result->type)); + + result->data.data = GetFlatbufferTensorBuffer(flatbuffer_tensor, buffers); + + if (flatbuffer_tensor.shape() == nullptr) { + // flatbuffer_tensor.shape() can return a nullptr in the case of a scalar + // tensor. + result->dims = const_cast(&kZeroLengthIntArray); + } else { + result->dims = FlatBufferVectorToTfLiteTypeArray(flatbuffer_tensor.shape()); + } + return kTfLiteOk; +} + +} // namespace internal + +size_t MicroAllocator::GetDefaultTailUsage(bool is_memory_planner_given) { + size_t total_size = AlignSizeUp() + + AlignSizeUp() + + AlignSizeUp() + + AlignSizeUp(); + if (!is_memory_planner_given) { + total_size += AlignSizeUp(); + } + return total_size; +} + +MicroAllocator::MicroAllocator(SingleArenaBufferAllocator* memory_allocator, + MicroMemoryPlanner* memory_planner) + : non_persistent_buffer_allocator_(memory_allocator), + persistent_buffer_allocator_(memory_allocator), + memory_planner_(memory_planner), + model_is_allocating_(false) {} + +MicroAllocator::MicroAllocator( + IPersistentBufferAllocator* persistent_buffer_allocator, + INonPersistentBufferAllocator* non_persistent_buffer_allocator, + MicroMemoryPlanner* memory_planner) + : non_persistent_buffer_allocator_(non_persistent_buffer_allocator), + persistent_buffer_allocator_(persistent_buffer_allocator), + memory_planner_(memory_planner), + model_is_allocating_(false) {} + +MicroAllocator::~MicroAllocator() {} + +MicroAllocator* MicroAllocator::Create(uint8_t* tensor_arena, size_t arena_size, + MicroMemoryPlanner* memory_planner) { + uint8_t* aligned_arena = + AlignPointerUp(tensor_arena, MicroArenaBufferAlignment()); + size_t aligned_arena_size = tensor_arena + arena_size - aligned_arena; + SingleArenaBufferAllocator* memory_allocator = + SingleArenaBufferAllocator::Create(aligned_arena, aligned_arena_size); + + return Create(memory_allocator, memory_planner); +} + +MicroAllocator* MicroAllocator::Create(uint8_t* tensor_arena, + size_t arena_size) { + uint8_t* aligned_arena = + AlignPointerUp(tensor_arena, MicroArenaBufferAlignment()); + size_t aligned_arena_size = tensor_arena + arena_size - aligned_arena; + SingleArenaBufferAllocator* memory_allocator = + SingleArenaBufferAllocator::Create(aligned_arena, aligned_arena_size); + + // By default create GreedyMemoryPlanner. + // If a different MemoryPlanner is needed, use the other api. + uint8_t* memory_planner_buffer = memory_allocator->AllocatePersistentBuffer( + sizeof(GreedyMemoryPlanner), alignof(GreedyMemoryPlanner)); + GreedyMemoryPlanner* memory_planner = + new (memory_planner_buffer) GreedyMemoryPlanner(); + + return Create(memory_allocator, memory_planner); +} + +MicroAllocator* MicroAllocator::Create( + SingleArenaBufferAllocator* memory_allocator, + MicroMemoryPlanner* memory_planner) { + TFLITE_DCHECK(memory_allocator != nullptr); + TFLITE_DCHECK(memory_planner != nullptr); + + uint8_t* allocator_buffer = memory_allocator->AllocatePersistentBuffer( + sizeof(MicroAllocator), alignof(MicroAllocator)); + MicroAllocator* allocator = new (allocator_buffer) + MicroAllocator(memory_allocator, memory_allocator, memory_planner); + return allocator; +} + +MicroAllocator* MicroAllocator::Create(uint8_t* persistent_tensor_arena, + size_t persistent_arena_size, + uint8_t* non_persistent_tensor_arena, + size_t non_persistent_arena_size) { + TFLITE_DCHECK(persistent_tensor_arena != nullptr); + TFLITE_DCHECK(non_persistent_tensor_arena != nullptr); + TFLITE_DCHECK(persistent_tensor_arena != non_persistent_tensor_arena); + + IPersistentBufferAllocator* persistent_buffer_allocator = + CreatePersistentArenaAllocator(persistent_tensor_arena, + persistent_arena_size); + INonPersistentBufferAllocator* non_persistent_buffer_allocator = + CreateNonPersistentArenaAllocator(non_persistent_tensor_arena, + non_persistent_arena_size, + persistent_buffer_allocator); + + uint8_t* memory_planner_buffer = + persistent_buffer_allocator->AllocatePersistentBuffer( + sizeof(GreedyMemoryPlanner), alignof(GreedyMemoryPlanner)); + GreedyMemoryPlanner* memory_planner = + new (memory_planner_buffer) GreedyMemoryPlanner(); + + uint8_t* micro_allocator_buffer = + persistent_buffer_allocator->AllocatePersistentBuffer( + sizeof(MicroAllocator), alignof(MicroAllocator)); + MicroAllocator* allocator = new (micro_allocator_buffer) + MicroAllocator(persistent_buffer_allocator, + non_persistent_buffer_allocator, memory_planner); + return allocator; +} + +SubgraphAllocations* MicroAllocator::StartModelAllocation(const Model* model) { + TFLITE_DCHECK(model != nullptr); + + if (model_is_allocating_) { + MicroPrintf( + "MicroAllocator: Model allocation started before " + "finishing previously allocated model"); + return nullptr; + } + + model_is_allocating_ = true; + + uint8_t* data_allocator_buffer = + persistent_buffer_allocator_->AllocatePersistentBuffer( + sizeof(MicroBuiltinDataAllocator), + alignof(MicroBuiltinDataAllocator)); + builtin_data_allocator_ = new (data_allocator_buffer) + MicroBuiltinDataAllocator(persistent_buffer_allocator_); + + if (InitScratchBufferData() != kTfLiteOk) { + return nullptr; + } + + // Allocate struct to store eval tensors, nodes and registrations. + SubgraphAllocations* output = reinterpret_cast( + persistent_buffer_allocator_->AllocatePersistentBuffer( + sizeof(SubgraphAllocations) * model->subgraphs()->size(), + alignof(SubgraphAllocations))); + if (output == nullptr) { + MicroPrintf("Failed to allocate memory for model metadata."); + return nullptr; + } + + if (AllocateTfLiteEvalTensors(model, output) != kTfLiteOk || + AllocateNodeAndRegistrations(model, output) != kTfLiteOk) { + return nullptr; + } + return output; +} + +TfLiteStatus MicroAllocator::FinishModelAllocation( + const Model* model, SubgraphAllocations* subgraph_allocations, + ScratchBufferHandle** scratch_buffer_handles) { + if (!model_is_allocating_) { + MicroPrintf( + "MicroAllocator: Model allocation finished before " + "starting allocating model"); + return kTfLiteError; + } + + // Allocate scratch buffer metadata. + TF_LITE_ENSURE_STATUS(AllocateScratchBufferHandles( + scratch_buffer_handles, scratch_buffer_request_count_)); + + // Plan all subgraphs and scratch buffers together. + TF_LITE_ENSURE_STATUS(CommitStaticMemoryPlan(model, subgraph_allocations, + *scratch_buffer_handles)); + model_is_allocating_ = false; + return kTfLiteOk; +} + +void* MicroAllocator::AllocatePersistentBuffer(size_t bytes) { + return persistent_buffer_allocator_->AllocatePersistentBuffer( + bytes, MicroArenaBufferAlignment()); +} + +TfLiteStatus MicroAllocator::RequestScratchBufferInArena(size_t bytes, + int subgraph_idx, + int* buffer_idx) { + // All scratch buffer requests are stored in the head section of the arena + // when a model is in the prepare phase. First align a scratch buffer request + // pointer to the start of the head: + internal::ScratchBufferRequest* requests = GetScratchBufferRequests(); + + // Count the number of requested scratch buffers for the current node: + size_t current_node_request_count = 0; + for (size_t i = 0; i < scratch_buffer_request_count_; ++i) { + if (requests[i].node_idx == kUnassignedScratchBufferRequestIndex) { + ++current_node_request_count; + } + } + + // First, ensure that the per-kernel request has not exceeded the limit: + if (current_node_request_count >= kMaxScratchBuffersPerOp) { + MicroPrintf("Scratch buffer request exeeds limit per operator (%d)", + kMaxScratchBuffersPerOp); + return kTfLiteError; + } + + // Initialize and assign values for the request at the current index: + internal::ScratchBufferRequest* current_request = + &requests[scratch_buffer_request_count_]; + *current_request = {}; + // Assign -1 as a sentinel value that will be updated when the node finishes + // allocating: + current_request->bytes = bytes; + current_request->node_idx = kUnassignedScratchBufferRequestIndex; + current_request->subgraph_idx = subgraph_idx; + + // Assign the current request index to the out-param: + *buffer_idx = scratch_buffer_request_count_; + + // Bump the request count to prepare for the next request: + ++scratch_buffer_request_count_; + return kTfLiteOk; +} + +TfLiteStatus MicroAllocator::FinishPrepareNodeAllocations(int node_id) { + // When a node has finished preparing, all temp allocations performed by the + // kernel should be cleaned up: + TF_LITE_ENSURE_STATUS(ResetTempAllocations()); + + // Find and update any new scratch buffer requests for the current node: + internal::ScratchBufferRequest* requests = GetScratchBufferRequests(); + + for (size_t i = 0; i < scratch_buffer_request_count_; ++i) { + // A request with a node_idx of -1 is a sentinel value used to indicate this + // was a new request for the current node. The allocator finally knows the + // node index at this point. Assign the value and update the list of new + // requests so the head section can be adjusted to allow for the next kernel + // to allocate at most kMaxScratchBuffersPerOp requests: + if (requests[i].node_idx == kUnassignedScratchBufferRequestIndex) { + requests[i].node_idx = node_id; + } + } + + // Ensure that the head is re-adjusted to allow for another at-most + // kMaxScratchBuffersPerOp scratch buffer requests in the next operator: + TF_LITE_ENSURE_STATUS(non_persistent_buffer_allocator_->ResizeBuffer( + scratch_buffer_head_, + sizeof(internal::ScratchBufferRequest) * + (scratch_buffer_request_count_ + kMaxScratchBuffersPerOp), + alignof(internal::ScratchBufferRequest))); + + return kTfLiteOk; +} + +size_t MicroAllocator::used_bytes() const { + return non_persistent_buffer_allocator_->GetNonPersistentUsedBytes() + + persistent_buffer_allocator_->GetPersistentUsedBytes(); +} + +TfLiteStatus MicroAllocator::AllocateNodeAndRegistrations( + const Model* model, SubgraphAllocations* subgraph_allocations) { + TFLITE_DCHECK(subgraph_allocations != nullptr); + + for (size_t subgraph_idx = 0; subgraph_idx < model->subgraphs()->size(); + subgraph_idx++) { + const SubGraph* subgraph = model->subgraphs()->Get(subgraph_idx); + TFLITE_DCHECK(subgraph != nullptr); + + uint32_t operators_size = NumSubgraphOperators(subgraph); + + // Initialize NodeAndRegistrations for the subgraph. + NodeAndRegistration* output = reinterpret_cast( + persistent_buffer_allocator_->AllocatePersistentBuffer( + sizeof(NodeAndRegistration) * operators_size, + alignof(NodeAndRegistration))); + if (output == nullptr) { + MicroPrintf("Failed to allocate memory for node_and_registrations."); + return kTfLiteError; + } + subgraph_allocations[subgraph_idx].node_and_registrations = output; + } + return kTfLiteOk; +} + +TfLiteTensor* MicroAllocator::AllocatePersistentTfLiteTensor( + const Model* model, const SubgraphAllocations* subgraph_allocations, + int tensor_index, int subgraph_index) { + const SubGraph* subgraph = model->subgraphs()->Get(subgraph_index); + TFLITE_DCHECK(subgraph != nullptr); + + // This value is allocated from persistent arena space. It is guaranteed to be + // around for the lifetime of the application. + TfLiteTensor* tensor = AllocatePersistentTfLiteTensorInternal(); + + // Populate any fields from the flatbuffer, since this TfLiteTensor struct is + // allocated in the persistent section of the arena, ensure that additional + // allocations also take place in that section of the arena. + if (PopulateTfLiteTensorFromFlatbuffer( + model, tensor, tensor_index, subgraph_index, + /*allocate_temp=*/false) != kTfLiteOk) { + MicroPrintf( + "Failed to populate a persistent TfLiteTensor struct " + "from flatbuffer data!"); + return nullptr; + } + + if (subgraph_allocations != nullptr) { + // Tensor buffers that are allocated at runtime (e.g. non-weight buffers) + // and not located in the flatbuffer are stored on the pre-allocated list of + // TfLiteEvalTensors structs. These structs are the source of truth, simply + // point the corresponding buffer to the new TfLiteTensor data value. + tensor->data.data = + subgraph_allocations[subgraph_index].tensors[tensor_index].data.data; + // TfLiteEvalTensor structs must also be the source of truth for the + // TfLiteTensor dims. + tensor->dims = + subgraph_allocations[subgraph_index].tensors[tensor_index].dims; + } + return tensor; +} + +void MicroAllocator::DeallocateTempTfLiteTensor(TfLiteTensor* tensor) { + TFLITE_DCHECK(tensor != nullptr); + + if (tensor->quantization.type == kTfLiteAffineQuantization) { + TFLITE_DCHECK(tensor->quantization.params != nullptr); + TfLiteAffineQuantization* quantization = + reinterpret_cast( + tensor->quantization.params); + + non_persistent_buffer_allocator_->DeallocateTemp( + reinterpret_cast(quantization->zero_point)); + non_persistent_buffer_allocator_->DeallocateTemp( + reinterpret_cast(quantization)); + } + + // Clear the data in case someone still access tensor arena by mistake + tensor->quantization.type = kTfLiteNoQuantization; + tensor->quantization.params = nullptr; + tensor->data.data = nullptr; + tensor->dims = nullptr; + non_persistent_buffer_allocator_->DeallocateTemp( + reinterpret_cast(tensor)); +} + +TfLiteTensor* MicroAllocator::AllocateTempTfLiteTensor( + const Model* model, const SubgraphAllocations* subgraph_allocations, + int tensor_index, int subgraph_index) { + const SubGraph* subgraph = model->subgraphs()->Get(subgraph_index); + TFLITE_DCHECK(subgraph != nullptr); + + // This value is allocated from temporary arena space. It is guaranteed to be + // around for at least the scope of the calling function. Since this struct + // allocation takes place in temp space, no need to own or cleanup. + TfLiteTensor* tensor = reinterpret_cast( + non_persistent_buffer_allocator_->AllocateTemp(sizeof(TfLiteTensor), + alignof(TfLiteTensor))); + + // Populate any fields from the flatbuffer, since this TfLiteTensor struct is + // allocated in the temp section of the arena, ensure that additional + // allocations also take place in that section of the arena. + if (PopulateTfLiteTensorFromFlatbuffer(model, tensor, tensor_index, + subgraph_index, + /*allocate_temp=*/true) != kTfLiteOk) { + MicroPrintf( + "Failed to populate a temp TfLiteTensor struct from flatbuffer data!"); + return nullptr; + } + + if (subgraph_allocations != nullptr) { + // Tensor buffers that are allocated at runtime (e.g. non-weight buffers) + // and not located in the flatbuffer are stored on the pre-allocated list of + // TfLiteEvalTensors structs. These structs are the source of truth, simply + // point the corresponding buffer to the new TfLiteTensor data value. + tensor->data.data = + subgraph_allocations[subgraph_index].tensors[tensor_index].data.data; + // TfLiteEvalTensor structs must also be the source of truth for the + // TfLiteTensor dims. + tensor->dims = + subgraph_allocations[subgraph_index].tensors[tensor_index].dims; + } + return tensor; +} + +TfLiteStatus MicroAllocator::ResetTempAllocations() { + return non_persistent_buffer_allocator_->ResetTempAllocations(); +} + +bool MicroAllocator::IsAllTempDeallocated() { + return non_persistent_buffer_allocator_->IsAllTempDeallocated(); +} + +TfLiteStatus MicroAllocator::AllocateTfLiteEvalTensors( + const Model* model, SubgraphAllocations* subgraph_allocations) { + TFLITE_DCHECK(subgraph_allocations != nullptr); + + for (size_t subgraph_idx = 0; subgraph_idx < model->subgraphs()->size(); + subgraph_idx++) { + const SubGraph* subgraph = model->subgraphs()->Get(subgraph_idx); + TFLITE_DCHECK(subgraph != nullptr); + + size_t alloc_count = subgraph->tensors()->size(); + TfLiteEvalTensor* tensors = reinterpret_cast( + persistent_buffer_allocator_->AllocatePersistentBuffer( + sizeof(TfLiteEvalTensor) * alloc_count, alignof(TfLiteEvalTensor))); + if (tensors == nullptr) { + MicroPrintf( + "Failed to allocate memory for context->eval_tensors, " + "%d bytes required", + sizeof(TfLiteEvalTensor) * alloc_count); + return kTfLiteError; + } + + for (size_t i = 0; i < alloc_count; ++i) { + TfLiteStatus status = internal::InitializeTfLiteEvalTensorFromFlatbuffer( + *subgraph->tensors()->Get(i), model->buffers(), &tensors[i]); + if (status != kTfLiteOk) { + MicroPrintf("Failed to initialize tensor %d", i); + return kTfLiteError; + } + } + subgraph_allocations[subgraph_idx].tensors = tensors; + } + return kTfLiteOk; +} + +TfLiteStatus MicroAllocator::AllocateVariables( + const SubGraph* subgraph, TfLiteEvalTensor* eval_tensors, + const int32_t* offline_planner_offsets) { + for (size_t i = 0; i < subgraph->tensors()->size(); ++i) { + auto* tensor = subgraph->tensors()->Get(i); + if (tensor->is_variable()) { + if (offline_planner_offsets == nullptr || + offline_planner_offsets[i] == kOnlinePlannedBuffer) { + size_t buffer_size; + TF_LITE_ENSURE_STATUS( + TfLiteEvalTensorByteLength(&eval_tensors[i], &buffer_size)); + + eval_tensors[i].data.data = + persistent_buffer_allocator_->AllocatePersistentBuffer( + buffer_size, MicroArenaBufferAlignment()); + + if (eval_tensors[i].data.data == nullptr) { + MicroPrintf("Failed to allocate variable tensor of size %d", + buffer_size); + return kTfLiteError; + } + } + } + } + return kTfLiteOk; +} + +TfLiteTensor* MicroAllocator::AllocatePersistentTfLiteTensorInternal() { + return reinterpret_cast( + persistent_buffer_allocator_->AllocatePersistentBuffer( + sizeof(TfLiteTensor), alignof(TfLiteTensor))); +} + +TfLiteStatus MicroAllocator::PopulateTfLiteTensorFromFlatbuffer( + const Model* model, TfLiteTensor* tensor, int tensor_index, + int subgraph_idx, bool allocate_temp) { + // TODO(b/162311891): This method serves as a stub to ensure quantized + // allocations in the tail can be recorded. Once the interpreter has APIs for + // accessing buffers on TfLiteEvalTensor this method can be dropped. + return internal::InitializeTfLiteTensorFromFlatbuffer( + persistent_buffer_allocator_, non_persistent_buffer_allocator_, + allocate_temp, + *model->subgraphs()->Get(subgraph_idx)->tensors()->Get(tensor_index), + model->buffers(), tensor); +} + +TfLiteStatus MicroAllocator::CommitStaticMemoryPlan( + const Model* model, SubgraphAllocations* allocations, + ScratchBufferHandle* scratch_buffer_handles) { + size_t head_usage = 0; + // Create static memory plan + // 1. Calculate AllocationInfo to know the lifetime of each tensor/buffer. + // 2. Add them into the planner (such as the GreedyMemoryPlanner). + // 3. Static memory planning using the planner. + // 4. Set tensor/buffer pointers based on the offsets from the previous step. + // + // Note that AllocationInfo is only needed for creating the plan. It will be + // allocated from the temp section and cleaned up at the bottom of this + // function. + + // Use the AllocationInfoBuilder class to help determine where buffers are + // used in the subgraph. + AllocationInfoBuilder builder(model, non_persistent_buffer_allocator_); + TF_LITE_ENSURE_STATUS( + builder.CreateAllocationInfo(scratch_buffer_request_count_)); + + const int32_t* offline_planner_offsets = nullptr; + TF_LITE_ENSURE_STATUS( + builder.GetOfflinePlannedOffsets(&offline_planner_offsets)); + + // We allocate buffers for variable tensors here since the offline planner + // offsets are conviently available here. + for (size_t subgraph_idx = 0; subgraph_idx < model->subgraphs()->size(); + subgraph_idx++) { + const SubGraph* subgraph = model->subgraphs()->Get(subgraph_idx); + TFLITE_DCHECK(subgraph != nullptr); + TF_LITE_ENSURE_STATUS(AllocateVariables( + subgraph, allocations[subgraph_idx].tensors, offline_planner_offsets)); + } + + TF_LITE_ENSURE_STATUS( + builder.InitializeAllocationInfo(offline_planner_offsets, allocations)); + + internal::ScratchBufferRequest* scratch_buffer_requests = + GetScratchBufferRequests(); + TF_LITE_ENSURE_STATUS(builder.MarkAllocationLifetimes( + 0, scratch_buffer_requests, scratch_buffer_handles, allocations)); + int allocation_info_count = builder.AllocationCount(); + AllocationInfo* allocation_info = builder.Finish(); + + // Remaining arena size that memory planner can use for calculating offsets. + size_t remaining_arena_size = + non_persistent_buffer_allocator_->GetAvailableMemory( + MicroArenaBufferAlignment()); + uint8_t* planner_arena = non_persistent_buffer_allocator_->AllocateTemp( + remaining_arena_size, MicroArenaBufferAlignment()); + + if (planner_arena == nullptr) { + return kTfLiteError; + } + + memory_planner_->Init(planner_arena, remaining_arena_size); + TF_LITE_ENSURE_STATUS( + CreatePlan(memory_planner_, allocation_info, allocation_info_count)); + + // Commit the plan. + TF_LITE_ENSURE_STATUS( + CommitPlan(memory_planner_, + non_persistent_buffer_allocator_->GetOverlayMemoryAddress(), + allocation_info, allocation_info_count)); + + // Reset all temp allocations used above: + builder.FreeAllocationInfo(); + non_persistent_buffer_allocator_->DeallocateTemp(planner_arena); + TF_LITE_ENSURE_STATUS( + non_persistent_buffer_allocator_->ResetTempAllocations()); + TF_LITE_ENSURE_STATUS( + non_persistent_buffer_allocator_->DeallocateResizableBuffer( + scratch_buffer_head_)); + +#ifdef TF_LITE_SHOW_MEMORY_USE + memory_planner_->PrintMemoryPlan(); +#endif + head_usage = memory_planner_->GetMaximumMemorySize(); + + // The head is used to store memory plans for one model at a time during the + // model preparation stage, and is re-purposed to store scratch buffer handles + // during model invocation. The head must be as large as the greater of the + // largest model memory plan's size and the total space required for all + // scratch buffer handles. + if (max_head_buffer_usage_ < head_usage) { + max_head_buffer_usage_ = head_usage; + } + + // The head is used for storing scratch buffer allocations before finalizing a + // memory plan in this function. Ensure that the head is set to the largest + // memory plan sent through the allocator: + TF_LITE_ENSURE_STATUS( + non_persistent_buffer_allocator_->ReserveNonPersistentOverlayMemory( + max_head_buffer_usage_, MicroArenaBufferAlignment())); + return kTfLiteOk; +} + +TfLiteStatus MicroAllocator::AllocateScratchBufferHandles( + ScratchBufferHandle** scratch_buffer_handles, size_t handle_count) { + TFLITE_DCHECK(scratch_buffer_handles != nullptr); + + if (scratch_buffer_request_count_ == 0) { + // No scratch buffer requests were requested during model allocation. + return kTfLiteOk; + } + + // Allocate a consecutive block of memory store the scratch buffer handles. + // This alignment ensures quick lookup during inference time for the model: + *scratch_buffer_handles = reinterpret_cast( + persistent_buffer_allocator_->AllocatePersistentBuffer( + sizeof(ScratchBufferHandle) * handle_count, + alignof(ScratchBufferHandle))); + + return kTfLiteOk; +} + +TfLiteStatus MicroAllocator::InitScratchBufferData() { + // A model is preparing to allocate resources, ensure that scratch buffer + // request counter is cleared: + scratch_buffer_request_count_ = 0; + + // All requests will be stored in the head section. Each kernel is allowed at + // most kMaxScratchBuffersPerOp requests. Adjust the head to reserve at most + // that many requests to begin: + scratch_buffer_head_ = + non_persistent_buffer_allocator_->AllocateResizableBuffer( + sizeof(internal::ScratchBufferRequest) * kMaxScratchBuffersPerOp, + alignof(internal::ScratchBufferRequest)); + if (scratch_buffer_head_ == nullptr) { + return kTfLiteError; + } + + return kTfLiteOk; +} + +internal::ScratchBufferRequest* MicroAllocator::GetScratchBufferRequests() { + return reinterpret_cast(AlignPointerUp( + scratch_buffer_head_, alignof(internal::ScratchBufferRequest))); +} + +TfLiteBridgeBuiltinDataAllocator* MicroAllocator::GetBuiltinDataAllocator() { + return builtin_data_allocator_; +} + +} // namespace tflite diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/micro_allocator.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/micro_allocator.h new file mode 100644 index 0000000..ca2e27e --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/micro_allocator.h @@ -0,0 +1,325 @@ +/* Copyright 2023 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_MICRO_MICRO_ALLOCATOR_H_ +#define TENSORFLOW_LITE_MICRO_MICRO_ALLOCATOR_H_ + +#include +#include + +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/single_arena_buffer_allocator.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/compatibility.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/flatbuffer_utils.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/memory_planner/micro_memory_planner.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/flatbuffer_conversions_bridge.h" +#include "edge-impulse-sdk/tensorflow/lite/schema/schema_generated.h" + +namespace tflite { + +// TODO(b/199402574): rename to tflite_internal or just remove internal +// namespace. +namespace internal { + +// Sets up all of the data structure members for a TfLiteTensor based on the +// contents of a serialized tensor in the flatbuffer. +// TODO(b/162311891): Drop this method when the interpreter has an API for +// returning buffers on TfLiteEvalTensor. +TfLiteStatus InitializeTfLiteTensorFromFlatbuffer( + IPersistentBufferAllocator* persistent_buffer_allocator, + INonPersistentBufferAllocator* non_persistent_buffer_allocator, + bool allocate_temp, const tflite::Tensor& flatbuffer_tensor, + const flatbuffers::Vector>* buffers, + TfLiteTensor* result); + +// Holds placeholder information for a scratch buffer request from a kernel. +// This struct is only used during the model prepare stage. Each request from a +// kernel is stored in the head section. During the prepare stage, the head +// section will at least hold kMaxScratchBuffersPerOp number of requests plus +// any requests from previous kernel requests. +// +// When the memory plan is finalized, these structs are no longer used in favor +// of a sequential, array of ScratchBufferHandle allocations in the tail +// section. These allocations are indexed by the request API defined in the +// TfLiteContext struct. +struct ScratchBufferRequest { + // Number of bytes required by the buffer. The actual allocated size might be + // greater than `bytes` due to buffer alignment. + size_t bytes; + // Node where the buffer is allocated for. This provides useful information to + // determine the lifetime of the buffer. In AllocationInfo, this buffer will + // have `before` = node_idx and `after` = node_idx. + int node_idx; + int subgraph_idx; +}; + +} // namespace internal + +struct NodeAndRegistration { + TfLiteNode node; + const TfLiteRegistration* registration; +}; + +// Holds a pointer to a buffer for a scratch buffer requested by a kernel during +// the model prepare stage. This struct is allocated in-place and allows for +// quick pointer-indexed lookup for speed during model inference. +struct ScratchBufferHandle { + // Pointer to location of the scratch buffer: + uint8_t* data; +}; + +// Stores all per-subgraph allocations. This includes the node and registration +// array, and tensor list for each subgraph. +struct SubgraphAllocations { + NodeAndRegistration* node_and_registrations; + TfLiteEvalTensor* tensors; +}; + +// Allocator responsible for allocating memory for all intermediate tensors +// necessary to invoke a model. +// +// The lifetime of the model, tensor arena and error reporter must be at +// least as long as that of the allocator object, since the allocator needs +// them to be accessible during its entire lifetime. +// +// The MicroAllocator simply plans out additional allocations that are required +// to standup a model for inference in TF Micro. This class currently relies on +// an additional allocator - SingleArenaBufferAllocator - for all allocations +// from an arena. These allocations are divided into head (non-persistent) and +// tail (persistent) regions: +// +// Memory layout to help understand how it works +// This information could change in the future version. +// ************** .memory_allocator->GetBuffer() +// Tensors/Scratch buffers (head) +// ************** .head_watermark +// unused memory +// ************** .memory_allocator->GetBuffer() + ->GetMaxBufferSize() +// - ->GetDataSize() +// persistent area (tail) +// ************** .memory_allocator->GetBuffer() + ->GetMaxBufferSize() +class MicroAllocator { + public: + // Creates a MicroAllocator instance from a given tensor arena. This arena + // will be managed by the created instance. The GreedyMemoryPlanner will + // by default be used and created on the arena. + // Note: Please use alignas(16) to make sure tensor_arena is 16 + // bytes aligned, otherwise some head room will be wasted. + // TODO(b/157615197): Cleanup constructor + factory usage. + static MicroAllocator* Create(uint8_t* tensor_arena, size_t arena_size); + + // Creates a MicroAllocator instance from a given tensor arena and a given + // MemoryPlanner. This arena will be managed by the created instance. Note: + // Please use alignas(16) to make sure tensor_arena is 16 bytes + // aligned, otherwise some head room will be wasted. + static MicroAllocator* Create(uint8_t* tensor_arena, size_t arena_size, + MicroMemoryPlanner* memory_planner); + + // Creates a MicroAllocator instance using the provided + // SingleArenaBufferAllocator instance and the MemoryPlanner. This allocator + // instance will use the SingleArenaBufferAllocator instance to manage + // allocations internally. + static MicroAllocator* Create(SingleArenaBufferAllocator* memory_allocator, + MicroMemoryPlanner* memory_planner); + + // Creates a MicroAllocator instance using the provided + // SingleArenaBufferAllocator instance and the MemoryPlanner. This allocator + // instance will use the SingleArenaBufferAllocator instance to manage + // allocations internally. + static MicroAllocator* Create(uint8_t* persistent_tensor_arena, + size_t persistent_arena_size, + uint8_t* non_persistent_tensor_arena, + size_t non_persistent_arena_size); + + // Returns the fixed amount of memory overhead of MicroAllocator. + static size_t GetDefaultTailUsage(bool is_memory_planner_given); + + // Allocates internal resources required for model inference for each subgraph + // from the arena. + // + // This method will run through the flatbuffer data supplied in the model to + // properly allocate tensor, node, and op registration data. This method is + // expected to be followed with a call to FinishModelAllocation() Returns a + // pointer to an array of SubgraphAllocations (also stored in the tail of the + // arena) where each index corresponds to a different subgraph in the model. + // Return value is nullptr if the allocations failed. + SubgraphAllocations* StartModelAllocation(const Model* model); + + // Finish allocating internal resources required for model inference. + // + // -Plan the memory for activation tensors and scratch buffers. + // -Update eval tensors for each subgraph based on planned offsets. + // -Allocate scratch buffer handles array and update based on planned offsets. + // + // This method should be called after assigning model resources + // in StartModelAllocation(). The subgraph_allocations pointer should be the + // value passed into this class during StartModelAllocation(). Scratch buffer + // handles are stored in the out-param `scratch_buffer_handles` array which is + // allocated in this method. This value will be used in `GetScratchBuffer` + // call to retrieve scratch buffers. + TfLiteStatus FinishModelAllocation( + const Model* model, SubgraphAllocations* subgraph_allocations, + ScratchBufferHandle** scratch_buffer_handles); + + // Allocates a TfLiteTensor struct and populates the returned value with + // properties from the model flatbuffer. This struct is allocated from + // persistent arena memory is only guaranteed for the lifetime of the + // application. The eval_tensors pointer should be the value passed into this + // class during StartModelAllocation() and contains the source-of-truth for + // buffers. + virtual TfLiteTensor* AllocatePersistentTfLiteTensor( + const Model* model, const SubgraphAllocations* subgraph_allocations, + int tensor_index, int subgraph_index); + + // Allocates a TfLiteTensor struct and populates the returned value with + // properties from the model flatbuffer. This struct is allocated from + // temporary arena memory is only guaranteed until a call is made to + // ResetTempAllocations(). Subgraph_allocaitons contains the array of + // TfLiteEvalTensors. If the newly allocated temp at the specified subgraph + // and tensor index is already present int the TfLiteEvalTensor array, its + // data buffer will be re-used. + virtual TfLiteTensor* AllocateTempTfLiteTensor( + const Model* model, const SubgraphAllocations* subgraph_allocations, + int tensor_index, int subgraph_index); + + virtual void DeallocateTempTfLiteTensor(TfLiteTensor*); + + // Resets all temporary allocations. This method should be called after a + // chain of temp allocations (e.g. chain of TfLiteTensor objects via + // AllocateTfLiteTensor()). + virtual TfLiteStatus ResetTempAllocations(); + + // Returns true if all temporary buffers including temp TfLiteTensor are + // already deallocated. + virtual bool IsAllTempDeallocated(); + + // Allocates persistent buffer which has the same life time as the allocator. + // The memory is immediately available and is allocated from the tail of the + // arena. + virtual void* AllocatePersistentBuffer(size_t bytes); + + // Register a scratch buffer of size `bytes` for Node with `node_id`. + // This method only requests a buffer with a given size to be used after a + // model has finished allocation via FinishModelAllocation(). All requested + // buffers will be accessible by the out-param in that method. + TfLiteStatus RequestScratchBufferInArena(size_t bytes, int subgraph_idx, + int* buffer_idx); + + // Finish allocating a specific NodeAndRegistration prepare block (kernel + // entry for a model) with a given node ID. This call ensures that any scratch + // buffer requests and temporary allocations are handled and ready for the + // next node prepare block. + TfLiteStatus FinishPrepareNodeAllocations(int node_id); + + // Returns the arena usage in bytes, only available after + // `FinishModelAllocation`. Otherwise, it will return 0. + size_t used_bytes() const; + + TfLiteBridgeBuiltinDataAllocator* GetBuiltinDataAllocator(); + + protected: + MicroAllocator(SingleArenaBufferAllocator* memory_allocator, + MicroMemoryPlanner* memory_planner); + MicroAllocator(IPersistentBufferAllocator* persistent_buffer_allocator, + INonPersistentBufferAllocator* non_persistent_buffer_allocator, + MicroMemoryPlanner* memory_planner); + virtual ~MicroAllocator(); + + // Allocates an array in the arena to hold pointers to the node and + // registration pointers required to represent the inference graph of the + // model. + virtual TfLiteStatus AllocateNodeAndRegistrations( + const Model* model, SubgraphAllocations* subgraph_allocations); + + // Allocates the list of persistent TfLiteEvalTensors that are used for the + // "eval" phase of model inference. These structs will be the source of truth + // for all tensor buffers. + virtual TfLiteStatus AllocateTfLiteEvalTensors( + const Model* model, SubgraphAllocations* subgraph_allocations); + + // Allocates persistent tensor buffers for variable tensors in the subgraph. + // Online and offline variable tensors are handled differently hence the + // offline_planner_offsets parameter is needed. + virtual TfLiteStatus AllocateVariables( + const SubGraph* subgraph, TfLiteEvalTensor* eval_tensors, + const int32_t* offline_planner_offsets); + + // Allocate and return a persistent TfLiteTensor. + // TODO(b/162311891): Drop this method when the interpreter has an API for + // accessing TfLiteEvalTensor structs. + virtual TfLiteTensor* AllocatePersistentTfLiteTensorInternal(); + + // Populates a TfLiteTensor struct with data from the model flatbuffer. Any + // quantization data is allocated from either the tail (persistent) or temp + // sections of the arena based on the allocation flag. + virtual TfLiteStatus PopulateTfLiteTensorFromFlatbuffer(const Model* model, + TfLiteTensor* tensor, + int tensor_index, + int subgraph_idx, + bool allocate_temp); + + private: + // Commits a memory plan for all non-persistent buffer allocations in the + // 'head' section of the memory arena. The eval_tensors pointer is the list of + // pre-allocated TfLiteEvalTensor structs that will point to the buffers that + // will be allocated into the head section in this function call. The + // scratch_buffer_handles pointer is the array of pre-allocated + // ScratchBufferHandle structs that will point to allocated buffers also in + // the head section. + virtual TfLiteStatus CommitStaticMemoryPlan( + const Model* model, SubgraphAllocations* allocations, + ScratchBufferHandle* scratch_buffer_handles); + + // Allocates an array of ScratchBufferHandle structs in the tail section for a + // given number of handles. + virtual TfLiteStatus AllocateScratchBufferHandles( + ScratchBufferHandle** scratch_buffer_handles, size_t handle_count); + + // Clears all internal scratch buffer request counts and resets the head to + // prepare for kernels to request scratch buffer data when a model is + // preparing. + TfLiteStatus InitScratchBufferData(); + + // Returns the pointer for the array of ScratchBufferRequest allocations in + // the head section. + internal::ScratchBufferRequest* GetScratchBufferRequests(); + + // A simple memory allocator that always allocate from the arena tail or head. + INonPersistentBufferAllocator* non_persistent_buffer_allocator_; + IPersistentBufferAllocator* persistent_buffer_allocator_; + + // Allocator used to allocate persistent builtin data. + TfLiteBridgeBuiltinDataAllocator* builtin_data_allocator_; + + // Activation buffer memory planner. + MicroMemoryPlanner* memory_planner_; + + bool model_is_allocating_; + + // Holds the number of ScratchBufferRequest instances stored in the head + // section when a model is allocating. + size_t scratch_buffer_request_count_ = 0; + + // Holds ScratchBufferRequest when a model is allocating + uint8_t* scratch_buffer_head_ = nullptr; + + // Holds the byte length of the memory plan with the largest head usage. Used + // to ensure that multi-tenant allocations can share the head for buffers. + size_t max_head_buffer_usage_ = 0; + + TF_LITE_REMOVE_VIRTUAL_DELETE +}; + +} // namespace tflite +#endif // TENSORFLOW_LITE_MICRO_MICRO_ALLOCATOR_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/micro_arena_constants.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/micro_arena_constants.h new file mode 100644 index 0000000..8282817 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/micro_arena_constants.h @@ -0,0 +1,28 @@ +/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_LITE_MICRO_MICRO_ARENA_CONSTANTS_H_ +#define TENSORFLOW_LITE_MICRO_MICRO_ARENA_CONSTANTS_H_ + +namespace tflite { + +// The default buffer alignment requirement. +// We align tensor buffers to 16-byte boundaries, since this is a common +// requirement for SIMD extensions. +constexpr int MicroArenaBufferAlignment() { return 16; } + +} // namespace tflite + +#endif // TENSORFLOW_LITE_MICRO_MICRO_ARENA_CONSTANTS_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/micro_context.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/micro_context.cpp new file mode 100644 index 0000000..b0a4244 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/micro_context.cpp @@ -0,0 +1,129 @@ +/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_context.h" + +#include +#include +#include + +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h" + +namespace tflite { +MicroContext::MicroContext(MicroAllocator* allocator, const Model* model, + MicroGraph* graph) + : allocator_(*allocator), graph_(*graph), model_(model) {} + +MicroContext::~MicroContext() {} + +void* MicroContext::AllocatePersistentBuffer(size_t bytes) { + return allocator_.AllocatePersistentBuffer(bytes); +} + +TfLiteStatus MicroContext::RequestScratchBufferInArena(size_t bytes, + int* buffer_idx) { + return allocator_.RequestScratchBufferInArena( + bytes, graph_.GetCurrentSubgraphIndex(), buffer_idx); +} + +void* MicroContext::GetScratchBuffer(int buffer_idx) { + ScratchBufferHandle* handle = scratch_buffer_handles_ + buffer_idx; + return handle->data; +} + +TfLiteTensor* MicroContext::AllocateTempTfLiteTensor(int tensor_idx) { + return allocator_.AllocateTempTfLiteTensor(model_, graph_.GetAllocations(), + tensor_idx, + graph_.GetCurrentSubgraphIndex()); +} + +int MicroContext::GetTensorIndex(int index, int max_size, + const int* tensor_indices) { + if (index >= 0 && index < max_size) { + const int tensor_index = tensor_indices[index]; + if (tensor_index != kTfLiteOptionalTensor) { + return tensor_index; + } + } + return -1; +} + +TfLiteTensor* MicroContext::AllocateTempInputTensor(const TfLiteNode* node, + int index) { + const int tensor_index = + GetTensorIndex(index, node->inputs->size, node->inputs->data); + if (tensor_index < 0) { + return nullptr; + } + return AllocateTempTfLiteTensor(tensor_index); +} + +TfLiteTensor* MicroContext::AllocateTempOutputTensor(const TfLiteNode* node, + int index) { + const int tensor_index = + GetTensorIndex(index, node->outputs->size, node->outputs->data); + if (tensor_index < 0) { + return nullptr; + } + return AllocateTempTfLiteTensor(tensor_index); +} + +TfLiteTensor* MicroContext::AllocateTempIntermediateTensor( + const TfLiteNode* node, int index) { + const int tensor_index = GetTensorIndex(index, node->intermediates->size, + node->intermediates->data); + if (tensor_index < 0) { + return nullptr; + } + return AllocateTempTfLiteTensor(tensor_index); +} + +void MicroContext::DeallocateTempTfLiteTensor(TfLiteTensor* tensor) { + return allocator_.DeallocateTempTfLiteTensor(tensor); +} + +TfLiteEvalTensor* MicroContext::GetEvalTensor(int tensor_idx) { + return &graph_.GetAllocations()[graph_.GetCurrentSubgraphIndex()] + .tensors[tensor_idx]; +} + +void MicroContext::SetScratchBufferHandles( + ScratchBufferHandle* scratch_buffer_handles) { + scratch_buffer_handles_ = scratch_buffer_handles; +} + +TfLiteStatus MicroContext::set_external_context( + void* external_context_payload) { + if (external_context_payload == nullptr || + external_context_payload_ != nullptr) { + MicroPrintf( + "Attempting to set external context to %x but it was %x already", + external_context_payload, external_context_payload_); + return kTfLiteError; + } + + external_context_payload_ = external_context_payload; + return kTfLiteOk; +} + +void MicroContextReportOpError(struct TfLiteContext* context, + const char* format, ...) { + va_list args; + va_start(args, format); + Log(format, args); + va_end(args); +} + +} // namespace tflite diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/micro_context.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/micro_context.h new file mode 100644 index 0000000..65a64b2 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/micro_context.h @@ -0,0 +1,161 @@ +/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_LITE_MICRO_MICRO_CONTEXT_H_ +#define TENSORFLOW_LITE_MICRO_MICRO_CONTEXT_H_ + +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_allocator.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_graph.h" + +namespace tflite { +// MicroContext is eventually going to become the API between TFLM and the +// kernels, replacing all the functions in TfLiteContext. The end state is code +// kernels to have code like: +// +// MicroContext* micro_context = GetMicroContext(context); +// micro_context-> +class MicroContext { + public: + // Does not take any ownership, and all pointers must refer to valid objects + // that outlive the one constructed. + explicit MicroContext(MicroAllocator* allocator, const Model* model, + MicroGraph* graph); + virtual ~MicroContext(); + + // Allocate persistent buffer which has the same life time as the interpreter. + // Returns nullptr on failure. + // The memory is allocated from the tail. + // This method is only available in Init or Prepare stage. + // Virtual so that it can be faked for kernel tests. + virtual void* AllocatePersistentBuffer(size_t bytes); + + // Request a scratch buffer in the arena through static memory planning. + // This method is only available in Prepare stage and the buffer is allocated + // by the interpreter between Prepare and Eval stage. In Eval stage, + // GetScratchBuffer API can be used to fetch the address. + // Virtual so that it can be faked for kernel tests. + virtual TfLiteStatus RequestScratchBufferInArena(size_t bytes, + int* buffer_idx); + + // Get the scratch buffer pointer. + // This method is only available in Eval stage. + // Virtual so that it can be faked for kernel tests. + virtual void* GetScratchBuffer(int buffer_idx); + + // Returns a temporary TfLiteTensor struct for a given index. + // Virtual so that it can be faked for kernel tests. + virtual TfLiteTensor* AllocateTempTfLiteTensor(int tensor_idx); + + // Returns a temporary TfLiteTensor struct for the specified input tensor of a + // given mode. This is the recommended API over the deprecated + // GetInput/GetInputSafe to get a temp input tensor. The returned tensor shall + // be freed via calling DeallocateTempTfLiteTensor. + virtual TfLiteTensor* AllocateTempInputTensor(const TfLiteNode* node, + int index); + + // Returns a temporary TfLiteTensor struct for the specified output tensor of + // a given mode. This is the recommended API over the deprecated + // GetOutput/GetOutputSafe to get a temp output tensor. The returned tensor + // shall be freed via calling DeallocateTempTfLiteTensor. + virtual TfLiteTensor* AllocateTempOutputTensor(const TfLiteNode* node, + int index); + + // Returns a temporary TfLiteTensor struct for the specified intermediate + // tensor of a given mode. This is the recommended API over the deprecated + // GetIntermediates/GetIntermediatesSafe to get a temp intermediate tensor. + // The returned tensor shall be freed via calling DeallocateTempTfLiteTensor. + virtual TfLiteTensor* AllocateTempIntermediateTensor(const TfLiteNode* node, + int index); + + // Deallocates a temp TfLiteTensor. + // Virtual so that it can be faked for kernel tests. + virtual void DeallocateTempTfLiteTensor(TfLiteTensor* tensor); + + // Returns a TfLiteEvalTensor struct for a given index. + // Virtual so that it can be faked for kernel tests. + virtual TfLiteEvalTensor* GetEvalTensor(int tensor_idx); + + // Does not take ownership of the pointer and the pointer must refer to valid + // an object that outlive this class instance. + // This can only be called once to set one external context. + TfLiteStatus set_external_context(void* external_context_payload); + + void* external_context() { return external_context_payload_; } + + MicroGraph& graph() { return graph_; } + + // Sets the pointer to a list of ScratchBufferHandle instances. + // Not API between TFLM and kernels. Primarily used by the framework for + // housekeeping in MicroContext. + void SetScratchBufferHandles(ScratchBufferHandle* scratch_buffer_handles); + + private: + // Return the tensor index as tensor_indices[index]. tensor_indices is of + // max_size. Return -1 if index is not in the valid range of tensor_indices. + int GetTensorIndex(int index, int max_size, const int* tensor_indices); + + MicroAllocator& allocator_; + MicroGraph& graph_; + const Model* model_; + + ScratchBufferHandle* scratch_buffer_handles_ = nullptr; + void* external_context_payload_ = nullptr; + + TF_LITE_REMOVE_VIRTUAL_DELETE +}; + +inline MicroContext* GetMicroContext(const struct TfLiteContext* context) { + return reinterpret_cast(context->impl_); +} + +// Deprecated API. Prefer to using the MicroContext API directly from the +// kernels. +// TODO(b/213010668): migrate all existing kernels to use MicroContext, delete +// these functions, and remove corresponding members from the TfLiteContext +// struct for TFLM. +inline void* MicroContextAllocatePersistentBuffer(TfLiteContext* ctx, + size_t bytes) { + return GetMicroContext(ctx)->AllocatePersistentBuffer(bytes); +} +inline TfLiteStatus MicroContextRequestScratchBufferInArena(TfLiteContext* ctx, + size_t bytes, + int* buffer_idx) { + return GetMicroContext(ctx)->RequestScratchBufferInArena(bytes, buffer_idx); +} +inline void* MicroContextGetScratchBuffer(TfLiteContext* ctx, int buffer_idx) { + return GetMicroContext(ctx)->GetScratchBuffer(buffer_idx); +} +inline TfLiteTensor* MicroContextGetTensor(const struct TfLiteContext* context, + int tensor_idx) { + return GetMicroContext(context)->AllocateTempTfLiteTensor(tensor_idx); +} +inline TfLiteEvalTensor* MicroContextGetEvalTensor( + const struct TfLiteContext* context, int tensor_idx) { + return GetMicroContext(context)->GetEvalTensor(tensor_idx); +} +inline TfLiteExternalContext* MicroContextGetExternalContext( + TfLiteContext* context, TfLiteExternalContextType unused) { + return reinterpret_cast( + GetMicroContext(context)->external_context()); +} + +// Requests that an error be reported with format string msg. +void MicroContextReportOpError(struct TfLiteContext* context, + const char* format, ...); + +} // namespace tflite + +#endif // TENSORFLOW_LITE_MICRO_MICRO_CONTEXT_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/micro_error_reporter.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/micro_error_reporter.cpp new file mode 100644 index 0000000..f15cfcc --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/micro_error_reporter.cpp @@ -0,0 +1,43 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_error_reporter.h" + +#include +#include +#include + +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h" + +namespace { +uint8_t micro_error_reporter_buffer[sizeof(tflite::MicroErrorReporter)]; +tflite::MicroErrorReporter* error_reporter_ = nullptr; + +} // namespace + +namespace tflite { +ErrorReporter* GetMicroErrorReporter() { + if (error_reporter_ == nullptr) { + error_reporter_ = new (micro_error_reporter_buffer) MicroErrorReporter(); + } + return error_reporter_; +} + +int MicroErrorReporter::Report(const char* format, va_list args) { + Log(format, args); + return 0; +} + +} // namespace tflite diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/micro_error_reporter.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/micro_error_reporter.h new file mode 100644 index 0000000..20a2423 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/micro_error_reporter.h @@ -0,0 +1,37 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_MICRO_TFLITE_BRIDGE_MICRO_ERROR_REPORTER_H_ +#define TENSORFLOW_LITE_MICRO_TFLITE_BRIDGE_MICRO_ERROR_REPORTER_H_ + +#include + +#include "edge-impulse-sdk/tensorflow/lite/core/api/error_reporter.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/compatibility.h" + +namespace tflite { +// Get a pointer to a singleton global error reporter. +ErrorReporter* GetMicroErrorReporter(); +class MicroErrorReporter : public ErrorReporter { + public: + ~MicroErrorReporter() override {} + int Report(const char* format, va_list args) override; + + private: + TF_LITE_REMOVE_VIRTUAL_DELETE +}; + +} // namespace tflite + +#endif // TENSORFLOW_LITE_MICRO_TFLITE_BRIDGE_MICRO_ERROR_REPORTER_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/micro_graph.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/micro_graph.cpp new file mode 100644 index 0000000..fa43d6c --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/micro_graph.cpp @@ -0,0 +1,258 @@ +/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_graph.h" + +#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/flatbuffers.h" // from @flatbuffers +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/compatibility.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/flatbuffer_utils.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/memory_helpers.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_profiler.h" +#include "edge-impulse-sdk/tensorflow/lite/schema/schema_generated.h" + +namespace tflite { +namespace { + +const char* OpNameFromRegistration(const TfLiteRegistration* registration) { + if (registration->builtin_code == BuiltinOperator_CUSTOM) { + return registration->custom_name; + } else { + return EnumNameBuiltinOperator(BuiltinOperator(registration->builtin_code)); + } +} + +} // namespace + +MicroGraph::MicroGraph(TfLiteContext* context, const Model* model, + MicroAllocator* allocator, + MicroResourceVariables* resource_variables) + : context_(context), + model_(model), + allocator_(allocator), + current_subgraph_index_(0), + resource_variables_(resource_variables) { + if (model != nullptr) { + subgraphs_ = model->subgraphs(); + } +} + +MicroGraph::~MicroGraph() {} + +TfLiteStatus MicroGraph::InitSubgraphs() { + int previous_subgraph_idx = current_subgraph_index_; + + for (size_t subgraph_idx = 0; subgraph_idx < subgraphs_->size(); + subgraph_idx++) { + current_subgraph_index_ = subgraph_idx; + uint32_t operators_size = NumSubgraphOperators(model_, subgraph_idx); + for (size_t i = 0; i < operators_size; ++i) { + TfLiteNode* node = + &(subgraph_allocations_[subgraph_idx].node_and_registrations[i].node); + const TfLiteRegistration* registration = + subgraph_allocations_[subgraph_idx] + .node_and_registrations[i] + .registration; + size_t init_data_size; + const char* init_data; + if (registration->builtin_code == BuiltinOperator_CUSTOM) { + init_data = reinterpret_cast(node->custom_initial_data); + init_data_size = node->custom_initial_data_size; + } else { + init_data = reinterpret_cast(node->builtin_data); + init_data_size = 0; + } + if (registration->init) { + node->user_data = + registration->init(context_, init_data, init_data_size); + } + } + } + current_subgraph_index_ = previous_subgraph_idx; + + return kTfLiteOk; +} + +TfLiteStatus MicroGraph::PrepareSubgraphs(bool run_all_prep_ops) { + int previous_subgraph_idx = current_subgraph_index_; + bool all_prep_ops_ok = true; + + for (size_t subgraph_idx = 0; subgraph_idx < subgraphs_->size(); + subgraph_idx++) { + current_subgraph_index_ = subgraph_idx; + uint32_t operators_size = NumSubgraphOperators(model_, subgraph_idx); + for (size_t i = 0; i < operators_size; ++i) { + TfLiteNode* node = + &(subgraph_allocations_[subgraph_idx].node_and_registrations[i].node); + const TfLiteRegistration* registration = + subgraph_allocations_[subgraph_idx] + .node_and_registrations[i] + .registration; + if (registration->prepare != nullptr) { + TfLiteStatus prepare_status = registration->prepare(context_, node); + if (prepare_status != kTfLiteOk) { + MicroPrintf("Node %s (number %df) failed to prepare with status %d", + OpNameFromRegistration(registration), i, prepare_status); + + all_prep_ops_ok = false; + if (!run_all_prep_ops) { + return kTfLiteError; + } + } + } + allocator_->FinishPrepareNodeAllocations(/*node_id=*/i); + } + + if (!all_prep_ops_ok) { + return kTfLiteError; + } + + } + current_subgraph_index_ = previous_subgraph_idx; + + return kTfLiteOk; +} + +TfLiteStatus MicroGraph::FreeSubgraphs() { + int previous_subgraph_idx = current_subgraph_index_; + + for (size_t subgraph_idx = 0; subgraph_idx < subgraphs_->size(); + subgraph_idx++) { + current_subgraph_index_ = subgraph_idx; + uint32_t operators_size = NumSubgraphOperators(model_, subgraph_idx); + for (size_t i = 0; i < operators_size; ++i) { + TfLiteNode* node = + &(subgraph_allocations_[subgraph_idx].node_and_registrations[i].node); + const TfLiteRegistration* registration = + subgraph_allocations_[subgraph_idx] + .node_and_registrations[i] + .registration; + // registration is allocated outside the interpreter, so double check to + // make sure it's not nullptr; + if (registration != nullptr && registration->free != nullptr) { + registration->free(context_, node->user_data); + } + } + } + current_subgraph_index_ = previous_subgraph_idx; + + return kTfLiteOk; +} + +TfLiteStatus MicroGraph::InvokeSubgraph(int subgraph_idx) { + int previous_subgraph_idx = current_subgraph_index_; + current_subgraph_index_ = subgraph_idx; + + if (static_cast(subgraph_idx) >= subgraphs_->size()) { + MicroPrintf("Accessing subgraph %d but only %d subgraphs found", + subgraph_idx, subgraphs_->size()); + return kTfLiteError; + } + uint32_t operators_size = NumSubgraphOperators(model_, subgraph_idx); + for (size_t i = 0; i < operators_size; ++i) { + TfLiteNode* node = + &(subgraph_allocations_[subgraph_idx].node_and_registrations[i].node); + const TfLiteRegistration* registration = subgraph_allocations_[subgraph_idx] + .node_and_registrations[i] + .registration; + +// This ifdef is needed (even though ScopedMicroProfiler itself is a no-op with +// -DTF_LITE_STRIP_ERROR_STRINGS) because the function OpNameFromRegistration is +// only defined for builds with the error strings. +#if !defined(TF_LITE_STRIP_ERROR_STRINGS) + ScopedMicroProfiler scoped_profiler( + OpNameFromRegistration(registration), + reinterpret_cast(context_->profiler)); +#endif + + TFLITE_DCHECK(registration->invoke); + TfLiteStatus invoke_status = registration->invoke(context_, node); + + // All TfLiteTensor structs used in the kernel are allocated from temp + // memory in the allocator. This creates a chain of allocations in the + // temp section. The call below resets the chain of allocations to + // prepare for the next call. + allocator_->ResetTempAllocations(); + + if (invoke_status == kTfLiteError) { + MicroPrintf("Node %s (number %d) failed to invoke with status %d", + OpNameFromRegistration(registration), i, invoke_status); + return kTfLiteError; + } else if (invoke_status != kTfLiteOk) { + return invoke_status; + } + } + current_subgraph_index_ = previous_subgraph_idx; + return kTfLiteOk; +} + +TfLiteStatus MicroGraph::ResetVariableTensors() { + for (size_t subgraph_idx = 0; subgraph_idx < subgraphs_->size(); + subgraph_idx++) { + const SubGraph* subgraph = (*subgraphs_)[subgraph_idx]; + for (size_t i = 0; i < subgraph->tensors()->size(); ++i) { + auto* tensor = subgraph->tensors()->Get(i); + if (tensor->is_variable()) { + size_t buffer_size; + TF_LITE_ENSURE_STATUS(TfLiteEvalTensorByteLength( + &subgraph_allocations_[subgraph_idx].tensors[i], &buffer_size)); + + int value = 0; + if (tensor->type() == tflite::TensorType_INT8) { + value = tensor->quantization()->zero_point()->Get(0); + } + memset(subgraph_allocations_[subgraph_idx].tensors[i].data.raw, value, + buffer_size); + } + } + } + if (resource_variables_ != nullptr) { + resource_variables_->ResetAll(); + } + + return kTfLiteOk; +} + +int MicroGraph::NumSubgraphs() { return model_->subgraphs()->size(); } + +void MicroGraph::SetSubgraphAllocations( + SubgraphAllocations* subgraph_allocations) { + subgraph_allocations_ = subgraph_allocations; +} + +size_t MicroGraph::NumSubgraphInputs(int subgraph_idx) { + return model_->subgraphs()->Get(subgraph_idx)->inputs()->size(); +} + +TfLiteEvalTensor* MicroGraph::GetSubgraphInput(int subgraph_idx, + int input_idx) { + int tensor_idx = + model_->subgraphs()->Get(subgraph_idx)->inputs()->Get(input_idx); + return &subgraph_allocations_[subgraph_idx].tensors[tensor_idx]; +} + +size_t MicroGraph::NumSubgraphOutputs(int subgraph_idx) { + return model_->subgraphs()->Get(subgraph_idx)->outputs()->size(); +} + +TfLiteEvalTensor* MicroGraph::GetSubgraphOutput(int subgraph_idx, + int output_idx) { + int tensor_idx = + model_->subgraphs()->Get(subgraph_idx)->outputs()->Get(output_idx); + return &subgraph_allocations_[subgraph_idx].tensors[tensor_idx]; +} + +} // namespace tflite diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/micro_graph.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/micro_graph.h new file mode 100644 index 0000000..082b898 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/micro_graph.h @@ -0,0 +1,104 @@ +/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_LITE_MICRO_MICRO_GRAPH_H_ +#define TENSORFLOW_LITE_MICRO_MICRO_GRAPH_H_ + +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_allocator.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_resource_variable.h" +#include "edge-impulse-sdk/tensorflow/lite/schema/schema_generated.h" + +namespace tflite { + +// Abstracts the details of interacting with the tflite::Model. +// +// Provides methods to access, initialize, prepare, invoke and free any +// subgraph in the tflite::Graph. +class MicroGraph { + public: + // The lifetime of the context, model, allocator and resource_variables must + // be at least as long as that of the graph object, since the this class may + // need to access them at any time. If resource_variables is a nullptr, + // GetResourceVariables will return a nullptr. + MicroGraph(TfLiteContext* context, const Model* model, + MicroAllocator* allocator, + MicroResourceVariables* resource_variables); + virtual ~MicroGraph(); + + // Sets up builtin data and calls TfLiteRegistration->Init for every operator + // in every subgraph in the model. + virtual TfLiteStatus InitSubgraphs(); + + // Calls TfLiteRegistration->Prepare for every operator in every subgraph in + // the model. + virtual TfLiteStatus PrepareSubgraphs(bool run_all_prep_ops); + + // Calls TfLiteRegistration->Free for every operator in every subgraph in the + // model. + virtual TfLiteStatus FreeSubgraphs(); + + // Calls TfLiteRegistration->Invoke for every operator in a single subgraph in + // the model. + virtual TfLiteStatus InvokeSubgraph(int subgraph_idx); + + // Zeros out all variable tensors in all subgraphs in the model. + virtual TfLiteStatus ResetVariableTensors(); + + // Number of tensor inputs to a specified subgraph in the model. + virtual size_t NumSubgraphInputs(int subgraph_idx); + + // Get the specified input tensor of a specified subgraph in the model. + virtual TfLiteEvalTensor* GetSubgraphInput(int subgraph_idx, int input_idx); + + // Number of tensor outputs from a specified subgraph in the model. + virtual size_t NumSubgraphOutputs(int subgraph_idx); + + // Get the specified output tensor of a specified subgraph in the model. + virtual TfLiteEvalTensor* GetSubgraphOutput(int subgraph_idx, int output_idx); + + // Number of subgraphs in the model. + virtual int NumSubgraphs(); + + // Hook to pass in subgraph allocations tracked within the interpreter, + // allowing MicroGraph to init / prepare / invoke subgraphs in the model. + void SetSubgraphAllocations(SubgraphAllocations* subgraph_allocations); + + // Get the current subgraph index. Within an on operator, this is guaranteed + // to be the subgraph of that operator. + int GetCurrentSubgraphIndex() { return current_subgraph_index_; } + + // Gets the list of alloctions for each subgraph. This is the source of truth + // for all per-subgraph allocation data. + SubgraphAllocations* GetAllocations() { return subgraph_allocations_; } + + // Get the resource variables for this TFLM graph. + MicroResourceVariables* GetResourceVariables() { return resource_variables_; } + + private: + TfLiteContext* context_; + const Model* model_; + MicroAllocator* allocator_; + SubgraphAllocations* subgraph_allocations_ = nullptr; + int current_subgraph_index_; + MicroResourceVariables* resource_variables_; + const flatbuffers::Vector>* subgraphs_; + + TF_LITE_REMOVE_VIRTUAL_DELETE +}; + +} // namespace tflite + +#endif // TENSORFLOW_LITE_MICRO_MICRO_GRAPH_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/micro_interpreter.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/micro_interpreter.cpp new file mode 100644 index 0000000..8877a8d --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/micro_interpreter.cpp @@ -0,0 +1,348 @@ +/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_interpreter.h" + +#include +#include +#include + +#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/flatbuffers.h" // from @flatbuffers +#include "edge-impulse-sdk/tensorflow/lite/c/c_api_types.h" +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/flatbuffer_utils.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/memory_helpers.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_allocator.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_op_resolver.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_profiler_interface.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/flatbuffer_conversions_bridge.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/op_resolver_bridge.h" +#include "edge-impulse-sdk/tensorflow/lite/schema/schema_generated.h" +#include "edge-impulse-sdk/tensorflow/lite/schema/schema_utils.h" + +namespace tflite { + +MicroInterpreter::MicroInterpreter(const Model* model, + const MicroOpResolver& op_resolver, + uint8_t* tensor_arena, + size_t tensor_arena_size, + MicroResourceVariables* resource_variables, + MicroProfilerInterface* profiler) + : model_(model), + op_resolver_(op_resolver), + allocator_(*MicroAllocator::Create(tensor_arena, tensor_arena_size)), + + graph_(&context_, model, &allocator_, resource_variables), + tensors_allocated_(false), + initialization_status_(kTfLiteError), + input_tensors_(nullptr), + output_tensors_(nullptr), + micro_context_(&allocator_, model_, &graph_) { + Init(profiler); +} + +MicroInterpreter::MicroInterpreter(const Model* model, + const MicroOpResolver& op_resolver, + MicroAllocator* allocator, + MicroResourceVariables* resource_variables, + MicroProfilerInterface* profiler) + : model_(model), + op_resolver_(op_resolver), + allocator_(*allocator), + graph_(&context_, model, allocator, resource_variables), + tensors_allocated_(false), + initialization_status_(kTfLiteError), + input_tensors_(nullptr), + output_tensors_(nullptr), + micro_context_(&allocator_, model_, &graph_) { + Init(profiler); +} + +MicroInterpreter::~MicroInterpreter() { + if (graph_.GetAllocations() != nullptr) { + graph_.FreeSubgraphs(); + } +#ifdef EON_COMPILER_RUN + if (node_and_registrations_ != nullptr) { + for (size_t i = 0; i < model_->subgraphs()->Get(0)->operators()->size(); ++i) { + TfLiteNode* node = &(node_and_registrations_[i].node); + const TfLiteRegistration* registration = + node_and_registrations_[i].registration; + // registration is allocated outside the interpreter, so double check to + // make sure it's not nullptr; + if (registration != nullptr && registration->free != nullptr) { + registration->free(&context_, node->user_data); + } + } + } +#endif +} + +void MicroInterpreter::Init(MicroProfilerInterface* profiler) { + context_.impl_ = static_cast(µ_context_); + context_.ReportError = MicroContextReportOpError; + context_.GetTensor = MicroContextGetTensor; + context_.GetEvalTensor = MicroContextGetEvalTensor; + context_.profiler = profiler; + + initialization_status_ = kTfLiteOk; +} + +TfLiteStatus MicroInterpreter::PrepareNodeAndRegistrationDataFromFlatbuffer() { + for (int subgraph_idx = 0; subgraph_idx < graph_.NumSubgraphs(); + subgraph_idx++) { + const SubGraph* subgraph = model_->subgraphs()->Get(subgraph_idx); + TFLITE_DCHECK(subgraph != nullptr); + + auto* opcodes = model_->operator_codes(); + TfLiteBridgeBuiltinDataAllocator* builtin_data_allocator = + allocator_.GetBuiltinDataAllocator(); + uint32_t operators_size = NumSubgraphOperators(subgraph); + for (size_t i = 0; i < operators_size; ++i) { + const auto* op = subgraph->operators()->Get(i); + const size_t index = op->opcode_index(); + if (index >= opcodes->size()) { + MicroPrintf("Missing registration for opcode_index %d\n", index); + return kTfLiteError; + } + const auto* opcode = opcodes->Get(index); + TfLiteStatus status = + GetRegistrationFromOpCode(opcode, op_resolver_, + &(graph_.GetAllocations()[subgraph_idx] + .node_and_registrations[i] + .registration)); + if (status != kTfLiteOk) { + MicroPrintf("Failed to get registration from op code %s\n ", + EnumNameBuiltinOperator(GetBuiltinCode(opcode))); + return status; + } + const auto* registration = graph_.GetAllocations()[subgraph_idx] + .node_and_registrations[i] + .registration; + if (registration == nullptr) { + MicroPrintf("Skipping op for opcode_index %d\n", index); + return kTfLiteError; + } + BuiltinOperator op_type = + static_cast(registration->builtin_code); + + const char* custom_data = nullptr; + size_t custom_data_size = 0; + unsigned char* builtin_data = nullptr; + + if (op_type == BuiltinOperator_CUSTOM) { + // Custom Ops may or may not have a non-null custom_options field. + if (op->custom_options() != nullptr) { + custom_data = + reinterpret_cast(op->custom_options()->data()); + custom_data_size = op->custom_options()->size(); + } + } else { + if (op->custom_options() != nullptr) { + MicroPrintf( + "Unsupported behavior: found builtin operator %s with custom " + "options.\n", + EnumNameBuiltinOperator(op_type)); + return kTfLiteError; + } + + TfLiteBridgeBuiltinParseFunction parser = + op_resolver_.GetOpDataParser(op_type); + if (parser == nullptr) { + MicroPrintf("Did not find a parser for %s", + EnumNameBuiltinOperator(op_type)); + + return kTfLiteError; + } + TF_LITE_ENSURE_STATUS(CallBuiltinParseFunction( + parser, op, builtin_data_allocator, (void**)(&builtin_data))); + } + + TfLiteIntArray* inputs_array = + FlatBufferVectorToTfLiteTypeArray(op->inputs()); + TfLiteIntArray* outputs_array = + FlatBufferVectorToTfLiteTypeArray(op->outputs()); + + TfLiteNode* node = &( + graph_.GetAllocations()[subgraph_idx].node_and_registrations[i].node); + *node = {}; + node->inputs = inputs_array; + node->outputs = outputs_array; + node->builtin_data = reinterpret_cast(builtin_data); + node->custom_initial_data = custom_data; + node->custom_initial_data_size = custom_data_size; + + if (op->intermediates() && (op->intermediates()->size() > 0)) { + node->intermediates = + FlatBufferVectorToTfLiteTypeArray(op->intermediates()); + } + } + } + return kTfLiteOk; +} + +TfLiteStatus MicroInterpreter::AllocateTensors(bool run_all_prep_ops) { + SubgraphAllocations* allocations = allocator_.StartModelAllocation(model_); + + if (allocations == nullptr) { + MicroPrintf("Failed starting model allocation.\n"); + initialization_status_ = kTfLiteError; + return kTfLiteError; + } + + graph_.SetSubgraphAllocations(allocations); + + TF_LITE_ENSURE_STATUS(PrepareNodeAndRegistrationDataFromFlatbuffer()); + + // Only allow AllocatePersistentBuffer in Init stage. + context_.AllocatePersistentBuffer = MicroContextAllocatePersistentBuffer; + context_.RequestScratchBufferInArena = nullptr; + context_.GetScratchBuffer = nullptr; + context_.GetExternalContext = nullptr; + TF_LITE_ENSURE_STATUS(graph_.InitSubgraphs()); + + // Both AllocatePersistentBuffer and RequestScratchBufferInArena is + // available in Prepare stage. + context_.RequestScratchBufferInArena = + MicroContextRequestScratchBufferInArena; + // external_context become available in Prepare stage. + context_.GetExternalContext = MicroContextGetExternalContext; + + TF_LITE_ENSURE_STATUS(graph_.PrepareSubgraphs(run_all_prep_ops)); + + // Prepare is done, we're ready for Invoke. Memory allocation is no longer + // allowed. Kernels can only fetch scratch buffers via GetScratchBuffer. + context_.AllocatePersistentBuffer = nullptr; + context_.RequestScratchBufferInArena = nullptr; + context_.GetScratchBuffer = MicroContextGetScratchBuffer; + + TF_LITE_ENSURE_OK(&context_, allocator_.FinishModelAllocation( + model_, graph_.GetAllocations(), + &scratch_buffer_handles_)); + + micro_context_.SetScratchBufferHandles(scratch_buffer_handles_); + + // TODO(b/162311891): Drop these allocations when the interpreter supports + // handling buffers from TfLiteEvalTensor. + input_tensors_ = + reinterpret_cast(allocator_.AllocatePersistentBuffer( + sizeof(TfLiteTensor*) * inputs_size())); + if (input_tensors_ == nullptr) { + MicroPrintf( + "Failed to allocate memory for context->input_tensors_, " + "%d bytes required", + sizeof(TfLiteTensor*) * inputs_size()); + return kTfLiteError; + } + + for (size_t i = 0; i < inputs_size(); ++i) { + input_tensors_[i] = allocator_.AllocatePersistentTfLiteTensor( + model_, graph_.GetAllocations(), inputs().Get(i), 0); + if (input_tensors_[i] == nullptr) { + MicroPrintf("Failed to initialize input tensor %d", i); + return kTfLiteError; + } + } + + // TODO(b/162311891): Drop these allocations when the interpreter supports + // handling buffers from TfLiteEvalTensor. + output_tensors_ = + reinterpret_cast(allocator_.AllocatePersistentBuffer( + sizeof(TfLiteTensor*) * outputs_size())); + if (output_tensors_ == nullptr) { + MicroPrintf( + "Failed to allocate memory for context->output_tensors_, " + "%d bytes required", + sizeof(TfLiteTensor*) * outputs_size()); + return kTfLiteError; + } + + for (size_t i = 0; i < outputs_size(); ++i) { + output_tensors_[i] = allocator_.AllocatePersistentTfLiteTensor( + model_, graph_.GetAllocations(), outputs().Get(i), 0); + if (output_tensors_[i] == nullptr) { + MicroPrintf("Failed to initialize output tensor %d", i); + return kTfLiteError; + } + } + + TF_LITE_ENSURE_STATUS(Reset()); + +#ifdef EON_COMPILER_RUN + node_and_registrations_ = allocations->node_and_registrations; +#endif + + tensors_allocated_ = true; + return kTfLiteOk; +} + +TfLiteStatus MicroInterpreter::Invoke() { + if (initialization_status_ != kTfLiteOk) { + MicroPrintf("Invoke() called after initialization failed\n"); + return kTfLiteError; + } + + // Ensure tensors are allocated before the interpreter is invoked to avoid + // difficult to debug segfaults. + if (!tensors_allocated_) { + TF_LITE_ENSURE_OK(&context_, AllocateTensors(true)); + } + return graph_.InvokeSubgraph(0); +} + +TfLiteTensor* MicroInterpreter::input(size_t index) { + const size_t length = inputs_size(); + if (index >= length) { + MicroPrintf("Input index %d out of range (length is %d)", index, length); + return nullptr; + } + return input_tensors_[index]; +} + +TfLiteTensor* MicroInterpreter::output(size_t index) { + const size_t length = outputs_size(); + if (index >= length) { + MicroPrintf("Output index %d out of range (length is %d)", index, length); + return nullptr; + } + return output_tensors_[index]; +} + +TfLiteTensor* MicroInterpreter::tensor(size_t index) { + const size_t length = tensors_size(); + if (index >= length) { + MicroPrintf("Tensor index %d out of range (length is %d)", index, length); + return nullptr; + } + return allocator_.AllocatePersistentTfLiteTensor(model_, graph_.GetAllocations(), index, 0); +} + + +// Repurposing free subgraphs to reset state for some ops for now +// will reset api is made. See b/220940833#comment25 for more context. +TfLiteStatus MicroInterpreter::Reset() { + TfLiteStatus status = graph_.FreeSubgraphs(); + if (status != kTfLiteOk) { + return status; + } + return graph_.ResetVariableTensors(); +} + +TfLiteStatus MicroInterpreter::SetMicroExternalContext( + void* external_context_payload) { + return micro_context_.set_external_context(external_context_payload); +} + +} // namespace tflite diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/micro_interpreter.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/micro_interpreter.h new file mode 100644 index 0000000..051490b --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/micro_interpreter.h @@ -0,0 +1,195 @@ +/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_MICRO_MICRO_INTERPRETER_H_ +#define TENSORFLOW_LITE_MICRO_MICRO_INTERPRETER_H_ + +#include +#include + +#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/flatbuffers.h" // from @flatbuffers +#include "edge-impulse-sdk/tensorflow/lite/c/c_api_types.h" +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/core/api/error_reporter.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_allocator.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_context.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_graph.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_op_resolver.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_profiler_interface.h" +#include "edge-impulse-sdk/tensorflow/lite/portable_type_to_tflitetype.h" +#include "edge-impulse-sdk/tensorflow/lite/schema/schema_generated.h" +#include "edge-impulse-sdk/tensorflow/lite/schema/schema_generated_full.h" + +/// Copied from tensorflow/lite/version.h to avoid a dependency chain into +// tensorflow/core. +#define TFLITE_SCHEMA_VERSION (3) + +namespace tflite { + +class MicroInterpreter { + public: + // The lifetime of the model, op resolver, tensor arena, error reporter, + // resource variables, and profiler must be at least as long as that of the + // interpreter object, since the interpreter may need to access them at any + // time. This means that you should usually create them with the same scope as + // each other, for example having them all allocated on the stack as local + // variables through a top-level function. The interpreter doesn't do any + // deallocation of any of the pointed-to objects, ownership remains with the + // caller. + MicroInterpreter(const Model* model, const MicroOpResolver& op_resolver, + uint8_t* tensor_arena, size_t tensor_arena_size, + MicroResourceVariables* resource_variables = nullptr, + MicroProfilerInterface* profiler = nullptr); + + // Create an interpreter instance using an existing MicroAllocator instance. + // This constructor should be used when creating an allocator that needs to + // have allocation handled in more than one interpreter or for recording + // allocations inside the interpreter. The lifetime of the allocator must be + // as long as that of the interpreter object. + MicroInterpreter(const Model* model, const MicroOpResolver& op_resolver, + MicroAllocator* allocator, + MicroResourceVariables* resource_variables = nullptr, + MicroProfilerInterface* profiler = nullptr); + + ~MicroInterpreter(); + + // Runs through the model and allocates all necessary input, output and + // intermediate tensors. + TfLiteStatus AllocateTensors(bool run_all_prep_ops); + + // In order to support partial graph runs for strided models, this can return + // values other than kTfLiteOk and kTfLiteError. + // TODO(b/149795762): Add this to the TfLiteStatus enum. + TfLiteStatus Invoke(); + + // This is the recommended API for an application to pass an external payload + // pointer as an external context to kernels. The life time of the payload + // pointer should be at least as long as this interpreter. TFLM supports only + // one external context. + TfLiteStatus SetMicroExternalContext(void* external_context_payload); + + size_t tensors_size() const { return model_->subgraphs()->Get(0)->tensors()->size(); } + + TfLiteTensor* tensor(size_t tensor_index); + template + T* typed_tensor(int tensor_index) { + if (TfLiteTensor* tensor_ptr = tensor(tensor_index)) { + if (tensor_ptr->type == typeToTfLiteType()) { + return GetTensorData(tensor_ptr); + } + } + return nullptr; + } + + TfLiteTensor* input(size_t index); + size_t inputs_size() const { + return model_->subgraphs()->Get(0)->inputs()->size(); + } + const flatbuffers::Vector& inputs() const { + return *model_->subgraphs()->Get(0)->inputs(); + } + TfLiteTensor* input_tensor(size_t index) { return input(index); } + template + T* typed_input_tensor(int tensor_index) { + if (TfLiteTensor* tensor_ptr = input_tensor(tensor_index)) { + if (tensor_ptr->type == typeToTfLiteType()) { + return GetTensorData(tensor_ptr); + } + } + return nullptr; + } + + TfLiteTensor* output(size_t index); + size_t outputs_size() const { + return model_->subgraphs()->Get(0)->outputs()->size(); + } + const flatbuffers::Vector& outputs() const { + return *model_->subgraphs()->Get(0)->outputs(); + } + TfLiteTensor* output_tensor(size_t index) { return output(index); } + template + T* typed_output_tensor(int tensor_index) { + if (TfLiteTensor* tensor_ptr = output_tensor(tensor_index)) { + if (tensor_ptr->type == typeToTfLiteType()) { + return GetTensorData(tensor_ptr); + } + } + return nullptr; + } + + // Reset the state to be what you would expect when the interpreter is first + // created. i.e. after Init and Prepare is called for the very first time. + TfLiteStatus Reset(); + + TfLiteStatus initialization_status() const { return initialization_status_; } + + size_t operators_size() const { return model_->subgraphs()->Get(0)->operators()->size(); } + +#ifdef EON_COMPILER_RUN + NodeAndRegistration* node_and_registrations_ = nullptr; + + const NodeAndRegistration node_and_registration(int node_index) const { + return node_and_registrations_[node_index]; + } +#endif + + // Populates node and registration pointers representing the inference graph + // of the model from values inside the flatbuffer (loaded from the TfLiteModel + // instance). Persistent data (e.g. operator data) is allocated from the + // arena. + TfLiteStatus PrepareNodeAndRegistrationDataFromFlatbuffer(); + + // For debugging only. + // Returns the actual used arena in bytes. This method gives the optimal arena + // size. It's only available after `AllocateTensors` has been called. + // Note that normally `tensor_arena` requires 16 bytes alignment to fully + // utilize the space. If it's not the case, the optimial arena size would be + // arena_used_bytes() + 16. + size_t arena_used_bytes() const { return allocator_.used_bytes(); } + + protected: + const MicroAllocator& allocator() const { return allocator_; } + const TfLiteContext& context() const { return context_; } + + private: + // TODO(b/158263161): Consider switching to Create() function to enable better + // error reporting during initialization. + void Init(MicroProfilerInterface* profiler); + + // Gets the current subgraph index used from within context methods. + int get_subgraph_index() { return graph_.GetCurrentSubgraphIndex(); } + + const Model* model_; + const MicroOpResolver& op_resolver_; + TfLiteContext context_ = {}; + MicroAllocator& allocator_; + MicroGraph graph_; + bool tensors_allocated_; + + TfLiteStatus initialization_status_; + + ScratchBufferHandle* scratch_buffer_handles_ = nullptr; + + // TODO(b/162311891): Clean these pointers up when this class supports buffers + // from TfLiteEvalTensor. + TfLiteTensor** input_tensors_; + TfLiteTensor** output_tensors_; + + MicroContext micro_context_; +}; + +} // namespace tflite + +#endif // TENSORFLOW_LITE_MICRO_MICRO_INTERPRETER_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/micro_log.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/micro_log.cpp new file mode 100644 index 0000000..26282ca --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/micro_log.cpp @@ -0,0 +1,47 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h" + +#include +#include +#include + +#if !defined(TF_LITE_STRIP_ERROR_STRINGS) +#include "edge-impulse-sdk/tensorflow/lite/micro/debug_log.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_string.h" +#endif + +void Log(const char* format, va_list args) { +#if !defined(TF_LITE_STRIP_ERROR_STRINGS) + // Only pulling in the implementation of this function for builds where we + // expect to make use of it to be extra cautious about not increasing the code + // size. + static constexpr int kMaxLogLen = 256; + char log_buffer[kMaxLogLen]; + MicroVsnprintf(log_buffer, kMaxLogLen, format, args); + DebugLog(log_buffer); + DebugLog("\r\n"); +#endif +} + +#if !defined(TF_LITE_STRIP_ERROR_STRINGS) +void MicroPrintf(const char* format, ...) { + va_list args; + va_start(args, format); + Log(format, args); + va_end(args); +} +#endif diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/micro_log.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/micro_log.h new file mode 100644 index 0000000..22cceb2 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/micro_log.h @@ -0,0 +1,49 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_MICRO_MICRO_LOG_H_ +#define TENSORFLOW_LITE_MICRO_MICRO_LOG_H_ + +#include + +// do this by default except when running EON compiler +#ifndef EON_COMPILER_RUN +#define TF_LITE_STRIP_ERROR_STRINGS +#endif + +// This is a free function used to perform the actual logging. +// This function will be used by MicroPrintf and MicroErrorReporter::Report() +void Log(const char* format, va_list args); + +#if !defined(TF_LITE_STRIP_ERROR_STRINGS) +// This function can be used independent of the MicroErrorReporter to get +// printf-like functionalitys and are common to all target platforms. +void MicroPrintf(const char* format, ...); +#else +// We use a #define to ensure that the strings are completely stripped, to +// prevent an unnecessary increase in the binary size. +#define MicroPrintf(...) tflite::Unused(__VA_ARGS__) +#endif + +namespace tflite { + +// From +// https://stackoverflow.com/questions/23235910/variadic-unused-function-macro +template +void Unused(Args&&... args) { + (void)(sizeof...(args)); +} +} // namespace tflite + +#endif // TENSORFLOW_LITE_MICRO_MICRO_LOG_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/micro_mutable_op_resolver.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/micro_mutable_op_resolver.h new file mode 100644 index 0000000..798787a --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/micro_mutable_op_resolver.h @@ -0,0 +1,676 @@ +/* Copyright 2023 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_MICRO_MICRO_MUTABLE_OP_RESOLVER_H_ +#define TENSORFLOW_LITE_MICRO_MICRO_MUTABLE_OP_RESOLVER_H_ + +#include +#include + +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/core/api/flatbuffer_conversions.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/compatibility.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/op_macros.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/compatibility.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/add.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/conv.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/depthwise_conv.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/ethosu.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/tree_ensemble_classifier.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/fully_connected.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/micro_ops.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/pooling.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/reduce.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/softmax.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_op_resolver.h" +#include "edge-impulse-sdk/tensorflow/lite/schema/schema_generated.h" + +namespace tflite { +TfLiteRegistration* Register_DETECTION_POSTPROCESS(); + +template +class MicroMutableOpResolver : public MicroOpResolver { + public: + TF_LITE_REMOVE_VIRTUAL_DELETE + + explicit MicroMutableOpResolver() {} + + const TfLiteRegistration* FindOp(tflite::BuiltinOperator op) const override { + if (op == BuiltinOperator_CUSTOM) return nullptr; + + for (unsigned int i = 0; i < registrations_len_; ++i) { + const TfLiteRegistration& registration = registrations_[i]; + if (registration.builtin_code == op) { + return ®istration; + } + } + return nullptr; + } + + const TfLiteRegistration* FindOp(const char* op) const override { + for (unsigned int i = 0; i < registrations_len_; ++i) { + const TfLiteRegistration& registration = registrations_[i]; + if ((registration.builtin_code == BuiltinOperator_CUSTOM) && + (strcmp(registration.custom_name, op) == 0)) { + return ®istration; + } + } + return nullptr; + } + + TfLiteBridgeBuiltinParseFunction GetOpDataParser( + BuiltinOperator op) const override { + TFLITE_DCHECK(num_buitin_ops_ <= tOpCount); + for (unsigned int i = 0; i < num_buitin_ops_; ++i) { + if (builtin_codes_[i] == op) return builtin_parsers_[i]; + } + return nullptr; + } + + // Registers a Custom Operator with the MicroOpResolver. + // + // Only the first call for a given name will be successful. i.e. if this + // function is called again for a previously added Custom Operator, the + // MicroOpResolver will be unchanged and this function will return + // kTfLiteError. + TfLiteStatus AddCustom(const char* name, TfLiteRegistration* registration) { + if (registrations_len_ >= tOpCount) { + MicroPrintf( + "Couldn't register custom op '%s', resolver size is too" + "small (%d)", + name, tOpCount); + return kTfLiteError; + } + + if (FindOp(name) != nullptr) { + MicroPrintf("Calling AddCustom for the same op more than once "); + MicroPrintf("is not supported (Op: %s).", name); + return kTfLiteError; + } + + TfLiteRegistration* new_registration = ®istrations_[registrations_len_]; + registrations_len_ += 1; + + *new_registration = *registration; + new_registration->builtin_code = BuiltinOperator_CUSTOM; + new_registration->custom_name = name; + return kTfLiteOk; + } + + // The Add* functions below add the various Builtin operators to the + // MicroMutableOpResolver object. + + TfLiteStatus AddAbs() { + return AddBuiltin(BuiltinOperator_ABS, tflite::ops::micro::Register_ABS(), + ParseAbs); + } + + TfLiteStatus AddAdd(const TfLiteRegistration& registration = Register_ADD()) { + return AddBuiltin(BuiltinOperator_ADD, registration, ParseAdd); + } + + TfLiteStatus AddAddN() { + return AddBuiltin(BuiltinOperator_ADD_N, tflite::Register_ADD_N(), + ParseAddN); + } + + TfLiteStatus AddArgMax() { + return AddBuiltin(BuiltinOperator_ARG_MAX, Register_ARG_MAX(), ParseArgMax); + } + + TfLiteStatus AddArgMin() { + return AddBuiltin(BuiltinOperator_ARG_MIN, Register_ARG_MIN(), ParseArgMin); + } + + TfLiteStatus AddAssignVariable() { + return AddBuiltin(BuiltinOperator_ASSIGN_VARIABLE, + tflite::Register_ASSIGN_VARIABLE(), ParseAssignVariable); + } + + TfLiteStatus AddAveragePool2D( + const TfLiteRegistration& registration = Register_AVERAGE_POOL_2D()) { + return AddBuiltin(BuiltinOperator_AVERAGE_POOL_2D, registration, ParsePool); + } + + TfLiteStatus AddBatchMatMul() { + return AddBuiltin(BuiltinOperator_BATCH_MATMUL, + Register_BATCH_MATMUL(), ParseBatchMatMul); + } + + TfLiteStatus AddBatchToSpaceNd() { + return AddBuiltin(BuiltinOperator_BATCH_TO_SPACE_ND, + Register_BATCH_TO_SPACE_ND(), ParseBatchToSpaceNd); + } + + TfLiteStatus AddBroadcastArgs() { + return AddBuiltin(BuiltinOperator_BROADCAST_ARGS, Register_BROADCAST_ARGS(), + ParseBroadcastArgs); + } + + TfLiteStatus AddBroadcastTo() { + return AddBuiltin(BuiltinOperator_BROADCAST_TO, Register_BROADCAST_TO(), + ParseBroadcastTo); + } + + TfLiteStatus AddCallOnce() { + return AddBuiltin(BuiltinOperator_CALL_ONCE, Register_CALL_ONCE(), + ParseCallOnce); + } + + TfLiteStatus AddCast() { + return AddBuiltin(BuiltinOperator_CAST, Register_CAST(), ParseCast); + } + + TfLiteStatus AddCeil() { + return AddBuiltin(BuiltinOperator_CEIL, Register_CEIL(), ParseCeil); + } + + TfLiteStatus AddComplexAbs() { + return AddBuiltin(BuiltinOperator_COMPLEX_ABS, Register_COMPLEX_ABS(), + ParseComplexAbs); + } + + TfLiteStatus AddCircularBuffer() { + return AddCustom("CIRCULAR_BUFFER", tflite::Register_CIRCULAR_BUFFER()); + } + + TfLiteStatus AddConcatenation() { + return AddBuiltin(BuiltinOperator_CONCATENATION, Register_CONCATENATION(), + ParseConcatenation); + } + + TfLiteStatus AddConv2D( + const TfLiteRegistration& registration = Register_CONV_2D()) { + return AddBuiltin(BuiltinOperator_CONV_2D, registration, ParseConv2D); + } + + TfLiteStatus AddCos() { + return AddBuiltin(BuiltinOperator_COS, tflite::ops::micro::Register_COS(), + ParseCos); + } + + TfLiteStatus AddCumSum() { + return AddBuiltin(BuiltinOperator_CUMSUM, tflite::Register_CUMSUM(), + ParseCumsum); + } + + TfLiteStatus AddDepthToSpace() { + return AddBuiltin(BuiltinOperator_DEPTH_TO_SPACE, + tflite::Register_DEPTH_TO_SPACE(), ParseDepthToSpace); + } + + TfLiteStatus AddDepthwiseConv2D( + const TfLiteRegistration& registration = Register_DEPTHWISE_CONV_2D()) { + return AddBuiltin(BuiltinOperator_DEPTHWISE_CONV_2D, registration, + ParseDepthwiseConv2D); + } + + TfLiteStatus AddDequantize() { + return AddBuiltin(BuiltinOperator_DEQUANTIZE, tflite::Register_DEQUANTIZE(), + ParseDequantize); + } + + TfLiteStatus AddDetectionPostprocess() { + return AddCustom("TFLite_Detection_PostProcess", + tflite::Register_DETECTION_POSTPROCESS()); + } + + TfLiteStatus AddDiv() { + return AddBuiltin(BuiltinOperator_DIV, tflite::Register_DIV(), ParseDiv); + } + + TfLiteStatus AddElu() { + return AddBuiltin(BuiltinOperator_ELU, tflite::Register_ELU(), ParseElu); + } + + TfLiteStatus AddEqual() { + return AddBuiltin(BuiltinOperator_EQUAL, Register_EQUAL(), ParseEqual); + } + + TfLiteStatus AddEthosU() { + TfLiteRegistration* registration = tflite::Register_ETHOSU(); + if (registration) { + return AddCustom(tflite::GetString_ETHOSU(), registration); + } + return kTfLiteOk; + } + + TfLiteStatus AddExp() { + return AddBuiltin(BuiltinOperator_EXP, Register_EXP(), ParseExp); + } + + TfLiteStatus AddExpandDims() { + return AddBuiltin(BuiltinOperator_EXPAND_DIMS, Register_EXPAND_DIMS(), + ParseExpandDims); + } + + TfLiteStatus AddFill() { + return AddBuiltin(BuiltinOperator_FILL, tflite::Register_FILL(), ParseFill); + } + + TfLiteStatus AddFloor() { + return AddBuiltin(BuiltinOperator_FLOOR, Register_FLOOR(), ParseFloor); + } + + TfLiteStatus AddFloorDiv() { + return AddBuiltin(BuiltinOperator_FLOOR_DIV, tflite::Register_FLOOR_DIV(), + ParseFloorDiv); + } + + TfLiteStatus AddFloorMod() { + return AddBuiltin(BuiltinOperator_FLOOR_MOD, tflite::Register_FLOOR_MOD(), + ParseFloorMod); + } + + TfLiteStatus AddFullyConnected( + const TfLiteRegistration& registration = Register_FULLY_CONNECTED()) { + return AddBuiltin(BuiltinOperator_FULLY_CONNECTED, registration, + ParseFullyConnected); + } + +#ifndef TF_LITE_STATIC_MEMORY + TfLiteStatus AddGather() { + return AddBuiltin(BuiltinOperator_GATHER, tflite::Register_GATHER(), + ParseGather); + } +#endif + + TfLiteStatus AddGatherNd() { + return AddBuiltin(BuiltinOperator_GATHER_ND, tflite::Register_GATHER_ND(), + ParseGatherNd); + } + + TfLiteStatus AddGreater() { + return AddBuiltin(BuiltinOperator_GREATER, Register_GREATER(), + ParseGreater); + } + + TfLiteStatus AddGreaterEqual() { + return AddBuiltin(BuiltinOperator_GREATER_EQUAL, Register_GREATER_EQUAL(), + ParseGreaterEqual); + } + + TfLiteStatus AddHardSwish() { + return AddBuiltin(BuiltinOperator_HARD_SWISH, tflite::Register_HARD_SWISH(), + ParseHardSwish); + } + + TfLiteStatus AddImag() { + return AddBuiltin(BuiltinOperator_IMAG, Register_IMAG(), + ParseImag); + } + + TfLiteStatus AddIf() { + return AddBuiltin(BuiltinOperator_IF, tflite::Register_IF(), ParseIf); + } + + TfLiteStatus AddL2Normalization() { + return AddBuiltin(BuiltinOperator_L2_NORMALIZATION, + tflite::ops::micro::Register_L2_NORMALIZATION(), + ParseL2Normalization); + } + + TfLiteStatus AddL2Pool2D() { + return AddBuiltin(BuiltinOperator_L2_POOL_2D, tflite::Register_L2_POOL_2D(), + ParsePool); + } + + TfLiteStatus AddLeakyRelu() { + return AddBuiltin(BuiltinOperator_LEAKY_RELU, tflite::Register_LEAKY_RELU(), + ParseLeakyRelu); + } + + TfLiteStatus AddLess() { + return AddBuiltin(BuiltinOperator_LESS, Register_LESS(), ParseLess); + } + + TfLiteStatus AddLessEqual() { + return AddBuiltin(BuiltinOperator_LESS_EQUAL, Register_LESS_EQUAL(), + ParseLessEqual); + } + + TfLiteStatus AddLog() { + return AddBuiltin(BuiltinOperator_LOG, tflite::ops::micro::Register_LOG(), + ParseLog); + } + + TfLiteStatus AddLogicalAnd() { + return AddBuiltin(BuiltinOperator_LOGICAL_AND, + tflite::Register_LOGICAL_AND(), ParseLogicalAnd); + } + + TfLiteStatus AddLogicalNot() { + return AddBuiltin(BuiltinOperator_LOGICAL_NOT, + tflite::ops::micro::Register_LOGICAL_NOT(), + ParseLogicalNot); + } + + TfLiteStatus AddLogicalOr() { + return AddBuiltin(BuiltinOperator_LOGICAL_OR, tflite::Register_LOGICAL_OR(), + ParseLogicalOr); + } + + TfLiteStatus AddLogistic() { + return AddBuiltin(BuiltinOperator_LOGISTIC, tflite::Register_LOGISTIC(), + ParseLogistic); + } + + TfLiteStatus AddLogSoftmax() { + return AddBuiltin(BuiltinOperator_LOG_SOFTMAX, + tflite::Register_LOG_SOFTMAX(), ParseLogSoftmax); + } + + TfLiteStatus AddMaximum() { + return AddBuiltin(BuiltinOperator_MAXIMUM, Register_MAXIMUM(), + ParseMaximum); + } + + TfLiteStatus AddMaxPool2D( + const TfLiteRegistration& registration = Register_MAX_POOL_2D()) { + return AddBuiltin(BuiltinOperator_MAX_POOL_2D, registration, ParsePool); + } + + TfLiteStatus AddMirrorPad() { + return AddBuiltin(BuiltinOperator_MIRROR_PAD, tflite::Register_MIRROR_PAD(), + ParseMirrorPad); + } + + TfLiteStatus AddMean() { + return AddBuiltin(BuiltinOperator_MEAN, Register_MEAN(), ParseReducer); + } + + TfLiteStatus AddMinimum() { + return AddBuiltin(BuiltinOperator_MINIMUM, Register_MINIMUM(), + ParseMinimum); + } + + TfLiteStatus AddMul(const TfLiteRegistration& registration = Register_MUL()) { + return AddBuiltin(BuiltinOperator_MUL, registration, ParseMul); + } + + TfLiteStatus AddNeg() { + return AddBuiltin(BuiltinOperator_NEG, Register_NEG(), ParseNeg); + } + + TfLiteStatus AddNotEqual() { + return AddBuiltin(BuiltinOperator_NOT_EQUAL, Register_NOT_EQUAL(), + ParseNotEqual); + } + + TfLiteStatus AddPack() { + return AddBuiltin(BuiltinOperator_PACK, Register_PACK(), ParsePack); + } + + TfLiteStatus AddPad(const TfLiteRegistration& registration = Register_PAD()) { + return AddBuiltin(BuiltinOperator_PAD, registration, ParsePad); + } + + TfLiteStatus AddPadV2() { + return AddBuiltin(BuiltinOperator_PADV2, Register_PADV2(), ParsePadV2); + } + + TfLiteStatus AddPrelu() { + return AddBuiltin(BuiltinOperator_PRELU, tflite::Register_PRELU(), + ParsePrelu); + } + + TfLiteStatus AddQuantize() { + return AddBuiltin(BuiltinOperator_QUANTIZE, Register_QUANTIZE(), + ParseQuantize); + } + + TfLiteStatus AddReal() { + return AddBuiltin(BuiltinOperator_REAL, Register_REAL(), + ParseReal); + } + + TfLiteStatus AddReadVariable() { + return AddBuiltin(BuiltinOperator_READ_VARIABLE, + tflite::Register_READ_VARIABLE(), ParseReadVariable); + } + + TfLiteStatus AddReduceMax() { + return AddBuiltin(BuiltinOperator_REDUCE_MAX, Register_REDUCE_MAX(), + ParseReducer); + } + + TfLiteStatus AddReduceMin() { + return AddBuiltin(BuiltinOperator_REDUCE_MIN, Register_REDUCE_MIN(), + ParseReducer); + } + + TfLiteStatus AddRelu() { + return AddBuiltin(BuiltinOperator_RELU, tflite::Register_RELU(), ParseRelu); + } + + TfLiteStatus AddRelu6() { + return AddBuiltin(BuiltinOperator_RELU6, tflite::Register_RELU6(), + ParseRelu6); + } + + TfLiteStatus AddReshape() { + return AddBuiltin(BuiltinOperator_RESHAPE, + tflite::ops::micro::Register_RESHAPE(), ParseReshape); + } + + TfLiteStatus AddResizeBilinear() { + return AddBuiltin(BuiltinOperator_RESIZE_BILINEAR, + Register_RESIZE_BILINEAR(), ParseResizeBilinear); + } + + TfLiteStatus AddResizeNearestNeighbor() { + return AddBuiltin(BuiltinOperator_RESIZE_NEAREST_NEIGHBOR, + tflite::ops::micro::Register_RESIZE_NEAREST_NEIGHBOR(), + ParseResizeNearestNeighbor); + } + + TfLiteStatus AddRfft2D() { + return AddBuiltin(BuiltinOperator_RFFT2D, Register_RFFT2D(), + ParseRfft2D); + } + + TfLiteStatus AddRound() { + return AddBuiltin(BuiltinOperator_ROUND, + tflite::ops::micro::Register_ROUND(), ParseRound); + } + + TfLiteStatus AddRsqrt() { + return AddBuiltin(BuiltinOperator_RSQRT, + tflite::ops::micro::Register_RSQRT(), ParseRsqrt); + } + +#ifndef TF_LITE_STATIC_MEMORY + TfLiteStatus AddSelect() { + return AddBuiltin(BuiltinOperator_SELECT, Register_SELECT(), + ParseSelect); + } + + TfLiteStatus AddSelectV2() { + return AddBuiltin(BuiltinOperator_SELECT_V2, Register_SELECT_V2(), + ParseSelect); + } +#endif // TF_LITE_STATIC_MEMORY + + TfLiteStatus AddShape() { + return AddBuiltin(BuiltinOperator_SHAPE, Register_SHAPE(), ParseShape); + } + + TfLiteStatus AddSin() { + return AddBuiltin(BuiltinOperator_SIN, tflite::ops::micro::Register_SIN(), + ParseSin); + } + + TfLiteStatus AddSlice() { + return AddBuiltin(BuiltinOperator_SLICE, Register_SLICE(), ParseSlice); + } + + TfLiteStatus AddSoftmax( + const TfLiteRegistration& registration = Register_SOFTMAX()) { + return AddBuiltin(BuiltinOperator_SOFTMAX, registration, ParseSoftmax); + } + + TfLiteStatus AddSpaceToBatchNd() { + return AddBuiltin(BuiltinOperator_SPACE_TO_BATCH_ND, + Register_SPACE_TO_BATCH_ND(), ParseSpaceToBatchNd); + } + + TfLiteStatus AddSpaceToDepth() { + return AddBuiltin(BuiltinOperator_SPACE_TO_DEPTH, Register_SPACE_TO_DEPTH(), + ParseSpaceToDepth); + } + + TfLiteStatus AddSplit() { + return AddBuiltin(BuiltinOperator_SPLIT, + tflite::ops::micro::Register_SPLIT(), ParseSplit); + } + + TfLiteStatus AddSplitV() { + return AddBuiltin(BuiltinOperator_SPLIT_V, Register_SPLIT_V(), ParseSplitV); + } + + TfLiteStatus AddSqueeze() { + return AddBuiltin(BuiltinOperator_SQUEEZE, Register_SQUEEZE(), + ParseSqueeze); + } + + TfLiteStatus AddSqrt() { + return AddBuiltin(BuiltinOperator_SQRT, tflite::ops::micro::Register_SQRT(), + ParseSqrt); + } + + TfLiteStatus AddSquare() { + return AddBuiltin(BuiltinOperator_SQUARE, + tflite::ops::micro::Register_SQUARE(), ParseSquare); + } + + TfLiteStatus AddSquaredDifference() { + return AddBuiltin(BuiltinOperator_SQUARED_DIFFERENCE, + tflite::Register_SQUARED_DIFFERENCE(), + ParseSquaredDifference); + } + + TfLiteStatus AddStridedSlice() { + return AddBuiltin(BuiltinOperator_STRIDED_SLICE, Register_STRIDED_SLICE(), + ParseStridedSlice); + } + + TfLiteStatus AddSub() { + return AddBuiltin(BuiltinOperator_SUB, tflite::Register_SUB(), ParseSub); + } + + TfLiteStatus AddSum() { + return AddBuiltin(BuiltinOperator_SUM, Register_SUM(), ParseReducer); + } + + TfLiteStatus AddSvdf( + const TfLiteRegistration& registration = Register_SVDF()) { + return AddBuiltin(BuiltinOperator_SVDF, registration, ParseSvdf); + } + + TfLiteStatus AddTanh() { + return AddBuiltin(BuiltinOperator_TANH, tflite::ops::micro::Register_TANH(), + ParseTanh); + } + + TfLiteStatus AddTransposeConv() { + return AddBuiltin(BuiltinOperator_TRANSPOSE_CONV, + tflite::Register_TRANSPOSE_CONV(), ParseTransposeConv); + } + + TfLiteStatus AddTranspose() { + return AddBuiltin(BuiltinOperator_TRANSPOSE, Register_TRANSPOSE(), + ParseTranspose); + } + + TfLiteStatus AddTreeEnsembleClassifier() { + return AddCustom(tflite::GetString_TreeEnsembleClassifier(), + tflite::Register_TreeEnsembleClassifier()); + } + + TfLiteStatus AddUnpack() { + return AddBuiltin(BuiltinOperator_UNPACK, + tflite::ops::micro::Register_UNPACK(), ParseUnpack); + } + + TfLiteStatus AddUnidirectionalSequenceLstm() { + return AddBuiltin(BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_LSTM, + Register_UNIDIRECTIONAL_SEQUENCE_LSTM(), + ParseUnidirectionalSequenceLSTM); + } + + TfLiteStatus AddVarHandle() { + return AddBuiltin(BuiltinOperator_VAR_HANDLE, Register_VAR_HANDLE(), + ParseVarHandle); + } + + TfLiteStatus AddWhile() { + return AddBuiltin(BuiltinOperator_WHILE, Register_WHILE(), ParseWhile); + } + + TfLiteStatus AddZerosLike() { + return AddBuiltin(BuiltinOperator_ZEROS_LIKE, Register_ZEROS_LIKE(), + ParseZerosLike); + } + + unsigned int GetRegistrationLength() { return registrations_len_; } + + private: + TfLiteStatus AddBuiltin(tflite::BuiltinOperator op, + const TfLiteRegistration& registration, + TfLiteBridgeBuiltinParseFunction parser) { + if (op == BuiltinOperator_CUSTOM) { + MicroPrintf("Invalid parameter BuiltinOperator_CUSTOM to the "); + MicroPrintf("AddBuiltin function."); + return kTfLiteError; + } + + if (FindOp(op) != nullptr) { + MicroPrintf("Calling AddBuiltin with the same op more than "); + MicroPrintf("once is not supported (Op: #%d).", op); + return kTfLiteError; + } + + if (registrations_len_ >= tOpCount) { + MicroPrintf("Couldn't register builtin op #%d, resolver size ", op); + MicroPrintf("is too small (%d).", tOpCount); + return kTfLiteError; + } + + registrations_[registrations_len_] = registration; + // Strictly speaking, the builtin_code is not necessary for TFLM but filling + // it in regardless. + registrations_[registrations_len_].builtin_code = op; + registrations_len_++; + + builtin_codes_[num_buitin_ops_] = op; + builtin_parsers_[num_buitin_ops_] = parser; + num_buitin_ops_++; + + return kTfLiteOk; + } + + TfLiteRegistration registrations_[tOpCount]; + unsigned int registrations_len_ = 0; + + // Arrays (and counter) to store the builtin codes and their corresponding + // parse functions as these are registered with the Op Resolver. + BuiltinOperator builtin_codes_[tOpCount]; + TfLiteBridgeBuiltinParseFunction builtin_parsers_[tOpCount]; + unsigned int num_buitin_ops_ = 0; +}; + +}; // namespace tflite + +#endif // TENSORFLOW_LITE_MICRO_MICRO_MUTABLE_OP_RESOLVER_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/micro_op_resolver.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/micro_op_resolver.h new file mode 100644 index 0000000..1bd3f4b --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/micro_op_resolver.h @@ -0,0 +1,68 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_MICRO_MICRO_OP_RESOLVER_H_ +#define TENSORFLOW_LITE_MICRO_MICRO_OP_RESOLVER_H_ + +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/flatbuffer_conversions_bridge.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/op_resolver_bridge.h" +#include "edge-impulse-sdk/tensorflow/lite/schema/schema_generated.h" + +namespace tflite { + +// This is an interface for the OpResolver for TFLiteMicro. The differences from +// the TFLite OpResolver base class are to: +// * explicitly remove support for Op versions +// * allow for finer grained registration of the Builtin Ops to reduce code +// size for TFLiteMicro. +// +// We need an interface class instead of directly using MicroMutableOpResolver +// because MicroMutableOpResolver is a class template with the number of +// registered Ops as the template parameter. +class MicroOpResolver : public TfLiteBridgeOpResolver { + public: + // Returns the Op registration struct corresponding to the enum code from the + // flatbuffer schema. Returns nullptr if the op is not found or if op == + // BuiltinOperator_CUSTOM. + virtual const TfLiteRegistration* FindOp(BuiltinOperator op) const = 0; + + // Returns the Op registration struct corresponding to the custom operator by + // name. + virtual const TfLiteRegistration* FindOp(const char* op) const = 0; + + // This implementation exists for compatibility with the OpResolver base class + // and disregards the version parameter. + const TfLiteRegistration* FindOp(BuiltinOperator op, + int version) const final { + return FindOp(op); + } + + // This implementation exists for compatibility with the OpResolver base class + // and disregards the version parameter. + const TfLiteRegistration* FindOp(const char* op, int version) const final { + return FindOp(op); + } + + // Returns the operator specific parsing function for the OpData for a + // BuiltinOperator (if registered), else nullptr. + virtual TfLiteBridgeBuiltinParseFunction GetOpDataParser( + BuiltinOperator op) const = 0; + + ~MicroOpResolver() override {} +}; + +} // namespace tflite + +#endif // TENSORFLOW_LITE_MICRO_MICRO_OP_RESOLVER_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/micro_profiler.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/micro_profiler.cpp new file mode 100644 index 0000000..63306ce --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/micro_profiler.cpp @@ -0,0 +1,115 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_profiler.h" + +#include +#include +#include + +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/compatibility.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_time.h" + +namespace tflite { + +uint32_t MicroProfiler::BeginEvent(const char* tag) { + if (num_events_ == kMaxEvents) { + num_events_ = 0; + } + + tags_[num_events_] = tag; + start_ticks_[num_events_] = GetCurrentTimeTicks(); + end_ticks_[num_events_] = start_ticks_[num_events_] - 1; + return num_events_++; +} + +void MicroProfiler::EndEvent(uint32_t event_handle) { + TFLITE_DCHECK(event_handle < kMaxEvents); + end_ticks_[event_handle] = GetCurrentTimeTicks(); +} + +uint32_t MicroProfiler::GetTotalTicks() const { + int32_t ticks = 0; + for (int i = 0; i < num_events_; ++i) { + ticks += end_ticks_[i] - start_ticks_[i]; + } + return ticks; +} + +void MicroProfiler::Log() const { +#if !defined(TF_LITE_STRIP_ERROR_STRINGS) + for (int i = 0; i < num_events_; ++i) { + uint32_t ticks = end_ticks_[i] - start_ticks_[i]; + MicroPrintf("%s took %" PRIu32 " ticks (%d ms).", tags_[i], ticks, + TicksToMs(ticks)); + } +#endif +} + +void MicroProfiler::LogCsv() const { +#if !defined(TF_LITE_STRIP_ERROR_STRINGS) + MicroPrintf("\"Event\",\"Tag\",\"Ticks\""); + for (int i = 0; i < num_events_; ++i) { + uint32_t ticks = end_ticks_[i] - start_ticks_[i]; + MicroPrintf("%d,%s,%" PRIu32, i, tags_[i], ticks); + } +#endif +} + +void MicroProfiler::LogTicksPerTagCsv() { +#if !defined(TF_LITE_STRIP_ERROR_STRINGS) + MicroPrintf( + "\"Unique Tag\",\"Total ticks across all events with that tag.\""); + int total_ticks = 0; + for (int i = 0; i < num_events_; ++i) { + uint32_t ticks = end_ticks_[i] - start_ticks_[i]; + TFLITE_DCHECK(tags_[i] != nullptr); + int position = FindExistingOrNextPosition(tags_[i]); + TFLITE_DCHECK(position >= 0); + total_ticks_per_tag[position].tag = tags_[i]; + total_ticks_per_tag[position].ticks = + total_ticks_per_tag[position].ticks + ticks; + total_ticks += ticks; + } + + for (int i = 0; i < num_events_; ++i) { + TicksPerTag each_tag_entry = total_ticks_per_tag[i]; + if (each_tag_entry.tag == nullptr) { + break; + } + MicroPrintf("%s, %d", each_tag_entry.tag, each_tag_entry.ticks); + } + MicroPrintf("total number of ticks, %d", total_ticks); +#endif +} + +// This method finds a particular array element in the total_ticks_per_tag array +// with the matching tag_name passed in the method. If it can find a +// matching array element that has the same tag_name, then it will return the +// position of the matching element. But if it unable to find a matching element +// with the given tag_name, it will return the next available empty position +// from the array. +int MicroProfiler::FindExistingOrNextPosition(const char* tag_name) { + int pos = 0; + for (; pos < num_events_; pos++) { + TicksPerTag each_tag_entry = total_ticks_per_tag[pos]; + if (each_tag_entry.tag == nullptr || + strcmp(each_tag_entry.tag, tag_name) == 0) { + return pos; + } + } + return pos < num_events_ ? pos : -1; +} +} // namespace tflite diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/micro_profiler.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/micro_profiler.h new file mode 100644 index 0000000..d940398 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/micro_profiler.h @@ -0,0 +1,140 @@ +/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_LITE_MICRO_MICRO_PROFILER_H_ +#define TENSORFLOW_LITE_MICRO_MICRO_PROFILER_H_ + +#include "edge-impulse-sdk/tensorflow/lite/micro/compatibility.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_profiler_interface.h" + +namespace tflite { + +// MicroProfiler creates a common way to gain fine-grained insight into runtime +// performance. Bottleck operators can be identified along with slow code +// sections. This can be used in conjunction with running the relevant micro +// benchmark to evaluate end-to-end performance. +class MicroProfiler : public MicroProfilerInterface { + public: + MicroProfiler() = default; + virtual ~MicroProfiler() = default; + + // Marks the start of a new event and returns an event handle that can be used + // to mark the end of the event via EndEvent. The lifetime of the tag + // parameter must exceed that of the MicroProfiler. + virtual uint32_t BeginEvent(const char* tag) override; + + // Marks the end of an event associated with event_handle. It is the + // responsibility of the caller to ensure than EndEvent is called once and + // only once per event_handle. + // + // If EndEvent is called more than once for the same event_handle, the last + // call will be used as the end of event marker.If EndEvent is called 0 times + // for a particular event_handle, the duration of that event will be 0 ticks. + virtual void EndEvent(uint32_t event_handle) override; + + // Clears all the events that have been currently profiled. + void ClearEvents() { num_events_ = 0; } + + // Returns the sum of the ticks taken across all the events. This number + // is only meaningful if all of the events are disjoint (the end time of + // event[i] <= start time of event[i+1]). + uint32_t GetTotalTicks() const; + + // Prints the profiling information of each of the events in human readable + // form. + void Log() const; + + // Prints the profiling information of each of the events in CSV (Comma + // Separated Value) form. + void LogCsv() const; + + // Prints total ticks for each unique tag in CSV format. + // Output will have one row for each unique tag along with the + // total ticks summed across all events with that particular tag. + void LogTicksPerTagCsv(); + + private: + // Maximum number of events that this class can keep track of. If we call + // AddEvent more than kMaxEvents number of times, then the oldest event's + // profiling information will be overwritten. + static constexpr int kMaxEvents = 1024; + + const char* tags_[kMaxEvents]; + uint32_t start_ticks_[kMaxEvents]; + uint32_t end_ticks_[kMaxEvents]; + int num_events_ = 0; + + struct TicksPerTag { + const char* tag; + uint32_t ticks; + }; + // In practice, the number of tags will be much lower than the number of + // events. But it is theoretically possible that each event to be unique and + // hence we allow total_ticks_per_tag to have kMaxEvents entries. + TicksPerTag total_ticks_per_tag[kMaxEvents] = {}; + + int FindExistingOrNextPosition(const char* tag_name); + + TF_LITE_REMOVE_VIRTUAL_DELETE; +}; + +#if defined(TF_LITE_STRIP_ERROR_STRINGS) +// For release builds, the ScopedMicroProfiler is a noop. +// +// This is done because the ScipedProfiler is used as part of the +// MicroInterpreter and we want to ensure zero overhead for the release builds. +class ScopedMicroProfiler { + public: + explicit ScopedMicroProfiler(const char* tag, + MicroProfilerInterface* profiler) {} +}; + +#else + +// This class can be used to add events to a MicroProfiler object that span the +// lifetime of the ScopedMicroProfiler object. +// Usage example: +// +// MicroProfiler profiler(); +// ... +// { +// ScopedMicroProfiler scoped_profiler("custom_tag", profiler); +// work_to_profile(); +// } +class ScopedMicroProfiler { + public: + explicit ScopedMicroProfiler(const char* tag, + MicroProfilerInterface* profiler) + : profiler_(profiler) { + if (profiler_ != nullptr) { + event_handle_ = profiler_->BeginEvent(tag); + } + } + + ~ScopedMicroProfiler() { + if (profiler_ != nullptr) { + profiler_->EndEvent(event_handle_); + } + } + + private: + uint32_t event_handle_ = 0; + MicroProfilerInterface* profiler_ = nullptr; +}; +#endif // !defined(TF_LITE_STRIP_ERROR_STRINGS) + +} // namespace tflite + +#endif // TENSORFLOW_LITE_MICRO_MICRO_PROFILER_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/micro_profiler_interface.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/micro_profiler_interface.h new file mode 100644 index 0000000..f839a74 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/micro_profiler_interface.h @@ -0,0 +1,38 @@ +/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_LITE_MICRO_MICRO_PROFILER_INTERFACE_H_ +#define TENSORFLOW_LITE_MICRO_MICRO_PROFILER_INTERFACE_H_ + +#include + +namespace tflite { + +// Interface class that the TFLM framework relies on for profiling. +class MicroProfilerInterface { + public: + virtual ~MicroProfilerInterface() {} + + // Marks the start of a new event and returns an event handle that can be used + // to mark the end of the event via EndEvent. + virtual uint32_t BeginEvent(const char* tag) = 0; + + // Marks the end of an event associated with event_handle. + virtual void EndEvent(uint32_t event_handle) = 0; +}; + +} // namespace tflite + +#endif // TENSORFLOW_LITE_MICRO_MICRO_PROFILER_INTERFACE_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/micro_resource_variable.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/micro_resource_variable.cpp new file mode 100644 index 0000000..c07d111 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/micro_resource_variable.cpp @@ -0,0 +1,148 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_resource_variable.h" + +#include + +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/compatibility.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/memory_helpers.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_utils.h" + +namespace tflite { + +namespace {} // namespace + +MicroResourceVariables* MicroResourceVariables::Create( + MicroAllocator* allocator, int max_num_variables) { + TFLITE_DCHECK(allocator != nullptr); + + uint8_t* allocator_buffer = static_cast( + allocator->AllocatePersistentBuffer(sizeof(MicroResourceVariables))); + MicroResourceVariable* variable_array = + static_cast(allocator->AllocatePersistentBuffer( + sizeof(MicroResourceVariable) * max_num_variables)); + MicroResourceVariables* variables = new (allocator_buffer) + MicroResourceVariables(variable_array, max_num_variables); + return variables; +} + +int MicroResourceVariables::CreateIdIfNoneFound(const char* container, + const char* shared_name) { + int resource_id = FindId(container, shared_name); + if (resource_id >= 0) { + return resource_id; + } + + // no existing variable found for the given container and shared name pair. + if (num_resource_variables_ >= max_variable_count_) { + MicroPrintf( + "Failed to allocate resource variable. Maximum resource variable count " + "(%d) " + "reached.", + max_variable_count_); + return -1; + } + + resource_id = num_resource_variables_++; + resource_variables_[resource_id].container = container; + resource_variables_[resource_id].shared_name = shared_name; + resource_variables_[resource_id].resource_buffer = nullptr; + resource_variables_[resource_id].bytes = 0; + return resource_id; +} + +TfLiteStatus MicroResourceVariables::Read(int id, + const TfLiteEvalTensor* tensor) { + if (id < 0 || id >= num_resource_variables_) { + MicroPrintf("Attempting to read non-existent resource variable %d", id); + return kTfLiteError; + } + MicroResourceVariable variable = resource_variables_[id]; + TFLITE_DCHECK(EvalTensorBytes(tensor) == variable.bytes); + TFLITE_DCHECK(variable.resource_buffer != nullptr); + memcpy(tensor->data.raw, variable.resource_buffer, variable.bytes); + return kTfLiteOk; +} + +TfLiteStatus MicroResourceVariables::Allocate(int id, TfLiteContext* context, + const TfLiteTensor* tensor) { + if (id < 0 || id >= num_resource_variables_) { + MicroPrintf("Attempting to read non-existent resource variable %d", id); + return kTfLiteError; + } + + MicroResourceVariable& variable = resource_variables_[id]; + + if (variable.resource_buffer == nullptr) { + variable.bytes = tensor->bytes; + variable.resource_buffer = + context->AllocatePersistentBuffer(context, tensor->bytes); + if (variable.resource_buffer == nullptr) { + MicroPrintf("Failed to allocate resource buffer."); + return kTfLiteError; + } + // Zero out resource buffers by deafult. Buffers can be initialized to + // nonzero values using ASSIGN_VARIABLE. + memset(variable.resource_buffer, 0, variable.bytes); + } + + return kTfLiteOk; +} + +TfLiteStatus MicroResourceVariables::Assign(int id, + const TfLiteEvalTensor* tensor) { + if (id < 0 || id >= num_resource_variables_) { + MicroPrintf("Attempting to read non-existent resource variable %d", id); + return kTfLiteError; + } + MicroResourceVariable variable = resource_variables_[id]; + + if (variable.resource_buffer == nullptr) { + MicroPrintf( + "Attempting to assign from a TfLiteEvalTensor before the resource " + "buffer has been allocated. Make sure to call AssignResourceVariable " + "with a TfLiteTensor first."); + return kTfLiteError; + } + TFLITE_DCHECK(EvalTensorBytes(tensor) == variable.bytes); + memcpy(variable.resource_buffer, tensor->data.raw, variable.bytes); + return kTfLiteOk; +} + +TfLiteStatus MicroResourceVariables::ResetAll() { + for (int i = 0; i < num_resource_variables_; i++) { + MicroResourceVariable variable = resource_variables_[i]; + memset(variable.resource_buffer, 0, variable.bytes); + } + return kTfLiteOk; +} + +int MicroResourceVariables::FindId(const char* container, + const char* shared_name) { + for (int i = 0; i < num_resource_variables_; i++) { + // Some TFLite flatbuffers contain null container names to save space. + if ((container == nullptr || + !strcmp(container, resource_variables_[i].container)) && + !strcmp(shared_name, resource_variables_[i].shared_name)) { + return i; + } + } + return -1; +} + +} // namespace tflite diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/micro_resource_variable.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/micro_resource_variable.h new file mode 100644 index 0000000..d2ebb35 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/micro_resource_variable.h @@ -0,0 +1,87 @@ +/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TFLITE_MICRO_TENSORFLOW_LITE_MICRO_MICRO_RESOURCE_H_ +#define TFLITE_MICRO_TENSORFLOW_LITE_MICRO_MICRO_RESOURCE_H_ + +#include + +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_allocator.h" + +namespace tflite { + +class MicroResourceVariables { + public: + // Create + static MicroResourceVariables* Create(MicroAllocator* allocator, + int num_variables); + + // Creates a resource variable if none is available for the given container + // and shared name pair. Returns the resource ID corresponding to the + // container and shared name pair. If allocation fails, the returned resource + // ID will be negative. The the container and shared_name must outlive this + // class. + int CreateIdIfNoneFound(const char* container, const char* shared_name); + + // Read the resource buffer associated with the given ID into the given + // tensor. + TfLiteStatus Read(int id, const TfLiteEvalTensor* tensor); + + // Allocates the resource buffer if none has been allocated, based on the + // length of the input tensor. Copies input tensor contents to the resource + // buffer. + TfLiteStatus Allocate(int id, TfLiteContext* context, + const TfLiteTensor* tensor); + + // Copies input tensor contents to the resource buffer. + // AllocateResourceVariable with a TFLite tensor must have been called first + // in order to allocate the resource buffer. + TfLiteStatus Assign(int id, const TfLiteEvalTensor* tensor); + + // Zeros out all resource buffers. + TfLiteStatus ResetAll(); + + private: + int FindId(const char* container, const char* shared_name); + + // Micro resource contains the mapping between resource container/name strings + // and resouce IDs. Each resource ID corresponds to a resource buffer pointer. + // The resouce ID is created during the VAR_HANDLE operator preparation stage. + // The resource buffer pointer is created during ASSIGN_VARIABLE preparation + // stage based on the size of the TFLiteTensor being assigned. + struct MicroResourceVariable { + const char* container; + const char* shared_name; + void* resource_buffer; + + // This is only for verifying read size. + size_t bytes; + }; + + MicroResourceVariables(MicroResourceVariable* variables, + int max_variable_count) + : resource_variables_(variables), + max_variable_count_(max_variable_count), + num_resource_variables_(0) {} + + MicroResourceVariable* resource_variables_; + int max_variable_count_; + int num_resource_variables_; +}; + +} // namespace tflite + +#endif // TFLITE_MICRO_TENSORFLOW_LITE_MICRO_MICRO_RESOURCE_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/micro_string.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/micro_string.cpp new file mode 100644 index 0000000..39746f9 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/micro_string.cpp @@ -0,0 +1,317 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// Implements debug logging for numbers by converting them into strings and then +// calling the main DebugLog(char*) function. These are separated into a +// different file so that platforms can just implement the string output version +// of DebugLog() and then get the numerical variations without requiring any +// more code. + +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_string.h" + +#include +#include +#include + +namespace { + +// Int formats can need up to 10 bytes for the value plus a single byte for the +// sign. +constexpr int kMaxIntCharsNeeded = 10 + 1; +// Hex formats can need up to 8 bytes for the value plus two bytes for the "0x". +constexpr int kMaxHexCharsNeeded = 8 + 2; + +// Float formats can need up to 7 bytes for the fraction plus 3 bytes for "x2^" +// plus 3 bytes for the exponent and a single sign bit. +constexpr float kMaxFloatCharsNeeded = 7 + 3 + 3 + 1; + +// All input buffers to the number conversion functions must be this long. +const int kFastToBufferSize = 48; + +// Reverses a zero-terminated string in-place. +char* ReverseStringInPlace(char* start, char* end) { + char* p1 = start; + char* p2 = end - 1; + while (p1 < p2) { + char tmp = *p1; + *p1++ = *p2; + *p2-- = tmp; + } + return start; +} + +// Appends a string to a string, in-place. You need to pass in the maximum +// string length as the second argument. +char* StrCatStr(char* main, int main_max_length, const char* to_append) { + char* current = main; + while (*current != 0) { + ++current; + } + char* current_end = main + (main_max_length - 1); + while ((*to_append != 0) && (current < current_end)) { + *current = *to_append; + ++current; + ++to_append; + } + *current = 0; + return current; +} + +// Populates the provided buffer with an ASCII representation of the number. +char* FastUInt32ToBufferLeft(uint32_t i, char* buffer, int base) { + char* start = buffer; + do { + int32_t digit = i % base; + char character; + if (digit < 10) { + character = '0' + digit; + } else { + character = 'a' + (digit - 10); + } + *buffer++ = character; + i /= base; + } while (i > 0); + *buffer = 0; + ReverseStringInPlace(start, buffer); + return buffer; +} + +// Populates the provided buffer with an ASCII representation of the number. +char* FastInt32ToBufferLeft(int32_t i, char* buffer) { + uint32_t u = i; + if (i < 0) { + *buffer++ = '-'; + u = -u; + } + return FastUInt32ToBufferLeft(u, buffer, 10); +} + +// Converts a number to a string and appends it to another. +char* StrCatInt32(char* main, int main_max_length, int32_t number) { + char number_string[kFastToBufferSize]; + FastInt32ToBufferLeft(number, number_string); + return StrCatStr(main, main_max_length, number_string); +} + +// Converts a number to a string and appends it to another. +char* StrCatUInt32(char* main, int main_max_length, uint32_t number, int base) { + char number_string[kFastToBufferSize]; + FastUInt32ToBufferLeft(number, number_string, base); + return StrCatStr(main, main_max_length, number_string); +} + +// Populates the provided buffer with ASCII representation of the float number. +// Avoids the use of any floating point instructions (since these aren't +// supported on many microcontrollers) and as a consequence prints values with +// power-of-two exponents. +char* FastFloatToBufferLeft(float f, char* buffer) { + char* current = buffer; + char* current_end = buffer + (kFastToBufferSize - 1); + // Access the bit fields of the floating point value to avoid requiring any + // float instructions. These constants are derived from IEEE 754. + const uint32_t sign_mask = 0x80000000; + const uint32_t exponent_mask = 0x7f800000; + const int32_t exponent_shift = 23; + const int32_t exponent_bias = 127; + const uint32_t fraction_mask = 0x007fffff; + uint32_t u; + memcpy(&u, &f, sizeof(int32_t)); + const int32_t exponent = + ((u & exponent_mask) >> exponent_shift) - exponent_bias; + const uint32_t fraction = (u & fraction_mask); + // Expect ~0x2B1B9D3 for fraction. + if (u & sign_mask) { + *current = '-'; + current += 1; + } + *current = 0; + // These are special cases for infinities and not-a-numbers. + if (exponent == 128) { + if (fraction == 0) { + current = StrCatStr(current, (current_end - current), "Inf"); + return current; + } else { + current = StrCatStr(current, (current_end - current), "NaN"); + return current; + } + } + // 0x007fffff (8388607) represents 0.99... for the fraction, so to print the + // correct decimal digits we need to scale our value before passing it to the + // conversion function. This scale should be 10000000/8388608 = 1.1920928955. + // We can approximate this using multiply-adds and right-shifts using the + // values in this array. The 1. portion of the number string is printed out + // in a fixed way before the fraction, below. + const int32_t scale_shifts_size = 13; + const int8_t scale_shifts[13] = {3, 4, 8, 11, 13, 14, 17, + 18, 19, 20, 21, 22, 23}; + uint32_t scaled_fraction = fraction; + for (int i = 0; i < scale_shifts_size; ++i) { + scaled_fraction += (fraction >> scale_shifts[i]); + } + *current = '1'; + current += 1; + *current = '.'; + current += 1; + *current = 0; + + // Prepend leading zeros to fill in all 7 bytes of the fraction. Truncate + // zeros off the end of the fraction. Every fractional value takes 7 bytes. + // For example, 2500 would be written into the buffer as 0002500 since it + // represents .00025. + constexpr int kMaxFractionalDigits = 7; + + // Abort early if there is not enough space in the buffer. + if (current_end - current <= kMaxFractionalDigits) { + return current; + } + + // Pre-fill buffer with zeros to ensure zero-truncation works properly. + for (int i = 1; i < kMaxFractionalDigits; i++) { + *(current + i) = '0'; + } + + // Track how large the fraction is to add leading zeros. + char* previous = current; + current = StrCatUInt32(current, (current_end - current), scaled_fraction, 10); + int fraction_digits = current - previous; + int leading_zeros = kMaxFractionalDigits - fraction_digits; + + // Overwrite the null terminator from StrCatUInt32 to ensure zero-trunctaion + // works properly. + *current = '0'; + + // Shift fraction values and prepend zeros if necessary. + if (leading_zeros != 0) { + for (int i = 0; i < fraction_digits; i++) { + current--; + *(current + leading_zeros) = *current; + *current = '0'; + } + current += kMaxFractionalDigits; + } + + // Truncate trailing zeros for cleaner logs. Ensure we leave at least one + // fractional character for the case when scaled_fraction is 0. + while (*(current - 1) == '0' && (current - 1) > previous) { + current--; + } + *current = 0; + current = StrCatStr(current, (current_end - current), "*2^"); + current = StrCatInt32(current, (current_end - current), exponent); + return current; +} + +int FormatInt32(char* output, int32_t i) { + return static_cast(FastInt32ToBufferLeft(i, output) - output); +} + +int FormatUInt32(char* output, uint32_t i) { + return static_cast(FastUInt32ToBufferLeft(i, output, 10) - output); +} + +int FormatHex(char* output, uint32_t i) { + return static_cast(FastUInt32ToBufferLeft(i, output, 16) - output); +} + +int FormatFloat(char* output, float i) { + return static_cast(FastFloatToBufferLeft(i, output) - output); +} + +} // namespace + +extern "C" int MicroVsnprintf(char* output, int len, const char* format, + va_list args) { + int output_index = 0; + const char* current = format; + // One extra character must be left for the null terminator. + const int usable_length = len - 1; + while (*current != '\0' && output_index < usable_length) { + if (*current == '%') { + current++; + switch (*current) { + case 'd': + // Cut off log message if format could exceed log buffer length. + if (usable_length - output_index < kMaxIntCharsNeeded) { + output[output_index++] = '\0'; + return output_index; + } + output_index += + FormatInt32(&output[output_index], va_arg(args, int32_t)); + current++; + break; + case 'u': + if (usable_length - output_index < kMaxIntCharsNeeded) { + output[output_index++] = '\0'; + return output_index; + } + output_index += + FormatUInt32(&output[output_index], va_arg(args, uint32_t)); + current++; + break; + case 'x': + if (usable_length - output_index < kMaxHexCharsNeeded) { + output[output_index++] = '\0'; + return output_index; + } + output[output_index++] = '0'; + output[output_index++] = 'x'; + output_index += + FormatHex(&output[output_index], va_arg(args, uint32_t)); + current++; + break; + case 'f': + if (usable_length - output_index < kMaxFloatCharsNeeded) { + output[output_index++] = '\0'; + return output_index; + } + output_index += + FormatFloat(&output[output_index], va_arg(args, double)); + current++; + break; + case '%': + output[output_index++] = *current++; + break; + case 'c': + if (usable_length - output_index < 1) { + output[output_index++] = '\0'; + return output_index; + } + output[output_index++] = va_arg(args, int32_t); + current++; + break; + case 's': + char* string = va_arg(args, char*); + int string_idx = 0; + while (string_idx + output_index < usable_length && + string[string_idx] != '\0') { + output[output_index++] = string[string_idx++]; + } + current++; + } + } else { + output[output_index++] = *current++; + } + } + output[output_index++] = '\0'; + return output_index; +} + +extern "C" int MicroSnprintf(char* output, int len, const char* format, ...) { + va_list args; + va_start(args, format); + int bytes_written = MicroVsnprintf(output, len, format, args); + va_end(args); + return bytes_written; +} diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/micro_string.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/micro_string.h new file mode 100644 index 0000000..59303e8 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/micro_string.h @@ -0,0 +1,33 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_MICRO_MICRO_STRING_H_ +#define TENSORFLOW_LITE_MICRO_MICRO_STRING_H_ + +#include + +// Implements simple string formatting for numeric types. Returns the number of +// bytes written to output. +extern "C" { +// Functionally equivalent to vsnprintf, trimmed down for TFLite Micro. +// MicroSnprintf() is implemented using MicroVsnprintf(). +int MicroVsnprintf(char* output, int len, const char* format, va_list args); +// Functionally equavalent to snprintf, trimmed down for TFLite Micro. +// For example, MicroSnprintf(buffer, 10, "int %d", 10) will put the string +// "int 10" in the buffer. +// Floating point values are logged in exponent notation (1.XXX*2^N). +int MicroSnprintf(char* output, int len, const char* format, ...); +} + +#endif // TENSORFLOW_LITE_MICRO_MICRO_STRING_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/micro_time.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/micro_time.cpp new file mode 100644 index 0000000..d418509 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/micro_time.cpp @@ -0,0 +1,58 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// Reference implementation of timer functions. Platforms are not required to +// implement these timer methods, but they are required to enable profiling. + +// On platforms that have a POSIX stack or C library, it can be written using +// methods from or clock() from . + +// To add an equivalent function for your own platform, create your own +// implementation file, and place it in a subfolder with named after the OS +// you're targeting. For example, see the Cortex M bare metal version in +// tensorflow/lite/micro/bluepill/micro_time.cc + +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_time.h" + +#if defined(TF_LITE_USE_CTIME) +#include +#endif + +namespace tflite { + +#if !defined(TF_LITE_USE_CTIME) + +// Reference implementation of the ticks_per_second() function that's required +// for a platform to support Tensorflow Lite for Microcontrollers profiling. +// This returns 0 by default because timing is an optional feature that builds +// without errors on platforms that do not need it. +uint32_t ticks_per_second() { return 0; } + +// Reference implementation of the GetCurrentTimeTicks() function that's +// required for a platform to support Tensorflow Lite for Microcontrollers +// profiling. This returns 0 by default because timing is an optional feature +// that builds without errors on platforms that do not need it. +uint32_t GetCurrentTimeTicks() { return 0; } + +#else // defined(TF_LITE_USE_CTIME) + +// For platforms that support ctime, we implment the micro_time interface in +// this central location. +uint32_t ticks_per_second() { return CLOCKS_PER_SEC; } + +uint32_t GetCurrentTimeTicks() { return clock(); } +#endif + +} // namespace tflite diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/micro_time.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/micro_time.h new file mode 100644 index 0000000..7a8ab45 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/micro_time.h @@ -0,0 +1,36 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_MICRO_MICRO_TIME_H_ +#define TENSORFLOW_LITE_MICRO_MICRO_TIME_H_ + +#include + +namespace tflite { + +// These functions should be implemented by each target platform, and provide an +// accurate tick count along with how many ticks there are per second. +uint32_t ticks_per_second(); + +// Return time in ticks. The meaning of a tick varies per platform. +uint32_t GetCurrentTimeTicks(); + +inline uint32_t TicksToMs(int32_t ticks) { + return static_cast(1000.0f * static_cast(ticks) / + static_cast(ticks_per_second())); +} + +} // namespace tflite + +#endif // TENSORFLOW_LITE_MICRO_MICRO_TIME_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/micro_utils.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/micro_utils.cpp new file mode 100644 index 0000000..4f7eba7 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/micro_utils.cpp @@ -0,0 +1,90 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_utils.h" + +#include +#include +#include + +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/compatibility.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/op_macros.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/memory_helpers.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h" + +namespace tflite { + +int ElementCount(const TfLiteIntArray& dims) { + int result = 1; + for (int i = 0; i < dims.size; ++i) { + result *= dims.data[i]; + } + return result; +} + +size_t EvalTensorBytes(const TfLiteEvalTensor* tensor) { + size_t bytes_per_element; + TFLITE_DCHECK(kTfLiteOk == + TfLiteTypeSizeOf(tensor->type, &bytes_per_element)); + return ElementCount(*tensor->dims) * bytes_per_element; +} + +void SignedSymmetricPerChannelQuantize( + const float* values, TfLiteIntArray* dims, int quantized_dimension, + int8_t* quantized_values, float* scaling_factors, TfLiteType type) { + int input_size = ElementCount(*dims); + int channel_count = dims->data[quantized_dimension]; + int per_channel_size = input_size / channel_count; + + int stride; + int channel_stride; + + int qmin = QMinFromTfLiteType(type); + int qmax = QMaxFromTfLiteType(type); + + if (quantized_dimension == 0) { + stride = 1; + channel_stride = per_channel_size; + } else if (quantized_dimension == 3) { + stride = channel_count; + channel_stride = 1; + } else { + MicroPrintf("quantized dimension must be 0 or 3"); + TFLITE_ABORT; + } + + // Calculate scales for each channel. + for (int channel = 0; channel < channel_count; channel++) { + float min = 0; + float max = 0; + + for (int i = 0; i < per_channel_size; i++) { + int idx = channel * channel_stride + i * stride; + min = fminf(min, values[idx]); + max = fmaxf(max, values[idx]); + } + scaling_factors[channel] = fmaxf(fabs(min), fabs(max)) / qmax; + for (int i = 0; i < per_channel_size; i++) { + int idx = channel * channel_stride + i * stride; + const int32_t quantized_value = + static_cast(roundf(values[idx] / scaling_factors[channel])); + // Clamp: just in case some odd numeric offset. + quantized_values[idx] = fminf(qmax, fmaxf(qmin + 1, quantized_value)); + } + } +} + +} // namespace tflite diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/micro_utils.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/micro_utils.h new file mode 100644 index 0000000..73de1dc --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/micro_utils.h @@ -0,0 +1,170 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_LITE_MICRO_MICRO_UTILS_H_ +#define TENSORFLOW_LITE_MICRO_MICRO_UTILS_H_ + +// Patched by Edge Impulse +// Arduino build defines abs as a macro here. That is invalid C++, and breaks +// libc++'s header, undefine it. +// TODO investigate if this belongs to global patch or Arduino lib one +#ifdef abs +#undef abs +#endif + +#include +#include +#include +#include + +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" + +namespace tflite { + +// Returns number of elements in the shape array. + +int ElementCount(const TfLiteIntArray& dims); + +size_t EvalTensorBytes(const TfLiteEvalTensor* tensor); + +// C++11 does not support constexpr max; hence, use ternary conditional to +// create our own constexpr Max function. +constexpr int Max(int a, int b) { return a >= b ? a : b; } + +// Converts a float value into a quantized value. Note that large values (close +// to max int and min int) may see significant error due to a lack of floating +// point granularity for large values. +template +T FloatToQuantizedType(const float value, const float scale, int zero_point) { + int32_t result = round(value / scale) + zero_point; + result = + std::max(static_cast(std::numeric_limits::min()), result); + result = + std::min(static_cast(std::numeric_limits::max()), result); + return result; +} + +template +T FloatToSymmetricQuantizedType(const float value, const float scale) { + // 64-bit values are required since 8x16 conv accumulates to int64, meaning + // an int64 bias is required. + std::int64_t result = round(value / scale); + result = std::max( + static_cast(std::numeric_limits::min() + 1), result); + result = std::min(static_cast(std::numeric_limits::max()), + result); + return result; +} + +// Helper methods to quantize arrays of floats to the desired format. +// +// There are several key flavors of quantization in TfLite: +// asymmetric symmetric per channel +// int8_t | X | X | X | +// uint8_t | X | X | | +// int16_t | X | | | +// int32_t | | X | X | +// +// The per-op quantization spec can be found here: +// https://www.tensorflow.org/lite/performance/quantization_spec +template +void Quantize(const float* input, T* output, int num_elements, float scale, + int zero_point) { + for (int i = 0; i < num_elements; i++) { + output[i] = FloatToQuantizedType(input[i], scale, zero_point); + } +} + +template +void SymmetricQuantize(const float* input, T* output, int num_elements, + float scale) { + for (int i = 0; i < num_elements; i++) { + output[i] = FloatToSymmetricQuantizedType(input[i], scale); + } +} + +template +void SymmetricPerChannelQuantize(const float* input, T* output, + int num_elements, int num_channels, + float* scales) { + int elements_per_channel = num_elements / num_channels; + for (int i = 0; i < num_channels; i++) { + for (int j = 0; j < elements_per_channel; j++) { + output[i * elements_per_channel + j] = FloatToSymmetricQuantizedType( + input[i * elements_per_channel + j], scales[i]); + } + } +} + +void SignedSymmetricPerChannelQuantize(const float* values, + TfLiteIntArray* dims, + int quantized_dimension, + int8_t* quantized_values, + float* scaling_factor, + TfLiteType type = kTfLiteNoType); + +// Quantizes inputs based on the values provided, choosing the smallest range +// which includes all input values. +template +void SymmetricQuantizeCalculateScales(const float* values, TfLiteIntArray* dims, + T* output, float* scale) { + int input_size = ElementCount(*dims); + + float min = 0; + float max = 0; + for (int i = 0; i < input_size; i++) { + min = fminf(min, values[i]); + max = fmaxf(max, values[i]); + } + *scale = fmaxf(std::abs(min), std::abs(max)) / std::numeric_limits::max(); + for (int i = 0; i < input_size; i++) { + const int32_t quantized_value = + static_cast(roundf(values[i] / *scale)); + // Clamp: just in case some odd numeric offset. + quantized_value = fminf(std::numeric_limits::max(), quantized_value); + quantized_value = fmaxf(std::numeric_limits::min() + 1, quantized_value); + output[i] = quantized_value; + } +} + +template +void Dequantize(const T* values, const int size, const float scale, + int zero_point, float* dequantized_values) { + for (int i = 0; i < size; ++i) { + dequantized_values[i] = (values[i] - zero_point) * scale; + } +} + +// based on TfLiteType passed in to these functions the corresponding max / min +// int for that type are returned +inline int QMinFromTfLiteType(TfLiteType type) { + if (type == kTfLiteInt4) { + return -8; + } else { + return std::numeric_limits::min(); + } +} + +inline int QMaxFromTfLiteType(TfLiteType type) { + if (type == kTfLiteInt4) { + return 7; + } else { + return std::numeric_limits::max(); + } +} + +} // namespace tflite + +#endif // TENSORFLOW_LITE_MICRO_MICRO_UTILS_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/mock_micro_graph.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/mock_micro_graph.cpp new file mode 100644 index 0000000..8ad3864 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/mock_micro_graph.cpp @@ -0,0 +1,66 @@ +/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "edge-impulse-sdk/tensorflow/lite/micro/mock_micro_graph.h" + +#include "edge-impulse-sdk/tensorflow/lite/micro/test_helpers.h" + +namespace tflite { + +MockMicroGraph::MockMicroGraph(SingleArenaBufferAllocator* allocator) + : MicroGraph(nullptr, nullptr, nullptr, nullptr), + allocator_(allocator), + init_count_(0), + prepare_count_(0), + free_count_(0) { + memset(invoke_counts_, 0, sizeof(invoke_counts_)); + mock_tensor_ = + reinterpret_cast(allocator_->AllocatePersistentBuffer( + sizeof(TfLiteEvalTensor), alignof(TfLiteEvalTensor))); + int* dims_array = reinterpret_cast( + allocator_->AllocatePersistentBuffer(3 * sizeof(int), alignof(int))); + float* data_array = reinterpret_cast( + allocator_->AllocatePersistentBuffer(2 * sizeof(float), alignof(float))); + int dims[] = {2, 1, 2}; + memcpy(dims_array, dims, 3 * sizeof(int)); + mock_tensor_->dims = testing::IntArrayFromInts(dims_array); + mock_tensor_->data.f = data_array; + mock_tensor_->type = kTfLiteFloat32; +} + +TfLiteStatus MockMicroGraph::InvokeSubgraph(int subgraph_idx) { + invoke_counts_[subgraph_idx]++; + return kTfLiteOk; +} + +TfLiteStatus MockMicroGraph::ResetVariableTensors() { return kTfLiteOk; } + +size_t MockMicroGraph::NumSubgraphInputs(int subgraph_idx) { return 1; } + +TfLiteEvalTensor* MockMicroGraph::GetSubgraphInput(int subgraph_idx, + int tensor_idx) { + return mock_tensor_; +} + +size_t MockMicroGraph::NumSubgraphOutputs(int subgraph_idx) { return 1; } + +TfLiteEvalTensor* MockMicroGraph::GetSubgraphOutput(int subgraph_idx, + int tensor_idx) { + return mock_tensor_; +} + +int MockMicroGraph::NumSubgraphs() { return kMaxSubgraphs; } + +} // namespace tflite diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/mock_micro_graph.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/mock_micro_graph.h new file mode 100644 index 0000000..b1aeb20 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/mock_micro_graph.h @@ -0,0 +1,60 @@ +/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_LITE_MICRO_MOCK_MICRO_GRAPH_H_ +#define TENSORFLOW_LITE_MICRO_MOCK_MICRO_GRAPH_H_ + +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_allocator.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_graph.h" +#include "edge-impulse-sdk/tensorflow/lite/schema/schema_generated.h" + +namespace tflite { + +// MockMicroGraph stubs out all MicroGraph methods used during invoke. A count +// of the number of calls to invoke for each subgraph is maintained for +// validation of control flow operators. +class MockMicroGraph : public MicroGraph { + public: + explicit MockMicroGraph(SingleArenaBufferAllocator* allocator); + TfLiteStatus InvokeSubgraph(int subgraph_idx) override; + TfLiteStatus ResetVariableTensors() override; + size_t NumSubgraphInputs(int subgraph_idx) override; + TfLiteEvalTensor* GetSubgraphInput(int subgraph_idx, int tensor_idx) override; + size_t NumSubgraphOutputs(int subgraph_idx) override; + TfLiteEvalTensor* GetSubgraphOutput(int subgraph_idx, + int tensor_idx) override; + int NumSubgraphs() override; + int get_init_count() const { return init_count_; } + int get_prepare_count() const { return prepare_count_; } + int get_free_count() const { return free_count_; } + int get_invoke_count(int subgraph_idx) const { + return invoke_counts_[subgraph_idx]; + } + + private: + static constexpr int kMaxSubgraphs = 10; + SingleArenaBufferAllocator* allocator_; + TfLiteEvalTensor* mock_tensor_; + int init_count_; + int prepare_count_; + int free_count_; + int invoke_counts_[kMaxSubgraphs]; + TF_LITE_REMOVE_VIRTUAL_DELETE +}; + +} // namespace tflite + +#endif // TENSORFLOW_LITE_MICRO_MOCK_MICRO_GRAPH_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/non_persistent_arena_buffer_allocator.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/non_persistent_arena_buffer_allocator.cpp new file mode 100644 index 0000000..65515ff --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/non_persistent_arena_buffer_allocator.cpp @@ -0,0 +1,170 @@ +/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "edge-impulse-sdk/tensorflow/lite/micro/non_persistent_arena_buffer_allocator.h" + +#include "edge-impulse-sdk/tensorflow/lite/micro/memory_helpers.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h" + +namespace tflite { + +NonPersistentArenaBufferAllocator::NonPersistentArenaBufferAllocator( + uint8_t* buffer, size_t buffer_size) + : buffer_head_(buffer), + buffer_tail_(buffer + buffer_size), + head_temp_(buffer), + next_temp_(buffer) {} + +NonPersistentArenaBufferAllocator::~NonPersistentArenaBufferAllocator() {} + +// Allocates a temporary buffer. This buffer is not resizable. +uint8_t* NonPersistentArenaBufferAllocator::AllocateTemp(size_t size, + size_t alignment) { + uint8_t* const aligned_result = AlignPointerUp(next_temp_, alignment); + const size_t available_memory = buffer_tail_ - aligned_result; + if (available_memory < size) { + MicroPrintf( + "Failed to allocate temp memory. Requested: %u, " + "available %u, missing: %u", + size, available_memory, size - available_memory); + return nullptr; + } + next_temp_ = aligned_result + size; + temp_buffer_ptr_check_sum_ ^= reinterpret_cast(aligned_result); + temp_buffer_count_++; + return aligned_result; +} + +// Signals that a temporary buffer is no longer needed. +void NonPersistentArenaBufferAllocator::DeallocateTemp(uint8_t* temp_buf) { + temp_buffer_ptr_check_sum_ ^= reinterpret_cast(temp_buf); + temp_buffer_count_--; +} + +// Returns true if all temporary buffers are already deallocated. +bool NonPersistentArenaBufferAllocator::IsAllTempDeallocated() { + if (temp_buffer_count_ != 0 || temp_buffer_ptr_check_sum_ != 0) { + MicroPrintf( + "Number of allocated temp buffers: %d. Checksum passing status: %d", + temp_buffer_count_, !temp_buffer_ptr_check_sum_); + return false; + } + return true; +} + +// Signals that all temporary allocations can be reclaimed. TFLM calls this +// API when it knows that all temporary buffers that it requested has been +// deallocated. The goal of API is to facilitate implementations of +// INonPersistentBufferAllocator can reuse buffer with some reasonable +// complexity. +TfLiteStatus NonPersistentArenaBufferAllocator::ResetTempAllocations() { + if (!IsAllTempDeallocated()) { + MicroPrintf( + "All temp buffers must be freed before calling ResetTempAllocations()"); + return kTfLiteError; + } + next_temp_ = head_temp_; + return kTfLiteOk; +} + +// Returns a buffer that is resizable viable ResizeBuffer(). +uint8_t* NonPersistentArenaBufferAllocator::AllocateResizableBuffer( + size_t size, size_t alignment) { + // Only supports one resizable buffer, which starts at the buffer head. + uint8_t* expected_resizable_buf = AlignPointerUp(buffer_head_, alignment); + + if (resizable_buffer_allocated_) { + MicroPrintf( + "Cannot allocate a new resizable buffer when one is already allocated"); + return nullptr; + } + + if (ResizeBuffer(expected_resizable_buf, size, alignment) == kTfLiteOk) { + resizable_buffer_allocated_ = true; + return expected_resizable_buf; + } + return nullptr; +} + +// Resizes a buffer that is previously returned by the AllocateResizableBuffer. +// Note that ResizeBuffer(old_resizable_buf, 0, 1) effectively deallocates +// a previous allocated resizable buffer. +TfLiteStatus NonPersistentArenaBufferAllocator::ResizeBuffer( + uint8_t* resizable_buf, size_t size, size_t alignment) { + // Only supports one resizable buffer, which starts at the buffer head. + uint8_t* expect_resizable_buf = AlignPointerUp(buffer_head_, alignment); + if (resizable_buf != expect_resizable_buf) { + MicroPrintf("Internal error: buffer is not resizable"); + return kTfLiteError; + } + if (head_temp_ != next_temp_) { + MicroPrintf("ResetTempAllocations() is not called before ResizeBuffer()."); + return kTfLiteError; + } + + const size_t available_memory = buffer_tail_ - expect_resizable_buf; + if (available_memory < size) { + MicroPrintf( + "Failed to resize buffer. Requested: %u, available %u, missing: %u", + size, available_memory, size - available_memory); + return kTfLiteError; + } + head_temp_ = expect_resizable_buf + size; + next_temp_ = head_temp_; + + return kTfLiteOk; +} + +// Frees up the memory occupied by the resizable buffer. +TfLiteStatus NonPersistentArenaBufferAllocator::DeallocateResizableBuffer( + uint8_t* resizable_buf) { + TfLiteStatus status = ResizeBuffer(resizable_buf, 0, 1); + if (status == kTfLiteOk) { + resizable_buffer_allocated_ = false; + } + return status; +} + +// Returns a pointer pointing to the start of the overlay memory, which is +// used for activation tensors and scratch buffers by kernels at Invoke stage. +uint8_t* NonPersistentArenaBufferAllocator::GetOverlayMemoryAddress() const { + return buffer_head_; +} + +// Reserves the size of the overlay memory. This overlay is reserved for the +// kernels at Invoke stage. This is referred to as the overlay because before +// Invoket state, the same memory can be used for temp buffers. The layout of +// the memory is planned by the memory planner separately at Invoke stage. +TfLiteStatus +NonPersistentArenaBufferAllocator::ReserveNonPersistentOverlayMemory( + size_t size, size_t alignment) { + uint8_t* expect_resizable_buf = AlignPointerUp(buffer_head_, alignment); + return ResizeBuffer(expect_resizable_buf, size, alignment); +} + +// Returns the size of non-persistent buffer in use. +size_t NonPersistentArenaBufferAllocator::GetNonPersistentUsedBytes() const { + return (next_temp_ - buffer_head_); +} + +// Returns the number of bytes available with a given alignment. This number +// takes in account any temporary allocations. +size_t NonPersistentArenaBufferAllocator::GetAvailableMemory( + size_t alignment) const { + uint8_t* const aligned_temp = AlignPointerUp(next_temp_, alignment); + uint8_t* const aligned_tail = AlignPointerDown(buffer_tail_, alignment); + return aligned_tail - aligned_temp; +} + +} // namespace tflite diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/non_persistent_arena_buffer_allocator.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/non_persistent_arena_buffer_allocator.h new file mode 100644 index 0000000..2a3d639 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/non_persistent_arena_buffer_allocator.h @@ -0,0 +1,104 @@ +/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_MICRO_ARENA_ALLOCATOR_NON_PERSISTENT_ARENA_BUFFER_ALLOCATOR_H_ +#define TENSORFLOW_LITE_MICRO_ARENA_ALLOCATOR_NON_PERSISTENT_ARENA_BUFFER_ALLOCATOR_H_ + +#include +#include + +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/ibuffer_allocator.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/compatibility.h" + +namespace tflite { + +// Implement INonPersistentBufferAllocator on an arena that is dedicated for +// non-persistent buffers. +class NonPersistentArenaBufferAllocator : public INonPersistentBufferAllocator { + public: + NonPersistentArenaBufferAllocator(uint8_t* buffer, size_t buffer_size); + virtual ~NonPersistentArenaBufferAllocator(); + + // Allocates a temporary buffer. This buffer is not resizable. + uint8_t* AllocateTemp(size_t size, size_t alignment) override; + + // Signals that a temporary buffer is no longer needed. + void DeallocateTemp(uint8_t* buf) override; + + // Returns true if all temporary buffers are already deallocated. + bool IsAllTempDeallocated() override; + + // Signals that all temporary allocations can be reclaimed. TFLM calls this + // API when it knows that all temporary buffers that it requested has been + // deallocated. + TfLiteStatus ResetTempAllocations() override; + + // Returns a buffer that is resizable viable ResizeBuffer(). + uint8_t* AllocateResizableBuffer(size_t size, size_t alignment) override; + + // Resizes a buffer that is previously returned by the + // AllocateResizableBuffer. + TfLiteStatus ResizeBuffer(uint8_t* resizable_buf, size_t size, + size_t alignment) override; + + // Frees up the memory occupied by the resizable buffer. + TfLiteStatus DeallocateResizableBuffer(uint8_t* resizable_buf) override; + + // Returns a pointer pointing to the start of the overlay memory, which is + // used for activation tensors and scratch buffers by kernels at Invoke stage. + uint8_t* GetOverlayMemoryAddress() const override; + + // Reserves the size of the overlay memory. This overlay is reserved for the + // kernels at Invoke stage. This is referred to as the overlay because before + // Invoket state, the same memory can be used for temp buffers. The layout of + // the memory is planned by the memory planner separately at Invoke stage. + TfLiteStatus ReserveNonPersistentOverlayMemory(size_t size, + size_t alignment) override; + + // Returns the size of non-persistent buffer in use. + size_t GetNonPersistentUsedBytes() const override; + + // Returns the number of bytes available with a given alignment. This number + // takes in account any temporary allocations. + size_t GetAvailableMemory(size_t alignment) const override; + + TF_LITE_REMOVE_VIRTUAL_DELETE + + private: + // The memory arena that this allocator manages. + uint8_t* const buffer_head_; + uint8_t* const buffer_tail_; + + // The whole region is split into two parts: + // buffer_head_ to head_temp_ - 1 belongs to the only resizable buffer. + // head_temp_ to buffer_tail_ can be used for (non-resizable) temp buffers. + uint8_t* head_temp_; + + // next_temp_ points to the next available temp buffer allocation address and + // its range is between head_temp_ and buffer_tail_ + uint8_t* next_temp_; + + // XOR Check sum for outstanding temp buffers. + // If all temp buffers are deallocated OR no temp buffers are allocated, + // temp_buffer_ptr_check_sum_ == nullptr. + intptr_t temp_buffer_ptr_check_sum_ = 0; + // Count of outstanding temp buffers. + int temp_buffer_count_ = 0; + bool resizable_buffer_allocated_ = false; +}; + +} // namespace tflite + +#endif // TENSORFLOW_LITE_MICRO_ARENA_ALLOCATOR_NON_PERSISTENT_ARENA_BUFFER_ALLOCATOR_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/op_resolver_bridge.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/op_resolver_bridge.cpp new file mode 100644 index 0000000..a60b626 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/op_resolver_bridge.cpp @@ -0,0 +1,32 @@ +/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "edge-impulse-sdk/tensorflow/lite/micro/op_resolver_bridge.h" + +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/core/api/error_reporter.h" +#include "edge-impulse-sdk/tensorflow/lite/core/api/op_resolver.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_error_reporter.h" +#include "edge-impulse-sdk/tensorflow/lite/schema/schema_utils.h" + +namespace tflite { + +TfLiteStatus GetRegistrationFromOpCode( + const OperatorCode* opcode, const OpResolver& op_resolver, + const TfLiteRegistration** registration) { + return GetRegistrationFromOpCode( + opcode, op_resolver, tflite::GetMicroErrorReporter(), registration); +} +} // namespace tflite diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/op_resolver_bridge.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/op_resolver_bridge.h new file mode 100644 index 0000000..bf6a2db --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/op_resolver_bridge.h @@ -0,0 +1,38 @@ +/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_MICRO_TFLITE_BRIDGE_OP_RESOLVER_BRIDGE_H_ +#define TENSORFLOW_LITE_MICRO_TFLITE_BRIDGE_OP_RESOLVER_BRIDGE_H_ + +#include "edge-impulse-sdk/tensorflow/lite/c/c_api_types.h" +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/core/api/op_resolver.h" // needed for the Using declarative + +namespace tflite { + +// Forward declaration of the classes and structs used here. +struct OperatorCode; + +using TfLiteBridgeOpResolver = OpResolver; + +// Handles the logic for converting between an OperatorCode structure extracted +// from a flatbuffer and information about a registered operator +// implementation. +TfLiteStatus GetRegistrationFromOpCode(const OperatorCode* opcode, + const OpResolver& op_resolver, + const TfLiteRegistration** registration); + +} // namespace tflite + +#endif // TENSORFLOW_LITE_MICRO_TFLITE_BRIDGE_OP_RESOLVER_BRIDGE_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/persistent_arena_buffer_allocator.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/persistent_arena_buffer_allocator.cpp new file mode 100644 index 0000000..9237691 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/persistent_arena_buffer_allocator.cpp @@ -0,0 +1,52 @@ +/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "edge-impulse-sdk/tensorflow/lite/micro/persistent_arena_buffer_allocator.h" + +#include "edge-impulse-sdk/tensorflow/lite/micro/memory_helpers.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h" + +namespace tflite { + +PersistentArenaBufferAllocator::PersistentArenaBufferAllocator( + uint8_t* buffer, size_t buffer_size) + : buffer_head_(buffer), + buffer_tail_(buffer + buffer_size), + tail_temp_(buffer_tail_) {} + +PersistentArenaBufferAllocator::~PersistentArenaBufferAllocator() {} + +uint8_t* PersistentArenaBufferAllocator::AllocatePersistentBuffer( + size_t size, size_t alignment) { + uint8_t* const aligned_result = + AlignPointerDown(tail_temp_ - size, alignment); + if (aligned_result < buffer_head_) { +#ifndef TF_LITE_STRIP_ERROR_STRINGS + const size_t missing_memory = buffer_head_ - aligned_result; + MicroPrintf( + "Failed to allocate tail memory. Requested: %u, " + "available %u, missing: %u", + size, size - missing_memory, missing_memory); +#endif + return nullptr; + } + tail_temp_ = aligned_result; + return aligned_result; +} + +size_t PersistentArenaBufferAllocator::GetPersistentUsedBytes() const { + return buffer_tail_ - tail_temp_; +} + +} // namespace tflite diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/persistent_arena_buffer_allocator.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/persistent_arena_buffer_allocator.h new file mode 100644 index 0000000..911c486 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/persistent_arena_buffer_allocator.h @@ -0,0 +1,58 @@ +/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_MICRO_ARENA_ALLOCATOR_PERSISTENT_ARENA_BUFFER_ALLOCATOR_H_ +#define TENSORFLOW_LITE_MICRO_ARENA_ALLOCATOR_PERSISTENT_ARENA_BUFFER_ALLOCATOR_H_ + +#include +#include + +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/ibuffer_allocator.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/compatibility.h" + +namespace tflite { + +// PersistentArenaBufferAllocator is an implementatation of +// IPersistentBufferAllocator interface on an arena that is dedicated for +// persistent buffers. +class PersistentArenaBufferAllocator : public IPersistentBufferAllocator { + public: + PersistentArenaBufferAllocator(uint8_t* buffer, size_t buffer_size); + virtual ~PersistentArenaBufferAllocator(); + + // Allocates persistent memory. The persistent buffer is never freed. + // Returns nullptr if errors occured. + uint8_t* AllocatePersistentBuffer(size_t size, size_t alignment) override; + + // Returns the size of all persistent allocations in bytes. + size_t GetPersistentUsedBytes() const override; + + TF_LITE_REMOVE_VIRTUAL_DELETE + private: + // The memory arena that this allocator manages. + uint8_t* const buffer_head_; + uint8_t* const buffer_tail_; + + // The whole region is split into two parts: + // tail_temp_ to buffer_tail_ contains allocated buffers; + // buffer_head_ to tail_temp_ - 1 belongs to still available spaces. + // So in essence, the allocated region grows from the bottom and emulates + // SingleArenaBufferAllocator's persistent part. + uint8_t* tail_temp_; +}; + +} // namespace tflite + +#endif // TENSORFLOW_LITE_MICRO_ARENA_ALLOCATOR_PERSISTENT_ARENA_BUFFER_ALLOCATOR_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/recording_micro_allocator.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/recording_micro_allocator.cpp new file mode 100644 index 0000000..11e4d1b --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/recording_micro_allocator.cpp @@ -0,0 +1,251 @@ +/* Copyright 2023 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "edge-impulse-sdk/tensorflow/lite/micro/recording_micro_allocator.h" + +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/compatibility.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/recording_single_arena_buffer_allocator.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/compatibility.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/memory_helpers.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/memory_planner/greedy_memory_planner.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_allocator.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h" + +namespace tflite { + +size_t RecordingMicroAllocator::GetDefaultTailUsage() { + // RecordingMicroAllocator inherits from MicroAllocator and its tail usage is + // similar with MicroAllocator with SingleArenaBufferAllocator and + // MicroAllocator being replaced. + return MicroAllocator::GetDefaultTailUsage( + /*is_memory_planner_given=*/false) + + AlignSizeUp() - + AlignSizeUp() + + AlignSizeUp() - AlignSizeUp(); +} + +RecordingMicroAllocator::RecordingMicroAllocator( + RecordingSingleArenaBufferAllocator* recording_memory_allocator, + MicroMemoryPlanner* memory_planner) + : MicroAllocator(recording_memory_allocator, memory_planner), + recording_memory_allocator_(recording_memory_allocator) {} + +RecordingMicroAllocator* RecordingMicroAllocator::Create(uint8_t* tensor_arena, + size_t arena_size) { + RecordingSingleArenaBufferAllocator* simple_memory_allocator = + RecordingSingleArenaBufferAllocator::Create(tensor_arena, arena_size); + TFLITE_DCHECK(simple_memory_allocator != nullptr); + + uint8_t* memory_planner_buffer = + simple_memory_allocator->AllocatePersistentBuffer( + sizeof(GreedyMemoryPlanner), alignof(GreedyMemoryPlanner)); + GreedyMemoryPlanner* memory_planner = + new (memory_planner_buffer) GreedyMemoryPlanner(); + + uint8_t* allocator_buffer = simple_memory_allocator->AllocatePersistentBuffer( + sizeof(RecordingMicroAllocator), alignof(RecordingMicroAllocator)); + RecordingMicroAllocator* allocator = new (allocator_buffer) + RecordingMicroAllocator(simple_memory_allocator, memory_planner); + return allocator; +} + +RecordedAllocation RecordingMicroAllocator::GetRecordedAllocation( + RecordedAllocationType allocation_type) const { + switch (allocation_type) { + case RecordedAllocationType::kTfLiteEvalTensorData: + return recorded_tflite_eval_tensor_data_; + case RecordedAllocationType::kPersistentTfLiteTensorData: + return recorded_persistent_tflite_tensor_data_; + case RecordedAllocationType::kPersistentTfLiteTensorQuantizationData: + return recorded_persistent_tflite_tensor_quantization_data_; + case RecordedAllocationType::kPersistentBufferData: + return recorded_persistent_buffer_data_; + case RecordedAllocationType::kTfLiteTensorVariableBufferData: + return recorded_tflite_tensor_variable_buffer_data_; + case RecordedAllocationType::kNodeAndRegistrationArray: + return recorded_node_and_registration_array_data_; + case RecordedAllocationType::kOpData: + return recorded_op_data_; + } + MicroPrintf("Invalid allocation type supplied: %d", allocation_type); + return RecordedAllocation(); +} + +const RecordingSingleArenaBufferAllocator* +RecordingMicroAllocator::GetSimpleMemoryAllocator() const { + return recording_memory_allocator_; +} + +void RecordingMicroAllocator::PrintAllocations() const { + MicroPrintf("[RecordingMicroAllocator] Arena allocation total %d bytes", + recording_memory_allocator_->GetUsedBytes()); + MicroPrintf("[RecordingMicroAllocator] Arena allocation head %d bytes", + recording_memory_allocator_->GetNonPersistentUsedBytes()); + MicroPrintf("[RecordingMicroAllocator] Arena allocation tail %d bytes", + recording_memory_allocator_->GetPersistentUsedBytes()); + PrintRecordedAllocation(RecordedAllocationType::kTfLiteEvalTensorData, + "TfLiteEvalTensor data", "allocations"); + PrintRecordedAllocation(RecordedAllocationType::kPersistentTfLiteTensorData, + "Persistent TfLiteTensor data", "tensors"); + PrintRecordedAllocation( + RecordedAllocationType::kPersistentTfLiteTensorQuantizationData, + "Persistent TfLiteTensor quantization data", "allocations"); + PrintRecordedAllocation(RecordedAllocationType::kPersistentBufferData, + "Persistent buffer data", "allocations"); + PrintRecordedAllocation( + RecordedAllocationType::kTfLiteTensorVariableBufferData, + "TfLiteTensor variable buffer data", "allocations"); + PrintRecordedAllocation(RecordedAllocationType::kNodeAndRegistrationArray, + "NodeAndRegistration struct", + "NodeAndRegistration structs"); + PrintRecordedAllocation(RecordedAllocationType::kOpData, + "Operator runtime data", "OpData structs"); +} + +void* RecordingMicroAllocator::AllocatePersistentBuffer(size_t bytes) { + RecordedAllocation allocations = SnapshotAllocationUsage(); + void* buffer = MicroAllocator::AllocatePersistentBuffer(bytes); + RecordAllocationUsage(allocations, recorded_persistent_buffer_data_); + + return buffer; +} + +void RecordingMicroAllocator::PrintRecordedAllocation( + RecordedAllocationType allocation_type, const char* allocation_name, + const char* allocation_description) const { +#ifndef TF_LITE_STRIP_ERROR_STRINGS + RecordedAllocation allocation = GetRecordedAllocation(allocation_type); + if (allocation.used_bytes > 0 || allocation.requested_bytes > 0) { + MicroPrintf( + "[RecordingMicroAllocator] '%s' used %d bytes with alignment overhead " + "(requested %d bytes for %d %s)", + allocation_name, allocation.used_bytes, allocation.requested_bytes, + allocation.count, allocation_description); + } +#endif +} + +TfLiteStatus RecordingMicroAllocator::AllocateNodeAndRegistrations( + const Model* model, SubgraphAllocations* subgraph_allocations) { + RecordedAllocation allocations = SnapshotAllocationUsage(); + + TfLiteStatus status = + MicroAllocator::AllocateNodeAndRegistrations(model, subgraph_allocations); + + RecordAllocationUsage(allocations, + recorded_node_and_registration_array_data_); + + for (size_t subgraph_idx = 0; subgraph_idx < model->subgraphs()->size(); + subgraph_idx++) { + // The allocation count in SingleArenaBufferAllocator will only be 1. To + // provide better logging, decrement by 1 and add in the actual number of + // operators used in the graph: The allocation for this recording will + // always be 1. This is because the parent class mallocs one large + // allocation for the number of nodes in the graph (e.g. + // sizeof(NodeAndRegistration) * num_nodes). To prevent extra overhead and + // potential for fragmentation, manually adjust the accounting by + // decrementing by 1 and adding the actual number of nodes used in the + // graph: + if (model->subgraphs()->Get(subgraph_idx)->operators()) { + recorded_node_and_registration_array_data_.count += + model->subgraphs()->Get(subgraph_idx)->operators()->size() - 1; + } else { + recorded_node_and_registration_array_data_.count -= 1; + } + } + return status; +} + +TfLiteStatus RecordingMicroAllocator::AllocateTfLiteEvalTensors( + const Model* model, SubgraphAllocations* subgraph_allocations) { + RecordedAllocation allocations = SnapshotAllocationUsage(); + + TfLiteStatus status = + MicroAllocator::AllocateTfLiteEvalTensors(model, subgraph_allocations); + + RecordAllocationUsage(allocations, recorded_tflite_eval_tensor_data_); + + for (size_t subgraph_idx = 0; subgraph_idx < model->subgraphs()->size(); + subgraph_idx++) { + // The allocation for this recording will always be 1. This is because the + // parent class mallocs one large allocation for the number of tensors in + // the graph (e.g. sizeof(TfLiteEvalTensor) * num_tensors). To prevent extra + // overhead and potential for fragmentation, manually adjust the accounting + // by decrementing by 1 and adding the actual number of tensors used in the + // graph: + recorded_tflite_eval_tensor_data_.count += + model->subgraphs()->Get(subgraph_idx)->tensors()->size() - 1; + } + return status; +} + +TfLiteStatus RecordingMicroAllocator::AllocateVariables( + const SubGraph* subgraph, TfLiteEvalTensor* eval_tensors, + const int32_t* offline_planner_offsets) { + RecordedAllocation allocations = SnapshotAllocationUsage(); + + TfLiteStatus status = MicroAllocator::AllocateVariables( + subgraph, eval_tensors, offline_planner_offsets); + + RecordAllocationUsage(allocations, + recorded_tflite_tensor_variable_buffer_data_); + return status; +} + +TfLiteTensor* +RecordingMicroAllocator::AllocatePersistentTfLiteTensorInternal() { + RecordedAllocation allocations = SnapshotAllocationUsage(); + + TfLiteTensor* result = + MicroAllocator::AllocatePersistentTfLiteTensorInternal(); + + RecordAllocationUsage(allocations, recorded_persistent_tflite_tensor_data_); + return result; +} + +TfLiteStatus RecordingMicroAllocator::PopulateTfLiteTensorFromFlatbuffer( + const Model* model, TfLiteTensor* tensor, int tensor_index, + int subgraph_index, bool allocate_temp) { + RecordedAllocation allocations = SnapshotAllocationUsage(); + + TfLiteStatus status = MicroAllocator::PopulateTfLiteTensorFromFlatbuffer( + model, tensor, tensor_index, subgraph_index, allocate_temp); + + RecordAllocationUsage(allocations, + recorded_persistent_tflite_tensor_quantization_data_); + return status; +} + +RecordedAllocation RecordingMicroAllocator::SnapshotAllocationUsage() const { + return {/*requested_bytes=*/recording_memory_allocator_->GetRequestedBytes(), + /*used_bytes=*/recording_memory_allocator_->GetUsedBytes(), + /*count=*/recording_memory_allocator_->GetAllocatedCount()}; +} + +void RecordingMicroAllocator::RecordAllocationUsage( + const RecordedAllocation& snapshotted_allocation, + RecordedAllocation& recorded_allocation) { + recorded_allocation.requested_bytes += + recording_memory_allocator_->GetRequestedBytes() - + snapshotted_allocation.requested_bytes; + recorded_allocation.used_bytes += + recording_memory_allocator_->GetUsedBytes() - + snapshotted_allocation.used_bytes; + recorded_allocation.count += + recording_memory_allocator_->GetAllocatedCount() - + snapshotted_allocation.count; +} + +} // namespace tflite diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/recording_micro_allocator.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/recording_micro_allocator.h new file mode 100644 index 0000000..9d694af --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/recording_micro_allocator.h @@ -0,0 +1,125 @@ +/* Copyright 2023 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_LITE_MICRO_RECORDING_MICRO_ALLOCATOR_H_ +#define TENSORFLOW_LITE_MICRO_RECORDING_MICRO_ALLOCATOR_H_ + +#include "edge-impulse-sdk/tensorflow/lite/micro/recording_single_arena_buffer_allocator.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/compatibility.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_allocator.h" + +namespace tflite { + +// List of buckets currently recorded by this class. Each type keeps a list of +// allocated information during model initialization. +// TODO(b/169834511): Add tracking for scratch buffer allocations. +enum class RecordedAllocationType { + kTfLiteEvalTensorData, + kPersistentTfLiteTensorData, + kPersistentTfLiteTensorQuantizationData, + kPersistentBufferData, + kTfLiteTensorVariableBufferData, + kNodeAndRegistrationArray, + kOpData, +}; + +// Container for holding information about allocation recordings by a given +// type. Each recording contains the number of bytes requested, the actual bytes +// allocated (can defer from requested by alignment), and the number of items +// allocated. +struct RecordedAllocation { + size_t requested_bytes; + size_t used_bytes; + size_t count; +}; + +// Utility subclass of MicroAllocator that records all allocations +// inside the arena. A summary of allocations can be logged through the +// ErrorReporter by invoking LogAllocations(). This special allocator requires +// an instance of RecordingSingleArenaBufferAllocator to capture allocations in +// the head and tail. Arena allocation recording can be retrieved by type +// through the GetRecordedAllocation() function. This class should only be used +// for auditing memory usage or integration testing. +class RecordingMicroAllocator : public MicroAllocator { + public: + static RecordingMicroAllocator* Create(uint8_t* tensor_arena, + size_t arena_size); + + // Returns the fixed amount of memory overhead of RecordingMicroAllocator. + static size_t GetDefaultTailUsage(); + + // Returns the recorded allocations information for a given allocation type. + RecordedAllocation GetRecordedAllocation( + RecordedAllocationType allocation_type) const; + + const RecordingSingleArenaBufferAllocator* GetSimpleMemoryAllocator() const; + + // Logs out through the ErrorReporter all allocation recordings by type + // defined in RecordedAllocationType. + void PrintAllocations() const; + + void* AllocatePersistentBuffer(size_t bytes) override; + + protected: + TfLiteStatus AllocateNodeAndRegistrations( + const Model* model, SubgraphAllocations* subgraph_allocations) override; + TfLiteStatus AllocateTfLiteEvalTensors( + const Model* model, SubgraphAllocations* subgraph_allocations) override; + TfLiteStatus AllocateVariables( + const SubGraph* subgraph, TfLiteEvalTensor* eval_tensors, + const int32_t* offline_planner_offsets) override; + // TODO(b/162311891): Once all kernels have been updated to the new API drop + // this method. It is only used to record TfLiteTensor persistent allocations. + TfLiteTensor* AllocatePersistentTfLiteTensorInternal() override; + + // TODO(b/162311891): Once all kernels have been updated to the new API drop + // this function since all allocations for quantized data will take place in + // the temp section. + TfLiteStatus PopulateTfLiteTensorFromFlatbuffer(const Model* model, + TfLiteTensor* tensor, + int tensor_index, + int subgraph_index, + bool allocate_temp) override; + + private: + RecordingMicroAllocator(RecordingSingleArenaBufferAllocator* memory_allocator, + MicroMemoryPlanner* memory_planner); + + void PrintRecordedAllocation(RecordedAllocationType allocation_type, + const char* allocation_name, + const char* allocation_description) const; + + RecordedAllocation SnapshotAllocationUsage() const; + void RecordAllocationUsage(const RecordedAllocation& snapshotted_allocation, + RecordedAllocation& recorded_allocation); + + const RecordingSingleArenaBufferAllocator* recording_memory_allocator_; + + RecordedAllocation recorded_tflite_eval_tensor_data_ = {}; + RecordedAllocation recorded_persistent_tflite_tensor_data_ = {}; + RecordedAllocation recorded_persistent_tflite_tensor_quantization_data_ = {}; + RecordedAllocation recorded_persistent_buffer_data_ = {}; + RecordedAllocation recorded_tflite_tensor_variable_buffer_data_ = {}; + RecordedAllocation recorded_node_and_registration_array_data_ = {}; + + // TODO(b/187993291): Re-enable OpData allocating tracking. + RecordedAllocation recorded_op_data_ = {}; + + TF_LITE_REMOVE_VIRTUAL_DELETE +}; + +} // namespace tflite + +#endif // TENSORFLOW_LITE_MICRO_RECORDING_MICRO_ALLOCATOR_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/recording_micro_interpreter.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/recording_micro_interpreter.h new file mode 100644 index 0000000..ce44fbd --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/recording_micro_interpreter.h @@ -0,0 +1,69 @@ +/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_LITE_MICRO_RECORDING_MICRO_INTERPRETER_H_ +#define TENSORFLOW_LITE_MICRO_RECORDING_MICRO_INTERPRETER_H_ + +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_interpreter.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_profiler_interface.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/recording_micro_allocator.h" + +namespace tflite { + +// Utility subclass that enables internal recordings of the MicroInterpreter. +// This class should be used to audit and analyze memory arena usage for a given +// model and interpreter. +// +// After construction and the first Invoke() or AllocateTensors() call - the +// memory usage is recorded and available through the GetMicroAllocator() +// function. See RecordingMicroAlloctor for more details on what is currently +// recorded from arena allocations. +// +// It is recommended for users to increase the tensor arena size by at least 1kb +// to ensure enough additional memory is available for internal recordings. +class RecordingMicroInterpreter : public MicroInterpreter { + public: + RecordingMicroInterpreter(const Model* model, + const MicroOpResolver& op_resolver, + uint8_t* tensor_arena, size_t tensor_arena_size, + MicroResourceVariables* resource_variable = nullptr, + MicroProfilerInterface* profiler = nullptr) + : MicroInterpreter( + model, op_resolver, + RecordingMicroAllocator::Create(tensor_arena, tensor_arena_size), + resource_variable, profiler), + recording_micro_allocator_( + static_cast(allocator())) {} + + RecordingMicroInterpreter(const Model* model, + const MicroOpResolver& op_resolver, + RecordingMicroAllocator* allocator, + MicroResourceVariables* resource_variable = nullptr, + MicroProfilerInterface* profiler = nullptr) + : MicroInterpreter(model, op_resolver, allocator, resource_variable, + profiler), + recording_micro_allocator_(*allocator) {} + + const RecordingMicroAllocator& GetMicroAllocator() const { + return recording_micro_allocator_; + } + + private: + const RecordingMicroAllocator& recording_micro_allocator_; +}; + +} // namespace tflite + +#endif // TENSORFLOW_LITE_MICRO_RECORDING_MICRO_INTERPRETER_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/recording_single_arena_buffer_allocator.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/recording_single_arena_buffer_allocator.cpp new file mode 100644 index 0000000..746561c --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/recording_single_arena_buffer_allocator.cpp @@ -0,0 +1,85 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "edge-impulse-sdk/tensorflow/lite/micro/recording_single_arena_buffer_allocator.h" + +#include + +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/compatibility.h" + +namespace tflite { + +RecordingSingleArenaBufferAllocator::RecordingSingleArenaBufferAllocator( + uint8_t* buffer_head, size_t buffer_size) + : SingleArenaBufferAllocator(buffer_head, buffer_size), + requested_head_bytes_(0), + requested_tail_bytes_(0), + used_bytes_(0), + alloc_count_(0) {} + +RecordingSingleArenaBufferAllocator::~RecordingSingleArenaBufferAllocator() {} + +RecordingSingleArenaBufferAllocator* +RecordingSingleArenaBufferAllocator::Create(uint8_t* buffer_head, + size_t buffer_size) { + TFLITE_DCHECK(buffer_head != nullptr); + RecordingSingleArenaBufferAllocator tmp = + RecordingSingleArenaBufferAllocator(buffer_head, buffer_size); + + uint8_t* allocator_buffer = tmp.AllocatePersistentBuffer( + sizeof(RecordingSingleArenaBufferAllocator), + alignof(RecordingSingleArenaBufferAllocator)); + // Use the default copy constructor to populate internal states. + return new (allocator_buffer) RecordingSingleArenaBufferAllocator(tmp); +} + +size_t RecordingSingleArenaBufferAllocator::GetRequestedBytes() const { + return requested_head_bytes_ + requested_tail_bytes_; +} + +size_t RecordingSingleArenaBufferAllocator::GetUsedBytes() const { + return used_bytes_; +} + +size_t RecordingSingleArenaBufferAllocator::GetAllocatedCount() const { + return alloc_count_; +} + +TfLiteStatus RecordingSingleArenaBufferAllocator::ResizeBuffer( + uint8_t* resizable_buf, size_t size, size_t alignment) { + const uint8_t* previous_head = head(); + TfLiteStatus status = + SingleArenaBufferAllocator::ResizeBuffer(resizable_buf, size, alignment); + if (status == kTfLiteOk) { + used_bytes_ += head() - previous_head; + requested_head_bytes_ = size; + } + return status; +} + +uint8_t* RecordingSingleArenaBufferAllocator::AllocatePersistentBuffer( + size_t size, size_t alignment) { + const uint8_t* previous_tail = tail(); + uint8_t* result = + SingleArenaBufferAllocator::AllocatePersistentBuffer(size, alignment); + if (result != nullptr) { + used_bytes_ += previous_tail - tail(); + requested_tail_bytes_ += size; + alloc_count_++; + } + return result; +} + +} // namespace tflite diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/recording_single_arena_buffer_allocator.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/recording_single_arena_buffer_allocator.h new file mode 100644 index 0000000..cb58a8b --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/recording_single_arena_buffer_allocator.h @@ -0,0 +1,63 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_LITE_MICRO_ARENA_ALLOCATOR_RECORDING_SINGLE_ARENA_BUFFER_ALLOCATOR_H_ +#define TENSORFLOW_LITE_MICRO_ARENA_ALLOCATOR_RECORDING_SINGLE_ARENA_BUFFER_ALLOCATOR_H_ + +#include "edge-impulse-sdk/tensorflow/lite/micro/single_arena_buffer_allocator.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/compatibility.h" + +namespace tflite { + +// Utility class used to log allocations of a SingleArenaBufferAllocator. Should +// only be used in debug/evaluation settings or unit tests to evaluate +// allocation usage. +class RecordingSingleArenaBufferAllocator : public SingleArenaBufferAllocator { + public: + RecordingSingleArenaBufferAllocator(uint8_t* buffer_head, size_t buffer_size); + // TODO(b/157615197): Cleanup constructors/destructor and use factory + // functions. + ~RecordingSingleArenaBufferAllocator() override; + + static RecordingSingleArenaBufferAllocator* Create(uint8_t* buffer_head, + size_t buffer_size); + + // Returns the number of bytes requested from the head or tail. + size_t GetRequestedBytes() const; + + // Returns the number of bytes actually allocated from the head or tail. This + // value will be >= to the number of requested bytes due to padding and + // alignment. + size_t GetUsedBytes() const; + + // Returns the number of alloc calls from the head or tail. + size_t GetAllocatedCount() const; + + TfLiteStatus ResizeBuffer(uint8_t* resizable_buf, size_t size, + size_t alignment) override; + uint8_t* AllocatePersistentBuffer(size_t size, size_t alignment) override; + + private: + size_t requested_head_bytes_; + size_t requested_tail_bytes_; + size_t used_bytes_; + size_t alloc_count_; + + TF_LITE_REMOVE_VIRTUAL_DELETE +}; + +} // namespace tflite + +#endif // TENSORFLOW_LITE_MICRO_ARENA_ALLOCATOR_RECORDING_SINGLE_ARENA_BUFFER_ALLOCATOR_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/schema_utils.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/schema_utils.cpp new file mode 100644 index 0000000..7588028 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/schema_utils.cpp @@ -0,0 +1,62 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "edge-impulse-sdk/tensorflow/lite/schema/schema_utils.h" + +#include + +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/compatibility.h" + +namespace tflite { + +// The following GetBuiltinCode methods are the utility methods for reading +// builtin operatore code, ensuring compatibility issues between v3 and v3a +// schema. Always the maximum value of the two fields always will be the correct +// value as follows: +// +// - Supporting schema version v3 models +// +// The `builtin_code` field is not available in the v3 models. Flatbuffer +// library will feed zero value, which is the default value in the v3a schema. +// The actual builtin operatore code value will exist in the +// `deprecated_builtin_code` field. At the same time, it implies that +// `deprecated_builtin_code` >= `builtin_code` and the maximum value of the two +// fields will be same with `deprecated_builtin_code'. +// +// - Supporting builtin operator codes beyonds 127 +// +// New builtin operators, whose operator code is larger than 127, can not be +// assigned to the `deprecated_builtin_code` field. In such cases, the +// value of the `builtin_code` field should be used for the builtin operator +// code. In the case, the maximum value of the two fields will be the value of +// the `builtin_code` as the right value. + +BuiltinOperator GetBuiltinCode(const OperatorCode* op_code) { + // Caller should guarantee that the given argument value is not a nullptr. + TFLITE_DCHECK(op_code != nullptr); + + return std::max( + op_code->builtin_code(), + static_cast(op_code->deprecated_builtin_code())); +} + +BuiltinOperator GetBuiltinCode(const OperatorCodeT* op_code) { + // Caller should guarantee that the given argument value is not a nullptr. + TFLITE_DCHECK(op_code != nullptr); + + return std::max(op_code->builtin_code, static_cast( + op_code->deprecated_builtin_code)); +} + +} // namespace tflite diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/single_arena_buffer_allocator.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/single_arena_buffer_allocator.cpp new file mode 100644 index 0000000..1015b53 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/single_arena_buffer_allocator.cpp @@ -0,0 +1,199 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "edge-impulse-sdk/tensorflow/lite/micro/single_arena_buffer_allocator.h" + +#include +#include +#include + +#include "edge-impulse-sdk/tensorflow/lite/c/c_api_types.h" +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/compatibility.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/op_macros.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/memory_helpers.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h" + +namespace tflite { + +SingleArenaBufferAllocator::SingleArenaBufferAllocator(uint8_t* buffer_head, + uint8_t* buffer_tail) + : buffer_head_(buffer_head), + buffer_tail_(buffer_tail), + head_(buffer_head), + tail_(buffer_tail), + temp_(buffer_head_) {} + +SingleArenaBufferAllocator::SingleArenaBufferAllocator(uint8_t* buffer, + size_t buffer_size) + : SingleArenaBufferAllocator(buffer, buffer + buffer_size) {} + +/* static */ +SingleArenaBufferAllocator* SingleArenaBufferAllocator::Create( + uint8_t* buffer_head, size_t buffer_size) { + TFLITE_DCHECK(buffer_head != nullptr); + SingleArenaBufferAllocator tmp = + SingleArenaBufferAllocator(buffer_head, buffer_size); + + // Allocate enough bytes from the buffer to create a + // SingleArenaBufferAllocator. The new instance will use the current adjusted + // tail buffer from the tmp allocator instance. + uint8_t* allocator_buffer = tmp.AllocatePersistentBuffer( + sizeof(SingleArenaBufferAllocator), alignof(SingleArenaBufferAllocator)); + // Use the default copy constructor to populate internal states. + return new (allocator_buffer) SingleArenaBufferAllocator(tmp); +} + +SingleArenaBufferAllocator::~SingleArenaBufferAllocator() {} + +uint8_t* SingleArenaBufferAllocator::AllocateResizableBuffer(size_t size, + size_t alignment) { + // Only supports one resizable buffer, which starts at the buffer head. + uint8_t* expect_resizable_buf = AlignPointerUp(buffer_head_, alignment); + if (ResizeBuffer(expect_resizable_buf, size, alignment) == kTfLiteOk) { + return expect_resizable_buf; + } + return nullptr; +} + +TfLiteStatus SingleArenaBufferAllocator::DeallocateResizableBuffer( + uint8_t* resizable_buf) { + return ResizeBuffer(resizable_buf, 0, 1); +} + +TfLiteStatus SingleArenaBufferAllocator::ReserveNonPersistentOverlayMemory( + size_t size, size_t alignment) { + uint8_t* expect_resizable_buf = AlignPointerUp(buffer_head_, alignment); + return ResizeBuffer(expect_resizable_buf, size, alignment); +} + +TfLiteStatus SingleArenaBufferAllocator::ResizeBuffer(uint8_t* resizable_buf, + size_t size, + size_t alignment) { + // Only supports one resizable buffer, which starts at the buffer head. + uint8_t* expect_resizable_buf = AlignPointerUp(buffer_head_, alignment); + if (head_ != temp_ || resizable_buf != expect_resizable_buf) { + MicroPrintf( + "Internal error: either buffer is not resizable or " + "ResetTempAllocations() is not called before ResizeBuffer()."); + return kTfLiteError; + } + + uint8_t* const aligned_result = AlignPointerUp(buffer_head_, alignment); + const size_t available_memory = tail_ - aligned_result; + if (available_memory < size) { + MicroPrintf( + "Failed to resize buffer. Requested: %u, available %u, missing: %u", + size, available_memory, size - available_memory); + return kTfLiteError; + } + head_ = aligned_result + size; + temp_ = head_; + + return kTfLiteOk; +} + +uint8_t* SingleArenaBufferAllocator::AllocatePersistentBuffer( + size_t size, size_t alignment) { + uint8_t* const aligned_result = AlignPointerDown(tail_ - size, alignment); + if (aligned_result < head_) { +#ifndef TF_LITE_STRIP_ERROR_STRINGS + const size_t missing_memory = head_ - aligned_result; + MicroPrintf( + "Failed to allocate tail memory. Requested: %u, " + "available %u, missing: %u", + size, size - missing_memory, missing_memory); +#endif + return nullptr; + } + tail_ = aligned_result; + return aligned_result; +} + +uint8_t* SingleArenaBufferAllocator::AllocateTemp(size_t size, + size_t alignment) { + uint8_t* const aligned_result = AlignPointerUp(temp_, alignment); + const size_t available_memory = tail_ - aligned_result; + if (available_memory < size) { + MicroPrintf( + "Failed to allocate temp memory. Requested: %u, " + "available %u, missing: %u", + size, available_memory, size - available_memory); + return nullptr; + } + temp_ = aligned_result + size; + temp_buffer_ptr_check_sum_ ^= (reinterpret_cast(aligned_result)); + temp_buffer_count_++; + return aligned_result; +} + +void SingleArenaBufferAllocator::DeallocateTemp(uint8_t* temp_buf) { + temp_buffer_ptr_check_sum_ ^= (reinterpret_cast(temp_buf)); + temp_buffer_count_--; +} + +bool SingleArenaBufferAllocator::IsAllTempDeallocated() { + if (temp_buffer_count_ != 0 || temp_buffer_ptr_check_sum_ != 0) { + MicroPrintf( + "Number of allocated temp buffers: %d. Checksum passing status: %d", + temp_buffer_count_, !temp_buffer_ptr_check_sum_); + return false; + } + return true; +} + +TfLiteStatus SingleArenaBufferAllocator::ResetTempAllocations() { + // TODO(b/209453859): enable error check based on IsAllTempDeallocated after + // all AllocateTemp have been paird with DeallocateTemp + if (!IsAllTempDeallocated()) { + MicroPrintf( + "All temp buffers must be freed before calling ResetTempAllocations()"); + return kTfLiteError; + } + temp_ = head_; + return kTfLiteOk; +} + +uint8_t* SingleArenaBufferAllocator::GetOverlayMemoryAddress() const { + return buffer_head_; +} + +size_t SingleArenaBufferAllocator::GetNonPersistentUsedBytes() const { + return std::max(head_ - buffer_head_, temp_ - buffer_head_); +} + +size_t SingleArenaBufferAllocator::GetPersistentUsedBytes() const { + return buffer_tail_ - tail_; +} + +size_t SingleArenaBufferAllocator::GetAvailableMemory(size_t alignment) const { + uint8_t* const aligned_temp = AlignPointerUp(temp_, alignment); + uint8_t* const aligned_tail = AlignPointerDown(tail_, alignment); + return aligned_tail - aligned_temp; +} + +size_t SingleArenaBufferAllocator::GetUsedBytes() const { + return GetPersistentUsedBytes() + GetNonPersistentUsedBytes(); +} + +size_t SingleArenaBufferAllocator::GetBufferSize() const { + return buffer_tail_ - buffer_head_; +} + +uint8_t* SingleArenaBufferAllocator::head() const { return head_; } + +uint8_t* SingleArenaBufferAllocator::tail() const { return tail_; } + +} // namespace tflite diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/single_arena_buffer_allocator.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/single_arena_buffer_allocator.h new file mode 100644 index 0000000..730ee73 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/single_arena_buffer_allocator.h @@ -0,0 +1,144 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_LITE_MICRO_ARENA_ALLOCATOR_SINGLE_ARENA_BUFFER_ALLOCATOR_H_ +#define TENSORFLOW_LITE_MICRO_ARENA_ALLOCATOR_SINGLE_ARENA_BUFFER_ALLOCATOR_H_ + +#include +#include + +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/ibuffer_allocator.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/compatibility.h" + +namespace tflite { + +// TODO(petewarden): This allocator never frees up or reuses any memory, even +// though we have enough information about lifetimes of the tensors to do so. +// This makes it pretty wasteful, so we should use a more intelligent method. +class SingleArenaBufferAllocator : public INonPersistentBufferAllocator, + public IPersistentBufferAllocator { + public: + // TODO(b/157615197): Cleanup constructors/destructor and use factory + // functions. + SingleArenaBufferAllocator(uint8_t* buffer_head, uint8_t* buffer_tail); + SingleArenaBufferAllocator(uint8_t* buffer, size_t buffer_size); + virtual ~SingleArenaBufferAllocator(); + + // Creates a new SingleArenaBufferAllocator from a given buffer head and size. + static SingleArenaBufferAllocator* Create(uint8_t* buffer_head, + size_t buffer_size); + + // Resizes a buffer that is previously returned by the + // AllocateResizableBuffer. In current implementation, it Adjusts the head + // (lowest address and moving upwards) memory allocation to a given size. + // Calls to this method will also invalidate all temporary allocation values + // (it sets the location of temp space at the end of the head section). This + // call will fail if a chain of allocations through AllocateTemp() have not + // been cleaned up with a call to ResetTempAllocations(). + virtual TfLiteStatus ResizeBuffer(uint8_t* resizable_buf, size_t size, + size_t alignment) override; + + // Returns a buffer that is resizable viable ResizeBuffer(). Only one + // resizable buffer is currently supported. + virtual uint8_t* AllocateResizableBuffer(size_t size, + size_t alignment) override; + + // Frees up the memory occupied by the resizable buffer + virtual TfLiteStatus DeallocateResizableBuffer( + uint8_t* resizable_buf) override; + + // Reserves the non-persistent memory that is planned by the memory planner. + virtual TfLiteStatus ReserveNonPersistentOverlayMemory( + size_t size, size_t alignment) override; + + // Allocates persistent memory starting at the tail of the arena (highest + // address and moving downwards). + virtual uint8_t* AllocatePersistentBuffer(size_t size, + size_t alignment) override; + + // Allocates a temporary buffer from the head of the arena (lowest address and + // moving upwards) but does not update the actual head allocation size or + // position. The returned buffer is guaranteed until either + // ResetTempAllocations() is called or another call to AllocateFromHead(). + // Repeat calls to this function will create a chain of temp allocations. All + // calls to AllocateTemp() must end with a call to ResetTempAllocations(). If + // AllocateFromHead() is called before a call to ResetTempAllocations(), it + // will fail with an error message. + virtual uint8_t* AllocateTemp(size_t size, size_t alignment) override; + + // Signals that a temporary buffer is no longer needed. This is currently for + // book-keeping purpose and the memory region are not immediately available + // for re-use. The deallocated memory region are only reclaimed after + // ResetTempAllocations is called as it is right now. + virtual void DeallocateTemp(uint8_t* buf) override; + + // Returns true if all temporary buffers are already deallocated. + virtual bool IsAllTempDeallocated() override; + + // Resets a chain of temporary allocations back to the current head of the + // arena (lowest address). + virtual TfLiteStatus ResetTempAllocations() override; + + // Returns a pointer to the buffer currently assigned to the head section. + // This buffer is set by calling SetHeadSize(). + uint8_t* GetOverlayMemoryAddress() const override; + + // Returns the size of the head section in bytes. + size_t GetNonPersistentUsedBytes() const override; + + // Returns the size of all allocations in the tail section in bytes. + size_t GetPersistentUsedBytes() const override; + + // Returns the number of bytes available with a given alignment. This number + // takes in account any temporary allocations. + size_t GetAvailableMemory(size_t alignment) const override; + + // Returns the number of used bytes in the allocator. This number takes in + // account any temporary allocations. + size_t GetUsedBytes() const; + + TF_LITE_REMOVE_VIRTUAL_DELETE + + protected: + // Returns a pointer to the current end of the head buffer. + uint8_t* head() const; + + // Returns a pointer to the current end of the tail buffer. + uint8_t* tail() const; + + private: + size_t GetBufferSize() const; + uint8_t* buffer_head_; + uint8_t* buffer_tail_; + uint8_t* head_; + uint8_t* tail_; + uint8_t* temp_; + + // The combination of the checksum of outstanding temporary buffer pointers + // AND the count of outstanding temporary buffer provide a low cost mechanism + // to audit temporary buffers' allocation and deallocation. + // + // XOR Check sum for outstanding temp buffers. + // If all temp buffers are deallocated OR no temp buffers are allocated, + // temp_buffer_ptr_check_sum_ == nullptr. + intptr_t temp_buffer_ptr_check_sum_ = 0; + // Count of outstanding temp buffers. + int temp_buffer_count_ = 0; +}; + +} // namespace tflite + +#endif // TENSORFLOW_LITE_MICRO_ARENA_ALLOCATOR_SINGLE_ARENA_BUFFER_ALLOCATOR_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/system_setup.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/system_setup.cpp new file mode 100644 index 0000000..86815b9 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/system_setup.cpp @@ -0,0 +1,25 @@ +/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "edge-impulse-sdk/tensorflow/lite/micro/system_setup.h" + +namespace tflite { + +// To add an equivalent function for your own platform, create your own +// implementation file, and place it in a subfolder named after the target. See +// tensorflow/lite/micro/debug_log.cc for a similar example. +void InitializeTarget() {} + +} // namespace tflite diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/system_setup.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/system_setup.h new file mode 100644 index 0000000..71ab13a --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/system_setup.h @@ -0,0 +1,27 @@ +/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_MICRO_SYSTEM_SETUP_H_ +#define TENSORFLOW_LITE_MICRO_SYSTEM_SETUP_H_ + +namespace tflite { + +// This should called during initialization of TFLM binaries and tests. It can +// be specialized if there is a need for custom target-specific intialization. +// For more information, see tensorflow/lite/micro/system_setup.cc. +void InitializeTarget(); + +} // namespace tflite + +#endif // TENSORFLOW_LITE_MICRO_SYSTEM_SETUP_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/test_helper_custom_ops.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/test_helper_custom_ops.cpp new file mode 100644 index 0000000..fe4c836 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/test_helper_custom_ops.cpp @@ -0,0 +1,112 @@ +/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "edge-impulse-sdk/tensorflow/lite/micro/test_helper_custom_ops.h" + +#include +#include +#include +#include +#include + +#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/flatbuffers.h" // from @flatbuffers +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/compatibility.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/all_ops_resolver.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_utils.h" +#include "edge-impulse-sdk/tensorflow/lite/schema/schema_generated.h" + +// TODO(b/170464050): Use TFLM test only version of schema_utils. + +namespace tflite { +namespace testing { + +const TfLiteRegistration* PackerOp::getRegistration() { + return GetMutableRegistration(); +} + +TfLiteRegistration* PackerOp::GetMutableRegistration() { + static TfLiteRegistration r; + r.init = Init; + r.prepare = Prepare; + r.invoke = Invoke; + r.free = Free; + return &r; +} + +void* PackerOp::Init(TfLiteContext* context, const char* buffer, + size_t length) { + freed_ = false; + // Do nothing. + return nullptr; +} + +void PackerOp::Free(TfLiteContext* context, void* buffer) { freed_ = true; } + +TfLiteStatus PackerOp::Prepare(TfLiteContext* context, TfLiteNode* node) { + return kTfLiteOk; +} + +TfLiteStatus PackerOp::Invoke(TfLiteContext* context, TfLiteNode* node) { + const TfLiteEvalTensor* input1 = + tflite::micro::GetEvalInput(context, node, 0); + TF_LITE_ENSURE(context, input1 != nullptr); + const int32_t* input1_data = input1->data.i32; + TF_LITE_ENSURE_EQ(context, input1->dims->size, 1); + const int32_t input1_len = input1->dims->data[0]; + + const TfLiteEvalTensor* input2 = + tflite::micro::GetEvalInput(context, node, 1); + TF_LITE_ENSURE(context, input2 != nullptr); + const int32_t* input2_data = input2->data.i32; + TF_LITE_ENSURE_EQ(context, input2->dims->size, 1); + const int32_t input2_len = input2->dims->data[0]; + + TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0); + TF_LITE_ENSURE(context, output != nullptr); + int32_t* output_data = output->data.i32; + int32_t output_len = output->dims->data[0]; + + // Fill output with input: first with the first tensor, then with the second + // tensor up to the size of the output tensor. + int cnt = 0; + int i; + for (i = 0; i < input1_len && cnt < output_len; i++, cnt++) { + output_data[cnt] = input1_data[i]; + } + if (cnt >= output_len) { + return kTfLiteOk; + } + + for (i = 0; i < input2_len && cnt < output_len; i++, cnt++) { + output_data[cnt] = input2_data[i]; + } + if (cnt >= output_len) { + return kTfLiteOk; + } + + for (; cnt < output_len; cnt++) { + output_data[cnt] = 0; + } + return kTfLiteOk; +} + +bool PackerOp::freed_ = false; + +} // namespace testing +} // namespace tflite diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/test_helper_custom_ops.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/test_helper_custom_ops.h new file mode 100644 index 0000000..cbbbcec --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/test_helper_custom_ops.h @@ -0,0 +1,50 @@ +/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_LITE_MICRO_TEST_HELPER_CUSTOM_OPS_H_ +#define TENSORFLOW_LITE_MICRO_TEST_HELPER_CUSTOM_OPS_H_ + +#include +#include + +#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/flatbuffers.h" // from @flatbuffers +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/compatibility.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/all_ops_resolver.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_utils.h" +#include "edge-impulse-sdk/tensorflow/lite/portable_type_to_tflitetype.h" +#include "edge-impulse-sdk/tensorflow/lite/schema/schema_generated.h" + +namespace tflite { +namespace testing { + +class PackerOp { + public: + static const TfLiteRegistration* getRegistration(); + static TfLiteRegistration* GetMutableRegistration(); + static void* Init(TfLiteContext* context, const char* buffer, size_t length); + static void Free(TfLiteContext* context, void* buffer); + static TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node); + static TfLiteStatus Invoke(TfLiteContext* context, TfLiteNode* node); + + private: + static bool freed_; +}; + +} // namespace testing +} // namespace tflite + +#endif // TENSORFLOW_LITE_MICRO_TEST_HELPER_CUSTOM_OPS_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/test_helpers.cpp b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/test_helpers.cpp new file mode 100644 index 0000000..d97caca --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/test_helpers.cpp @@ -0,0 +1,2035 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "edge-impulse-sdk/tensorflow/lite/micro/test_helpers.h" + +#include +#include +#include +#include +#include + +#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/flatbuffers.h" // from @flatbuffers +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/compatibility.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/all_ops_resolver.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/memory_helpers.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_arena_constants.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_utils.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/test_helper_custom_ops.h" +#include "edge-impulse-sdk/tensorflow/lite/schema/schema_generated.h" + +// TODO(b/170464050): Use TFLM test only version of schema_utils. + +namespace tflite { +namespace testing { +namespace { + +class StackAllocator : public flatbuffers::Allocator { + public: + StackAllocator(size_t alignment) : data_size_(0) { + data_ = AlignPointerUp(data_backing_, alignment); + } + + uint8_t* allocate(size_t size) override { + TFLITE_DCHECK((data_size_ + size) <= kStackAllocatorSize); + uint8_t* result = data_; + data_ += size; + data_size_ += size; + return result; + } + + void deallocate(uint8_t* p, size_t) override {} + + static StackAllocator& instance(size_t alignment = 1) { + // Avoid using true dynamic memory allocation to be portable to bare metal. + static char inst_memory[sizeof(StackAllocator)]; + static StackAllocator* inst = new (inst_memory) StackAllocator(alignment); + return *inst; + } + + static constexpr size_t kStackAllocatorSize = 8192; + + private: + uint8_t data_backing_[kStackAllocatorSize]; + uint8_t* data_; + int data_size_; + + TF_LITE_REMOVE_VIRTUAL_DELETE +}; + +flatbuffers::FlatBufferBuilder* BuilderInstance() { + static char inst_memory[sizeof(flatbuffers::FlatBufferBuilder)]; + static flatbuffers::FlatBufferBuilder* inst = + new (inst_memory) flatbuffers::FlatBufferBuilder( + StackAllocator::kStackAllocatorSize, + &StackAllocator::instance(MicroArenaBufferAlignment())); + return inst; +} + +// A wrapper around FlatBuffer API to help build model easily. +class ModelBuilder { + public: + typedef int32_t Tensor; + typedef int Operator; + typedef int Node; + + // `builder` needs to be available until BuildModel is called. + explicit ModelBuilder(flatbuffers::FlatBufferBuilder* builder) + : builder_(builder) {} + + // Registers an operator that will be used in the model. + Operator RegisterOp(BuiltinOperator op, const char* custom_code); + + // Adds a tensor to the model. + Tensor AddTensor(TensorType type, std::initializer_list shape) { + return AddTensorImpl(type, /* is_variable */ false, shape); + } + + // Adds a variable tensor to the model. + Tensor AddVariableTensor(TensorType type, + std::initializer_list shape) { + return AddTensorImpl(type, /* is_variable */ true, shape); + } + + // Adds a node to the model with given input and output Tensors. + Node AddNode(Operator op, std::initializer_list inputs, + std::initializer_list outputs, + std::initializer_list intermediates = + std::initializer_list{}); + + void AddMetadata(const char* description_string, + const int32_t* metadata_buffer_data, size_t num_elements); + + // Constructs the flatbuffer model using `builder_` and return a pointer to + // it. The returned model has the same lifetime as `builder_`. + // Note the default value of 0 for num_subgraph_inputs means all tensor inputs + // are in subgraph input list. + const Model* BuildModel(std::initializer_list inputs, + std::initializer_list outputs, + size_t num_subgraph_inputs = 0); + + private: + // Adds a tensor to the model. + Tensor AddTensorImpl(TensorType type, bool is_variable, + std::initializer_list shape); + + flatbuffers::FlatBufferBuilder* builder_; + + static constexpr int kMaxOperatorCodes = 10; + flatbuffers::Offset operator_codes_[kMaxOperatorCodes]; + int next_operator_code_id_ = 0; + + static constexpr int kMaxOperators = 50; + flatbuffers::Offset operators_[kMaxOperators]; + int next_operator_id_ = 0; + + static constexpr int kMaxTensors = 50; + flatbuffers::Offset tensors_[kMaxTensors]; + + static constexpr int kMaxMetadataBuffers = 10; + + static constexpr int kMaxMetadatas = 10; + flatbuffers::Offset metadata_[kMaxMetadatas]; + + flatbuffers::Offset metadata_buffers_[kMaxMetadataBuffers]; + + int nbr_of_metadata_buffers_ = 0; + + int next_tensor_id_ = 0; +}; + +ModelBuilder::Operator ModelBuilder::RegisterOp(BuiltinOperator op, + const char* custom_code) { + TFLITE_DCHECK(next_operator_code_id_ <= kMaxOperatorCodes); + operator_codes_[next_operator_code_id_] = tflite::CreateOperatorCodeDirect( + *builder_, /*deprecated_builtin_code=*/0, custom_code, /*version=*/0, op); + next_operator_code_id_++; + return next_operator_code_id_ - 1; +} + +ModelBuilder::Node ModelBuilder::AddNode( + ModelBuilder::Operator op, + std::initializer_list inputs, + std::initializer_list outputs, + std::initializer_list intermediates) { + TFLITE_DCHECK(next_operator_id_ <= kMaxOperators); + operators_[next_operator_id_] = tflite::CreateOperator( + *builder_, op, builder_->CreateVector(inputs.begin(), inputs.size()), + builder_->CreateVector(outputs.begin(), outputs.size()), + BuiltinOptions_NONE, + /*builtin_options=*/0, + /*custom_options=*/0, tflite::CustomOptionsFormat_FLEXBUFFERS, + /*mutating_variable_inputs =*/0, + builder_->CreateVector(intermediates.begin(), intermediates.size())); + next_operator_id_++; + return next_operator_id_ - 1; +} + +void ModelBuilder::AddMetadata(const char* description_string, + const int32_t* metadata_buffer_data, + size_t num_elements) { + metadata_[ModelBuilder::nbr_of_metadata_buffers_] = + CreateMetadata(*builder_, builder_->CreateString(description_string), + 1 + ModelBuilder::nbr_of_metadata_buffers_); + + metadata_buffers_[nbr_of_metadata_buffers_] = tflite::CreateBuffer( + *builder_, builder_->CreateVector((uint8_t*)metadata_buffer_data, + sizeof(uint32_t) * num_elements)); + + ModelBuilder::nbr_of_metadata_buffers_++; +} + +const Model* ModelBuilder::BuildModel( + std::initializer_list inputs, + std::initializer_list outputs, + size_t num_subgraph_inputs) { + // Model schema requires an empty buffer at idx 0. + size_t buffer_size = 1 + ModelBuilder::nbr_of_metadata_buffers_; + flatbuffers::Offset buffers[kMaxMetadataBuffers]; + buffers[0] = tflite::CreateBuffer(*builder_); + + // Place the metadata buffers first in the buffer since the indices for them + // have already been set in AddMetadata() + for (int i = 1; i < ModelBuilder::nbr_of_metadata_buffers_ + 1; ++i) { + buffers[i] = metadata_buffers_[i - 1]; + } + + // Default to single subgraph model. + constexpr size_t subgraphs_size = 1; + + // Find out number of subgraph inputs. + if (num_subgraph_inputs == 0) { + // This is the default case. + num_subgraph_inputs = inputs.size(); + } else { + // A non-zero value of num_subgraph_inputs means that some of + // the operator input tensors are not subgraph inputs. + TFLITE_DCHECK(num_subgraph_inputs <= inputs.size()); + } + + const flatbuffers::Offset subgraphs[subgraphs_size] = { + tflite::CreateSubGraph( + *builder_, builder_->CreateVector(tensors_, next_tensor_id_), + builder_->CreateVector(inputs.begin(), num_subgraph_inputs), + builder_->CreateVector(outputs.begin(), outputs.size()), + builder_->CreateVector(operators_, next_operator_id_), + builder_->CreateString("test_subgraph"))}; + + flatbuffers::Offset model_offset; + if (ModelBuilder::nbr_of_metadata_buffers_ > 0) { + model_offset = tflite::CreateModel( + *builder_, 0, + builder_->CreateVector(operator_codes_, next_operator_code_id_), + builder_->CreateVector(subgraphs, subgraphs_size), + builder_->CreateString("teset_model"), + builder_->CreateVector(buffers, buffer_size), 0, + builder_->CreateVector(metadata_, + ModelBuilder::nbr_of_metadata_buffers_)); + } else { + model_offset = tflite::CreateModel( + *builder_, 0, + builder_->CreateVector(operator_codes_, next_operator_code_id_), + builder_->CreateVector(subgraphs, subgraphs_size), + builder_->CreateString("teset_model"), + builder_->CreateVector(buffers, buffer_size)); + } + + tflite::FinishModelBuffer(*builder_, model_offset); + void* model_pointer = builder_->GetBufferPointer(); + const Model* model = flatbuffers::GetRoot(model_pointer); + return model; +} + +ModelBuilder::Tensor ModelBuilder::AddTensorImpl( + TensorType type, bool is_variable, std::initializer_list shape) { + TFLITE_DCHECK(next_tensor_id_ <= kMaxTensors); + tensors_[next_tensor_id_] = tflite::CreateTensor( + *builder_, builder_->CreateVector(shape.begin(), shape.size()), type, + /* buffer */ 0, /* name */ 0, /* quantization */ 0, + /* is_variable */ is_variable, + /* sparsity */ 0); + next_tensor_id_++; + return next_tensor_id_ - 1; +} + +const Model* BuildSimpleStatefulModel() { + using flatbuffers::Offset; + flatbuffers::FlatBufferBuilder* fb_builder = BuilderInstance(); + + ModelBuilder model_builder(fb_builder); + + const int op_id = + model_builder.RegisterOp(BuiltinOperator_CUSTOM, "simple_stateful_op"); + const int input_tensor = model_builder.AddTensor(TensorType_INT8, {3}); + const int median_tensor = model_builder.AddTensor(TensorType_INT8, {3}); + const int invoke_count_tensor = + model_builder.AddTensor(TensorType_INT32, {1}); + const int intermediate_tensor = + model_builder.AddTensor(TensorType_FLOAT32, {0}); + + model_builder.AddNode(op_id, {input_tensor}, + {median_tensor, invoke_count_tensor}, + {intermediate_tensor}); + return model_builder.BuildModel({input_tensor}, + {median_tensor, invoke_count_tensor}); +} + +const Model* BuildSimpleModelWithBranch() { + using flatbuffers::Offset; + flatbuffers::FlatBufferBuilder* fb_builder = BuilderInstance(); + + ModelBuilder model_builder(fb_builder); + /* Model structure + | t0 + +------| + | v + | +---------+ + | | n0 | + | | | + | +---------+ + v + + | + +---------+ | t1 + | n1 | | + | | | + +---------+ | + | | + t2 | v + | +---------+ + +-->| n2 | + | | + +-------|-+ + |t3 + v + */ + const int op_id = + model_builder.RegisterOp(BuiltinOperator_CUSTOM, "mock_custom"); + const int t0 = model_builder.AddTensor(TensorType_FLOAT32, {2, 2, 3}); + const int t1 = model_builder.AddTensor(TensorType_FLOAT32, {2, 2, 3}); + const int t2 = model_builder.AddTensor(TensorType_FLOAT32, {2, 2, 3}); + const int t3 = model_builder.AddTensor(TensorType_FLOAT32, {2, 2, 3}); + model_builder.AddNode(op_id, {t0}, {t1}); // n0 + model_builder.AddNode(op_id, {t0}, {t2}); // n1 + model_builder.AddNode(op_id, {t1, t2}, {t3}); // n2 + return model_builder.BuildModel({t0}, {t3}); +} + +const Model* BuildModelWithOfflinePlanning(int number_of_tensors, + const int32_t* metadata_buffer, + NodeConnection* node_conn, + int num_conns, + int num_subgraph_inputs) { + using flatbuffers::Offset; + flatbuffers::FlatBufferBuilder* fb_builder = BuilderInstance(); + + ModelBuilder model_builder(fb_builder); + + const int op_id = + model_builder.RegisterOp(BuiltinOperator_CUSTOM, "mock_custom"); + + for (int i = 0; i < number_of_tensors; ++i) { + model_builder.AddTensor(TensorType_FLOAT32, {2, 2, 3}); + } + + for (int i = 0; i < num_conns; ++i) { + model_builder.AddNode(op_id, node_conn[i].input, node_conn[i].output); + } + + model_builder.AddMetadata( + "OfflineMemoryAllocation", metadata_buffer, + number_of_tensors + tflite::testing::kOfflinePlannerHeaderSize); + + return model_builder.BuildModel( + node_conn[0].input, node_conn[num_conns - 1].output, num_subgraph_inputs); +} + +const Model* BuildModelWithUnusedInputs() { + using flatbuffers::Offset; + flatbuffers::FlatBufferBuilder* builder = BuilderInstance(); + + constexpr size_t buffers_size = 1; + const Offset buffers[buffers_size] = {CreateBuffer(*builder)}; + constexpr size_t tensor_shape_size = 2; + const int32_t tensor_shape[tensor_shape_size] = {1, 64}; + constexpr size_t tensors_size = 4; + const Offset tensors[tensors_size] = { + CreateTensor(*builder, + builder->CreateVector(tensor_shape, tensor_shape_size), + TensorType_INT8, 0, + builder->CreateString("test_input_tensor"), 0, false), + CreateTensor(*builder, + builder->CreateVector(tensor_shape, tensor_shape_size), + TensorType_INT8, 0, + builder->CreateString("test_unused_input_tensor"), 0, false), + CreateTensor(*builder, + builder->CreateVector(tensor_shape, tensor_shape_size), + TensorType_INT8, 0, + builder->CreateString("test_output_tensor"), 0, false), + CreateTensor(*builder, + builder->CreateVector(tensor_shape, tensor_shape_size), + TensorType_INT8, 0, + builder->CreateString("test_unused_tensor"), 0, false), + }; + constexpr size_t inputs_size = 2; + const int32_t inputs[inputs_size] = {0, 1}; + constexpr size_t outputs_size = 1; + const int32_t outputs[outputs_size] = {2}; + constexpr size_t operator_inputs_size = 1; + const int32_t operator_inputs[operator_inputs_size] = {0}; + constexpr size_t operator_outputs_size = 1; + const int32_t operator_outputs[operator_outputs_size] = {2}; + constexpr size_t operators_size = 1; + const Offset operators[operators_size] = { + CreateOperator( + *builder, 0, + builder->CreateVector(operator_inputs, operator_inputs_size), + builder->CreateVector(operator_outputs, operator_outputs_size), + BuiltinOptions_NONE), + }; + constexpr size_t subgraphs_size = 1; + const Offset subgraphs[subgraphs_size] = { + CreateSubGraph(*builder, builder->CreateVector(tensors, tensors_size), + builder->CreateVector(inputs, inputs_size), + builder->CreateVector(outputs, outputs_size), + builder->CreateVector(operators, operators_size), + builder->CreateString("test_subgraph"))}; + constexpr size_t operator_codes_size = 1; + const Offset operator_codes[operator_codes_size] = { + CreateOperatorCodeDirect(*builder, /*deprecated_builtin_code=*/0, + "mock_custom", + /*version=*/0, BuiltinOperator_CUSTOM)}; + const Offset model_offset = CreateModel( + *builder, 0, builder->CreateVector(operator_codes, operator_codes_size), + builder->CreateVector(subgraphs, subgraphs_size), + builder->CreateString("test_model"), + builder->CreateVector(buffers, buffers_size)); + FinishModelBuffer(*builder, model_offset); + void* model_pointer = builder->GetBufferPointer(); + const Model* model = flatbuffers::GetRoot(model_pointer); + return model; +} + +const Model* BuildModelWithUnusedOperatorOutputs() { + using flatbuffers::Offset; + flatbuffers::FlatBufferBuilder* builder = BuilderInstance(); + + constexpr size_t buffers_size = 1; + const Offset buffers[buffers_size] = {CreateBuffer(*builder)}; + constexpr size_t tensor_shape_size = 2; + const int32_t tensor_shape[tensor_shape_size] = {1, 64}; + constexpr size_t tensors_size = 2; + const Offset tensors[tensors_size] = { + CreateTensor(*builder, + builder->CreateVector(tensor_shape, tensor_shape_size), + TensorType_INT8, 0, + builder->CreateString("test_input_tensor"), 0, false), + CreateTensor( + *builder, builder->CreateVector(tensor_shape, tensor_shape_size), + TensorType_INT8, 0, + builder->CreateString("test_unused_output_tensor"), 0, false)}; + constexpr size_t inputs_size = 1; + const int32_t inputs[inputs_size] = {}; + constexpr size_t outputs_size = 1; + const int32_t outputs[outputs_size] = {0}; + constexpr size_t operator_inputs_size = 1; + const int32_t operator_inputs[operator_inputs_size] = {}; + constexpr size_t operator_outputs_size = 2; + const int32_t operator_outputs[operator_outputs_size] = {0, 1}; + constexpr size_t operators_size = 1; + const Offset operators[operators_size] = { + CreateOperator( + *builder, 0, + builder->CreateVector(operator_inputs, operator_inputs_size), + builder->CreateVector(operator_outputs, operator_outputs_size), + BuiltinOptions_NONE), + }; + constexpr size_t subgraphs_size = 1; + const Offset subgraphs[subgraphs_size] = { + CreateSubGraph(*builder, builder->CreateVector(tensors, tensors_size), + builder->CreateVector(inputs, inputs_size), + builder->CreateVector(outputs, outputs_size), + builder->CreateVector(operators, operators_size), + builder->CreateString("test_subgraph"))}; + constexpr size_t operator_codes_size = 1; + const Offset operator_codes[operator_codes_size] = { + CreateOperatorCodeDirect(*builder, /*deprecated_builtin_code=*/0, + "mock_custom", + /*version=*/0, BuiltinOperator_CUSTOM)}; + const Offset model_offset = CreateModel( + *builder, 0, builder->CreateVector(operator_codes, operator_codes_size), + builder->CreateVector(subgraphs, subgraphs_size), + builder->CreateString("test_model"), + builder->CreateVector(buffers, buffers_size)); + FinishModelBuffer(*builder, model_offset); + void* model_pointer = builder->GetBufferPointer(); + const Model* model = flatbuffers::GetRoot(model_pointer); + return model; +} + +const Model* BuildModelWith256x256Tensor() { + using flatbuffers::Offset; + flatbuffers::FlatBufferBuilder* fb_builder = BuilderInstance(); + + ModelBuilder model_builder(fb_builder); + + const int op_id = + model_builder.RegisterOp(BuiltinOperator_CUSTOM, "mock_custom"); + const int input1_tensor = + model_builder.AddTensor(TensorType_INT8, {256, 256}); + const int input2_tensor = + model_builder.AddTensor(TensorType_INT8, {256, 256}); + const int output_tensor = + model_builder.AddTensor(TensorType_INT8, {256, 256}); + + model_builder.AddNode(op_id, {input1_tensor, input2_tensor}, {output_tensor}); + return model_builder.BuildModel({input1_tensor, input2_tensor}, + {output_tensor}); +} + +const Model* BuildSimpleMockModel() { + using flatbuffers::Offset; + flatbuffers::FlatBufferBuilder* builder = BuilderInstance(); + + constexpr size_t buffer_data_size = 1; + const uint8_t buffer_data[buffer_data_size] = {21}; + constexpr size_t buffers_size = 2; + const Offset buffers[buffers_size] = { + CreateBuffer(*builder), + CreateBuffer(*builder, + builder->CreateVector(buffer_data, buffer_data_size))}; + constexpr size_t tensor_shape_size = 1; + const int32_t tensor_shape[tensor_shape_size] = {1}; + constexpr size_t tensors_size = 4; + const Offset tensors[tensors_size] = { + CreateTensor(*builder, + builder->CreateVector(tensor_shape, tensor_shape_size), + TensorType_INT32, 0, + builder->CreateString("test_input_tensor"), 0, false), + CreateTensor(*builder, + builder->CreateVector(tensor_shape, tensor_shape_size), + TensorType_INT8, 1, + builder->CreateString("test_weight_tensor"), 0, false), + CreateTensor(*builder, + builder->CreateVector(tensor_shape, tensor_shape_size), + TensorType_INT32, 0, + builder->CreateString("test_output_tensor"), 0, false), + CreateTensor(*builder, + builder->CreateVector(tensor_shape, tensor_shape_size), + TensorType_INT32, 0, + builder->CreateString("test_output2_tensor"), 0, false), + }; + constexpr size_t inputs_size = 1; + const int32_t inputs[inputs_size] = {0}; + constexpr size_t outputs_size = 2; + const int32_t outputs[outputs_size] = {2, 3}; + constexpr size_t operator_inputs_size = 2; + const int32_t operator_inputs[operator_inputs_size] = {0, 1}; + constexpr size_t operator_outputs_size = 1; + const int32_t operator_outputs[operator_outputs_size] = {2}; + const int32_t operator2_outputs[operator_outputs_size] = {3}; + constexpr size_t operators_size = 2; + const Offset operators[operators_size] = { + CreateOperator( + *builder, 0, + builder->CreateVector(operator_inputs, operator_inputs_size), + builder->CreateVector(operator_outputs, operator_outputs_size), + BuiltinOptions_NONE), + CreateOperator( + *builder, 0, + builder->CreateVector(operator_inputs, operator_inputs_size), + builder->CreateVector(operator2_outputs, operator_outputs_size), + BuiltinOptions_NONE), + }; + constexpr size_t subgraphs_size = 1; + const Offset subgraphs[subgraphs_size] = { + CreateSubGraph(*builder, builder->CreateVector(tensors, tensors_size), + builder->CreateVector(inputs, inputs_size), + builder->CreateVector(outputs, outputs_size), + builder->CreateVector(operators, operators_size), + builder->CreateString("test_subgraph"))}; + constexpr size_t operator_codes_size = 1; + const Offset operator_codes[operator_codes_size] = { + CreateOperatorCodeDirect(*builder, /*deprecated_builtin_code=*/0, + "mock_custom", + /*version=*/0, BuiltinOperator_CUSTOM)}; + const Offset model_offset = CreateModel( + *builder, 0, builder->CreateVector(operator_codes, operator_codes_size), + builder->CreateVector(subgraphs, subgraphs_size), + builder->CreateString("test_model"), + builder->CreateVector(buffers, buffers_size)); + FinishModelBuffer(*builder, model_offset); + void* model_pointer = builder->GetBufferPointer(); + const Model* model = flatbuffers::GetRoot(model_pointer); + return model; +} + +const Model* BuildComplexMockModel() { + using flatbuffers::Offset; + flatbuffers::FlatBufferBuilder* builder = BuilderInstance(); + + constexpr size_t buffer_data_size = 1; + const uint8_t buffer_data_1[buffer_data_size] = {21}; + const uint8_t buffer_data_2[buffer_data_size] = {21}; + const uint8_t buffer_data_3[buffer_data_size] = {21}; + constexpr size_t buffers_size = 7; + const Offset buffers[buffers_size] = { + // Op 1 buffers: + CreateBuffer(*builder), + CreateBuffer(*builder), + CreateBuffer(*builder, + builder->CreateVector(buffer_data_1, buffer_data_size)), + // Op 2 buffers: + CreateBuffer(*builder), + CreateBuffer(*builder, + builder->CreateVector(buffer_data_2, buffer_data_size)), + // Op 3 buffers: + CreateBuffer(*builder), + CreateBuffer(*builder, + builder->CreateVector(buffer_data_3, buffer_data_size)), + }; + constexpr size_t tensor_shape_size = 1; + const int32_t tensor_shape[tensor_shape_size] = {1}; + + constexpr size_t tensors_size = 10; + const Offset tensors[tensors_size] = { + // Op 1 inputs: + CreateTensor( + *builder, builder->CreateVector(tensor_shape, tensor_shape_size), + TensorType_INT32, 0, builder->CreateString("test_input_tensor_1"), 0, + false /* is_variable */), + CreateTensor( + *builder, builder->CreateVector(tensor_shape, tensor_shape_size), + TensorType_INT32, 1, builder->CreateString("test_variable_tensor_1"), + 0, true /* is_variable */), + CreateTensor( + *builder, builder->CreateVector(tensor_shape, tensor_shape_size), + TensorType_INT8, 2, builder->CreateString("test_weight_tensor_1"), 0, + false /* is_variable */), + // Op 1 output / Op 2 input: + CreateTensor( + *builder, builder->CreateVector(tensor_shape, tensor_shape_size), + TensorType_INT32, 0, builder->CreateString("test_output_tensor_1"), 0, + false /* is_variable */), + // Op 2 inputs: + CreateTensor( + *builder, builder->CreateVector(tensor_shape, tensor_shape_size), + TensorType_INT32, 1, builder->CreateString("test_variable_tensor_2"), + 0, true /* is_variable */), + CreateTensor( + *builder, builder->CreateVector(tensor_shape, tensor_shape_size), + TensorType_INT8, 2, builder->CreateString("test_weight_tensor_2"), 0, + false /* is_variable */), + // Op 2 output / Op 3 input: + CreateTensor( + *builder, builder->CreateVector(tensor_shape, tensor_shape_size), + TensorType_INT32, 0, builder->CreateString("test_output_tensor_2"), 0, + false /* is_variable */), + // Op 3 inputs: + CreateTensor( + *builder, builder->CreateVector(tensor_shape, tensor_shape_size), + TensorType_INT32, 1, builder->CreateString("test_variable_tensor_3"), + 0, true /* is_variable */), + CreateTensor( + *builder, builder->CreateVector(tensor_shape, tensor_shape_size), + TensorType_INT8, 2, builder->CreateString("test_weight_tensor_3"), 0, + false /* is_variable */), + // Op 3 output: + CreateTensor( + *builder, builder->CreateVector(tensor_shape, tensor_shape_size), + TensorType_INT32, 0, builder->CreateString("test_output_tensor_3"), 0, + false /* is_variable */), + }; + + constexpr size_t operators_size = 3; + Offset operators[operators_size]; + { + // Set Op 1 attributes: + constexpr size_t operator_inputs_size = 3; + const int32_t operator_inputs[operator_inputs_size] = {0, 1, 2}; + constexpr size_t operator_outputs_size = 1; + const int32_t operator_outputs[operator_outputs_size] = {3}; + + operators[0] = {CreateOperator( + *builder, 0, + builder->CreateVector(operator_inputs, operator_inputs_size), + builder->CreateVector(operator_outputs, operator_outputs_size), + BuiltinOptions_NONE)}; + } + + { + // Set Op 2 attributes + constexpr size_t operator_inputs_size = 3; + const int32_t operator_inputs[operator_inputs_size] = {3, 4, 5}; + constexpr size_t operator_outputs_size = 1; + const int32_t operator_outputs[operator_outputs_size] = {6}; + + operators[1] = {CreateOperator( + *builder, 0, + builder->CreateVector(operator_inputs, operator_inputs_size), + builder->CreateVector(operator_outputs, operator_outputs_size), + BuiltinOptions_NONE)}; + } + + { + // Set Op 3 attributes + constexpr size_t operator_inputs_size = 3; + const int32_t operator_inputs[operator_inputs_size] = {6, 7, 8}; + constexpr size_t operator_outputs_size = 1; + const int32_t operator_outputs[operator_outputs_size] = {9}; + + operators[2] = {CreateOperator( + *builder, 0, + builder->CreateVector(operator_inputs, operator_inputs_size), + builder->CreateVector(operator_outputs, operator_outputs_size), + BuiltinOptions_NONE)}; + } + + constexpr size_t inputs_size = 1; + const int32_t inputs[inputs_size] = {0}; + constexpr size_t outputs_size = 1; + const int32_t outputs[outputs_size] = {9}; + + constexpr size_t subgraphs_size = 1; + const Offset subgraphs[subgraphs_size] = { + CreateSubGraph(*builder, builder->CreateVector(tensors, tensors_size), + builder->CreateVector(inputs, inputs_size), + builder->CreateVector(outputs, outputs_size), + builder->CreateVector(operators, operators_size), + builder->CreateString("test_subgraph"))}; + + constexpr size_t operator_codes_size = 1; + const Offset operator_codes[operator_codes_size] = { + CreateOperatorCodeDirect(*builder, /*deprecated_builtin_code=*/0, + "mock_custom", + /*version=*/0, BuiltinOperator_CUSTOM)}; + + const Offset model_offset = CreateModel( + *builder, 0, builder->CreateVector(operator_codes, operator_codes_size), + builder->CreateVector(subgraphs, subgraphs_size), + builder->CreateString("test_model"), + builder->CreateVector(buffers, buffers_size)); + + FinishModelBuffer(*builder, model_offset); + void* model_pointer = builder->GetBufferPointer(); + const Model* model = flatbuffers::GetRoot(model_pointer); + return model; +} + +const Model* BuildSimpleMultipleInputsModel() { + using flatbuffers::Offset; + flatbuffers::FlatBufferBuilder* builder = BuilderInstance(); + + constexpr size_t buffers_size = 1; + const Offset buffers[buffers_size] = { + CreateBuffer(*builder), + }; + constexpr size_t tensor_shape_size = 1; + const int32_t tensor_shape[tensor_shape_size] = {1}; + constexpr size_t tensors_size = 4; + const Offset tensors[tensors_size] = { + CreateTensor(*builder, + builder->CreateVector(tensor_shape, tensor_shape_size), + TensorType_INT32, 0, + builder->CreateString("test_input_tensor1"), 0, false), + CreateTensor(*builder, + builder->CreateVector(tensor_shape, tensor_shape_size), + TensorType_INT8, 0, + builder->CreateString("test_input_tensor2"), 0, false), + CreateTensor(*builder, + builder->CreateVector(tensor_shape, tensor_shape_size), + TensorType_INT32, 0, + builder->CreateString("test_input_tensor3"), 0, false), + CreateTensor(*builder, + builder->CreateVector(tensor_shape, tensor_shape_size), + TensorType_INT32, 0, + builder->CreateString("test_output_tensor"), 0, false), + }; + constexpr size_t inputs_size = 3; + const int32_t inputs[inputs_size] = {0, 1, 2}; + constexpr size_t outputs_size = 1; + const int32_t outputs[outputs_size] = {3}; + constexpr size_t operator_inputs_size = 3; + const int32_t operator_inputs[operator_inputs_size] = {0, 1, 2}; + constexpr size_t operator_outputs_size = 1; + const int32_t operator_outputs[operator_outputs_size] = {3}; + constexpr size_t operators_size = 1; + const Offset operators[operators_size] = { + CreateOperator( + *builder, 0, + builder->CreateVector(operator_inputs, operator_inputs_size), + builder->CreateVector(operator_outputs, operator_outputs_size), + BuiltinOptions_NONE), + }; + constexpr size_t subgraphs_size = 1; + const Offset subgraphs[subgraphs_size] = { + CreateSubGraph(*builder, builder->CreateVector(tensors, tensors_size), + builder->CreateVector(inputs, inputs_size), + builder->CreateVector(outputs, outputs_size), + builder->CreateVector(operators, operators_size), + builder->CreateString("test_subgraph"))}; + constexpr size_t operator_codes_size = 1; + const Offset operator_codes[operator_codes_size] = { + CreateOperatorCodeDirect(*builder, /*deprecated_builtin_code=*/0, + "multiple_inputs_op", + /*version=*/0, BuiltinOperator_CUSTOM)}; + const Offset model_offset = CreateModel( + *builder, 0, builder->CreateVector(operator_codes, operator_codes_size), + builder->CreateVector(subgraphs, subgraphs_size), + builder->CreateString("test_model"), + builder->CreateVector(buffers, buffers_size)); + FinishModelBuffer(*builder, model_offset); + void* model_pointer = builder->GetBufferPointer(); + const Model* model = flatbuffers::GetRoot(model_pointer); + return model; +} + +const Model* BuildSimpleModelWithSubgraphsAndIf() { + using flatbuffers::Offset; + flatbuffers::FlatBufferBuilder* builder = BuilderInstance(); + + constexpr size_t buffers_size = 1; + const Offset buffers[buffers_size] = { + CreateBuffer(*builder), + }; + const int32_t condition_tensor_shape[] = {1}; + const int32_t data_tensor_shape[] = {1, 2}; + constexpr size_t tensors_size = 4; + const Offset subgraph1_tensors[tensors_size] = { + CreateTensor(*builder, builder->CreateVector(condition_tensor_shape, 1), + TensorType_BOOL, 0, + builder->CreateString("condition tensor"), 0, false), + CreateTensor(*builder, builder->CreateVector(data_tensor_shape, 2), + TensorType_FLOAT32, 0, + builder->CreateString("input_tensor1"), 0, false), + CreateTensor(*builder, builder->CreateVector(data_tensor_shape, 2), + TensorType_FLOAT32, 0, + builder->CreateString("input_tensor2"), 0, false), + CreateTensor(*builder, builder->CreateVector(data_tensor_shape, 2), + TensorType_FLOAT32, 0, + builder->CreateString("output_tensor"), 0, false), + }; + const Offset subgraph2_tensors[tensors_size] = { + CreateTensor(*builder, builder->CreateVector(data_tensor_shape, 2), + TensorType_FLOAT32, 0, + builder->CreateString("input_tensor1"), 0, false), + CreateTensor(*builder, builder->CreateVector(data_tensor_shape, 2), + TensorType_FLOAT32, 0, + builder->CreateString("input_tensor2"), 0, false), + CreateTensor(*builder, builder->CreateVector(data_tensor_shape, 2), + TensorType_FLOAT32, 0, + builder->CreateString("output_tensor"), 0, false), + }; + const Offset subgraph3_tensors[tensors_size] = { + CreateTensor(*builder, builder->CreateVector(data_tensor_shape, 2), + TensorType_FLOAT32, 0, + builder->CreateString("input_tensor1"), 0, false), + CreateTensor(*builder, builder->CreateVector(data_tensor_shape, 2), + TensorType_FLOAT32, 0, + builder->CreateString("input_tensor2"), 0, false), + CreateTensor(*builder, builder->CreateVector(data_tensor_shape, 2), + TensorType_FLOAT32, 0, + builder->CreateString("output_tensor"), 0, false), + }; + + constexpr size_t if_inputs_size = 3; + const int32_t if_inputs[if_inputs_size] = {0, 1, 2}; + constexpr size_t outputs_size = 1; + const int32_t if_outputs[outputs_size] = {3}; + constexpr size_t operator_inputs_size = 2; + const int32_t operator_inputs[operator_inputs_size] = {0, 1}; + const int32_t operator_outputs[outputs_size] = {2}; + constexpr size_t operators_size = 1; + const Offset subgraph1_operators[operators_size] = { + CreateOperator( + *builder, 0, builder->CreateVector(if_inputs, if_inputs_size), + builder->CreateVector(if_outputs, outputs_size), + BuiltinOptions_IfOptions, CreateIfOptions(*builder, 1, 2).Union()), + }; + const Offset subgraph2_operators[operators_size] = { + CreateOperator( + *builder, 1, + builder->CreateVector(operator_inputs, operator_inputs_size), + builder->CreateVector(operator_outputs, outputs_size), + BuiltinOptions_NONE), + }; + const Offset subgraph3_operators[operators_size] = { + CreateOperator( + *builder, 2, + builder->CreateVector(operator_inputs, operator_inputs_size), + builder->CreateVector(operator_outputs, outputs_size), + BuiltinOptions_NONE), + }; + constexpr size_t subgraphs_size = 3; + const Offset subgraphs[subgraphs_size] = { + CreateSubGraph(*builder, builder->CreateVector(subgraph1_tensors, 4), + builder->CreateVector(if_inputs, if_inputs_size), + builder->CreateVector(if_outputs, outputs_size), + builder->CreateVector(subgraph1_operators, operators_size), + builder->CreateString("if_subgraph")), + CreateSubGraph( + *builder, builder->CreateVector(subgraph2_tensors, 3), + builder->CreateVector(operator_inputs, operator_inputs_size), + builder->CreateVector(operator_outputs, outputs_size), + builder->CreateVector(subgraph2_operators, operators_size), + builder->CreateString("then_subgraph")), + CreateSubGraph( + *builder, builder->CreateVector(subgraph3_tensors, 3), + builder->CreateVector(operator_inputs, operator_inputs_size), + builder->CreateVector(operator_outputs, outputs_size), + builder->CreateVector(subgraph3_operators, operators_size), + builder->CreateString("else_subgraph")), + }; + constexpr size_t operator_codes_size = 3; + const Offset operator_codes[operator_codes_size] = { + CreateOperatorCodeDirect(*builder, /*deprecated_builtin_code=*/0, + "multiple_inputs_op", + /*version=*/0, BuiltinOperator_IF), + CreateOperatorCodeDirect(*builder, /*deprecated_builtin_code=*/0, + "multiple_inputs_op", + /*version=*/0, BuiltinOperator_ADD), + CreateOperatorCodeDirect(*builder, /*deprecated_builtin_code=*/0, + "multiple_inputs_op", + /*version=*/0, BuiltinOperator_MUL), + }; + const Offset model_offset = CreateModel( + *builder, 0, builder->CreateVector(operator_codes, operator_codes_size), + builder->CreateVector(subgraphs, subgraphs_size), + builder->CreateString("test_model"), + builder->CreateVector(buffers, buffers_size)); + FinishModelBuffer(*builder, model_offset); + void* model_pointer = builder->GetBufferPointer(); + const Model* model = flatbuffers::GetRoot(model_pointer); + return model; +} + +const Model* BuildSimpleModelWithIfAndEmptySubgraph() { + using flatbuffers::Offset; + flatbuffers::FlatBufferBuilder* builder = BuilderInstance(); + + constexpr size_t buffers_size = 1; + const Offset buffers[buffers_size] = { + CreateBuffer(*builder), + }; + const int32_t condition_tensor_shape[] = {1}; + const int32_t data_tensor_shape[] = {1, 2}; + constexpr size_t tensors_size = 4; + const Offset subgraph1_tensors[tensors_size] = { + CreateTensor(*builder, builder->CreateVector(condition_tensor_shape, 1), + TensorType_BOOL, 0, + builder->CreateString("condition tensor"), 0, false), + CreateTensor(*builder, builder->CreateVector(data_tensor_shape, 2), + TensorType_FLOAT32, 0, + builder->CreateString("input_tensor1"), 0, false), + CreateTensor(*builder, builder->CreateVector(data_tensor_shape, 2), + TensorType_FLOAT32, 0, + builder->CreateString("input_tensor2"), 0, false), + CreateTensor(*builder, builder->CreateVector(data_tensor_shape, 2), + TensorType_FLOAT32, 0, + builder->CreateString("output_tensor"), 0, false), + }; + const Offset subgraph2_tensors[tensors_size] = { + CreateTensor(*builder, builder->CreateVector(data_tensor_shape, 2), + TensorType_FLOAT32, 0, + builder->CreateString("input_tensor1"), 0, false), + CreateTensor(*builder, builder->CreateVector(data_tensor_shape, 2), + TensorType_FLOAT32, 0, + builder->CreateString("input_tensor2"), 0, false), + CreateTensor(*builder, builder->CreateVector(data_tensor_shape, 2), + TensorType_FLOAT32, 0, + builder->CreateString("output_tensor"), 0, false), + }; + const Offset subgraph3_tensors[tensors_size] = { + CreateTensor(*builder, builder->CreateVector(data_tensor_shape, 2), + TensorType_FLOAT32, 0, + builder->CreateString("input_tensor1"), 0, false), + CreateTensor(*builder, builder->CreateVector(data_tensor_shape, 2), + TensorType_FLOAT32, 0, + builder->CreateString("input_tensor2"), 0, false), + CreateTensor(*builder, builder->CreateVector(data_tensor_shape, 2), + TensorType_FLOAT32, 0, + builder->CreateString("output_tensor"), 0, false), + }; + + constexpr size_t if_inputs_size = 3; + const int32_t if_inputs[if_inputs_size] = {0, 1, 2}; + constexpr size_t outputs_size = 1; + const int32_t if_outputs[outputs_size] = {3}; + constexpr size_t operator_inputs_size = 2; + const int32_t operator_inputs[operator_inputs_size] = {0, 1}; + const int32_t operator_outputs[outputs_size] = {2}; + constexpr size_t operators_size = 1; + const Offset subgraph1_operators[operators_size] = { + CreateOperator( + *builder, 0, builder->CreateVector(if_inputs, if_inputs_size), + builder->CreateVector(if_outputs, outputs_size), + BuiltinOptions_IfOptions, CreateIfOptions(*builder, 1, 2).Union()), + }; + const Offset subgraph2_operators[operators_size] = { + CreateOperator( + *builder, 1, + builder->CreateVector(operator_inputs, operator_inputs_size), + builder->CreateVector(operator_outputs, outputs_size), + BuiltinOptions_NONE), + }; + constexpr size_t subgraphs_size = 3; + const Offset subgraphs[subgraphs_size] = { + CreateSubGraph(*builder, builder->CreateVector(subgraph1_tensors, 4), + builder->CreateVector(if_inputs, if_inputs_size), + builder->CreateVector(if_outputs, outputs_size), + builder->CreateVector(subgraph1_operators, operators_size), + builder->CreateString("if_subgraph")), + CreateSubGraph( + *builder, builder->CreateVector(subgraph2_tensors, 3), + builder->CreateVector(operator_inputs, operator_inputs_size), + builder->CreateVector(operator_outputs, outputs_size), + builder->CreateVector(subgraph2_operators, operators_size), + builder->CreateString("then_subgraph")), + CreateSubGraph( + *builder, builder->CreateVector(subgraph3_tensors, 3), + builder->CreateVector(operator_inputs, operator_inputs_size), + builder->CreateVector(operator_outputs, outputs_size), 0, + builder->CreateString("else_subgraph")), + }; + constexpr size_t operator_codes_size = 3; + const Offset operator_codes[operator_codes_size] = { + CreateOperatorCodeDirect(*builder, /*deprecated_builtin_code=*/0, + "multiple_inputs_op", + /*version=*/0, BuiltinOperator_IF), + CreateOperatorCodeDirect(*builder, /*deprecated_builtin_code=*/0, + "multiple_inputs_op", + /*version=*/0, BuiltinOperator_ADD), + CreateOperatorCodeDirect(*builder, /*deprecated_builtin_code=*/0, + "multiple_inputs_op", + /*version=*/0, BuiltinOperator_MUL), + }; + const Offset model_offset = CreateModel( + *builder, 0, builder->CreateVector(operator_codes, operator_codes_size), + builder->CreateVector(subgraphs, subgraphs_size), + builder->CreateString("test_model"), + builder->CreateVector(buffers, buffers_size)); + FinishModelBuffer(*builder, model_offset); + void* model_pointer = builder->GetBufferPointer(); + const Model* model = flatbuffers::GetRoot(model_pointer); + return model; +} + +const Model* BuildSimpleModelWithSubgraphsAndWhile() { + using flatbuffers::Offset; + flatbuffers::FlatBufferBuilder* builder = BuilderInstance(); + + constexpr size_t buffers_size = 1; + const Offset buffers[buffers_size] = { + CreateBuffer(*builder), + }; + const int32_t data_tensor_shape[] = {1, 1}; + constexpr size_t while_tensors_size = 4; + constexpr size_t op_tensors_size = 3; + const Offset subgraph0_tensors[while_tensors_size] = { + CreateTensor(*builder, builder->CreateVector(data_tensor_shape, 1), + TensorType_FLOAT32, 0, + builder->CreateString("input_tensor0"), 0, false), + CreateTensor(*builder, builder->CreateVector(data_tensor_shape, 1), + TensorType_FLOAT32, 0, + builder->CreateString("input_tensor1"), 0, false), + CreateTensor(*builder, builder->CreateVector(data_tensor_shape, 1), + TensorType_FLOAT32, 0, + builder->CreateString("output_tensor0"), 0, false), + CreateTensor(*builder, builder->CreateVector(data_tensor_shape, 1), + TensorType_FLOAT32, 0, + builder->CreateString("output_tensor1"), 0, false), + }; + const Offset subgraph1_tensors[op_tensors_size] = { + CreateTensor(*builder, builder->CreateVector(data_tensor_shape, 1), + TensorType_FLOAT32, 0, + builder->CreateString("input_tensor1"), 0, false), + CreateTensor(*builder, builder->CreateVector(data_tensor_shape, 1), + TensorType_FLOAT32, 0, + builder->CreateString("input_tensor2"), 0, false), + CreateTensor(*builder, builder->CreateVector(data_tensor_shape, 1), + TensorType_BOOL, 0, + builder->CreateString("condition_tensor"), 0, false), + }; + const Offset subgraph2_tensors[op_tensors_size] = { + CreateTensor(*builder, builder->CreateVector(data_tensor_shape, 1), + TensorType_FLOAT32, 0, + builder->CreateString("input_tensor0"), 0, false), + CreateTensor(*builder, builder->CreateVector(data_tensor_shape, 1), + TensorType_FLOAT32, 0, + builder->CreateString("input_tensor1"), 0, false), + CreateTensor(*builder, builder->CreateVector(data_tensor_shape, 1), + TensorType_FLOAT32, 0, + builder->CreateString("output_tensor0"), 0, false), + }; + + constexpr size_t inputs_size = 2; + const int32_t inputs[inputs_size] = {0, 1}; + constexpr size_t while_outputs_size = 2; + const int32_t while_outputs[while_outputs_size] = {2, 3}; + constexpr size_t cond_outputs_size = 1; + const int32_t cond_outputs[cond_outputs_size] = {2}; + constexpr size_t add_outputs_size = 1; + const int32_t add_outputs[add_outputs_size] = {2}; + constexpr size_t add_subgraph_outputs_size = 2; + const int32_t add_subgraph_outputs[add_subgraph_outputs_size] = {2, 1}; + constexpr size_t operators_size = 1; + const Offset subgraph0_operators[operators_size] = { + CreateOperator(*builder, 0, builder->CreateVector(inputs, inputs_size), + builder->CreateVector(while_outputs, while_outputs_size), + BuiltinOptions_WhileOptions, + CreateWhileOptions(*builder, 1, 2).Union()), + }; + const Offset subgraph1_operators[operators_size] = { + CreateOperator(*builder, 1, builder->CreateVector(inputs, inputs_size), + builder->CreateVector(cond_outputs, cond_outputs_size), + BuiltinOptions_NONE), + }; + const Offset subgraph2_operators[operators_size] = { + CreateOperator(*builder, 2, builder->CreateVector(inputs, inputs_size), + builder->CreateVector(add_outputs, add_outputs_size), + BuiltinOptions_NONE), + }; + constexpr size_t subgraphs_size = 3; + const Offset subgraphs[subgraphs_size] = { + CreateSubGraph(*builder, builder->CreateVector(subgraph0_tensors, 4), + builder->CreateVector(inputs, inputs_size), + builder->CreateVector(while_outputs, while_outputs_size), + builder->CreateVector(subgraph0_operators, operators_size), + builder->CreateString("while_subgraph")), + CreateSubGraph(*builder, builder->CreateVector(subgraph1_tensors, 3), + builder->CreateVector(inputs, inputs_size), + builder->CreateVector(cond_outputs, cond_outputs_size), + builder->CreateVector(subgraph1_operators, operators_size), + builder->CreateString("cond_subgraph")), + CreateSubGraph(*builder, builder->CreateVector(subgraph2_tensors, 3), + builder->CreateVector(inputs, inputs_size), + builder->CreateVector(add_subgraph_outputs, + add_subgraph_outputs_size), + builder->CreateVector(subgraph2_operators, operators_size), + builder->CreateString("body_subgraph")), + }; + constexpr size_t operator_codes_size = 3; + const Offset operator_codes[operator_codes_size] = { + CreateOperatorCodeDirect(*builder, /*deprecated_builtin_code=*/0, + "multiple_inputs_op", + /*version=*/0, BuiltinOperator_WHILE), + CreateOperatorCodeDirect(*builder, /*deprecated_builtin_code=*/0, + "multiple_inputs_op", + /*version=*/0, BuiltinOperator_LESS), + CreateOperatorCodeDirect(*builder, /*deprecated_builtin_code=*/0, + "multiple_inputs_op", + /*version=*/0, BuiltinOperator_ADD), + }; + const Offset model_offset = CreateModel( + *builder, 0, builder->CreateVector(operator_codes, operator_codes_size), + builder->CreateVector(subgraphs, subgraphs_size), + builder->CreateString("test_model"), + builder->CreateVector(buffers, buffers_size)); + FinishModelBuffer(*builder, model_offset); + void* model_pointer = builder->GetBufferPointer(); + const Model* model = flatbuffers::GetRoot(model_pointer); + return model; +} + +// Build a model with If and two subgraphs: two data tensors A1 of size 2, A2 of +// size 4 are first concatenated, then cut to a new tensor A3 of size 3; the new +// tensor A3 of size 3 is then concatenated with A2 tensor of size 4 to produce +// a final output tensor A4. This model is specially crafted to capture the +// corner case outlined in go/avoid-memory-corruption-in-if-operator. +// +// Subgraph0 +// A0(1) A2_0(4) A1_0(2) +// | | | ---+ +// v v v | +// +--------------+ | +// | IF | | +// +------+-------+ | +// | A3_0(3) | +// v | +// +--------------+ | +// | CUSTOM |<---+ +// +------+-------+ +// | +// v +// A4_0(8) +// +// Subgraph1/2 +// A1_1(2) A2_1(4) +// | | +// v v +// +---------------+ +// | CUSTOM | +// +-------+-------+ +// | +// v A3_1(3) +// +// And it leads to memory plan as below +// +// Subgraph0 Layout +// +// +// <------------A4_0 -------------> <----- A2_0-------> <----A3_0 ---> +// +----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+ +// | | | | | | | | | 3 | 4 | 5 | 6 | | | | +// +----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+ +// +// +----+----+----+ +// | 1 | 2 | A0 | +// +----+----+----+ +// <---A1_0--> +// +// Subgraph 1 Layout +// +// +----+----+----+----+----+----+----+----+----+ +// | | | | | | | | | | +// +----+----+----+----+----+----+----+----+----+ +// +// +// <------A2_1 -------><----A3_1 ---><--A1_1---> +// +// +// A1_1 of subgraph 1 will overlap with A2_0 of subgraph 0. +// In a buggy implementation of IF, two overwrite may happen: +// 1. copying input from A1_0 to A1_1 overwrites A2_0 before A2_0 is copied to +// A2_1; thus subgraph 1 produce incorrect output. +// 2. copying output from A3_1 to A4_0 overwrites A1_0, which should remain +// intact so that it can be used by the OP after the IF operator in subgraph 0 +// + +const Model* BuildModelWithIfAndSubgraphInputTensorOverlap() { + using flatbuffers::Offset; + flatbuffers::FlatBufferBuilder* builder = BuilderInstance(); + + constexpr TensorType kTensorType = TensorType_INT32; + constexpr int kBlockSize = + tflite::MicroArenaBufferAlignment() / sizeof(int32_t); + constexpr size_t kBuffersCount = 1; + const Offset buffers[kBuffersCount] = { + CreateBuffer(*builder), + }; + const int32_t kConditionTensorShape[] = {1}; + const int32_t kIfInput1TensorShape[] = {2 * kBlockSize}; + const int32_t kIfInput2TensorShape[] = {4 * kBlockSize}; + const int32_t kIfOutputTensorShape[] = {3 * kBlockSize}; + const int32_t kFinalOutputTensorShape[] = {8 * kBlockSize}; + constexpr size_t kSubgraph0TensorsCount = 5; + const Offset kSubgraph0Tensors[kSubgraph0TensorsCount] = { + CreateTensor(*builder, builder->CreateVector(kConditionTensorShape, 1), + TensorType_BOOL, 0, + builder->CreateString("condition tensor"), 0, false), + CreateTensor(*builder, builder->CreateVector(kIfInput1TensorShape, 1), + kTensorType, 0, builder->CreateString("if_input_tensor1"), 0, + false), + CreateTensor(*builder, builder->CreateVector(kIfInput2TensorShape, 1), + kTensorType, 0, builder->CreateString("if_input_tensor2"), 0, + false), + CreateTensor(*builder, builder->CreateVector(kIfOutputTensorShape, 1), + kTensorType, 0, builder->CreateString("if_output_tensor"), 0, + false), + CreateTensor(*builder, builder->CreateVector(kFinalOutputTensorShape, 1), + kTensorType, 0, builder->CreateString("final_output_tensor"), + 0, false), + }; + + // Subgraph 1 is the chosen path if condition tensor in IF is true. + constexpr size_t kSubgraph1TensorsCount = 3; + const Offset kSubgraph1Tensors[kSubgraph1TensorsCount] = { + CreateTensor(*builder, builder->CreateVector(kIfInput1TensorShape, 1), + kTensorType, 0, + builder->CreateString("subgraph1_input_tensor1"), 0, false), + CreateTensor(*builder, builder->CreateVector(kIfInput2TensorShape, 1), + kTensorType, 0, + builder->CreateString("subgraph1_input_tensor2"), 0, false), + CreateTensor(*builder, builder->CreateVector(kIfOutputTensorShape, 1), + kTensorType, 0, + builder->CreateString("subgraph1_output_tensor"), 0, false), + }; + + // Subgraph 2 is the chosen path if condition tensor in IF is false + constexpr size_t kSubgraph2TensorsCount = 3; + const Offset kSubgraph2Tensors[kSubgraph2TensorsCount] = { + CreateTensor(*builder, builder->CreateVector(kIfInput1TensorShape, 1), + kTensorType, 0, builder->CreateString("if_input_tensor1"), 0, + false), + CreateTensor(*builder, builder->CreateVector(kIfInput2TensorShape, 1), + kTensorType, 0, builder->CreateString("if_input_tensor2"), 0, + false), + CreateTensor(*builder, builder->CreateVector(kIfOutputTensorShape, 1), + kTensorType, 0, builder->CreateString("if_output_tensor"), 0, + false), + }; + + constexpr int kIfOpCodeIndex = 0; + constexpr int kCustomOpCodeIndex = 1; + + constexpr size_t kIfInputsCount = 3; + const int32_t kIfInputs[kIfInputsCount] = {0, 1, 2}; + constexpr size_t kOutputsCount = 1; + const int32_t kIfOutputs[kOutputsCount] = {3}; + constexpr size_t kOpAfterIfInputsCount = 2; + const int32_t kOpAfterIfInputs[kOpAfterIfInputsCount] = {3, 2}; + const int32_t kOpAfterIfOutputs[kOutputsCount] = {4}; + constexpr size_t kOperatorsCount = 2; + const Offset kSubgraph0Operators[kOperatorsCount] = { + CreateOperator(*builder, kIfOpCodeIndex, + builder->CreateVector(kIfInputs, kIfInputsCount), + builder->CreateVector(kIfOutputs, kOutputsCount), + BuiltinOptions_IfOptions, + CreateIfOptions(*builder, 1, 2).Union()), + CreateOperator( + *builder, kCustomOpCodeIndex, + builder->CreateVector(kOpAfterIfInputs, kOpAfterIfInputsCount), + builder->CreateVector(kOpAfterIfOutputs, kOutputsCount)), + }; + + constexpr size_t kSubgraph1InputsCount = 2; + const int32_t kSubgraph1Inputs[kSubgraph1InputsCount] = {0, 1}; + constexpr size_t kSubgraph1OutputsCount = 1; + const int32_t kSubgraph1Outputs[kSubgraph1OutputsCount] = {2}; + constexpr size_t kSubgraph1OperatorsCount = 1; + const Offset kSubgraph1Operators[kSubgraph1OperatorsCount] = { + CreateOperator( + *builder, kCustomOpCodeIndex, + builder->CreateVector(kSubgraph1Inputs, kSubgraph1InputsCount), + builder->CreateVector(kSubgraph1Outputs, kSubgraph1OutputsCount), + BuiltinOptions_NONE), + }; + + constexpr size_t kSubgraph2InputsCount = 2; + const int32_t kSubgraph2Inputs[kSubgraph2InputsCount] = {0, 1}; + constexpr size_t kSubgraph2OutputsCount = 1; + const int32_t kSubgraph2Outputs[kSubgraph2OutputsCount] = {2}; + constexpr size_t kSubgraph2OperatorsCount = 1; + const Offset kSubgraph2Operators[kSubgraph2OperatorsCount] = { + CreateOperator( + *builder, kCustomOpCodeIndex, + builder->CreateVector(kSubgraph2Inputs, kSubgraph2InputsCount), + builder->CreateVector(kSubgraph2Outputs, kSubgraph2OutputsCount), + BuiltinOptions_NONE), + }; + + constexpr size_t kSubgraphsCount = 3; + const Offset kSubgraphs[kSubgraphsCount] = { + CreateSubGraph( + *builder, + builder->CreateVector(kSubgraph0Tensors, kSubgraph0TensorsCount), + builder->CreateVector(kIfInputs, kIfInputsCount), + builder->CreateVector(kOpAfterIfOutputs, kOutputsCount), + builder->CreateVector(kSubgraph0Operators, kOperatorsCount), + builder->CreateString("if_subgraph")), + CreateSubGraph( + *builder, + builder->CreateVector(kSubgraph1Tensors, kSubgraph1TensorsCount), + builder->CreateVector(kSubgraph1Inputs, kSubgraph1InputsCount), + builder->CreateVector(kSubgraph1Outputs, kSubgraph1OutputsCount), + builder->CreateVector(kSubgraph1Operators, kSubgraph1OperatorsCount), + builder->CreateString("then_subgraph")), + CreateSubGraph( + *builder, + builder->CreateVector(kSubgraph2Tensors, kSubgraph2TensorsCount), + builder->CreateVector(kSubgraph2Inputs, kSubgraph2InputsCount), + builder->CreateVector(kSubgraph2Outputs, kSubgraph2OutputsCount), + builder->CreateVector(kSubgraph2Operators, kSubgraph2OperatorsCount), + builder->CreateString("else_subgraph")), + }; + + constexpr size_t kOperatorCodesCount = 2; + const Offset kOperatorCodes[kOperatorCodesCount] = { + CreateOperatorCodeDirect(*builder, /*deprecated_builtin_code=*/0, "if", + /*version=*/0, BuiltinOperator_IF), + CreateOperatorCodeDirect(*builder, /*deprecated_builtin_code=*/0, + "custom_packer_op", + /*version=*/0, BuiltinOperator_CUSTOM), + }; + const Offset kModelOffset = CreateModel( + *builder, 0, builder->CreateVector(kOperatorCodes, kOperatorCodesCount), + builder->CreateVector(kSubgraphs, kSubgraphsCount), + builder->CreateString("test_model"), + builder->CreateVector(buffers, kBuffersCount)); + FinishModelBuffer(*builder, kModelOffset); + void* model_pointer = builder->GetBufferPointer(); + const Model* model = flatbuffers::GetRoot(model_pointer); + return model; +} + +// Mock model with one main subgraph containing a single CALL_ONCE op (with null +// inputs and outputs) which invokes a second subgraph which has null inputs and +// outputs. +const Model* BuildSimpleMockModelWithNullInputsOutputs() { + using flatbuffers::Offset; + flatbuffers::FlatBufferBuilder* builder = BuilderInstance(); + + constexpr size_t buffers_size = 1; + const Offset buffers[buffers_size] = { + CreateBuffer(*builder), + }; + constexpr size_t tensor_shape_size = 1; + const int32_t tensor_shape[tensor_shape_size] = {0}; + constexpr size_t tensors_size = 1; + const Offset tensors[tensors_size] = { + CreateTensor(*builder, + builder->CreateVector(tensor_shape, tensor_shape_size), + TensorType_INT32, 0, + builder->CreateString("test_input_tensor1"), 0, false), + }; + constexpr size_t subgraph0_inputs_size = 1; + const int32_t subgraph0_inputs[subgraph0_inputs_size] = {0}; + constexpr size_t subgraph0_outputs_size = 1; + const int32_t subgraph0_outputs[subgraph0_outputs_size] = {0}; + constexpr size_t operators_size = 1; + const Offset subgraph0_operators[operators_size] = { + CreateOperator(*builder, 0, {}, {}, BuiltinOptions_CallOnceOptions, + CreateCallOnceOptions(*builder, 1).Union()), + }; + const Offset subgraph1_operators[operators_size] = { + CreateOperator(*builder, 1, {}, {}, BuiltinOptions_NONE)}; + constexpr size_t subgraphs_size = 2; + const Offset subgraphs[subgraphs_size] = { + CreateSubGraph( + *builder, builder->CreateVector(tensors, tensors_size), + builder->CreateVector(subgraph0_inputs, subgraph0_inputs_size), + builder->CreateVector(subgraph0_outputs, subgraph0_outputs_size), + builder->CreateVector(subgraph0_operators, operators_size), + builder->CreateString("main_subgraph")), + CreateSubGraph(*builder, builder->CreateVector(tensors, tensors_size), {}, + {}, + builder->CreateVector(subgraph1_operators, operators_size), + builder->CreateString("secondary subgraph")), + }; + constexpr size_t operator_codes_size = 2; + const Offset operator_codes[operator_codes_size] = { + CreateOperatorCodeDirect(*builder, /*deprecated_builtin_code=*/0, + "call_once_op", + /*version=*/0, BuiltinOperator_CALL_ONCE), + CreateOperatorCodeDirect(*builder, /*deprecated_builtin_code=*/0, "no_op", + /*version=*/0, BuiltinOperator_CUSTOM)}; + const Offset model_offset = CreateModel( + *builder, 0, builder->CreateVector(operator_codes, operator_codes_size), + builder->CreateVector(subgraphs, subgraphs_size), + builder->CreateString("test_model"), + builder->CreateVector(buffers, buffers_size)); + FinishModelBuffer(*builder, model_offset); + void* model_pointer = builder->GetBufferPointer(); + const Model* model = flatbuffers::GetRoot(model_pointer); + return model; +} + +} // namespace + +const TfLiteRegistration* SimpleStatefulOp::getRegistration() { + return GetMutableRegistration(); +} + +TfLiteRegistration* SimpleStatefulOp::GetMutableRegistration() { + static TfLiteRegistration r; + r.init = Init; + r.prepare = Prepare; + r.invoke = Invoke; + return &r; +} + +void* SimpleStatefulOp::Init(TfLiteContext* context, const char* buffer, + size_t length) { + TFLITE_DCHECK(context->AllocateBufferForEval == nullptr); + TFLITE_DCHECK(context->GetScratchBuffer == nullptr); + TFLITE_DCHECK(context->RequestScratchBufferInArena == nullptr); + + void* raw = context->AllocatePersistentBuffer(context, sizeof(OpData)); + OpData* data = reinterpret_cast(raw); + *data = {}; + return raw; +} + +TfLiteStatus SimpleStatefulOp::Prepare(TfLiteContext* context, + TfLiteNode* node) { + OpData* data = reinterpret_cast(node->user_data); + + // Make sure that the input is in uint8_t with at least 1 data entry. + MicroContext* micro_context = GetMicroContext(context); + TfLiteTensor* input = + micro_context->AllocateTempInputTensor(node, kInputTensor); + TF_LITE_ENSURE(context, input != nullptr); + + if (input->type != kTfLiteInt8) return kTfLiteError; + if (NumElements(input->dims) == 0) return kTfLiteError; + + // Allocate a temporary buffer with the same size of input for sorting. + TF_LITE_ENSURE_STATUS(context->RequestScratchBufferInArena( + context, sizeof(uint8_t) * NumElements(input->dims), + &data->sorting_buffer)); + // We can interleave scratch / persistent buffer allocation. + data->invoke_count = reinterpret_cast( + context->AllocatePersistentBuffer(context, sizeof(int))); + *data->invoke_count = 0; + + micro_context->DeallocateTempTfLiteTensor(input); + return kTfLiteOk; +} + +TfLiteStatus SimpleStatefulOp::Invoke(TfLiteContext* context, + TfLiteNode* node) { + OpData* data = reinterpret_cast(node->user_data); + *data->invoke_count += 1; + + const TfLiteEvalTensor* input = + tflite::micro::GetEvalInput(context, node, kInputTensor); + TF_LITE_ENSURE(context, input != nullptr); + const uint8_t* input_data = input->data.uint8; + int size = NumElements(input->dims); + + uint8_t* sorting_buffer = reinterpret_cast( + context->GetScratchBuffer(context, data->sorting_buffer)); + // Copy inputs data to the sorting buffer. We don't want to mutate the input + // tensor as it might be used by a another node. + for (int i = 0; i < size; i++) { + sorting_buffer[i] = input_data[i]; + } + + // In place insertion sort on `sorting_buffer`. + for (int i = 1; i < size; i++) { + for (int j = i; j > 0 && sorting_buffer[j] < sorting_buffer[j - 1]; j--) { + std::swap(sorting_buffer[j], sorting_buffer[j - 1]); + } + } + + TfLiteEvalTensor* median = + tflite::micro::GetEvalOutput(context, node, kMedianTensor); + TF_LITE_ENSURE(context, median != nullptr); + uint8_t* median_data = median->data.uint8; + TfLiteEvalTensor* invoke_count = + tflite::micro::GetEvalOutput(context, node, kInvokeCount); + TF_LITE_ENSURE(context, invoke_count != nullptr); + int32_t* invoke_count_data = invoke_count->data.i32; + + median_data[0] = sorting_buffer[size / 2]; + invoke_count_data[0] = *data->invoke_count; + return kTfLiteOk; +} + +const TfLiteRegistration* MockCustom::getRegistration() { + return GetMutableRegistration(); +} + +TfLiteRegistration* MockCustom::GetMutableRegistration() { + static TfLiteRegistration r; + r.init = Init; + r.prepare = Prepare; + r.invoke = Invoke; + r.free = Free; + return &r; +} + +void* MockCustom::Init(TfLiteContext* context, const char* buffer, + size_t length) { + // We don't support delegate in TFL micro. This is a weak check to test if + // context struct being zero-initialized. + TFLITE_DCHECK(context->ReplaceNodeSubsetsWithDelegateKernels == nullptr); + freed_ = false; + // Do nothing. + return nullptr; +} + +void MockCustom::Free(TfLiteContext* context, void* buffer) { freed_ = true; } + +TfLiteStatus MockCustom::Prepare(TfLiteContext* context, TfLiteNode* node) { + return kTfLiteOk; +} + +TfLiteStatus MockCustom::Invoke(TfLiteContext* context, TfLiteNode* node) { + const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0); + TF_LITE_ENSURE(context, input != nullptr); + const int32_t* input_data = input->data.i32; + const TfLiteEvalTensor* weight = + tflite::micro::GetEvalInput(context, node, 1); + TF_LITE_ENSURE(context, weight != nullptr); + const uint8_t* weight_data = weight->data.uint8; + TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0); + TF_LITE_ENSURE(context, output != nullptr); + int32_t* output_data = output->data.i32; + output_data[0] = + 0; // Catch output tensor sharing memory with an input tensor + output_data[0] = input_data[0] + weight_data[0]; + return kTfLiteOk; +} + +bool MockCustom::freed_ = false; + +const TfLiteRegistration* MultipleInputs::getRegistration() { + return GetMutableRegistration(); +} + +TfLiteRegistration* MultipleInputs::GetMutableRegistration() { + static TfLiteRegistration r; + r.init = Init; + r.prepare = Prepare; + r.invoke = Invoke; + r.free = Free; + return &r; +} + +void* MultipleInputs::Init(TfLiteContext* context, const char* buffer, + size_t length) { + // We don't support delegate in TFL micro. This is a weak check to test if + // context struct being zero-initialized. + TFLITE_DCHECK(context->ReplaceNodeSubsetsWithDelegateKernels == nullptr); + freed_ = false; + // Do nothing. + return nullptr; +} + +void MultipleInputs::Free(TfLiteContext* context, void* buffer) { + freed_ = true; +} + +TfLiteStatus MultipleInputs::Prepare(TfLiteContext* context, TfLiteNode* node) { + return kTfLiteOk; +} + +TfLiteStatus MultipleInputs::Invoke(TfLiteContext* context, TfLiteNode* node) { + const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0); + TF_LITE_ENSURE(context, input != nullptr); + const int32_t* input_data = input->data.i32; + const TfLiteEvalTensor* input1 = + tflite::micro::GetEvalInput(context, node, 1); + TF_LITE_ENSURE(context, input1 != nullptr); + const int32_t* input_data1 = input1->data.i32; + const TfLiteEvalTensor* input2 = + tflite::micro::GetEvalInput(context, node, 2); + TF_LITE_ENSURE(context, input2 != nullptr); + const int32_t* input_data2 = input2->data.i32; + + TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0); + TF_LITE_ENSURE(context, output != nullptr); + int32_t* output_data = output->data.i32; + output_data[0] = + 0; // Catch output tensor sharing memory with an input tensor + output_data[0] = input_data[0] + input_data1[0] + input_data2[0]; + return kTfLiteOk; +} + +bool MultipleInputs::freed_ = false; + +const TfLiteRegistration* NoOp::getRegistration() { + return GetMutableRegistration(); +} + +TfLiteRegistration* NoOp::GetMutableRegistration() { + static TfLiteRegistration r; + r.init = Init; + r.prepare = Prepare; + r.invoke = Invoke; + r.free = Free; + return &r; +} + +void* NoOp::Init(TfLiteContext* context, const char* buffer, size_t length) { + // We don't support delegate in TFL micro. This is a weak check to test if + // context struct being zero-initialized. + TFLITE_DCHECK(context->ReplaceNodeSubsetsWithDelegateKernels == nullptr); + freed_ = false; + // Do nothing. + return nullptr; +} + +void NoOp::Free(TfLiteContext* context, void* buffer) { freed_ = true; } + +TfLiteStatus NoOp::Prepare(TfLiteContext* context, TfLiteNode* node) { + return kTfLiteOk; +} + +TfLiteStatus NoOp::Invoke(TfLiteContext* context, TfLiteNode* node) { + return kTfLiteOk; +} + +bool NoOp::freed_ = false; + +AllOpsResolver GetOpResolver() { + AllOpsResolver op_resolver; + op_resolver.AddCustom("mock_custom", MockCustom::GetMutableRegistration()); + op_resolver.AddCustom("simple_stateful_op", + SimpleStatefulOp::GetMutableRegistration()); + op_resolver.AddCustom("multiple_inputs_op", + MultipleInputs::GetMutableRegistration()); + op_resolver.AddCustom("no_op", NoOp::GetMutableRegistration()); + op_resolver.AddCustom("custom_packer_op", PackerOp::GetMutableRegistration()); + return op_resolver; +} + +const Model* GetModelWithUnusedInputs() { + static Model* model = nullptr; + if (!model) { + model = const_cast(BuildModelWithUnusedInputs()); + } + return model; +} + +const Model* GetModelWithUnusedOperatorOutputs() { + static Model* model = nullptr; + if (!model) { + model = const_cast(BuildModelWithUnusedOperatorOutputs()); + } + return model; +} + +const Model* GetModelWith256x256Tensor() { + static const Model* model = BuildModelWith256x256Tensor(); + return model; +} + +const Model* GetSimpleMockModel() { + static Model* model = nullptr; + if (!model) { + model = const_cast(BuildSimpleMockModel()); + } + return model; +} + +const Model* GetSimpleMultipleInputsModel() { + static Model* model = nullptr; + if (!model) { + model = const_cast(BuildSimpleMultipleInputsModel()); + } + return model; +} + +const Model* GetSimpleModelWithSubgraphsAndIf() { + static Model* model = nullptr; + if (!model) { + model = const_cast(BuildSimpleModelWithSubgraphsAndIf()); + } + return model; +} + +const Model* GetSimpleModelWithIfAndEmptySubgraph() { + static Model* model = nullptr; + if (!model) { + model = const_cast(BuildSimpleModelWithIfAndEmptySubgraph()); + } + return model; +} + +const Model* GetSimpleModelWithSubgraphsAndWhile() { + static Model* model = nullptr; + if (!model) { + model = const_cast(BuildSimpleModelWithSubgraphsAndWhile()); + } + return model; +} + +const Model* GetModelWithIfAndSubgraphInputTensorOverlap() { + static Model* model = nullptr; + if (!model) { + model = const_cast(BuildModelWithIfAndSubgraphInputTensorOverlap()); + } + return model; +} + +const Model* GetSimpleModelWithNullInputsAndOutputs() { + static Model* model = nullptr; + if (!model) { + model = const_cast(BuildSimpleMockModelWithNullInputsOutputs()); + } + return model; +} + +const Model* GetComplexMockModel() { + static Model* model = nullptr; + if (!model) { + model = const_cast(BuildComplexMockModel()); + } + return model; +} + +const Model* GetSimpleModelWithBranch() { + static Model* model = nullptr; + if (!model) { + model = const_cast(BuildSimpleModelWithBranch()); + } + return model; +} + +const Model* GetModelWithOfflinePlanning(int num_tensors, + const int32_t* metadata_buffer, + NodeConnection* node_conn, + int num_conns, + int num_subgraph_inputs) { + const Model* model = BuildModelWithOfflinePlanning( + num_tensors, metadata_buffer, node_conn, num_conns, num_subgraph_inputs); + return model; +} + +const Model* GetSimpleStatefulModel() { + static Model* model = nullptr; + if (!model) { + model = const_cast(BuildSimpleStatefulModel()); + } + return model; +} + +const Tensor* Create1dFlatbufferTensor(int size, bool is_variable) { + using flatbuffers::Offset; + flatbuffers::FlatBufferBuilder* builder = BuilderInstance(); + constexpr size_t tensor_shape_size = 1; + const int32_t tensor_shape[tensor_shape_size] = {size}; + const Offset tensor_offset = CreateTensor( + *builder, builder->CreateVector(tensor_shape, tensor_shape_size), + TensorType_INT32, 0, builder->CreateString("test_tensor"), 0, + is_variable); + builder->Finish(tensor_offset); + void* tensor_pointer = builder->GetBufferPointer(); + const Tensor* tensor = flatbuffers::GetRoot(tensor_pointer); + return tensor; +} + +const Tensor* CreateQuantizedFlatbufferTensor(int size) { + using flatbuffers::Offset; + flatbuffers::FlatBufferBuilder* builder = BuilderInstance(); + constexpr size_t quant_params_size = 1; + const float min_array[quant_params_size] = {0.1f}; + const float max_array[quant_params_size] = {0.2f}; + const float scale_array[quant_params_size] = {0.3f}; + const int64_t zero_point_array[quant_params_size] = {100ll}; + + const Offset quant_params = + CreateQuantizationParameters( + *builder, + /*min=*/builder->CreateVector(min_array, quant_params_size), + /*max=*/builder->CreateVector(max_array, quant_params_size), + /*scale=*/ + builder->CreateVector(scale_array, quant_params_size), + /*zero_point=*/ + builder->CreateVector(zero_point_array, quant_params_size)); + + constexpr size_t tensor_shape_size = 1; + const int32_t tensor_shape[tensor_shape_size] = {size}; + const Offset tensor_offset = CreateTensor( + *builder, builder->CreateVector(tensor_shape, tensor_shape_size), + TensorType_INT32, 0, builder->CreateString("test_tensor"), quant_params, + false); + builder->Finish(tensor_offset); + void* tensor_pointer = builder->GetBufferPointer(); + const Tensor* tensor = flatbuffers::GetRoot(tensor_pointer); + return tensor; +} + +const Tensor* CreateMissingQuantizationFlatbufferTensor(int size) { + using flatbuffers::Offset; + flatbuffers::FlatBufferBuilder* builder = BuilderInstance(); + const Offset quant_params = + CreateQuantizationParameters(*builder, 0, 0, 0, 0, + QuantizationDetails_NONE, 0, 0); + constexpr size_t tensor_shape_size = 1; + const int32_t tensor_shape[tensor_shape_size] = {size}; + const Offset tensor_offset = CreateTensor( + *builder, builder->CreateVector(tensor_shape, tensor_shape_size), + TensorType_INT32, 0, builder->CreateString("test_tensor"), quant_params, + false); + builder->Finish(tensor_offset); + void* tensor_pointer = builder->GetBufferPointer(); + const Tensor* tensor = flatbuffers::GetRoot(tensor_pointer); + return tensor; +} + +const flatbuffers::Vector>* +CreateFlatbufferBuffers() { + using flatbuffers::Offset; + flatbuffers::FlatBufferBuilder* builder = BuilderInstance(); + constexpr size_t buffers_size = 1; + const Offset buffers[buffers_size] = { + CreateBuffer(*builder), + }; + const flatbuffers::Offset>> + buffers_offset = builder->CreateVector(buffers, buffers_size); + builder->Finish(buffers_offset); + void* buffers_pointer = builder->GetBufferPointer(); + const flatbuffers::Vector>* result = + flatbuffers::GetRoot>>( + buffers_pointer); + return result; +} + +int TestStrcmp(const char* a, const char* b) { + if ((a == nullptr) || (b == nullptr)) { + return -1; + } + while ((*a != 0) && (*a == *b)) { + a++; + b++; + } + return *reinterpret_cast(a) - + *reinterpret_cast(b); +} + +// Create a TfLiteIntArray from an array of ints. The first element in the +// supplied array must be the size of the array expressed as an int. +TfLiteIntArray* IntArrayFromInts(int* int_array) { + return reinterpret_cast(int_array); +} + +// Create a TfLiteFloatArray from an array of floats. The first element in the +// supplied array must be the size of the array expressed as a float. +TfLiteFloatArray* FloatArrayFromFloats(const float* floats) { + static_assert(sizeof(float) == sizeof(int), + "assumes sizeof(float) == sizeof(int) to perform casting"); + int size = static_cast(floats[0]); + *reinterpret_cast(const_cast(floats)) = size; + return reinterpret_cast(const_cast(floats)); +} + +TfLiteTensor CreateQuantizedBiasTensor(const float* data, int16_t* quantized, + TfLiteIntArray* dims, float input_scale, + float weights_scale, bool is_variable) { + float bias_scale = input_scale * weights_scale; + tflite::SymmetricQuantize(data, quantized, ElementCount(*dims), bias_scale); + + // Quantized int16_t tensors always have a zero point of 0, since the range of + // int16_t values is large, and because zero point costs extra cycles during + // processing. + TfLiteTensor result = + CreateQuantizedTensor(quantized, dims, bias_scale, 0, is_variable); + return result; +} + +TfLiteTensor CreateQuantizedBiasTensor(const float* data, int32_t* quantized, + TfLiteIntArray* dims, float input_scale, + float weights_scale, bool is_variable) { + float bias_scale = input_scale * weights_scale; + tflite::SymmetricQuantize(data, quantized, ElementCount(*dims), bias_scale); + + // Quantized int32_t tensors always have a zero point of 0, since the range of + // int32_t values is large, and because zero point costs extra cycles during + // processing. + TfLiteTensor result = + CreateQuantizedTensor(quantized, dims, bias_scale, 0, is_variable); + return result; +} + +TfLiteTensor CreateQuantizedBiasTensor(const float* data, + std::int64_t* quantized, + TfLiteIntArray* dims, float input_scale, + float weights_scale, bool is_variable) { + float bias_scale = input_scale * weights_scale; + tflite::SymmetricQuantize(data, quantized, ElementCount(*dims), bias_scale); + + // Quantized int32_t tensors always have a zero point of 0, since the range of + // int32_t values is large, and because zero point costs extra cycles during + // processing. + TfLiteTensor result = + CreateQuantizedTensor(quantized, dims, bias_scale, 0, is_variable); + return result; +} + +// Quantizes int32_t bias tensor with per-channel weights determined by input +// scale multiplied by weight scale for each channel. +template +TfLiteTensor CreatePerChannelQuantizedBiasTensor( + const float* input, T* quantized, TfLiteIntArray* dims, float input_scale, + float* weight_scales, float* scales, int* zero_points, + TfLiteAffineQuantization* affine_quant, int quantized_dimension, + bool is_variable) { + int input_size = ElementCount(*dims); + int num_channels = dims->data[quantized_dimension]; + // First element is reserved for array length + zero_points[0] = num_channels; + scales[0] = static_cast(num_channels); + float* scales_array = &scales[1]; + for (int i = 0; i < num_channels; i++) { + scales_array[i] = input_scale * weight_scales[i]; + zero_points[i + 1] = 0; + } + + SymmetricPerChannelQuantize(input, quantized, input_size, num_channels, + scales_array); + + affine_quant->scale = FloatArrayFromFloats(scales); + affine_quant->zero_point = IntArrayFromInts(zero_points); + affine_quant->quantized_dimension = quantized_dimension; + + TfLiteTensor result = CreateTensor(quantized, dims, is_variable); + result.quantization = {kTfLiteAffineQuantization, affine_quant}; + return result; +} + +TfLiteTensor CreatePerChannelQuantizedBiasTensor( + const float* input, int32_t* quantized, TfLiteIntArray* dims, + float input_scale, float* weight_scales, float* scales, int* zero_points, + TfLiteAffineQuantization* affine_quant, int quantized_dimension, + bool is_variable) { + return CreatePerChannelQuantizedBiasTensor( + input, quantized, dims, input_scale, weight_scales, scales, zero_points, + affine_quant, quantized_dimension, is_variable); +} + +TfLiteTensor CreatePerChannelQuantizedBiasTensor( + const float* input, std::int64_t* quantized, TfLiteIntArray* dims, + float input_scale, float* weight_scales, float* scales, int* zero_points, + TfLiteAffineQuantization* affine_quant, int quantized_dimension, + bool is_variable) { + return CreatePerChannelQuantizedBiasTensor( + input, quantized, dims, input_scale, weight_scales, scales, zero_points, + affine_quant, quantized_dimension, is_variable); +} + +TfLiteTensor CreateSymmetricPerChannelQuantizedTensor( + const float* input, int8_t* quantized, TfLiteIntArray* dims, float* scales, + int* zero_points, TfLiteAffineQuantization* affine_quant, + int quantized_dimension, bool is_variable, TfLiteType tensor_weight_type) { + int channel_count = dims->data[quantized_dimension]; + + scales[0] = static_cast(channel_count); + zero_points[0] = channel_count; + + SignedSymmetricPerChannelQuantize(input, dims, quantized_dimension, quantized, + &scales[1], tensor_weight_type); + + for (int i = 0; i < channel_count; i++) { + zero_points[i + 1] = 0; + } + + affine_quant->scale = FloatArrayFromFloats(scales); + affine_quant->zero_point = IntArrayFromInts(zero_points); + affine_quant->quantized_dimension = quantized_dimension; + TfLiteTensor result = + CreateTensor(quantized, dims, is_variable, tensor_weight_type); + result.quantization = {kTfLiteAffineQuantization, affine_quant}; + return result; +} + +size_t GetModelTensorCount(const Model* model) { + auto* subgraphs = model->subgraphs(); + if (subgraphs) { + return (*subgraphs)[0]->tensors()->size(); + } + return 0; +} + +void PackInt4ValuesDenselyInPlace(uint8_t* src_buffer, int buffer_size) { + for (int i = 0; i < buffer_size; ++i) { + if (i % 2 == 0) { + src_buffer[i / 2] = src_buffer[i] & 0x0F; + } else { + src_buffer[i / 2] |= src_buffer[i] << 4; + } + } + // the rest of the buffer should be empty since half of it is packed with the + // values + memset(src_buffer + (buffer_size + 1) / 2, 0, buffer_size / 2); +} + +} // namespace testing +} // namespace tflite diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/test_helpers.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/test_helpers.h new file mode 100644 index 0000000..544181d --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/micro/test_helpers.h @@ -0,0 +1,319 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_LITE_MICRO_TEST_HELPERS_H_ +#define TENSORFLOW_LITE_MICRO_TEST_HELPERS_H_ + +#include +#include + +#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/flatbuffers.h" // from @flatbuffers +#include "edge-impulse-sdk/tensorflow/lite/c/common.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/compatibility.h" +#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/all_ops_resolver.h" +#include "edge-impulse-sdk/tensorflow/lite/micro/micro_utils.h" +#include "edge-impulse-sdk/tensorflow/lite/portable_type_to_tflitetype.h" +#include "edge-impulse-sdk/tensorflow/lite/schema/schema_generated.h" + +namespace tflite { +namespace testing { + +constexpr int kOfflinePlannerHeaderSize = 3; + +struct NodeConnection_ { + std::initializer_list input; + std::initializer_list output; +}; +typedef struct NodeConnection_ NodeConnection; + +// A simple operator that returns the median of the input with the number of +// times the kernel was invoked. The implementation below is deliberately +// complicated, just to demonstrate how kernel memory planning works. +class SimpleStatefulOp { + static constexpr int kBufferNotAllocated = 0; + // Inputs: + static constexpr int kInputTensor = 0; + // Outputs: + static constexpr int kMedianTensor = 0; + static constexpr int kInvokeCount = 1; + struct OpData { + int* invoke_count = nullptr; + int sorting_buffer = kBufferNotAllocated; + }; + + public: + static const TfLiteRegistration* getRegistration(); + static TfLiteRegistration* GetMutableRegistration(); + static void* Init(TfLiteContext* context, const char* buffer, size_t length); + static TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node); + static TfLiteStatus Invoke(TfLiteContext* context, TfLiteNode* node); +}; + +class MockCustom { + public: + static const TfLiteRegistration* getRegistration(); + static TfLiteRegistration* GetMutableRegistration(); + static void* Init(TfLiteContext* context, const char* buffer, size_t length); + static void Free(TfLiteContext* context, void* buffer); + static TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node); + static TfLiteStatus Invoke(TfLiteContext* context, TfLiteNode* node); + + static bool freed_; +}; + +// A simple operator with the purpose of testing multiple inputs. It returns +// the sum of the inputs. +class MultipleInputs { + public: + static const TfLiteRegistration* getRegistration(); + static TfLiteRegistration* GetMutableRegistration(); + static void* Init(TfLiteContext* context, const char* buffer, size_t length); + static void Free(TfLiteContext* context, void* buffer); + static TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node); + static TfLiteStatus Invoke(TfLiteContext* context, TfLiteNode* node); + + static bool freed_; +}; + +// A simple no-op operator. +class NoOp { + public: + static const TfLiteRegistration* getRegistration(); + static TfLiteRegistration* GetMutableRegistration(); + static void* Init(TfLiteContext* context, const char* buffer, size_t length); + static void Free(TfLiteContext* context, void* buffer); + static TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node); + static TfLiteStatus Invoke(TfLiteContext* context, TfLiteNode* node); + + static bool freed_; +}; + +// Returns an Op Resolver that can be used in the testing code. +AllOpsResolver GetOpResolver(); + +// Returns a simple example flatbuffer TensorFlow Lite model. Contains 1 input, +// 1 layer of weights, 1 output Tensor, and 1 operator. +const Model* GetSimpleMockModel(); + +// Returns a flatbuffer TensorFlow Lite model with more inputs, variable +// tensors, and operators. +const Model* GetComplexMockModel(); + +// Returns a simple example flatbuffer TensorFlow Lite model. Contains 1 input, +// 1 layer of weights, 1 output Tensor, and 1 operator. +// The size of all three tensors is 256 x 256, which is larger than what other +// models provide from this test helper. +const Model* GetModelWith256x256Tensor(); + +// Returns a simple flatbuffer model with two branches. +const Model* GetSimpleModelWithBranch(); + +// Returns a simple example flatbuffer TensorFlow Lite model. Contains 3 inputs, +// 1 output Tensor, and 1 operator. +const Model* GetSimpleMultipleInputsModel(); + +// Returns a simple flatbuffer model with offline planned tensors +// @param[in] num_tensors Number of tensors in the model. +// @param[in] metadata_buffer Metadata for offline planner. +// @param[in] node_con List of connections, i.e. operators +// in the model. +// @param[in] num_conns Number of connections. +// @param[in] num_subgraph_inputs How many of the input tensors are in +// the subgraph inputs. The default value +// of 0 means all of the input tensors +// are in the subgraph input list. There +// must be at least 1 input tensor in the +// subgraph input list. +const Model* GetModelWithOfflinePlanning(int num_tensors, + const int32_t* metadata_buffer, + NodeConnection* node_conn, + int num_conns, + int num_subgraph_inputs = 0); + +// Returns a flatbuffer with a single operator, two inputs (one unused) and one +// output. +const Model* GetModelWithUnusedInputs(); + +// Returns a flatbuffer with a single operator, zero inputs and two outputs +// (one unused). +const Model* GetModelWithUnusedOperatorOutputs(); + +// Returns a flatbuffer model with `simple_stateful_op` +const Model* GetSimpleStatefulModel(); + +// Returns a flatbuffer model with "if" and two subgraphs. +const Model* GetSimpleModelWithSubgraphsAndIf(); + +// Returns a flatbuffer model with "if" and two subgraphs one of which is empty. +const Model* GetSimpleModelWithIfAndEmptySubgraph(); + +// Returns a flatbuffer model with "while" and three subgraphs. +const Model* GetSimpleModelWithSubgraphsAndWhile(); + +// Returns a flatbuffer model with "if" and two subgraphs and the input tensor 1 +// of "if" subgraph overlaps with the input tensor 2 of subgraph 1. +const Model* GetModelWithIfAndSubgraphInputTensorOverlap(); + +// Returns a flatbuffer model with null subgraph/operator inputs and outputs. +const Model* GetSimpleModelWithNullInputsAndOutputs(); + +// Builds a one-dimensional flatbuffer tensor of the given size. +const Tensor* Create1dFlatbufferTensor(int size, bool is_variable = false); + +// Builds a one-dimensional flatbuffer tensor of the given size with +// quantization metadata. +const Tensor* CreateQuantizedFlatbufferTensor(int size); + +// Creates a one-dimensional tensor with no quantization metadata. +const Tensor* CreateMissingQuantizationFlatbufferTensor(int size); + +// Creates a vector of flatbuffer buffers. +const flatbuffers::Vector>* +CreateFlatbufferBuffers(); + +// Performs a simple string comparison without requiring standard C library. +int TestStrcmp(const char* a, const char* b); + +void PopulateContext(TfLiteTensor* tensors, int tensors_size, + TfLiteContext* context); + +// Create a TfLiteIntArray from an array of ints. The first element in the +// supplied array must be the size of the array expressed as an int. +TfLiteIntArray* IntArrayFromInts(int* int_array); + +// Create a TfLiteFloatArray from an array of floats. The first element in the +// supplied array must be the size of the array expressed as a float. +TfLiteFloatArray* FloatArrayFromFloats(const float* floats); + +// Assumes that `src_tensor` is a buffer where each element is a 4-bit value +// stored in 8-bit. +// Returns a new buffer that is packed densely with 2 4-bit values in a byte. +// The packing format is low-bits-first, i.e. the lower nibble of a byte is +// filled first, followed by the upper nibble. +void PackInt4ValuesDenselyInPlace(uint8_t* src_buffer, int buffer_size); + +template +TfLiteTensor CreateTensor(const T* data, TfLiteIntArray* dims, + const bool is_variable = false, + TfLiteType type = kTfLiteNoType) { + TfLiteTensor result; + result.dims = dims; + result.params = {}; + result.quantization = {kTfLiteNoQuantization, nullptr}; + result.is_variable = is_variable; + result.allocation_type = kTfLiteMemNone; + result.data.data = const_cast(data); + result.quantization = {kTfLiteAffineQuantization, nullptr}; + result.bytes = ElementCount(*dims) * sizeof(T); + result.data.data = const_cast(data); + + if (type == kTfLiteInt4) { + result.type = kTfLiteInt4; + PackInt4ValuesDenselyInPlace(tflite::GetTensorData(&result), + ElementCount(*dims)); + result.bytes = ((ElementCount(*dims) + 1) / 2); + } else { + // Const cast is used to allow passing in const and non-const arrays within + // a single CreateTensor method. A Const array should be used for immutable + // input tensors and non-const array should be used for mutable and output + // tensors. + result.type = typeToTfLiteType(); + } + return result; +} + +template +TfLiteTensor CreateQuantizedTensor(const T* data, TfLiteIntArray* dims, + const float scale, const int zero_point = 0, + const bool is_variable = false, + TfLiteType type = kTfLiteNoType) { + TfLiteTensor result = CreateTensor(data, dims, is_variable, type); + result.params = {scale, zero_point}; + result.quantization = {kTfLiteAffineQuantization, nullptr}; + return result; +} + +template +TfLiteTensor CreateQuantizedTensor(const float* input, T* quantized, + TfLiteIntArray* dims, float scale, + int zero_point, bool is_variable = false, + TfLiteType type = kTfLiteNoType) { + int input_size = ElementCount(*dims); + tflite::Quantize(input, quantized, input_size, scale, zero_point); + return CreateQuantizedTensor(quantized, dims, scale, zero_point, is_variable, + type); +} + +TfLiteTensor CreateQuantizedBiasTensor(const float* data, int16_t* quantized, + TfLiteIntArray* dims, float input_scale, + float weights_scale, + bool is_variable = false); + +TfLiteTensor CreateQuantizedBiasTensor(const float* data, int32_t* quantized, + TfLiteIntArray* dims, float input_scale, + float weights_scale, + bool is_variable = false); + +TfLiteTensor CreateQuantizedBiasTensor(const float* data, + std::int64_t* quantized, + TfLiteIntArray* dims, float input_scale, + float weights_scale, + bool is_variable = false); + +// Quantizes int32_t bias tensor with per-channel weights determined by input +// scale multiplied by weight scale for each channel. +TfLiteTensor CreatePerChannelQuantizedBiasTensor( + const float* input, int32_t* quantized, TfLiteIntArray* dims, + float input_scale, float* weight_scales, float* scales, int* zero_points, + TfLiteAffineQuantization* affine_quant, int quantized_dimension, + bool is_variable = false); + +// Quantizes int64_t bias tensor with per-channel weights determined by input +// scale multiplied by weight scale for each channel. +TfLiteTensor CreatePerChannelQuantizedBiasTensor( + const float* input, std::int64_t* quantized, TfLiteIntArray* dims, + float input_scale, float* weight_scales, float* scales, int* zero_points, + TfLiteAffineQuantization* affine_quant, int quantized_dimension, + bool is_variable = false); + +TfLiteTensor CreateSymmetricPerChannelQuantizedTensor( + const float* input, int8_t* quantized, TfLiteIntArray* dims, float* scales, + int* zero_points, TfLiteAffineQuantization* affine_quant, + int quantized_dimension, bool is_variable = false, + TfLiteType tensor_weight_type = kTfLiteNoType); + +// Returns the number of tensors in the default subgraph for a tflite::Model. +size_t GetModelTensorCount(const Model* model); + +// Derives the quantization scaling factor from a min and max range. +template +inline float ScaleFromMinMax(const float min, const float max) { + return (max - min) / + static_cast((std::numeric_limits::max() * 1.0) - + std::numeric_limits::min()); +} + +// Derives the quantization zero point from a min and max range. +template +inline int ZeroPointFromMinMax(const float min, const float max) { + return static_cast(std::numeric_limits::min()) + + static_cast(-min / ScaleFromMinMax(min, max) + 0.5f); +} + +} // namespace testing +} // namespace tflite + +#endif // TENSORFLOW_LITE_MICRO_TEST_HELPERS_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/portable_type_to_tflitetype.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/portable_type_to_tflitetype.h new file mode 100644 index 0000000..28d2bf8 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/portable_type_to_tflitetype.h @@ -0,0 +1,75 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_PORTABLE_TYPE_TO_TFLITETYPE_H_ +#define TENSORFLOW_LITE_PORTABLE_TYPE_TO_TFLITETYPE_H_ + +// Most of the definitions have been moved to this subheader so that Micro +// can include it without relying on and , which isn't +// available on all platforms. + +// Arduino build defines abs as a macro here. That is invalid C++, and breaks +// libc++'s header, undefine it. +#ifdef abs +#undef abs +#endif + +#include + +#include "edge-impulse-sdk/tensorflow/lite/core/c/common.h" + +namespace tflite { + +// Map statically from a C++ type to a TfLiteType. Used in interpreter for +// safe casts. +// Example: +// typeToTfLiteType() -> kTfLiteBool +template +constexpr TfLiteType typeToTfLiteType() { + return kTfLiteNoType; +} +// Map from TfLiteType to the corresponding C++ type. +// Example: +// TfLiteTypeToType::Type -> bool +template +struct TfLiteTypeToType {}; // Specializations below + +// Template specialization for both typeToTfLiteType and TfLiteTypeToType. +#define MATCH_TYPE_AND_TFLITE_TYPE(CPP_TYPE, TFLITE_TYPE_ENUM) \ + template <> \ + constexpr TfLiteType typeToTfLiteType() { \ + return TFLITE_TYPE_ENUM; \ + } \ + template <> \ + struct TfLiteTypeToType { \ + using Type = CPP_TYPE; \ + } + +// No string mapping is included here, since the TF Lite packed representation +// doesn't correspond to a C++ type well. +MATCH_TYPE_AND_TFLITE_TYPE(int32_t, kTfLiteInt32); +MATCH_TYPE_AND_TFLITE_TYPE(uint32_t, kTfLiteUInt32); +MATCH_TYPE_AND_TFLITE_TYPE(int16_t, kTfLiteInt16); +MATCH_TYPE_AND_TFLITE_TYPE(uint16_t, kTfLiteUInt16); +MATCH_TYPE_AND_TFLITE_TYPE(int64_t, kTfLiteInt64); +MATCH_TYPE_AND_TFLITE_TYPE(float, kTfLiteFloat32); +MATCH_TYPE_AND_TFLITE_TYPE(unsigned char, kTfLiteUInt8); +MATCH_TYPE_AND_TFLITE_TYPE(int8_t, kTfLiteInt8); +MATCH_TYPE_AND_TFLITE_TYPE(bool, kTfLiteBool); +MATCH_TYPE_AND_TFLITE_TYPE(TfLiteFloat16, kTfLiteFloat16); +MATCH_TYPE_AND_TFLITE_TYPE(double, kTfLiteFloat64); +MATCH_TYPE_AND_TFLITE_TYPE(uint64_t, kTfLiteUInt64); + +} // namespace tflite +#endif // TENSORFLOW_LITE_PORTABLE_TYPE_TO_TFLITETYPE_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/schema/schema_generated.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/schema/schema_generated.h new file mode 100755 index 0000000..416029f --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/schema/schema_generated.h @@ -0,0 +1,2587 @@ +#ifndef FLATBUFFERS_GENERATED_SCHEMA_SUPPL_TFLITE_H_ +#define FLATBUFFERS_GENERATED_SCHEMA_SUPPL_TFLITE_H_ + +#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/flatbuffers.h" + +// Ensure the included flatbuffers.h is the same version as when this file was +// generated, otherwise it may not be compatible. +static_assert(FLATBUFFERS_VERSION_MAJOR == 2 && + FLATBUFFERS_VERSION_MINOR == 0 && + FLATBUFFERS_VERSION_REVISION == 6, + "Non-compatible flatbuffers version included"); + +namespace tflite { + +struct CustomQuantization; +struct CustomQuantizationBuilder; +struct CustomQuantizationT; + +struct QuantizationParameters; +struct QuantizationParametersBuilder; +struct QuantizationParametersT; + +struct Int32Vector; +struct Int32VectorBuilder; +struct Int32VectorT; + +struct Uint16Vector; +struct Uint16VectorBuilder; +struct Uint16VectorT; + +struct Uint8Vector; +struct Uint8VectorBuilder; +struct Uint8VectorT; + +struct DimensionMetadata; +struct DimensionMetadataBuilder; +struct DimensionMetadataT; + +struct SparsityParameters; +struct SparsityParametersBuilder; +struct SparsityParametersT; + +struct VariantSubType; +struct VariantSubTypeBuilder; +struct VariantSubTypeT; + +struct Tensor; +struct TensorBuilder; +struct TensorT; + +struct Conv2DOptions; +struct Conv2DOptionsBuilder; +struct Conv2DOptionsT; + +struct Conv3DOptions; +struct Conv3DOptionsBuilder; +struct Conv3DOptionsT; + +struct Pool2DOptions; +struct Pool2DOptionsBuilder; +struct Pool2DOptionsT; + +struct DepthwiseConv2DOptions; +struct DepthwiseConv2DOptionsBuilder; +struct DepthwiseConv2DOptionsT; + +struct ConcatEmbeddingsOptions; +struct ConcatEmbeddingsOptionsBuilder; +struct ConcatEmbeddingsOptionsT; + +struct LSHProjectionOptions; +struct LSHProjectionOptionsBuilder; +struct LSHProjectionOptionsT; + +struct SVDFOptions; +struct SVDFOptionsBuilder; +struct SVDFOptionsT; + +struct RNNOptions; +struct RNNOptionsBuilder; +struct RNNOptionsT; + +struct SequenceRNNOptions; +struct SequenceRNNOptionsBuilder; +struct SequenceRNNOptionsT; + +struct BidirectionalSequenceRNNOptions; +struct BidirectionalSequenceRNNOptionsBuilder; +struct BidirectionalSequenceRNNOptionsT; + +struct FullyConnectedOptions; +struct FullyConnectedOptionsBuilder; +struct FullyConnectedOptionsT; + +struct SoftmaxOptions; +struct SoftmaxOptionsBuilder; +struct SoftmaxOptionsT; + +struct ConcatenationOptions; +struct ConcatenationOptionsBuilder; +struct ConcatenationOptionsT; + +struct AddOptions; +struct AddOptionsBuilder; +struct AddOptionsT; + +struct MulOptions; +struct MulOptionsBuilder; +struct MulOptionsT; + +struct L2NormOptions; +struct L2NormOptionsBuilder; +struct L2NormOptionsT; + +struct LocalResponseNormalizationOptions; +struct LocalResponseNormalizationOptionsBuilder; +struct LocalResponseNormalizationOptionsT; + +struct LSTMOptions; +struct LSTMOptionsBuilder; +struct LSTMOptionsT; + +struct UnidirectionalSequenceLSTMOptions; +struct UnidirectionalSequenceLSTMOptionsBuilder; +struct UnidirectionalSequenceLSTMOptionsT; + +struct BidirectionalSequenceLSTMOptions; +struct BidirectionalSequenceLSTMOptionsBuilder; +struct BidirectionalSequenceLSTMOptionsT; + +struct ResizeBilinearOptions; +struct ResizeBilinearOptionsBuilder; +struct ResizeBilinearOptionsT; + +struct ResizeNearestNeighborOptions; +struct ResizeNearestNeighborOptionsBuilder; +struct ResizeNearestNeighborOptionsT; + +struct CallOptions; +struct CallOptionsBuilder; +struct CallOptionsT; + +struct PadOptions; +struct PadOptionsBuilder; +struct PadOptionsT; + +struct PadV2Options; +struct PadV2OptionsBuilder; +struct PadV2OptionsT; + +struct ReshapeOptions; +struct ReshapeOptionsBuilder; +struct ReshapeOptionsT; + +struct SpaceToBatchNDOptions; +struct SpaceToBatchNDOptionsBuilder; +struct SpaceToBatchNDOptionsT; + +struct BatchToSpaceNDOptions; +struct BatchToSpaceNDOptionsBuilder; +struct BatchToSpaceNDOptionsT; + +struct SkipGramOptions; +struct SkipGramOptionsBuilder; +struct SkipGramOptionsT; + +struct SpaceToDepthOptions; +struct SpaceToDepthOptionsBuilder; +struct SpaceToDepthOptionsT; + +struct DepthToSpaceOptions; +struct DepthToSpaceOptionsBuilder; +struct DepthToSpaceOptionsT; + +struct SubOptions; +struct SubOptionsBuilder; +struct SubOptionsT; + +struct DivOptions; +struct DivOptionsBuilder; +struct DivOptionsT; + +struct TopKV2Options; +struct TopKV2OptionsBuilder; +struct TopKV2OptionsT; + +struct EmbeddingLookupSparseOptions; +struct EmbeddingLookupSparseOptionsBuilder; +struct EmbeddingLookupSparseOptionsT; + +struct GatherOptions; +struct GatherOptionsBuilder; +struct GatherOptionsT; + +struct TransposeOptions; +struct TransposeOptionsBuilder; +struct TransposeOptionsT; + +struct ExpOptions; +struct ExpOptionsBuilder; +struct ExpOptionsT; + +struct CosOptions; +struct CosOptionsBuilder; +struct CosOptionsT; + +struct ReducerOptions; +struct ReducerOptionsBuilder; +struct ReducerOptionsT; + +struct SqueezeOptions; +struct SqueezeOptionsBuilder; +struct SqueezeOptionsT; + +struct SplitOptions; +struct SplitOptionsBuilder; +struct SplitOptionsT; + +struct SplitVOptions; +struct SplitVOptionsBuilder; +struct SplitVOptionsT; + +struct StridedSliceOptions; +struct StridedSliceOptionsBuilder; +struct StridedSliceOptionsT; + +struct LogSoftmaxOptions; +struct LogSoftmaxOptionsBuilder; +struct LogSoftmaxOptionsT; + +struct CastOptions; +struct CastOptionsBuilder; +struct CastOptionsT; + +struct DequantizeOptions; +struct DequantizeOptionsBuilder; +struct DequantizeOptionsT; + +struct MaximumMinimumOptions; +struct MaximumMinimumOptionsBuilder; +struct MaximumMinimumOptionsT; + +struct TileOptions; +struct TileOptionsBuilder; +struct TileOptionsT; + +struct ArgMaxOptions; +struct ArgMaxOptionsBuilder; +struct ArgMaxOptionsT; + +struct ArgMinOptions; +struct ArgMinOptionsBuilder; +struct ArgMinOptionsT; + +struct GreaterOptions; +struct GreaterOptionsBuilder; +struct GreaterOptionsT; + +struct GreaterEqualOptions; +struct GreaterEqualOptionsBuilder; +struct GreaterEqualOptionsT; + +struct LessOptions; +struct LessOptionsBuilder; +struct LessOptionsT; + +struct LessEqualOptions; +struct LessEqualOptionsBuilder; +struct LessEqualOptionsT; + +struct NegOptions; +struct NegOptionsBuilder; +struct NegOptionsT; + +struct SelectOptions; +struct SelectOptionsBuilder; +struct SelectOptionsT; + +struct SliceOptions; +struct SliceOptionsBuilder; +struct SliceOptionsT; + +struct TransposeConvOptions; +struct TransposeConvOptionsBuilder; +struct TransposeConvOptionsT; + +struct ExpandDimsOptions; +struct ExpandDimsOptionsBuilder; +struct ExpandDimsOptionsT; + +struct SparseToDenseOptions; +struct SparseToDenseOptionsBuilder; +struct SparseToDenseOptionsT; + +struct EqualOptions; +struct EqualOptionsBuilder; +struct EqualOptionsT; + +struct NotEqualOptions; +struct NotEqualOptionsBuilder; +struct NotEqualOptionsT; + +struct ShapeOptions; +struct ShapeOptionsBuilder; +struct ShapeOptionsT; + +struct RankOptions; +struct RankOptionsBuilder; +struct RankOptionsT; + +struct PowOptions; +struct PowOptionsBuilder; +struct PowOptionsT; + +struct FakeQuantOptions; +struct FakeQuantOptionsBuilder; +struct FakeQuantOptionsT; + +struct PackOptions; +struct PackOptionsBuilder; +struct PackOptionsT; + +struct LogicalOrOptions; +struct LogicalOrOptionsBuilder; +struct LogicalOrOptionsT; + +struct OneHotOptions; +struct OneHotOptionsBuilder; +struct OneHotOptionsT; + +struct AbsOptions; +struct AbsOptionsBuilder; +struct AbsOptionsT; + +struct HardSwishOptions; +struct HardSwishOptionsBuilder; +struct HardSwishOptionsT; + +struct LogicalAndOptions; +struct LogicalAndOptionsBuilder; +struct LogicalAndOptionsT; + +struct LogicalNotOptions; +struct LogicalNotOptionsBuilder; +struct LogicalNotOptionsT; + +struct UnpackOptions; +struct UnpackOptionsBuilder; +struct UnpackOptionsT; + +struct FloorDivOptions; +struct FloorDivOptionsBuilder; +struct FloorDivOptionsT; + +struct SquareOptions; +struct SquareOptionsBuilder; +struct SquareOptionsT; + +struct ZerosLikeOptions; +struct ZerosLikeOptionsBuilder; +struct ZerosLikeOptionsT; + +struct FillOptions; +struct FillOptionsBuilder; +struct FillOptionsT; + +struct FloorModOptions; +struct FloorModOptionsBuilder; +struct FloorModOptionsT; + +struct RangeOptions; +struct RangeOptionsBuilder; +struct RangeOptionsT; + +struct LeakyReluOptions; +struct LeakyReluOptionsBuilder; +struct LeakyReluOptionsT; + +struct SquaredDifferenceOptions; +struct SquaredDifferenceOptionsBuilder; +struct SquaredDifferenceOptionsT; + +struct MirrorPadOptions; +struct MirrorPadOptionsBuilder; +struct MirrorPadOptionsT; + +struct UniqueOptions; +struct UniqueOptionsBuilder; +struct UniqueOptionsT; + +struct ReverseV2Options; +struct ReverseV2OptionsBuilder; +struct ReverseV2OptionsT; + +struct AddNOptions; +struct AddNOptionsBuilder; +struct AddNOptionsT; + +struct GatherNdOptions; +struct GatherNdOptionsBuilder; +struct GatherNdOptionsT; + +struct WhereOptions; +struct WhereOptionsBuilder; +struct WhereOptionsT; + +struct ReverseSequenceOptions; +struct ReverseSequenceOptionsBuilder; +struct ReverseSequenceOptionsT; + +struct MatrixDiagOptions; +struct MatrixDiagOptionsBuilder; +struct MatrixDiagOptionsT; + +struct QuantizeOptions; +struct QuantizeOptionsBuilder; +struct QuantizeOptionsT; + +struct MatrixSetDiagOptions; +struct MatrixSetDiagOptionsBuilder; +struct MatrixSetDiagOptionsT; + +struct IfOptions; +struct IfOptionsBuilder; +struct IfOptionsT; + +struct CallOnceOptions; +struct CallOnceOptionsBuilder; +struct CallOnceOptionsT; + +struct WhileOptions; +struct WhileOptionsBuilder; +struct WhileOptionsT; + +struct NonMaxSuppressionV4Options; +struct NonMaxSuppressionV4OptionsBuilder; +struct NonMaxSuppressionV4OptionsT; + +struct NonMaxSuppressionV5Options; +struct NonMaxSuppressionV5OptionsBuilder; +struct NonMaxSuppressionV5OptionsT; + +struct ScatterNdOptions; +struct ScatterNdOptionsBuilder; +struct ScatterNdOptionsT; + +struct SelectV2Options; +struct SelectV2OptionsBuilder; +struct SelectV2OptionsT; + +struct DensifyOptions; +struct DensifyOptionsBuilder; +struct DensifyOptionsT; + +struct SegmentSumOptions; +struct SegmentSumOptionsBuilder; +struct SegmentSumOptionsT; + +struct BatchMatMulOptions; +struct BatchMatMulOptionsBuilder; +struct BatchMatMulOptionsT; + +struct CumsumOptions; +struct CumsumOptionsBuilder; +struct CumsumOptionsT; + +struct BroadcastToOptions; +struct BroadcastToOptionsBuilder; +struct BroadcastToOptionsT; + +struct Rfft2dOptions; +struct Rfft2dOptionsBuilder; +struct Rfft2dOptionsT; + +struct HashtableOptions; +struct HashtableOptionsBuilder; +struct HashtableOptionsT; + +struct HashtableFindOptions; +struct HashtableFindOptionsBuilder; +struct HashtableFindOptionsT; + +struct HashtableImportOptions; +struct HashtableImportOptionsBuilder; +struct HashtableImportOptionsT; + +struct HashtableSizeOptions; +struct HashtableSizeOptionsBuilder; +struct HashtableSizeOptionsT; + +struct VarHandleOptions; +struct VarHandleOptionsBuilder; +struct VarHandleOptionsT; + +struct ReadVariableOptions; +struct ReadVariableOptionsBuilder; +struct ReadVariableOptionsT; + +struct AssignVariableOptions; +struct AssignVariableOptionsBuilder; +struct AssignVariableOptionsT; + +struct RandomOptions; +struct RandomOptionsBuilder; +struct RandomOptionsT; + +struct BucketizeOptions; +struct BucketizeOptionsBuilder; +struct BucketizeOptionsT; + +struct GeluOptions; +struct GeluOptionsBuilder; +struct GeluOptionsT; + +struct DynamicUpdateSliceOptions; +struct DynamicUpdateSliceOptionsBuilder; +struct DynamicUpdateSliceOptionsT; + +struct UnsortedSegmentProdOptions; +struct UnsortedSegmentProdOptionsBuilder; +struct UnsortedSegmentProdOptionsT; + +struct UnsortedSegmentMaxOptions; +struct UnsortedSegmentMaxOptionsBuilder; +struct UnsortedSegmentMaxOptionsT; + +struct UnsortedSegmentSumOptions; +struct UnsortedSegmentSumOptionsBuilder; +struct UnsortedSegmentSumOptionsT; + +struct ATan2Options; +struct ATan2OptionsBuilder; +struct ATan2OptionsT; + +struct UnsortedSegmentMinOptions; +struct UnsortedSegmentMinOptionsBuilder; +struct UnsortedSegmentMinOptionsT; + +struct SignOptions; +struct SignOptionsBuilder; +struct SignOptionsT; + +struct OperatorCode; +struct OperatorCodeBuilder; +struct OperatorCodeT; + +struct Operator; +struct OperatorBuilder; +struct OperatorT; + +struct SubGraph; +struct SubGraphBuilder; +struct SubGraphT; + +struct Buffer; +struct BufferBuilder; +struct BufferT; + +struct Metadata; +struct MetadataBuilder; +struct MetadataT; + +struct TensorMap; +struct TensorMapBuilder; +struct TensorMapT; + +struct SignatureDef; +struct SignatureDefBuilder; +struct SignatureDefT; + +struct Model; +struct ModelBuilder; +struct ModelT; + +enum TensorType : int8_t { + TensorType_FLOAT32 = 0, + TensorType_FLOAT16 = 1, + TensorType_INT32 = 2, + TensorType_UINT8 = 3, + TensorType_INT64 = 4, + TensorType_STRING = 5, + TensorType_BOOL = 6, + TensorType_INT16 = 7, + TensorType_COMPLEX64 = 8, + TensorType_INT8 = 9, + TensorType_FLOAT64 = 10, + TensorType_COMPLEX128 = 11, + TensorType_UINT64 = 12, + TensorType_RESOURCE = 13, + TensorType_VARIANT = 14, + TensorType_UINT32 = 15, + TensorType_UINT16 = 16, + TensorType_INT4 = 17, + TensorType_MIN = TensorType_FLOAT32, + TensorType_MAX = TensorType_INT4 +}; + +inline const TensorType (&EnumValuesTensorType())[18] { + static const TensorType values[] = { + TensorType_FLOAT32, + TensorType_FLOAT16, + TensorType_INT32, + TensorType_UINT8, + TensorType_INT64, + TensorType_STRING, + TensorType_BOOL, + TensorType_INT16, + TensorType_COMPLEX64, + TensorType_INT8, + TensorType_FLOAT64, + TensorType_COMPLEX128, + TensorType_UINT64, + TensorType_RESOURCE, + TensorType_VARIANT, + TensorType_UINT32, + TensorType_UINT16, + TensorType_INT4 + }; + return values; +} + +inline const char * const *EnumNamesTensorType() { + static const char * const names[19] = { + "FLOAT32", + "FLOAT16", + "INT32", + "UINT8", + "INT64", + "STRING", + "BOOL", + "INT16", + "COMPLEX64", + "INT8", + "FLOAT64", + "COMPLEX128", + "UINT64", + "RESOURCE", + "VARIANT", + "UINT32", + "UINT16", + "INT4", + nullptr + }; + return names; +} + +inline const char *EnumNameTensorType(TensorType e) { + if (flatbuffers::IsOutRange(e, TensorType_FLOAT32, TensorType_INT4)) return ""; + const size_t index = static_cast(e); + return EnumNamesTensorType()[index]; +} + + +enum BuiltinOperator : int32_t { + BuiltinOperator_ADD = 0, + BuiltinOperator_AVERAGE_POOL_2D = 1, + BuiltinOperator_CONCATENATION = 2, + BuiltinOperator_CONV_2D = 3, + BuiltinOperator_DEPTHWISE_CONV_2D = 4, + BuiltinOperator_DEPTH_TO_SPACE = 5, + BuiltinOperator_DEQUANTIZE = 6, + BuiltinOperator_EMBEDDING_LOOKUP = 7, + BuiltinOperator_FLOOR = 8, + BuiltinOperator_FULLY_CONNECTED = 9, + BuiltinOperator_HASHTABLE_LOOKUP = 10, + BuiltinOperator_L2_NORMALIZATION = 11, + BuiltinOperator_L2_POOL_2D = 12, + BuiltinOperator_LOCAL_RESPONSE_NORMALIZATION = 13, + BuiltinOperator_LOGISTIC = 14, + BuiltinOperator_LSH_PROJECTION = 15, + BuiltinOperator_LSTM = 16, + BuiltinOperator_MAX_POOL_2D = 17, + BuiltinOperator_MUL = 18, + BuiltinOperator_RELU = 19, + BuiltinOperator_RELU_N1_TO_1 = 20, + BuiltinOperator_RELU6 = 21, + BuiltinOperator_RESHAPE = 22, + BuiltinOperator_RESIZE_BILINEAR = 23, + BuiltinOperator_RNN = 24, + BuiltinOperator_SOFTMAX = 25, + BuiltinOperator_SPACE_TO_DEPTH = 26, + BuiltinOperator_SVDF = 27, + BuiltinOperator_TANH = 28, + BuiltinOperator_CONCAT_EMBEDDINGS = 29, + BuiltinOperator_SKIP_GRAM = 30, + BuiltinOperator_CALL = 31, + BuiltinOperator_CUSTOM = 32, + BuiltinOperator_EMBEDDING_LOOKUP_SPARSE = 33, + BuiltinOperator_PAD = 34, + BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_RNN = 35, + BuiltinOperator_GATHER = 36, + BuiltinOperator_BATCH_TO_SPACE_ND = 37, + BuiltinOperator_SPACE_TO_BATCH_ND = 38, + BuiltinOperator_TRANSPOSE = 39, + BuiltinOperator_MEAN = 40, + BuiltinOperator_SUB = 41, + BuiltinOperator_DIV = 42, + BuiltinOperator_SQUEEZE = 43, + BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_LSTM = 44, + BuiltinOperator_STRIDED_SLICE = 45, + BuiltinOperator_BIDIRECTIONAL_SEQUENCE_RNN = 46, + BuiltinOperator_EXP = 47, + BuiltinOperator_TOPK_V2 = 48, + BuiltinOperator_SPLIT = 49, + BuiltinOperator_LOG_SOFTMAX = 50, + BuiltinOperator_DELEGATE = 51, + BuiltinOperator_BIDIRECTIONAL_SEQUENCE_LSTM = 52, + BuiltinOperator_CAST = 53, + BuiltinOperator_PRELU = 54, + BuiltinOperator_MAXIMUM = 55, + BuiltinOperator_ARG_MAX = 56, + BuiltinOperator_MINIMUM = 57, + BuiltinOperator_LESS = 58, + BuiltinOperator_NEG = 59, + BuiltinOperator_PADV2 = 60, + BuiltinOperator_GREATER = 61, + BuiltinOperator_GREATER_EQUAL = 62, + BuiltinOperator_LESS_EQUAL = 63, + BuiltinOperator_SELECT = 64, + BuiltinOperator_SLICE = 65, + BuiltinOperator_SIN = 66, + BuiltinOperator_TRANSPOSE_CONV = 67, + BuiltinOperator_SPARSE_TO_DENSE = 68, + BuiltinOperator_TILE = 69, + BuiltinOperator_EXPAND_DIMS = 70, + BuiltinOperator_EQUAL = 71, + BuiltinOperator_NOT_EQUAL = 72, + BuiltinOperator_LOG = 73, + BuiltinOperator_SUM = 74, + BuiltinOperator_SQRT = 75, + BuiltinOperator_RSQRT = 76, + BuiltinOperator_SHAPE = 77, + BuiltinOperator_POW = 78, + BuiltinOperator_ARG_MIN = 79, + BuiltinOperator_FAKE_QUANT = 80, + BuiltinOperator_REDUCE_PROD = 81, + BuiltinOperator_REDUCE_MAX = 82, + BuiltinOperator_PACK = 83, + BuiltinOperator_LOGICAL_OR = 84, + BuiltinOperator_ONE_HOT = 85, + BuiltinOperator_LOGICAL_AND = 86, + BuiltinOperator_LOGICAL_NOT = 87, + BuiltinOperator_UNPACK = 88, + BuiltinOperator_REDUCE_MIN = 89, + BuiltinOperator_FLOOR_DIV = 90, + BuiltinOperator_REDUCE_ANY = 91, + BuiltinOperator_SQUARE = 92, + BuiltinOperator_ZEROS_LIKE = 93, + BuiltinOperator_FILL = 94, + BuiltinOperator_FLOOR_MOD = 95, + BuiltinOperator_RANGE = 96, + BuiltinOperator_RESIZE_NEAREST_NEIGHBOR = 97, + BuiltinOperator_LEAKY_RELU = 98, + BuiltinOperator_SQUARED_DIFFERENCE = 99, + BuiltinOperator_MIRROR_PAD = 100, + BuiltinOperator_ABS = 101, + BuiltinOperator_SPLIT_V = 102, + BuiltinOperator_UNIQUE = 103, + BuiltinOperator_CEIL = 104, + BuiltinOperator_REVERSE_V2 = 105, + BuiltinOperator_ADD_N = 106, + BuiltinOperator_GATHER_ND = 107, + BuiltinOperator_COS = 108, + BuiltinOperator_WHERE = 109, + BuiltinOperator_RANK = 110, + BuiltinOperator_ELU = 111, + BuiltinOperator_REVERSE_SEQUENCE = 112, + BuiltinOperator_MATRIX_DIAG = 113, + BuiltinOperator_QUANTIZE = 114, + BuiltinOperator_MATRIX_SET_DIAG = 115, + BuiltinOperator_ROUND = 116, + BuiltinOperator_HARD_SWISH = 117, + BuiltinOperator_IF = 118, + BuiltinOperator_WHILE = 119, + BuiltinOperator_NON_MAX_SUPPRESSION_V4 = 120, + BuiltinOperator_NON_MAX_SUPPRESSION_V5 = 121, + BuiltinOperator_SCATTER_ND = 122, + BuiltinOperator_SELECT_V2 = 123, + BuiltinOperator_DENSIFY = 124, + BuiltinOperator_SEGMENT_SUM = 125, + BuiltinOperator_BATCH_MATMUL = 126, + BuiltinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES = 127, + BuiltinOperator_CUMSUM = 128, + BuiltinOperator_CALL_ONCE = 129, + BuiltinOperator_BROADCAST_TO = 130, + BuiltinOperator_RFFT2D = 131, + BuiltinOperator_CONV_3D = 132, + BuiltinOperator_IMAG = 133, + BuiltinOperator_REAL = 134, + BuiltinOperator_COMPLEX_ABS = 135, + BuiltinOperator_HASHTABLE = 136, + BuiltinOperator_HASHTABLE_FIND = 137, + BuiltinOperator_HASHTABLE_IMPORT = 138, + BuiltinOperator_HASHTABLE_SIZE = 139, + BuiltinOperator_REDUCE_ALL = 140, + BuiltinOperator_CONV_3D_TRANSPOSE = 141, + BuiltinOperator_VAR_HANDLE = 142, + BuiltinOperator_READ_VARIABLE = 143, + BuiltinOperator_ASSIGN_VARIABLE = 144, + BuiltinOperator_BROADCAST_ARGS = 145, + BuiltinOperator_RANDOM_STANDARD_NORMAL = 146, + BuiltinOperator_BUCKETIZE = 147, + BuiltinOperator_RANDOM_UNIFORM = 148, + BuiltinOperator_MULTINOMIAL = 149, + BuiltinOperator_GELU = 150, + BuiltinOperator_DYNAMIC_UPDATE_SLICE = 151, + BuiltinOperator_RELU_0_TO_1 = 152, + BuiltinOperator_UNSORTED_SEGMENT_PROD = 153, + BuiltinOperator_UNSORTED_SEGMENT_MAX = 154, + BuiltinOperator_UNSORTED_SEGMENT_SUM = 155, + BuiltinOperator_ATAN2 = 156, + BuiltinOperator_UNSORTED_SEGMENT_MIN = 157, + BuiltinOperator_SIGN = 158, + BuiltinOperator_MIN = BuiltinOperator_ADD, + BuiltinOperator_MAX = BuiltinOperator_SIGN +}; + +inline const BuiltinOperator (&EnumValuesBuiltinOperator())[159] { + static const BuiltinOperator values[] = { + BuiltinOperator_ADD, + BuiltinOperator_AVERAGE_POOL_2D, + BuiltinOperator_CONCATENATION, + BuiltinOperator_CONV_2D, + BuiltinOperator_DEPTHWISE_CONV_2D, + BuiltinOperator_DEPTH_TO_SPACE, + BuiltinOperator_DEQUANTIZE, + BuiltinOperator_EMBEDDING_LOOKUP, + BuiltinOperator_FLOOR, + BuiltinOperator_FULLY_CONNECTED, + BuiltinOperator_HASHTABLE_LOOKUP, + BuiltinOperator_L2_NORMALIZATION, + BuiltinOperator_L2_POOL_2D, + BuiltinOperator_LOCAL_RESPONSE_NORMALIZATION, + BuiltinOperator_LOGISTIC, + BuiltinOperator_LSH_PROJECTION, + BuiltinOperator_LSTM, + BuiltinOperator_MAX_POOL_2D, + BuiltinOperator_MUL, + BuiltinOperator_RELU, + BuiltinOperator_RELU_N1_TO_1, + BuiltinOperator_RELU6, + BuiltinOperator_RESHAPE, + BuiltinOperator_RESIZE_BILINEAR, + BuiltinOperator_RNN, + BuiltinOperator_SOFTMAX, + BuiltinOperator_SPACE_TO_DEPTH, + BuiltinOperator_SVDF, + BuiltinOperator_TANH, + BuiltinOperator_CONCAT_EMBEDDINGS, + BuiltinOperator_SKIP_GRAM, + BuiltinOperator_CALL, + BuiltinOperator_CUSTOM, + BuiltinOperator_EMBEDDING_LOOKUP_SPARSE, + BuiltinOperator_PAD, + BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_RNN, + BuiltinOperator_GATHER, + BuiltinOperator_BATCH_TO_SPACE_ND, + BuiltinOperator_SPACE_TO_BATCH_ND, + BuiltinOperator_TRANSPOSE, + BuiltinOperator_MEAN, + BuiltinOperator_SUB, + BuiltinOperator_DIV, + BuiltinOperator_SQUEEZE, + BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_LSTM, + BuiltinOperator_STRIDED_SLICE, + BuiltinOperator_BIDIRECTIONAL_SEQUENCE_RNN, + BuiltinOperator_EXP, + BuiltinOperator_TOPK_V2, + BuiltinOperator_SPLIT, + BuiltinOperator_LOG_SOFTMAX, + BuiltinOperator_DELEGATE, + BuiltinOperator_BIDIRECTIONAL_SEQUENCE_LSTM, + BuiltinOperator_CAST, + BuiltinOperator_PRELU, + BuiltinOperator_MAXIMUM, + BuiltinOperator_ARG_MAX, + BuiltinOperator_MINIMUM, + BuiltinOperator_LESS, + BuiltinOperator_NEG, + BuiltinOperator_PADV2, + BuiltinOperator_GREATER, + BuiltinOperator_GREATER_EQUAL, + BuiltinOperator_LESS_EQUAL, + BuiltinOperator_SELECT, + BuiltinOperator_SLICE, + BuiltinOperator_SIN, + BuiltinOperator_TRANSPOSE_CONV, + BuiltinOperator_SPARSE_TO_DENSE, + BuiltinOperator_TILE, + BuiltinOperator_EXPAND_DIMS, + BuiltinOperator_EQUAL, + BuiltinOperator_NOT_EQUAL, + BuiltinOperator_LOG, + BuiltinOperator_SUM, + BuiltinOperator_SQRT, + BuiltinOperator_RSQRT, + BuiltinOperator_SHAPE, + BuiltinOperator_POW, + BuiltinOperator_ARG_MIN, + BuiltinOperator_FAKE_QUANT, + BuiltinOperator_REDUCE_PROD, + BuiltinOperator_REDUCE_MAX, + BuiltinOperator_PACK, + BuiltinOperator_LOGICAL_OR, + BuiltinOperator_ONE_HOT, + BuiltinOperator_LOGICAL_AND, + BuiltinOperator_LOGICAL_NOT, + BuiltinOperator_UNPACK, + BuiltinOperator_REDUCE_MIN, + BuiltinOperator_FLOOR_DIV, + BuiltinOperator_REDUCE_ANY, + BuiltinOperator_SQUARE, + BuiltinOperator_ZEROS_LIKE, + BuiltinOperator_FILL, + BuiltinOperator_FLOOR_MOD, + BuiltinOperator_RANGE, + BuiltinOperator_RESIZE_NEAREST_NEIGHBOR, + BuiltinOperator_LEAKY_RELU, + BuiltinOperator_SQUARED_DIFFERENCE, + BuiltinOperator_MIRROR_PAD, + BuiltinOperator_ABS, + BuiltinOperator_SPLIT_V, + BuiltinOperator_UNIQUE, + BuiltinOperator_CEIL, + BuiltinOperator_REVERSE_V2, + BuiltinOperator_ADD_N, + BuiltinOperator_GATHER_ND, + BuiltinOperator_COS, + BuiltinOperator_WHERE, + BuiltinOperator_RANK, + BuiltinOperator_ELU, + BuiltinOperator_REVERSE_SEQUENCE, + BuiltinOperator_MATRIX_DIAG, + BuiltinOperator_QUANTIZE, + BuiltinOperator_MATRIX_SET_DIAG, + BuiltinOperator_ROUND, + BuiltinOperator_HARD_SWISH, + BuiltinOperator_IF, + BuiltinOperator_WHILE, + BuiltinOperator_NON_MAX_SUPPRESSION_V4, + BuiltinOperator_NON_MAX_SUPPRESSION_V5, + BuiltinOperator_SCATTER_ND, + BuiltinOperator_SELECT_V2, + BuiltinOperator_DENSIFY, + BuiltinOperator_SEGMENT_SUM, + BuiltinOperator_BATCH_MATMUL, + BuiltinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES, + BuiltinOperator_CUMSUM, + BuiltinOperator_CALL_ONCE, + BuiltinOperator_BROADCAST_TO, + BuiltinOperator_RFFT2D, + BuiltinOperator_CONV_3D, + BuiltinOperator_IMAG, + BuiltinOperator_REAL, + BuiltinOperator_COMPLEX_ABS, + BuiltinOperator_HASHTABLE, + BuiltinOperator_HASHTABLE_FIND, + BuiltinOperator_HASHTABLE_IMPORT, + BuiltinOperator_HASHTABLE_SIZE, + BuiltinOperator_REDUCE_ALL, + BuiltinOperator_CONV_3D_TRANSPOSE, + BuiltinOperator_VAR_HANDLE, + BuiltinOperator_READ_VARIABLE, + BuiltinOperator_ASSIGN_VARIABLE, + BuiltinOperator_BROADCAST_ARGS, + BuiltinOperator_RANDOM_STANDARD_NORMAL, + BuiltinOperator_BUCKETIZE, + BuiltinOperator_RANDOM_UNIFORM, + BuiltinOperator_MULTINOMIAL, + BuiltinOperator_GELU, + BuiltinOperator_DYNAMIC_UPDATE_SLICE, + BuiltinOperator_RELU_0_TO_1, + BuiltinOperator_UNSORTED_SEGMENT_PROD, + BuiltinOperator_UNSORTED_SEGMENT_MAX, + BuiltinOperator_UNSORTED_SEGMENT_SUM, + BuiltinOperator_ATAN2, + BuiltinOperator_UNSORTED_SEGMENT_MIN, + BuiltinOperator_SIGN + }; + return values; +} + +inline const char * const *EnumNamesBuiltinOperator() { + static const char * const names[160] = { + "ADD", + "AVERAGE_POOL_2D", + "CONCATENATION", + "CONV_2D", + "DEPTHWISE_CONV_2D", + "DEPTH_TO_SPACE", + "DEQUANTIZE", + "EMBEDDING_LOOKUP", + "FLOOR", + "FULLY_CONNECTED", + "HASHTABLE_LOOKUP", + "L2_NORMALIZATION", + "L2_POOL_2D", + "LOCAL_RESPONSE_NORMALIZATION", + "LOGISTIC", + "LSH_PROJECTION", + "LSTM", + "MAX_POOL_2D", + "MUL", + "RELU", + "RELU_N1_TO_1", + "RELU6", + "RESHAPE", + "RESIZE_BILINEAR", + "RNN", + "SOFTMAX", + "SPACE_TO_DEPTH", + "SVDF", + "TANH", + "CONCAT_EMBEDDINGS", + "SKIP_GRAM", + "CALL", + "CUSTOM", + "EMBEDDING_LOOKUP_SPARSE", + "PAD", + "UNIDIRECTIONAL_SEQUENCE_RNN", + "GATHER", + "BATCH_TO_SPACE_ND", + "SPACE_TO_BATCH_ND", + "TRANSPOSE", + "MEAN", + "SUB", + "DIV", + "SQUEEZE", + "UNIDIRECTIONAL_SEQUENCE_LSTM", + "STRIDED_SLICE", + "BIDIRECTIONAL_SEQUENCE_RNN", + "EXP", + "TOPK_V2", + "SPLIT", + "LOG_SOFTMAX", + "DELEGATE", + "BIDIRECTIONAL_SEQUENCE_LSTM", + "CAST", + "PRELU", + "MAXIMUM", + "ARG_MAX", + "MINIMUM", + "LESS", + "NEG", + "PADV2", + "GREATER", + "GREATER_EQUAL", + "LESS_EQUAL", + "SELECT", + "SLICE", + "SIN", + "TRANSPOSE_CONV", + "SPARSE_TO_DENSE", + "TILE", + "EXPAND_DIMS", + "EQUAL", + "NOT_EQUAL", + "LOG", + "SUM", + "SQRT", + "RSQRT", + "SHAPE", + "POW", + "ARG_MIN", + "FAKE_QUANT", + "REDUCE_PROD", + "REDUCE_MAX", + "PACK", + "LOGICAL_OR", + "ONE_HOT", + "LOGICAL_AND", + "LOGICAL_NOT", + "UNPACK", + "REDUCE_MIN", + "FLOOR_DIV", + "REDUCE_ANY", + "SQUARE", + "ZEROS_LIKE", + "FILL", + "FLOOR_MOD", + "RANGE", + "RESIZE_NEAREST_NEIGHBOR", + "LEAKY_RELU", + "SQUARED_DIFFERENCE", + "MIRROR_PAD", + "ABS", + "SPLIT_V", + "UNIQUE", + "CEIL", + "REVERSE_V2", + "ADD_N", + "GATHER_ND", + "COS", + "WHERE", + "RANK", + "ELU", + "REVERSE_SEQUENCE", + "MATRIX_DIAG", + "QUANTIZE", + "MATRIX_SET_DIAG", + "ROUND", + "HARD_SWISH", + "IF", + "WHILE", + "NON_MAX_SUPPRESSION_V4", + "NON_MAX_SUPPRESSION_V5", + "SCATTER_ND", + "SELECT_V2", + "DENSIFY", + "SEGMENT_SUM", + "BATCH_MATMUL", + "PLACEHOLDER_FOR_GREATER_OP_CODES", + "CUMSUM", + "CALL_ONCE", + "BROADCAST_TO", + "RFFT2D", + "CONV_3D", + "IMAG", + "REAL", + "COMPLEX_ABS", + "HASHTABLE", + "HASHTABLE_FIND", + "HASHTABLE_IMPORT", + "HASHTABLE_SIZE", + "REDUCE_ALL", + "CONV_3D_TRANSPOSE", + "VAR_HANDLE", + "READ_VARIABLE", + "ASSIGN_VARIABLE", + "BROADCAST_ARGS", + "RANDOM_STANDARD_NORMAL", + "BUCKETIZE", + "RANDOM_UNIFORM", + "MULTINOMIAL", + "GELU", + "DYNAMIC_UPDATE_SLICE", + "RELU_0_TO_1", + "UNSORTED_SEGMENT_PROD", + "UNSORTED_SEGMENT_MAX", + "UNSORTED_SEGMENT_SUM", + "ATAN2", + "UNSORTED_SEGMENT_MIN", + "SIGN", + nullptr + }; + return names; +} + +inline const char *EnumNameBuiltinOperator(BuiltinOperator e) { + if (flatbuffers::IsOutRange(e, BuiltinOperator_ADD, BuiltinOperator_SIGN)) return ""; + const size_t index = static_cast(e); + return EnumNamesBuiltinOperator()[index]; +} + +enum BuiltinOptions : uint8_t { + BuiltinOptions_NONE = 0, + BuiltinOptions_Conv2DOptions = 1, + BuiltinOptions_DepthwiseConv2DOptions = 2, + BuiltinOptions_ConcatEmbeddingsOptions = 3, + BuiltinOptions_LSHProjectionOptions = 4, + BuiltinOptions_Pool2DOptions = 5, + BuiltinOptions_SVDFOptions = 6, + BuiltinOptions_RNNOptions = 7, + BuiltinOptions_FullyConnectedOptions = 8, + BuiltinOptions_SoftmaxOptions = 9, + BuiltinOptions_ConcatenationOptions = 10, + BuiltinOptions_AddOptions = 11, + BuiltinOptions_L2NormOptions = 12, + BuiltinOptions_LocalResponseNormalizationOptions = 13, + BuiltinOptions_LSTMOptions = 14, + BuiltinOptions_ResizeBilinearOptions = 15, + BuiltinOptions_CallOptions = 16, + BuiltinOptions_ReshapeOptions = 17, + BuiltinOptions_SkipGramOptions = 18, + BuiltinOptions_SpaceToDepthOptions = 19, + BuiltinOptions_EmbeddingLookupSparseOptions = 20, + BuiltinOptions_MulOptions = 21, + BuiltinOptions_PadOptions = 22, + BuiltinOptions_GatherOptions = 23, + BuiltinOptions_BatchToSpaceNDOptions = 24, + BuiltinOptions_SpaceToBatchNDOptions = 25, + BuiltinOptions_TransposeOptions = 26, + BuiltinOptions_ReducerOptions = 27, + BuiltinOptions_SubOptions = 28, + BuiltinOptions_DivOptions = 29, + BuiltinOptions_SqueezeOptions = 30, + BuiltinOptions_SequenceRNNOptions = 31, + BuiltinOptions_StridedSliceOptions = 32, + BuiltinOptions_ExpOptions = 33, + BuiltinOptions_TopKV2Options = 34, + BuiltinOptions_SplitOptions = 35, + BuiltinOptions_LogSoftmaxOptions = 36, + BuiltinOptions_CastOptions = 37, + BuiltinOptions_DequantizeOptions = 38, + BuiltinOptions_MaximumMinimumOptions = 39, + BuiltinOptions_ArgMaxOptions = 40, + BuiltinOptions_LessOptions = 41, + BuiltinOptions_NegOptions = 42, + BuiltinOptions_PadV2Options = 43, + BuiltinOptions_GreaterOptions = 44, + BuiltinOptions_GreaterEqualOptions = 45, + BuiltinOptions_LessEqualOptions = 46, + BuiltinOptions_SelectOptions = 47, + BuiltinOptions_SliceOptions = 48, + BuiltinOptions_TransposeConvOptions = 49, + BuiltinOptions_SparseToDenseOptions = 50, + BuiltinOptions_TileOptions = 51, + BuiltinOptions_ExpandDimsOptions = 52, + BuiltinOptions_EqualOptions = 53, + BuiltinOptions_NotEqualOptions = 54, + BuiltinOptions_ShapeOptions = 55, + BuiltinOptions_PowOptions = 56, + BuiltinOptions_ArgMinOptions = 57, + BuiltinOptions_FakeQuantOptions = 58, + BuiltinOptions_PackOptions = 59, + BuiltinOptions_LogicalOrOptions = 60, + BuiltinOptions_OneHotOptions = 61, + BuiltinOptions_LogicalAndOptions = 62, + BuiltinOptions_LogicalNotOptions = 63, + BuiltinOptions_UnpackOptions = 64, + BuiltinOptions_FloorDivOptions = 65, + BuiltinOptions_SquareOptions = 66, + BuiltinOptions_ZerosLikeOptions = 67, + BuiltinOptions_FillOptions = 68, + BuiltinOptions_BidirectionalSequenceLSTMOptions = 69, + BuiltinOptions_BidirectionalSequenceRNNOptions = 70, + BuiltinOptions_UnidirectionalSequenceLSTMOptions = 71, + BuiltinOptions_FloorModOptions = 72, + BuiltinOptions_RangeOptions = 73, + BuiltinOptions_ResizeNearestNeighborOptions = 74, + BuiltinOptions_LeakyReluOptions = 75, + BuiltinOptions_SquaredDifferenceOptions = 76, + BuiltinOptions_MirrorPadOptions = 77, + BuiltinOptions_AbsOptions = 78, + BuiltinOptions_SplitVOptions = 79, + BuiltinOptions_UniqueOptions = 80, + BuiltinOptions_ReverseV2Options = 81, + BuiltinOptions_AddNOptions = 82, + BuiltinOptions_GatherNdOptions = 83, + BuiltinOptions_CosOptions = 84, + BuiltinOptions_WhereOptions = 85, + BuiltinOptions_RankOptions = 86, + BuiltinOptions_ReverseSequenceOptions = 87, + BuiltinOptions_MatrixDiagOptions = 88, + BuiltinOptions_QuantizeOptions = 89, + BuiltinOptions_MatrixSetDiagOptions = 90, + BuiltinOptions_HardSwishOptions = 91, + BuiltinOptions_IfOptions = 92, + BuiltinOptions_WhileOptions = 93, + BuiltinOptions_DepthToSpaceOptions = 94, + BuiltinOptions_NonMaxSuppressionV4Options = 95, + BuiltinOptions_NonMaxSuppressionV5Options = 96, + BuiltinOptions_ScatterNdOptions = 97, + BuiltinOptions_SelectV2Options = 98, + BuiltinOptions_DensifyOptions = 99, + BuiltinOptions_SegmentSumOptions = 100, + BuiltinOptions_BatchMatMulOptions = 101, + BuiltinOptions_CumsumOptions = 102, + BuiltinOptions_CallOnceOptions = 103, + BuiltinOptions_BroadcastToOptions = 104, + BuiltinOptions_Rfft2dOptions = 105, + BuiltinOptions_Conv3DOptions = 106, + BuiltinOptions_HashtableOptions = 107, + BuiltinOptions_HashtableFindOptions = 108, + BuiltinOptions_HashtableImportOptions = 109, + BuiltinOptions_HashtableSizeOptions = 110, + BuiltinOptions_VarHandleOptions = 111, + BuiltinOptions_ReadVariableOptions = 112, + BuiltinOptions_AssignVariableOptions = 113, + BuiltinOptions_RandomOptions = 114, + BuiltinOptions_BucketizeOptions = 115, + BuiltinOptions_GeluOptions = 116, + BuiltinOptions_DynamicUpdateSliceOptions = 117, + BuiltinOptions_UnsortedSegmentProdOptions = 118, + BuiltinOptions_UnsortedSegmentMaxOptions = 119, + BuiltinOptions_UnsortedSegmentMinOptions = 120, + BuiltinOptions_UnsortedSegmentSumOptions = 121, + BuiltinOptions_ATan2Options = 122, + BuiltinOptions_SignOptions = 123, + BuiltinOptions_MIN = BuiltinOptions_NONE, + BuiltinOptions_MAX = BuiltinOptions_SignOptions +}; + +inline const BuiltinOptions (&EnumValuesBuiltinOptions())[124] { + static const BuiltinOptions values[] = { + BuiltinOptions_NONE, + BuiltinOptions_Conv2DOptions, + BuiltinOptions_DepthwiseConv2DOptions, + BuiltinOptions_ConcatEmbeddingsOptions, + BuiltinOptions_LSHProjectionOptions, + BuiltinOptions_Pool2DOptions, + BuiltinOptions_SVDFOptions, + BuiltinOptions_RNNOptions, + BuiltinOptions_FullyConnectedOptions, + BuiltinOptions_SoftmaxOptions, + BuiltinOptions_ConcatenationOptions, + BuiltinOptions_AddOptions, + BuiltinOptions_L2NormOptions, + BuiltinOptions_LocalResponseNormalizationOptions, + BuiltinOptions_LSTMOptions, + BuiltinOptions_ResizeBilinearOptions, + BuiltinOptions_CallOptions, + BuiltinOptions_ReshapeOptions, + BuiltinOptions_SkipGramOptions, + BuiltinOptions_SpaceToDepthOptions, + BuiltinOptions_EmbeddingLookupSparseOptions, + BuiltinOptions_MulOptions, + BuiltinOptions_PadOptions, + BuiltinOptions_GatherOptions, + BuiltinOptions_BatchToSpaceNDOptions, + BuiltinOptions_SpaceToBatchNDOptions, + BuiltinOptions_TransposeOptions, + BuiltinOptions_ReducerOptions, + BuiltinOptions_SubOptions, + BuiltinOptions_DivOptions, + BuiltinOptions_SqueezeOptions, + BuiltinOptions_SequenceRNNOptions, + BuiltinOptions_StridedSliceOptions, + BuiltinOptions_ExpOptions, + BuiltinOptions_TopKV2Options, + BuiltinOptions_SplitOptions, + BuiltinOptions_LogSoftmaxOptions, + BuiltinOptions_CastOptions, + BuiltinOptions_DequantizeOptions, + BuiltinOptions_MaximumMinimumOptions, + BuiltinOptions_ArgMaxOptions, + BuiltinOptions_LessOptions, + BuiltinOptions_NegOptions, + BuiltinOptions_PadV2Options, + BuiltinOptions_GreaterOptions, + BuiltinOptions_GreaterEqualOptions, + BuiltinOptions_LessEqualOptions, + BuiltinOptions_SelectOptions, + BuiltinOptions_SliceOptions, + BuiltinOptions_TransposeConvOptions, + BuiltinOptions_SparseToDenseOptions, + BuiltinOptions_TileOptions, + BuiltinOptions_ExpandDimsOptions, + BuiltinOptions_EqualOptions, + BuiltinOptions_NotEqualOptions, + BuiltinOptions_ShapeOptions, + BuiltinOptions_PowOptions, + BuiltinOptions_ArgMinOptions, + BuiltinOptions_FakeQuantOptions, + BuiltinOptions_PackOptions, + BuiltinOptions_LogicalOrOptions, + BuiltinOptions_OneHotOptions, + BuiltinOptions_LogicalAndOptions, + BuiltinOptions_LogicalNotOptions, + BuiltinOptions_UnpackOptions, + BuiltinOptions_FloorDivOptions, + BuiltinOptions_SquareOptions, + BuiltinOptions_ZerosLikeOptions, + BuiltinOptions_FillOptions, + BuiltinOptions_BidirectionalSequenceLSTMOptions, + BuiltinOptions_BidirectionalSequenceRNNOptions, + BuiltinOptions_UnidirectionalSequenceLSTMOptions, + BuiltinOptions_FloorModOptions, + BuiltinOptions_RangeOptions, + BuiltinOptions_ResizeNearestNeighborOptions, + BuiltinOptions_LeakyReluOptions, + BuiltinOptions_SquaredDifferenceOptions, + BuiltinOptions_MirrorPadOptions, + BuiltinOptions_AbsOptions, + BuiltinOptions_SplitVOptions, + BuiltinOptions_UniqueOptions, + BuiltinOptions_ReverseV2Options, + BuiltinOptions_AddNOptions, + BuiltinOptions_GatherNdOptions, + BuiltinOptions_CosOptions, + BuiltinOptions_WhereOptions, + BuiltinOptions_RankOptions, + BuiltinOptions_ReverseSequenceOptions, + BuiltinOptions_MatrixDiagOptions, + BuiltinOptions_QuantizeOptions, + BuiltinOptions_MatrixSetDiagOptions, + BuiltinOptions_HardSwishOptions, + BuiltinOptions_IfOptions, + BuiltinOptions_WhileOptions, + BuiltinOptions_DepthToSpaceOptions, + BuiltinOptions_NonMaxSuppressionV4Options, + BuiltinOptions_NonMaxSuppressionV5Options, + BuiltinOptions_ScatterNdOptions, + BuiltinOptions_SelectV2Options, + BuiltinOptions_DensifyOptions, + BuiltinOptions_SegmentSumOptions, + BuiltinOptions_BatchMatMulOptions, + BuiltinOptions_CumsumOptions, + BuiltinOptions_CallOnceOptions, + BuiltinOptions_BroadcastToOptions, + BuiltinOptions_Rfft2dOptions, + BuiltinOptions_Conv3DOptions, + BuiltinOptions_HashtableOptions, + BuiltinOptions_HashtableFindOptions, + BuiltinOptions_HashtableImportOptions, + BuiltinOptions_HashtableSizeOptions, + BuiltinOptions_VarHandleOptions, + BuiltinOptions_ReadVariableOptions, + BuiltinOptions_AssignVariableOptions, + BuiltinOptions_RandomOptions, + BuiltinOptions_BucketizeOptions, + BuiltinOptions_GeluOptions, + BuiltinOptions_DynamicUpdateSliceOptions, + BuiltinOptions_UnsortedSegmentProdOptions, + BuiltinOptions_UnsortedSegmentMaxOptions, + BuiltinOptions_UnsortedSegmentMinOptions, + BuiltinOptions_UnsortedSegmentSumOptions, + BuiltinOptions_ATan2Options, + BuiltinOptions_SignOptions + }; + return values; +} + +inline const char * const *EnumNamesBuiltinOptions() { + static const char * const names[125] = { + "NONE", + "Conv2DOptions", + "DepthwiseConv2DOptions", + "ConcatEmbeddingsOptions", + "LSHProjectionOptions", + "Pool2DOptions", + "SVDFOptions", + "RNNOptions", + "FullyConnectedOptions", + "SoftmaxOptions", + "ConcatenationOptions", + "AddOptions", + "L2NormOptions", + "LocalResponseNormalizationOptions", + "LSTMOptions", + "ResizeBilinearOptions", + "CallOptions", + "ReshapeOptions", + "SkipGramOptions", + "SpaceToDepthOptions", + "EmbeddingLookupSparseOptions", + "MulOptions", + "PadOptions", + "GatherOptions", + "BatchToSpaceNDOptions", + "SpaceToBatchNDOptions", + "TransposeOptions", + "ReducerOptions", + "SubOptions", + "DivOptions", + "SqueezeOptions", + "SequenceRNNOptions", + "StridedSliceOptions", + "ExpOptions", + "TopKV2Options", + "SplitOptions", + "LogSoftmaxOptions", + "CastOptions", + "DequantizeOptions", + "MaximumMinimumOptions", + "ArgMaxOptions", + "LessOptions", + "NegOptions", + "PadV2Options", + "GreaterOptions", + "GreaterEqualOptions", + "LessEqualOptions", + "SelectOptions", + "SliceOptions", + "TransposeConvOptions", + "SparseToDenseOptions", + "TileOptions", + "ExpandDimsOptions", + "EqualOptions", + "NotEqualOptions", + "ShapeOptions", + "PowOptions", + "ArgMinOptions", + "FakeQuantOptions", + "PackOptions", + "LogicalOrOptions", + "OneHotOptions", + "LogicalAndOptions", + "LogicalNotOptions", + "UnpackOptions", + "FloorDivOptions", + "SquareOptions", + "ZerosLikeOptions", + "FillOptions", + "BidirectionalSequenceLSTMOptions", + "BidirectionalSequenceRNNOptions", + "UnidirectionalSequenceLSTMOptions", + "FloorModOptions", + "RangeOptions", + "ResizeNearestNeighborOptions", + "LeakyReluOptions", + "SquaredDifferenceOptions", + "MirrorPadOptions", + "AbsOptions", + "SplitVOptions", + "UniqueOptions", + "ReverseV2Options", + "AddNOptions", + "GatherNdOptions", + "CosOptions", + "WhereOptions", + "RankOptions", + "ReverseSequenceOptions", + "MatrixDiagOptions", + "QuantizeOptions", + "MatrixSetDiagOptions", + "HardSwishOptions", + "IfOptions", + "WhileOptions", + "DepthToSpaceOptions", + "NonMaxSuppressionV4Options", + "NonMaxSuppressionV5Options", + "ScatterNdOptions", + "SelectV2Options", + "DensifyOptions", + "SegmentSumOptions", + "BatchMatMulOptions", + "CumsumOptions", + "CallOnceOptions", + "BroadcastToOptions", + "Rfft2dOptions", + "Conv3DOptions", + "HashtableOptions", + "HashtableFindOptions", + "HashtableImportOptions", + "HashtableSizeOptions", + "VarHandleOptions", + "ReadVariableOptions", + "AssignVariableOptions", + "RandomOptions", + "BucketizeOptions", + "GeluOptions", + "DynamicUpdateSliceOptions", + "UnsortedSegmentProdOptions", + "UnsortedSegmentMaxOptions", + "UnsortedSegmentMinOptions", + "UnsortedSegmentSumOptions", + "ATan2Options", + "SignOptions", + nullptr + }; + return names; +} + +inline const char *EnumNameBuiltinOptions(BuiltinOptions e) { + if (flatbuffers::IsOutRange(e, BuiltinOptions_NONE, BuiltinOptions_SignOptions)) return ""; + const size_t index = static_cast(e); + return EnumNamesBuiltinOptions()[index]; +} + +template struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_NONE; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_Conv2DOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_DepthwiseConv2DOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_ConcatEmbeddingsOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_LSHProjectionOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_Pool2DOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_SVDFOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_RNNOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_FullyConnectedOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_SoftmaxOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_ConcatenationOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_AddOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_L2NormOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_LocalResponseNormalizationOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_LSTMOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_ResizeBilinearOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_CallOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_ReshapeOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_SkipGramOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_SpaceToDepthOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_EmbeddingLookupSparseOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_MulOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_PadOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_GatherOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_BatchToSpaceNDOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_SpaceToBatchNDOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_TransposeOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_ReducerOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_SubOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_DivOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_SqueezeOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_SequenceRNNOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_StridedSliceOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_ExpOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_TopKV2Options; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_SplitOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_LogSoftmaxOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_CastOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_DequantizeOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_MaximumMinimumOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_ArgMaxOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_LessOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_NegOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_PadV2Options; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_GreaterOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_GreaterEqualOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_LessEqualOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_SelectOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_SliceOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_TransposeConvOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_SparseToDenseOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_TileOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_ExpandDimsOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_EqualOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_NotEqualOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_ShapeOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_PowOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_ArgMinOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_FakeQuantOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_PackOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_LogicalOrOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_OneHotOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_LogicalAndOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_LogicalNotOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_UnpackOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_FloorDivOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_SquareOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_ZerosLikeOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_FillOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_BidirectionalSequenceLSTMOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_BidirectionalSequenceRNNOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_UnidirectionalSequenceLSTMOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_FloorModOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_RangeOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_ResizeNearestNeighborOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_LeakyReluOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_SquaredDifferenceOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_MirrorPadOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_AbsOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_SplitVOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_UniqueOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_ReverseV2Options; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_AddNOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_GatherNdOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_CosOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_WhereOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_RankOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_ReverseSequenceOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_MatrixDiagOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_QuantizeOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_MatrixSetDiagOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_HardSwishOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_IfOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_WhileOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_DepthToSpaceOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_NonMaxSuppressionV4Options; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_NonMaxSuppressionV5Options; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_ScatterNdOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_SelectV2Options; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_DensifyOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_SegmentSumOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_BatchMatMulOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_CumsumOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_CallOnceOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_BroadcastToOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_Rfft2dOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_Conv3DOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_HashtableOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_HashtableFindOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_HashtableImportOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_HashtableSizeOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_VarHandleOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_ReadVariableOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_AssignVariableOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_RandomOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_BucketizeOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_GeluOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_DynamicUpdateSliceOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_UnsortedSegmentProdOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_UnsortedSegmentMaxOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_UnsortedSegmentMinOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_UnsortedSegmentSumOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_ATan2Options; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_SignOptions; +}; + +template struct BuiltinOptionsUnionTraits { + static const BuiltinOptions enum_value = BuiltinOptions_NONE; +}; + +template<> struct BuiltinOptionsUnionTraits { + static const BuiltinOptions enum_value = BuiltinOptions_Conv2DOptions; +}; + +template<> struct BuiltinOptionsUnionTraits { + static const BuiltinOptions enum_value = BuiltinOptions_DepthwiseConv2DOptions; +}; + +template<> struct BuiltinOptionsUnionTraits { + static const BuiltinOptions enum_value = BuiltinOptions_ConcatEmbeddingsOptions; +}; + +template<> struct BuiltinOptionsUnionTraits { + static const BuiltinOptions enum_value = BuiltinOptions_LSHProjectionOptions; +}; + +template<> struct BuiltinOptionsUnionTraits { + static const BuiltinOptions enum_value = BuiltinOptions_Pool2DOptions; +}; + +template<> struct BuiltinOptionsUnionTraits { + static const BuiltinOptions enum_value = BuiltinOptions_SVDFOptions; +}; + +template<> struct BuiltinOptionsUnionTraits { + static const BuiltinOptions enum_value = BuiltinOptions_RNNOptions; +}; + +template<> struct BuiltinOptionsUnionTraits { + static const BuiltinOptions enum_value = BuiltinOptions_FullyConnectedOptions; +}; + +template<> struct BuiltinOptionsUnionTraits { + static const BuiltinOptions enum_value = BuiltinOptions_SoftmaxOptions; +}; + +template<> struct BuiltinOptionsUnionTraits { + static const BuiltinOptions enum_value = BuiltinOptions_ConcatenationOptions; +}; + +template<> struct BuiltinOptionsUnionTraits { + static const BuiltinOptions enum_value = BuiltinOptions_AddOptions; +}; + +template<> struct BuiltinOptionsUnionTraits { + static const BuiltinOptions enum_value = BuiltinOptions_L2NormOptions; +}; + +template<> struct BuiltinOptionsUnionTraits { + static const BuiltinOptions enum_value = BuiltinOptions_LocalResponseNormalizationOptions; +}; + +template<> struct BuiltinOptionsUnionTraits { + static const BuiltinOptions enum_value = BuiltinOptions_LSTMOptions; +}; + +template<> struct BuiltinOptionsUnionTraits { + static const BuiltinOptions enum_value = BuiltinOptions_ResizeBilinearOptions; +}; + +template<> struct BuiltinOptionsUnionTraits { + static const BuiltinOptions enum_value = BuiltinOptions_CallOptions; +}; + +template<> struct BuiltinOptionsUnionTraits { + static const BuiltinOptions enum_value = BuiltinOptions_ReshapeOptions; +}; + +template<> struct BuiltinOptionsUnionTraits { + static const BuiltinOptions enum_value = BuiltinOptions_SkipGramOptions; +}; + +template<> struct BuiltinOptionsUnionTraits { + static const BuiltinOptions enum_value = BuiltinOptions_SpaceToDepthOptions; +}; + +template<> struct BuiltinOptionsUnionTraits { + static const BuiltinOptions enum_value = BuiltinOptions_EmbeddingLookupSparseOptions; +}; + +template<> struct BuiltinOptionsUnionTraits { + static const BuiltinOptions enum_value = BuiltinOptions_MulOptions; +}; + +template<> struct BuiltinOptionsUnionTraits { + static const BuiltinOptions enum_value = BuiltinOptions_PadOptions; +}; + +template<> struct BuiltinOptionsUnionTraits { + static const BuiltinOptions enum_value = BuiltinOptions_GatherOptions; +}; + +template<> struct BuiltinOptionsUnionTraits { + static const BuiltinOptions enum_value = BuiltinOptions_BatchToSpaceNDOptions; +}; + +template<> struct BuiltinOptionsUnionTraits { + static const BuiltinOptions enum_value = BuiltinOptions_SpaceToBatchNDOptions; +}; + +template<> struct BuiltinOptionsUnionTraits { + static const BuiltinOptions enum_value = BuiltinOptions_TransposeOptions; +}; + +template<> struct BuiltinOptionsUnionTraits { + static const BuiltinOptions enum_value = BuiltinOptions_ReducerOptions; +}; + +template<> struct BuiltinOptionsUnionTraits { + static const BuiltinOptions enum_value = BuiltinOptions_SubOptions; +}; + +template<> struct BuiltinOptionsUnionTraits { + static const BuiltinOptions enum_value = BuiltinOptions_DivOptions; +}; + +template<> struct BuiltinOptionsUnionTraits { + static const BuiltinOptions enum_value = BuiltinOptions_SqueezeOptions; +}; + +template<> struct BuiltinOptionsUnionTraits { + static const BuiltinOptions enum_value = BuiltinOptions_SequenceRNNOptions; +}; + +template<> struct BuiltinOptionsUnionTraits { + static const BuiltinOptions enum_value = BuiltinOptions_StridedSliceOptions; +}; + +template<> struct BuiltinOptionsUnionTraits { + static const BuiltinOptions enum_value = BuiltinOptions_ExpOptions; +}; + +template<> struct BuiltinOptionsUnionTraits { + static const BuiltinOptions enum_value = BuiltinOptions_TopKV2Options; +}; + +template<> struct BuiltinOptionsUnionTraits { + static const BuiltinOptions enum_value = BuiltinOptions_SplitOptions; +}; + +template<> struct BuiltinOptionsUnionTraits { + static const BuiltinOptions enum_value = BuiltinOptions_LogSoftmaxOptions; +}; + +template<> struct BuiltinOptionsUnionTraits { + static const BuiltinOptions enum_value = BuiltinOptions_CastOptions; +}; + +template<> struct BuiltinOptionsUnionTraits { + static const BuiltinOptions enum_value = BuiltinOptions_DequantizeOptions; +}; + +template<> struct BuiltinOptionsUnionTraits { + static const BuiltinOptions enum_value = BuiltinOptions_MaximumMinimumOptions; +}; + +template<> struct BuiltinOptionsUnionTraits { + static const BuiltinOptions enum_value = BuiltinOptions_ArgMaxOptions; +}; + +template<> struct BuiltinOptionsUnionTraits { + static const BuiltinOptions enum_value = BuiltinOptions_LessOptions; +}; + +template<> struct BuiltinOptionsUnionTraits { + static const BuiltinOptions enum_value = BuiltinOptions_NegOptions; +}; + +template<> struct BuiltinOptionsUnionTraits { + static const BuiltinOptions enum_value = BuiltinOptions_PadV2Options; +}; + +template<> struct BuiltinOptionsUnionTraits { + static const BuiltinOptions enum_value = BuiltinOptions_GreaterOptions; +}; + +template<> struct BuiltinOptionsUnionTraits { + static const BuiltinOptions enum_value = BuiltinOptions_GreaterEqualOptions; +}; + +template<> struct BuiltinOptionsUnionTraits { + static const BuiltinOptions enum_value = BuiltinOptions_LessEqualOptions; +}; + +template<> struct BuiltinOptionsUnionTraits { + static const BuiltinOptions enum_value = BuiltinOptions_SelectOptions; +}; + +template<> struct BuiltinOptionsUnionTraits { + static const BuiltinOptions enum_value = BuiltinOptions_SliceOptions; +}; + +template<> struct BuiltinOptionsUnionTraits { + static const BuiltinOptions enum_value = BuiltinOptions_TransposeConvOptions; +}; + +template<> struct BuiltinOptionsUnionTraits { + static const BuiltinOptions enum_value = BuiltinOptions_SparseToDenseOptions; +}; + +template<> struct BuiltinOptionsUnionTraits { + static const BuiltinOptions enum_value = BuiltinOptions_TileOptions; +}; + +template<> struct BuiltinOptionsUnionTraits { + static const BuiltinOptions enum_value = BuiltinOptions_ExpandDimsOptions; +}; + +template<> struct BuiltinOptionsUnionTraits { + static const BuiltinOptions enum_value = BuiltinOptions_EqualOptions; +}; + +template<> struct BuiltinOptionsUnionTraits { + static const BuiltinOptions enum_value = BuiltinOptions_NotEqualOptions; +}; + +template<> struct BuiltinOptionsUnionTraits { + static const BuiltinOptions enum_value = BuiltinOptions_ShapeOptions; +}; + +template<> struct BuiltinOptionsUnionTraits { + static const BuiltinOptions enum_value = BuiltinOptions_PowOptions; +}; + +template<> struct BuiltinOptionsUnionTraits { + static const BuiltinOptions enum_value = BuiltinOptions_ArgMinOptions; +}; + +template<> struct BuiltinOptionsUnionTraits { + static const BuiltinOptions enum_value = BuiltinOptions_FakeQuantOptions; +}; + +template<> struct BuiltinOptionsUnionTraits { + static const BuiltinOptions enum_value = BuiltinOptions_PackOptions; +}; + +template<> struct BuiltinOptionsUnionTraits { + static const BuiltinOptions enum_value = BuiltinOptions_LogicalOrOptions; +}; + +template<> struct BuiltinOptionsUnionTraits { + static const BuiltinOptions enum_value = BuiltinOptions_OneHotOptions; +}; + +template<> struct BuiltinOptionsUnionTraits { + static const BuiltinOptions enum_value = BuiltinOptions_LogicalAndOptions; +}; + +template<> struct BuiltinOptionsUnionTraits { + static const BuiltinOptions enum_value = BuiltinOptions_LogicalNotOptions; +}; + +template<> struct BuiltinOptionsUnionTraits { + static const BuiltinOptions enum_value = BuiltinOptions_UnpackOptions; +}; + +template<> struct BuiltinOptionsUnionTraits { + static const BuiltinOptions enum_value = BuiltinOptions_FloorDivOptions; +}; + +template<> struct BuiltinOptionsUnionTraits { + static const BuiltinOptions enum_value = BuiltinOptions_SquareOptions; +}; + +template<> struct BuiltinOptionsUnionTraits { + static const BuiltinOptions enum_value = BuiltinOptions_ZerosLikeOptions; +}; + +template<> struct BuiltinOptionsUnionTraits { + static const BuiltinOptions enum_value = BuiltinOptions_FillOptions; +}; + +template<> struct BuiltinOptionsUnionTraits { + static const BuiltinOptions enum_value = BuiltinOptions_BidirectionalSequenceLSTMOptions; +}; + +template<> struct BuiltinOptionsUnionTraits { + static const BuiltinOptions enum_value = BuiltinOptions_BidirectionalSequenceRNNOptions; +}; + +template<> struct BuiltinOptionsUnionTraits { + static const BuiltinOptions enum_value = BuiltinOptions_UnidirectionalSequenceLSTMOptions; +}; + +template<> struct BuiltinOptionsUnionTraits { + static const BuiltinOptions enum_value = BuiltinOptions_FloorModOptions; +}; + +template<> struct BuiltinOptionsUnionTraits { + static const BuiltinOptions enum_value = BuiltinOptions_RangeOptions; +}; + +template<> struct BuiltinOptionsUnionTraits { + static const BuiltinOptions enum_value = BuiltinOptions_ResizeNearestNeighborOptions; +}; + +template<> struct BuiltinOptionsUnionTraits { + static const BuiltinOptions enum_value = BuiltinOptions_LeakyReluOptions; +}; + +template<> struct BuiltinOptionsUnionTraits { + static const BuiltinOptions enum_value = BuiltinOptions_SquaredDifferenceOptions; +}; + +template<> struct BuiltinOptionsUnionTraits { + static const BuiltinOptions enum_value = BuiltinOptions_MirrorPadOptions; +}; + +template<> struct BuiltinOptionsUnionTraits { + static const BuiltinOptions enum_value = BuiltinOptions_AbsOptions; +}; + +template<> struct BuiltinOptionsUnionTraits { + static const BuiltinOptions enum_value = BuiltinOptions_SplitVOptions; +}; + +template<> struct BuiltinOptionsUnionTraits { + static const BuiltinOptions enum_value = BuiltinOptions_UniqueOptions; +}; + +template<> struct BuiltinOptionsUnionTraits { + static const BuiltinOptions enum_value = BuiltinOptions_ReverseV2Options; +}; + +template<> struct BuiltinOptionsUnionTraits { + static const BuiltinOptions enum_value = BuiltinOptions_AddNOptions; +}; + +template<> struct BuiltinOptionsUnionTraits { + static const BuiltinOptions enum_value = BuiltinOptions_GatherNdOptions; +}; + +template<> struct BuiltinOptionsUnionTraits { + static const BuiltinOptions enum_value = BuiltinOptions_CosOptions; +}; + +template<> struct BuiltinOptionsUnionTraits { + static const BuiltinOptions enum_value = BuiltinOptions_WhereOptions; +}; + +template<> struct BuiltinOptionsUnionTraits { + static const BuiltinOptions enum_value = BuiltinOptions_RankOptions; +}; + +template<> struct BuiltinOptionsUnionTraits { + static const BuiltinOptions enum_value = BuiltinOptions_ReverseSequenceOptions; +}; + +template<> struct BuiltinOptionsUnionTraits { + static const BuiltinOptions enum_value = BuiltinOptions_MatrixDiagOptions; +}; + +template<> struct BuiltinOptionsUnionTraits { + static const BuiltinOptions enum_value = BuiltinOptions_QuantizeOptions; +}; + +template<> struct BuiltinOptionsUnionTraits { + static const BuiltinOptions enum_value = BuiltinOptions_MatrixSetDiagOptions; +}; + +template<> struct BuiltinOptionsUnionTraits { + static const BuiltinOptions enum_value = BuiltinOptions_HardSwishOptions; +}; + +template<> struct BuiltinOptionsUnionTraits { + static const BuiltinOptions enum_value = BuiltinOptions_IfOptions; +}; + +template<> struct BuiltinOptionsUnionTraits { + static const BuiltinOptions enum_value = BuiltinOptions_WhileOptions; +}; + +template<> struct BuiltinOptionsUnionTraits { + static const BuiltinOptions enum_value = BuiltinOptions_DepthToSpaceOptions; +}; + +template<> struct BuiltinOptionsUnionTraits { + static const BuiltinOptions enum_value = BuiltinOptions_NonMaxSuppressionV4Options; +}; + +template<> struct BuiltinOptionsUnionTraits { + static const BuiltinOptions enum_value = BuiltinOptions_NonMaxSuppressionV5Options; +}; + +template<> struct BuiltinOptionsUnionTraits { + static const BuiltinOptions enum_value = BuiltinOptions_ScatterNdOptions; +}; + +template<> struct BuiltinOptionsUnionTraits { + static const BuiltinOptions enum_value = BuiltinOptions_SelectV2Options; +}; + +template<> struct BuiltinOptionsUnionTraits { + static const BuiltinOptions enum_value = BuiltinOptions_DensifyOptions; +}; + +template<> struct BuiltinOptionsUnionTraits { + static const BuiltinOptions enum_value = BuiltinOptions_SegmentSumOptions; +}; + +template<> struct BuiltinOptionsUnionTraits { + static const BuiltinOptions enum_value = BuiltinOptions_BatchMatMulOptions; +}; + +template<> struct BuiltinOptionsUnionTraits { + static const BuiltinOptions enum_value = BuiltinOptions_CumsumOptions; +}; + +template<> struct BuiltinOptionsUnionTraits { + static const BuiltinOptions enum_value = BuiltinOptions_CallOnceOptions; +}; + +template<> struct BuiltinOptionsUnionTraits { + static const BuiltinOptions enum_value = BuiltinOptions_BroadcastToOptions; +}; + +template<> struct BuiltinOptionsUnionTraits { + static const BuiltinOptions enum_value = BuiltinOptions_Rfft2dOptions; +}; + +template<> struct BuiltinOptionsUnionTraits { + static const BuiltinOptions enum_value = BuiltinOptions_Conv3DOptions; +}; + +template<> struct BuiltinOptionsUnionTraits { + static const BuiltinOptions enum_value = BuiltinOptions_HashtableOptions; +}; + +template<> struct BuiltinOptionsUnionTraits { + static const BuiltinOptions enum_value = BuiltinOptions_HashtableFindOptions; +}; + +template<> struct BuiltinOptionsUnionTraits { + static const BuiltinOptions enum_value = BuiltinOptions_HashtableImportOptions; +}; + +template<> struct BuiltinOptionsUnionTraits { + static const BuiltinOptions enum_value = BuiltinOptions_HashtableSizeOptions; +}; + +template<> struct BuiltinOptionsUnionTraits { + static const BuiltinOptions enum_value = BuiltinOptions_VarHandleOptions; +}; + +template<> struct BuiltinOptionsUnionTraits { + static const BuiltinOptions enum_value = BuiltinOptions_ReadVariableOptions; +}; + +template<> struct BuiltinOptionsUnionTraits { + static const BuiltinOptions enum_value = BuiltinOptions_AssignVariableOptions; +}; + +template<> struct BuiltinOptionsUnionTraits { + static const BuiltinOptions enum_value = BuiltinOptions_RandomOptions; +}; + +template<> struct BuiltinOptionsUnionTraits { + static const BuiltinOptions enum_value = BuiltinOptions_BucketizeOptions; +}; + +template<> struct BuiltinOptionsUnionTraits { + static const BuiltinOptions enum_value = BuiltinOptions_GeluOptions; +}; + +template<> struct BuiltinOptionsUnionTraits { + static const BuiltinOptions enum_value = BuiltinOptions_DynamicUpdateSliceOptions; +}; + +template<> struct BuiltinOptionsUnionTraits { + static const BuiltinOptions enum_value = BuiltinOptions_UnsortedSegmentProdOptions; +}; + +template<> struct BuiltinOptionsUnionTraits { + static const BuiltinOptions enum_value = BuiltinOptions_UnsortedSegmentMaxOptions; +}; + +template<> struct BuiltinOptionsUnionTraits { + static const BuiltinOptions enum_value = BuiltinOptions_UnsortedSegmentMinOptions; +}; + +template<> struct BuiltinOptionsUnionTraits { + static const BuiltinOptions enum_value = BuiltinOptions_UnsortedSegmentSumOptions; +}; + +template<> struct BuiltinOptionsUnionTraits { + static const BuiltinOptions enum_value = BuiltinOptions_ATan2Options; +}; + +template<> struct BuiltinOptionsUnionTraits { + static const BuiltinOptions enum_value = BuiltinOptions_SignOptions; +}; + +struct OperatorCodeT : public flatbuffers::NativeTable { + typedef OperatorCode TableType; + int8_t deprecated_builtin_code = 0; + std::string custom_code{}; + int32_t version = 1; + tflite::BuiltinOperator builtin_code = tflite::BuiltinOperator_ADD; +}; + +struct OperatorCode FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef OperatorCodeT NativeTableType; + typedef OperatorCodeBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_DEPRECATED_BUILTIN_CODE = 4, + VT_CUSTOM_CODE = 6, + VT_VERSION = 8, + VT_BUILTIN_CODE = 10 + }; + int8_t deprecated_builtin_code() const { + return GetField(VT_DEPRECATED_BUILTIN_CODE, 0); + } + const flatbuffers::String *custom_code() const { + return GetPointer(VT_CUSTOM_CODE); + } + int32_t version() const { + return GetField(VT_VERSION, 1); + } + tflite::BuiltinOperator builtin_code() const { + return static_cast(GetField(VT_BUILTIN_CODE, 0)); + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_DEPRECATED_BUILTIN_CODE, 1) && + VerifyOffset(verifier, VT_CUSTOM_CODE) && + verifier.VerifyString(custom_code()) && + VerifyField(verifier, VT_VERSION, 4) && + VerifyField(verifier, VT_BUILTIN_CODE, 4) && + verifier.EndTable(); + } + OperatorCodeT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(OperatorCodeT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const OperatorCodeT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +} +#endif // FLATBUFFERS_GENERATED_SCHEMA_SUPPL_TFLITE_H_ \ No newline at end of file diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/schema/schema_generated_full.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/schema/schema_generated_full.h new file mode 100755 index 0000000..aaa2252 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/schema/schema_generated_full.h @@ -0,0 +1,17601 @@ +// automatically generated by the FlatBuffers compiler, do not modify + + +#ifndef FLATBUFFERS_GENERATED_SCHEMA_TFLITE_H_ +#define FLATBUFFERS_GENERATED_SCHEMA_TFLITE_H_ + +#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/flatbuffers.h" + +// Ensure the included flatbuffers.h is the same version as when this file was +// generated, otherwise it may not be compatible. +static_assert(FLATBUFFERS_VERSION_MAJOR == 2 && + FLATBUFFERS_VERSION_MINOR == 0 && + FLATBUFFERS_VERSION_REVISION == 6, + "Non-compatible flatbuffers version included"); + +namespace tflite { + +enum QuantizationDetails : uint8_t { + QuantizationDetails_NONE = 0, + QuantizationDetails_CustomQuantization = 1, + QuantizationDetails_MIN = QuantizationDetails_NONE, + QuantizationDetails_MAX = QuantizationDetails_CustomQuantization +}; + +inline const QuantizationDetails (&EnumValuesQuantizationDetails())[2] { + static const QuantizationDetails values[] = { + QuantizationDetails_NONE, + QuantizationDetails_CustomQuantization + }; + return values; +} + +inline const char * const *EnumNamesQuantizationDetails() { + static const char * const names[3] = { + "NONE", + "CustomQuantization", + nullptr + }; + return names; +} + +inline const char *EnumNameQuantizationDetails(QuantizationDetails e) { + if (flatbuffers::IsOutRange(e, QuantizationDetails_NONE, QuantizationDetails_CustomQuantization)) return ""; + const size_t index = static_cast(e); + return EnumNamesQuantizationDetails()[index]; +} + +template struct QuantizationDetailsTraits { + static const QuantizationDetails enum_value = QuantizationDetails_NONE; +}; + +template<> struct QuantizationDetailsTraits { + static const QuantizationDetails enum_value = QuantizationDetails_CustomQuantization; +}; + +template struct QuantizationDetailsUnionTraits { + static const QuantizationDetails enum_value = QuantizationDetails_NONE; +}; + +template<> struct QuantizationDetailsUnionTraits { + static const QuantizationDetails enum_value = QuantizationDetails_CustomQuantization; +}; + +struct QuantizationDetailsUnion { + QuantizationDetails type; + void *value; + + QuantizationDetailsUnion() : type(QuantizationDetails_NONE), value(nullptr) {} + QuantizationDetailsUnion(QuantizationDetailsUnion&& u) FLATBUFFERS_NOEXCEPT : + type(QuantizationDetails_NONE), value(nullptr) + { std::swap(type, u.type); std::swap(value, u.value); } + QuantizationDetailsUnion(const QuantizationDetailsUnion &); + QuantizationDetailsUnion &operator=(const QuantizationDetailsUnion &u) + { QuantizationDetailsUnion t(u); std::swap(type, t.type); std::swap(value, t.value); return *this; } + QuantizationDetailsUnion &operator=(QuantizationDetailsUnion &&u) FLATBUFFERS_NOEXCEPT + { std::swap(type, u.type); std::swap(value, u.value); return *this; } + ~QuantizationDetailsUnion() { Reset(); } + + void Reset(); + + template + void Set(T&& val) { + typedef typename std::remove_reference::type RT; + Reset(); + type = QuantizationDetailsUnionTraits::enum_value; + if (type != QuantizationDetails_NONE) { + value = new RT(std::forward(val)); + } + } + + static void *UnPack(const void *obj, QuantizationDetails type, const flatbuffers::resolver_function_t *resolver); + flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const flatbuffers::rehasher_function_t *_rehasher = nullptr) const; + + tflite::CustomQuantizationT *AsCustomQuantization() { + return type == QuantizationDetails_CustomQuantization ? + reinterpret_cast(value) : nullptr; + } + const tflite::CustomQuantizationT *AsCustomQuantization() const { + return type == QuantizationDetails_CustomQuantization ? + reinterpret_cast(value) : nullptr; + } +}; + +bool VerifyQuantizationDetails(flatbuffers::Verifier &verifier, const void *obj, QuantizationDetails type); +bool VerifyQuantizationDetailsVector(flatbuffers::Verifier &verifier, const flatbuffers::Vector> *values, const flatbuffers::Vector *types); + +enum DimensionType : int8_t { + DimensionType_DENSE = 0, + DimensionType_SPARSE_CSR = 1, + DimensionType_MIN = DimensionType_DENSE, + DimensionType_MAX = DimensionType_SPARSE_CSR +}; + +inline const DimensionType (&EnumValuesDimensionType())[2] { + static const DimensionType values[] = { + DimensionType_DENSE, + DimensionType_SPARSE_CSR + }; + return values; +} + +inline const char * const *EnumNamesDimensionType() { + static const char * const names[3] = { + "DENSE", + "SPARSE_CSR", + nullptr + }; + return names; +} + +inline const char *EnumNameDimensionType(DimensionType e) { + if (flatbuffers::IsOutRange(e, DimensionType_DENSE, DimensionType_SPARSE_CSR)) return ""; + const size_t index = static_cast(e); + return EnumNamesDimensionType()[index]; +} + +enum SparseIndexVector : uint8_t { + SparseIndexVector_NONE = 0, + SparseIndexVector_Int32Vector = 1, + SparseIndexVector_Uint16Vector = 2, + SparseIndexVector_Uint8Vector = 3, + SparseIndexVector_MIN = SparseIndexVector_NONE, + SparseIndexVector_MAX = SparseIndexVector_Uint8Vector +}; + +inline const SparseIndexVector (&EnumValuesSparseIndexVector())[4] { + static const SparseIndexVector values[] = { + SparseIndexVector_NONE, + SparseIndexVector_Int32Vector, + SparseIndexVector_Uint16Vector, + SparseIndexVector_Uint8Vector + }; + return values; +} + +inline const char * const *EnumNamesSparseIndexVector() { + static const char * const names[5] = { + "NONE", + "Int32Vector", + "Uint16Vector", + "Uint8Vector", + nullptr + }; + return names; +} + +inline const char *EnumNameSparseIndexVector(SparseIndexVector e) { + if (flatbuffers::IsOutRange(e, SparseIndexVector_NONE, SparseIndexVector_Uint8Vector)) return ""; + const size_t index = static_cast(e); + return EnumNamesSparseIndexVector()[index]; +} + +template struct SparseIndexVectorTraits { + static const SparseIndexVector enum_value = SparseIndexVector_NONE; +}; + +template<> struct SparseIndexVectorTraits { + static const SparseIndexVector enum_value = SparseIndexVector_Int32Vector; +}; + +template<> struct SparseIndexVectorTraits { + static const SparseIndexVector enum_value = SparseIndexVector_Uint16Vector; +}; + +template<> struct SparseIndexVectorTraits { + static const SparseIndexVector enum_value = SparseIndexVector_Uint8Vector; +}; + +template struct SparseIndexVectorUnionTraits { + static const SparseIndexVector enum_value = SparseIndexVector_NONE; +}; + +template<> struct SparseIndexVectorUnionTraits { + static const SparseIndexVector enum_value = SparseIndexVector_Int32Vector; +}; + +template<> struct SparseIndexVectorUnionTraits { + static const SparseIndexVector enum_value = SparseIndexVector_Uint16Vector; +}; + +template<> struct SparseIndexVectorUnionTraits { + static const SparseIndexVector enum_value = SparseIndexVector_Uint8Vector; +}; + +struct SparseIndexVectorUnion { + SparseIndexVector type; + void *value; + + SparseIndexVectorUnion() : type(SparseIndexVector_NONE), value(nullptr) {} + SparseIndexVectorUnion(SparseIndexVectorUnion&& u) FLATBUFFERS_NOEXCEPT : + type(SparseIndexVector_NONE), value(nullptr) + { std::swap(type, u.type); std::swap(value, u.value); } + SparseIndexVectorUnion(const SparseIndexVectorUnion &); + SparseIndexVectorUnion &operator=(const SparseIndexVectorUnion &u) + { SparseIndexVectorUnion t(u); std::swap(type, t.type); std::swap(value, t.value); return *this; } + SparseIndexVectorUnion &operator=(SparseIndexVectorUnion &&u) FLATBUFFERS_NOEXCEPT + { std::swap(type, u.type); std::swap(value, u.value); return *this; } + ~SparseIndexVectorUnion() { Reset(); } + + void Reset(); + + template + void Set(T&& val) { + typedef typename std::remove_reference::type RT; + Reset(); + type = SparseIndexVectorUnionTraits::enum_value; + if (type != SparseIndexVector_NONE) { + value = new RT(std::forward(val)); + } + } + + static void *UnPack(const void *obj, SparseIndexVector type, const flatbuffers::resolver_function_t *resolver); + flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const flatbuffers::rehasher_function_t *_rehasher = nullptr) const; + + tflite::Int32VectorT *AsInt32Vector() { + return type == SparseIndexVector_Int32Vector ? + reinterpret_cast(value) : nullptr; + } + const tflite::Int32VectorT *AsInt32Vector() const { + return type == SparseIndexVector_Int32Vector ? + reinterpret_cast(value) : nullptr; + } + tflite::Uint16VectorT *AsUint16Vector() { + return type == SparseIndexVector_Uint16Vector ? + reinterpret_cast(value) : nullptr; + } + const tflite::Uint16VectorT *AsUint16Vector() const { + return type == SparseIndexVector_Uint16Vector ? + reinterpret_cast(value) : nullptr; + } + tflite::Uint8VectorT *AsUint8Vector() { + return type == SparseIndexVector_Uint8Vector ? + reinterpret_cast(value) : nullptr; + } + const tflite::Uint8VectorT *AsUint8Vector() const { + return type == SparseIndexVector_Uint8Vector ? + reinterpret_cast(value) : nullptr; + } +}; + +bool VerifySparseIndexVector(flatbuffers::Verifier &verifier, const void *obj, SparseIndexVector type); +bool VerifySparseIndexVectorVector(flatbuffers::Verifier &verifier, const flatbuffers::Vector> *values, const flatbuffers::Vector *types); + +struct BuiltinOptionsUnion { + BuiltinOptions type; + void *value; + + BuiltinOptionsUnion() : type(BuiltinOptions_NONE), value(nullptr) {} + BuiltinOptionsUnion(BuiltinOptionsUnion&& u) FLATBUFFERS_NOEXCEPT : + type(BuiltinOptions_NONE), value(nullptr) + { std::swap(type, u.type); std::swap(value, u.value); } + BuiltinOptionsUnion(const BuiltinOptionsUnion &); + BuiltinOptionsUnion &operator=(const BuiltinOptionsUnion &u) + { BuiltinOptionsUnion t(u); std::swap(type, t.type); std::swap(value, t.value); return *this; } + BuiltinOptionsUnion &operator=(BuiltinOptionsUnion &&u) FLATBUFFERS_NOEXCEPT + { std::swap(type, u.type); std::swap(value, u.value); return *this; } + ~BuiltinOptionsUnion() { Reset(); } + + void Reset(); + + template + void Set(T&& val) { + typedef typename std::remove_reference::type RT; + Reset(); + type = BuiltinOptionsUnionTraits::enum_value; + if (type != BuiltinOptions_NONE) { + value = new RT(std::forward(val)); + } + } + + static void *UnPack(const void *obj, BuiltinOptions type, const flatbuffers::resolver_function_t *resolver); + flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const flatbuffers::rehasher_function_t *_rehasher = nullptr) const; + + tflite::Conv2DOptionsT *AsConv2DOptions() { + return type == BuiltinOptions_Conv2DOptions ? + reinterpret_cast(value) : nullptr; + } + const tflite::Conv2DOptionsT *AsConv2DOptions() const { + return type == BuiltinOptions_Conv2DOptions ? + reinterpret_cast(value) : nullptr; + } + tflite::DepthwiseConv2DOptionsT *AsDepthwiseConv2DOptions() { + return type == BuiltinOptions_DepthwiseConv2DOptions ? + reinterpret_cast(value) : nullptr; + } + const tflite::DepthwiseConv2DOptionsT *AsDepthwiseConv2DOptions() const { + return type == BuiltinOptions_DepthwiseConv2DOptions ? + reinterpret_cast(value) : nullptr; + } + tflite::ConcatEmbeddingsOptionsT *AsConcatEmbeddingsOptions() { + return type == BuiltinOptions_ConcatEmbeddingsOptions ? + reinterpret_cast(value) : nullptr; + } + const tflite::ConcatEmbeddingsOptionsT *AsConcatEmbeddingsOptions() const { + return type == BuiltinOptions_ConcatEmbeddingsOptions ? + reinterpret_cast(value) : nullptr; + } + tflite::LSHProjectionOptionsT *AsLSHProjectionOptions() { + return type == BuiltinOptions_LSHProjectionOptions ? + reinterpret_cast(value) : nullptr; + } + const tflite::LSHProjectionOptionsT *AsLSHProjectionOptions() const { + return type == BuiltinOptions_LSHProjectionOptions ? + reinterpret_cast(value) : nullptr; + } + tflite::Pool2DOptionsT *AsPool2DOptions() { + return type == BuiltinOptions_Pool2DOptions ? + reinterpret_cast(value) : nullptr; + } + const tflite::Pool2DOptionsT *AsPool2DOptions() const { + return type == BuiltinOptions_Pool2DOptions ? + reinterpret_cast(value) : nullptr; + } + tflite::SVDFOptionsT *AsSVDFOptions() { + return type == BuiltinOptions_SVDFOptions ? + reinterpret_cast(value) : nullptr; + } + const tflite::SVDFOptionsT *AsSVDFOptions() const { + return type == BuiltinOptions_SVDFOptions ? + reinterpret_cast(value) : nullptr; + } + tflite::RNNOptionsT *AsRNNOptions() { + return type == BuiltinOptions_RNNOptions ? + reinterpret_cast(value) : nullptr; + } + const tflite::RNNOptionsT *AsRNNOptions() const { + return type == BuiltinOptions_RNNOptions ? + reinterpret_cast(value) : nullptr; + } + tflite::FullyConnectedOptionsT *AsFullyConnectedOptions() { + return type == BuiltinOptions_FullyConnectedOptions ? + reinterpret_cast(value) : nullptr; + } + const tflite::FullyConnectedOptionsT *AsFullyConnectedOptions() const { + return type == BuiltinOptions_FullyConnectedOptions ? + reinterpret_cast(value) : nullptr; + } + tflite::SoftmaxOptionsT *AsSoftmaxOptions() { + return type == BuiltinOptions_SoftmaxOptions ? + reinterpret_cast(value) : nullptr; + } + const tflite::SoftmaxOptionsT *AsSoftmaxOptions() const { + return type == BuiltinOptions_SoftmaxOptions ? + reinterpret_cast(value) : nullptr; + } + tflite::ConcatenationOptionsT *AsConcatenationOptions() { + return type == BuiltinOptions_ConcatenationOptions ? + reinterpret_cast(value) : nullptr; + } + const tflite::ConcatenationOptionsT *AsConcatenationOptions() const { + return type == BuiltinOptions_ConcatenationOptions ? + reinterpret_cast(value) : nullptr; + } + tflite::AddOptionsT *AsAddOptions() { + return type == BuiltinOptions_AddOptions ? + reinterpret_cast(value) : nullptr; + } + const tflite::AddOptionsT *AsAddOptions() const { + return type == BuiltinOptions_AddOptions ? + reinterpret_cast(value) : nullptr; + } + tflite::L2NormOptionsT *AsL2NormOptions() { + return type == BuiltinOptions_L2NormOptions ? + reinterpret_cast(value) : nullptr; + } + const tflite::L2NormOptionsT *AsL2NormOptions() const { + return type == BuiltinOptions_L2NormOptions ? + reinterpret_cast(value) : nullptr; + } + tflite::LocalResponseNormalizationOptionsT *AsLocalResponseNormalizationOptions() { + return type == BuiltinOptions_LocalResponseNormalizationOptions ? + reinterpret_cast(value) : nullptr; + } + const tflite::LocalResponseNormalizationOptionsT *AsLocalResponseNormalizationOptions() const { + return type == BuiltinOptions_LocalResponseNormalizationOptions ? + reinterpret_cast(value) : nullptr; + } + tflite::LSTMOptionsT *AsLSTMOptions() { + return type == BuiltinOptions_LSTMOptions ? + reinterpret_cast(value) : nullptr; + } + const tflite::LSTMOptionsT *AsLSTMOptions() const { + return type == BuiltinOptions_LSTMOptions ? + reinterpret_cast(value) : nullptr; + } + tflite::ResizeBilinearOptionsT *AsResizeBilinearOptions() { + return type == BuiltinOptions_ResizeBilinearOptions ? + reinterpret_cast(value) : nullptr; + } + const tflite::ResizeBilinearOptionsT *AsResizeBilinearOptions() const { + return type == BuiltinOptions_ResizeBilinearOptions ? + reinterpret_cast(value) : nullptr; + } + tflite::CallOptionsT *AsCallOptions() { + return type == BuiltinOptions_CallOptions ? + reinterpret_cast(value) : nullptr; + } + const tflite::CallOptionsT *AsCallOptions() const { + return type == BuiltinOptions_CallOptions ? + reinterpret_cast(value) : nullptr; + } + tflite::ReshapeOptionsT *AsReshapeOptions() { + return type == BuiltinOptions_ReshapeOptions ? + reinterpret_cast(value) : nullptr; + } + const tflite::ReshapeOptionsT *AsReshapeOptions() const { + return type == BuiltinOptions_ReshapeOptions ? + reinterpret_cast(value) : nullptr; + } + tflite::SkipGramOptionsT *AsSkipGramOptions() { + return type == BuiltinOptions_SkipGramOptions ? + reinterpret_cast(value) : nullptr; + } + const tflite::SkipGramOptionsT *AsSkipGramOptions() const { + return type == BuiltinOptions_SkipGramOptions ? + reinterpret_cast(value) : nullptr; + } + tflite::SpaceToDepthOptionsT *AsSpaceToDepthOptions() { + return type == BuiltinOptions_SpaceToDepthOptions ? + reinterpret_cast(value) : nullptr; + } + const tflite::SpaceToDepthOptionsT *AsSpaceToDepthOptions() const { + return type == BuiltinOptions_SpaceToDepthOptions ? + reinterpret_cast(value) : nullptr; + } + tflite::EmbeddingLookupSparseOptionsT *AsEmbeddingLookupSparseOptions() { + return type == BuiltinOptions_EmbeddingLookupSparseOptions ? + reinterpret_cast(value) : nullptr; + } + const tflite::EmbeddingLookupSparseOptionsT *AsEmbeddingLookupSparseOptions() const { + return type == BuiltinOptions_EmbeddingLookupSparseOptions ? + reinterpret_cast(value) : nullptr; + } + tflite::MulOptionsT *AsMulOptions() { + return type == BuiltinOptions_MulOptions ? + reinterpret_cast(value) : nullptr; + } + const tflite::MulOptionsT *AsMulOptions() const { + return type == BuiltinOptions_MulOptions ? + reinterpret_cast(value) : nullptr; + } + tflite::PadOptionsT *AsPadOptions() { + return type == BuiltinOptions_PadOptions ? + reinterpret_cast(value) : nullptr; + } + const tflite::PadOptionsT *AsPadOptions() const { + return type == BuiltinOptions_PadOptions ? + reinterpret_cast(value) : nullptr; + } + tflite::GatherOptionsT *AsGatherOptions() { + return type == BuiltinOptions_GatherOptions ? + reinterpret_cast(value) : nullptr; + } + const tflite::GatherOptionsT *AsGatherOptions() const { + return type == BuiltinOptions_GatherOptions ? + reinterpret_cast(value) : nullptr; + } + tflite::BatchToSpaceNDOptionsT *AsBatchToSpaceNDOptions() { + return type == BuiltinOptions_BatchToSpaceNDOptions ? + reinterpret_cast(value) : nullptr; + } + const tflite::BatchToSpaceNDOptionsT *AsBatchToSpaceNDOptions() const { + return type == BuiltinOptions_BatchToSpaceNDOptions ? + reinterpret_cast(value) : nullptr; + } + tflite::SpaceToBatchNDOptionsT *AsSpaceToBatchNDOptions() { + return type == BuiltinOptions_SpaceToBatchNDOptions ? + reinterpret_cast(value) : nullptr; + } + const tflite::SpaceToBatchNDOptionsT *AsSpaceToBatchNDOptions() const { + return type == BuiltinOptions_SpaceToBatchNDOptions ? + reinterpret_cast(value) : nullptr; + } + tflite::TransposeOptionsT *AsTransposeOptions() { + return type == BuiltinOptions_TransposeOptions ? + reinterpret_cast(value) : nullptr; + } + const tflite::TransposeOptionsT *AsTransposeOptions() const { + return type == BuiltinOptions_TransposeOptions ? + reinterpret_cast(value) : nullptr; + } + tflite::ReducerOptionsT *AsReducerOptions() { + return type == BuiltinOptions_ReducerOptions ? + reinterpret_cast(value) : nullptr; + } + const tflite::ReducerOptionsT *AsReducerOptions() const { + return type == BuiltinOptions_ReducerOptions ? + reinterpret_cast(value) : nullptr; + } + tflite::SubOptionsT *AsSubOptions() { + return type == BuiltinOptions_SubOptions ? + reinterpret_cast(value) : nullptr; + } + const tflite::SubOptionsT *AsSubOptions() const { + return type == BuiltinOptions_SubOptions ? + reinterpret_cast(value) : nullptr; + } + tflite::DivOptionsT *AsDivOptions() { + return type == BuiltinOptions_DivOptions ? + reinterpret_cast(value) : nullptr; + } + const tflite::DivOptionsT *AsDivOptions() const { + return type == BuiltinOptions_DivOptions ? + reinterpret_cast(value) : nullptr; + } + tflite::SqueezeOptionsT *AsSqueezeOptions() { + return type == BuiltinOptions_SqueezeOptions ? + reinterpret_cast(value) : nullptr; + } + const tflite::SqueezeOptionsT *AsSqueezeOptions() const { + return type == BuiltinOptions_SqueezeOptions ? + reinterpret_cast(value) : nullptr; + } + tflite::SequenceRNNOptionsT *AsSequenceRNNOptions() { + return type == BuiltinOptions_SequenceRNNOptions ? + reinterpret_cast(value) : nullptr; + } + const tflite::SequenceRNNOptionsT *AsSequenceRNNOptions() const { + return type == BuiltinOptions_SequenceRNNOptions ? + reinterpret_cast(value) : nullptr; + } + tflite::StridedSliceOptionsT *AsStridedSliceOptions() { + return type == BuiltinOptions_StridedSliceOptions ? + reinterpret_cast(value) : nullptr; + } + const tflite::StridedSliceOptionsT *AsStridedSliceOptions() const { + return type == BuiltinOptions_StridedSliceOptions ? + reinterpret_cast(value) : nullptr; + } + tflite::ExpOptionsT *AsExpOptions() { + return type == BuiltinOptions_ExpOptions ? + reinterpret_cast(value) : nullptr; + } + const tflite::ExpOptionsT *AsExpOptions() const { + return type == BuiltinOptions_ExpOptions ? + reinterpret_cast(value) : nullptr; + } + tflite::TopKV2OptionsT *AsTopKV2Options() { + return type == BuiltinOptions_TopKV2Options ? + reinterpret_cast(value) : nullptr; + } + const tflite::TopKV2OptionsT *AsTopKV2Options() const { + return type == BuiltinOptions_TopKV2Options ? + reinterpret_cast(value) : nullptr; + } + tflite::SplitOptionsT *AsSplitOptions() { + return type == BuiltinOptions_SplitOptions ? + reinterpret_cast(value) : nullptr; + } + const tflite::SplitOptionsT *AsSplitOptions() const { + return type == BuiltinOptions_SplitOptions ? + reinterpret_cast(value) : nullptr; + } + tflite::LogSoftmaxOptionsT *AsLogSoftmaxOptions() { + return type == BuiltinOptions_LogSoftmaxOptions ? + reinterpret_cast(value) : nullptr; + } + const tflite::LogSoftmaxOptionsT *AsLogSoftmaxOptions() const { + return type == BuiltinOptions_LogSoftmaxOptions ? + reinterpret_cast(value) : nullptr; + } + tflite::CastOptionsT *AsCastOptions() { + return type == BuiltinOptions_CastOptions ? + reinterpret_cast(value) : nullptr; + } + const tflite::CastOptionsT *AsCastOptions() const { + return type == BuiltinOptions_CastOptions ? + reinterpret_cast(value) : nullptr; + } + tflite::DequantizeOptionsT *AsDequantizeOptions() { + return type == BuiltinOptions_DequantizeOptions ? + reinterpret_cast(value) : nullptr; + } + const tflite::DequantizeOptionsT *AsDequantizeOptions() const { + return type == BuiltinOptions_DequantizeOptions ? + reinterpret_cast(value) : nullptr; + } + tflite::MaximumMinimumOptionsT *AsMaximumMinimumOptions() { + return type == BuiltinOptions_MaximumMinimumOptions ? + reinterpret_cast(value) : nullptr; + } + const tflite::MaximumMinimumOptionsT *AsMaximumMinimumOptions() const { + return type == BuiltinOptions_MaximumMinimumOptions ? + reinterpret_cast(value) : nullptr; + } + tflite::ArgMaxOptionsT *AsArgMaxOptions() { + return type == BuiltinOptions_ArgMaxOptions ? + reinterpret_cast(value) : nullptr; + } + const tflite::ArgMaxOptionsT *AsArgMaxOptions() const { + return type == BuiltinOptions_ArgMaxOptions ? + reinterpret_cast(value) : nullptr; + } + tflite::LessOptionsT *AsLessOptions() { + return type == BuiltinOptions_LessOptions ? + reinterpret_cast(value) : nullptr; + } + const tflite::LessOptionsT *AsLessOptions() const { + return type == BuiltinOptions_LessOptions ? + reinterpret_cast(value) : nullptr; + } + tflite::NegOptionsT *AsNegOptions() { + return type == BuiltinOptions_NegOptions ? + reinterpret_cast(value) : nullptr; + } + const tflite::NegOptionsT *AsNegOptions() const { + return type == BuiltinOptions_NegOptions ? + reinterpret_cast(value) : nullptr; + } + tflite::PadV2OptionsT *AsPadV2Options() { + return type == BuiltinOptions_PadV2Options ? + reinterpret_cast(value) : nullptr; + } + const tflite::PadV2OptionsT *AsPadV2Options() const { + return type == BuiltinOptions_PadV2Options ? + reinterpret_cast(value) : nullptr; + } + tflite::GreaterOptionsT *AsGreaterOptions() { + return type == BuiltinOptions_GreaterOptions ? + reinterpret_cast(value) : nullptr; + } + const tflite::GreaterOptionsT *AsGreaterOptions() const { + return type == BuiltinOptions_GreaterOptions ? + reinterpret_cast(value) : nullptr; + } + tflite::GreaterEqualOptionsT *AsGreaterEqualOptions() { + return type == BuiltinOptions_GreaterEqualOptions ? + reinterpret_cast(value) : nullptr; + } + const tflite::GreaterEqualOptionsT *AsGreaterEqualOptions() const { + return type == BuiltinOptions_GreaterEqualOptions ? + reinterpret_cast(value) : nullptr; + } + tflite::LessEqualOptionsT *AsLessEqualOptions() { + return type == BuiltinOptions_LessEqualOptions ? + reinterpret_cast(value) : nullptr; + } + const tflite::LessEqualOptionsT *AsLessEqualOptions() const { + return type == BuiltinOptions_LessEqualOptions ? + reinterpret_cast(value) : nullptr; + } + tflite::SelectOptionsT *AsSelectOptions() { + return type == BuiltinOptions_SelectOptions ? + reinterpret_cast(value) : nullptr; + } + const tflite::SelectOptionsT *AsSelectOptions() const { + return type == BuiltinOptions_SelectOptions ? + reinterpret_cast(value) : nullptr; + } + tflite::SliceOptionsT *AsSliceOptions() { + return type == BuiltinOptions_SliceOptions ? + reinterpret_cast(value) : nullptr; + } + const tflite::SliceOptionsT *AsSliceOptions() const { + return type == BuiltinOptions_SliceOptions ? + reinterpret_cast(value) : nullptr; + } + tflite::TransposeConvOptionsT *AsTransposeConvOptions() { + return type == BuiltinOptions_TransposeConvOptions ? + reinterpret_cast(value) : nullptr; + } + const tflite::TransposeConvOptionsT *AsTransposeConvOptions() const { + return type == BuiltinOptions_TransposeConvOptions ? + reinterpret_cast(value) : nullptr; + } + tflite::SparseToDenseOptionsT *AsSparseToDenseOptions() { + return type == BuiltinOptions_SparseToDenseOptions ? + reinterpret_cast(value) : nullptr; + } + const tflite::SparseToDenseOptionsT *AsSparseToDenseOptions() const { + return type == BuiltinOptions_SparseToDenseOptions ? + reinterpret_cast(value) : nullptr; + } + tflite::TileOptionsT *AsTileOptions() { + return type == BuiltinOptions_TileOptions ? + reinterpret_cast(value) : nullptr; + } + const tflite::TileOptionsT *AsTileOptions() const { + return type == BuiltinOptions_TileOptions ? + reinterpret_cast(value) : nullptr; + } + tflite::ExpandDimsOptionsT *AsExpandDimsOptions() { + return type == BuiltinOptions_ExpandDimsOptions ? + reinterpret_cast(value) : nullptr; + } + const tflite::ExpandDimsOptionsT *AsExpandDimsOptions() const { + return type == BuiltinOptions_ExpandDimsOptions ? + reinterpret_cast(value) : nullptr; + } + tflite::EqualOptionsT *AsEqualOptions() { + return type == BuiltinOptions_EqualOptions ? + reinterpret_cast(value) : nullptr; + } + const tflite::EqualOptionsT *AsEqualOptions() const { + return type == BuiltinOptions_EqualOptions ? + reinterpret_cast(value) : nullptr; + } + tflite::NotEqualOptionsT *AsNotEqualOptions() { + return type == BuiltinOptions_NotEqualOptions ? + reinterpret_cast(value) : nullptr; + } + const tflite::NotEqualOptionsT *AsNotEqualOptions() const { + return type == BuiltinOptions_NotEqualOptions ? + reinterpret_cast(value) : nullptr; + } + tflite::ShapeOptionsT *AsShapeOptions() { + return type == BuiltinOptions_ShapeOptions ? + reinterpret_cast(value) : nullptr; + } + const tflite::ShapeOptionsT *AsShapeOptions() const { + return type == BuiltinOptions_ShapeOptions ? + reinterpret_cast(value) : nullptr; + } + tflite::PowOptionsT *AsPowOptions() { + return type == BuiltinOptions_PowOptions ? + reinterpret_cast(value) : nullptr; + } + const tflite::PowOptionsT *AsPowOptions() const { + return type == BuiltinOptions_PowOptions ? + reinterpret_cast(value) : nullptr; + } + tflite::ArgMinOptionsT *AsArgMinOptions() { + return type == BuiltinOptions_ArgMinOptions ? + reinterpret_cast(value) : nullptr; + } + const tflite::ArgMinOptionsT *AsArgMinOptions() const { + return type == BuiltinOptions_ArgMinOptions ? + reinterpret_cast(value) : nullptr; + } + tflite::FakeQuantOptionsT *AsFakeQuantOptions() { + return type == BuiltinOptions_FakeQuantOptions ? + reinterpret_cast(value) : nullptr; + } + const tflite::FakeQuantOptionsT *AsFakeQuantOptions() const { + return type == BuiltinOptions_FakeQuantOptions ? + reinterpret_cast(value) : nullptr; + } + tflite::PackOptionsT *AsPackOptions() { + return type == BuiltinOptions_PackOptions ? + reinterpret_cast(value) : nullptr; + } + const tflite::PackOptionsT *AsPackOptions() const { + return type == BuiltinOptions_PackOptions ? + reinterpret_cast(value) : nullptr; + } + tflite::LogicalOrOptionsT *AsLogicalOrOptions() { + return type == BuiltinOptions_LogicalOrOptions ? + reinterpret_cast(value) : nullptr; + } + const tflite::LogicalOrOptionsT *AsLogicalOrOptions() const { + return type == BuiltinOptions_LogicalOrOptions ? + reinterpret_cast(value) : nullptr; + } + tflite::OneHotOptionsT *AsOneHotOptions() { + return type == BuiltinOptions_OneHotOptions ? + reinterpret_cast(value) : nullptr; + } + const tflite::OneHotOptionsT *AsOneHotOptions() const { + return type == BuiltinOptions_OneHotOptions ? + reinterpret_cast(value) : nullptr; + } + tflite::LogicalAndOptionsT *AsLogicalAndOptions() { + return type == BuiltinOptions_LogicalAndOptions ? + reinterpret_cast(value) : nullptr; + } + const tflite::LogicalAndOptionsT *AsLogicalAndOptions() const { + return type == BuiltinOptions_LogicalAndOptions ? + reinterpret_cast(value) : nullptr; + } + tflite::LogicalNotOptionsT *AsLogicalNotOptions() { + return type == BuiltinOptions_LogicalNotOptions ? + reinterpret_cast(value) : nullptr; + } + const tflite::LogicalNotOptionsT *AsLogicalNotOptions() const { + return type == BuiltinOptions_LogicalNotOptions ? + reinterpret_cast(value) : nullptr; + } + tflite::UnpackOptionsT *AsUnpackOptions() { + return type == BuiltinOptions_UnpackOptions ? + reinterpret_cast(value) : nullptr; + } + const tflite::UnpackOptionsT *AsUnpackOptions() const { + return type == BuiltinOptions_UnpackOptions ? + reinterpret_cast(value) : nullptr; + } + tflite::FloorDivOptionsT *AsFloorDivOptions() { + return type == BuiltinOptions_FloorDivOptions ? + reinterpret_cast(value) : nullptr; + } + const tflite::FloorDivOptionsT *AsFloorDivOptions() const { + return type == BuiltinOptions_FloorDivOptions ? + reinterpret_cast(value) : nullptr; + } + tflite::SquareOptionsT *AsSquareOptions() { + return type == BuiltinOptions_SquareOptions ? + reinterpret_cast(value) : nullptr; + } + const tflite::SquareOptionsT *AsSquareOptions() const { + return type == BuiltinOptions_SquareOptions ? + reinterpret_cast(value) : nullptr; + } + tflite::ZerosLikeOptionsT *AsZerosLikeOptions() { + return type == BuiltinOptions_ZerosLikeOptions ? + reinterpret_cast(value) : nullptr; + } + const tflite::ZerosLikeOptionsT *AsZerosLikeOptions() const { + return type == BuiltinOptions_ZerosLikeOptions ? + reinterpret_cast(value) : nullptr; + } + tflite::FillOptionsT *AsFillOptions() { + return type == BuiltinOptions_FillOptions ? + reinterpret_cast(value) : nullptr; + } + const tflite::FillOptionsT *AsFillOptions() const { + return type == BuiltinOptions_FillOptions ? + reinterpret_cast(value) : nullptr; + } + tflite::BidirectionalSequenceLSTMOptionsT *AsBidirectionalSequenceLSTMOptions() { + return type == BuiltinOptions_BidirectionalSequenceLSTMOptions ? + reinterpret_cast(value) : nullptr; + } + const tflite::BidirectionalSequenceLSTMOptionsT *AsBidirectionalSequenceLSTMOptions() const { + return type == BuiltinOptions_BidirectionalSequenceLSTMOptions ? + reinterpret_cast(value) : nullptr; + } + tflite::BidirectionalSequenceRNNOptionsT *AsBidirectionalSequenceRNNOptions() { + return type == BuiltinOptions_BidirectionalSequenceRNNOptions ? + reinterpret_cast(value) : nullptr; + } + const tflite::BidirectionalSequenceRNNOptionsT *AsBidirectionalSequenceRNNOptions() const { + return type == BuiltinOptions_BidirectionalSequenceRNNOptions ? + reinterpret_cast(value) : nullptr; + } + tflite::UnidirectionalSequenceLSTMOptionsT *AsUnidirectionalSequenceLSTMOptions() { + return type == BuiltinOptions_UnidirectionalSequenceLSTMOptions ? + reinterpret_cast(value) : nullptr; + } + const tflite::UnidirectionalSequenceLSTMOptionsT *AsUnidirectionalSequenceLSTMOptions() const { + return type == BuiltinOptions_UnidirectionalSequenceLSTMOptions ? + reinterpret_cast(value) : nullptr; + } + tflite::FloorModOptionsT *AsFloorModOptions() { + return type == BuiltinOptions_FloorModOptions ? + reinterpret_cast(value) : nullptr; + } + const tflite::FloorModOptionsT *AsFloorModOptions() const { + return type == BuiltinOptions_FloorModOptions ? + reinterpret_cast(value) : nullptr; + } + tflite::RangeOptionsT *AsRangeOptions() { + return type == BuiltinOptions_RangeOptions ? + reinterpret_cast(value) : nullptr; + } + const tflite::RangeOptionsT *AsRangeOptions() const { + return type == BuiltinOptions_RangeOptions ? + reinterpret_cast(value) : nullptr; + } + tflite::ResizeNearestNeighborOptionsT *AsResizeNearestNeighborOptions() { + return type == BuiltinOptions_ResizeNearestNeighborOptions ? + reinterpret_cast(value) : nullptr; + } + const tflite::ResizeNearestNeighborOptionsT *AsResizeNearestNeighborOptions() const { + return type == BuiltinOptions_ResizeNearestNeighborOptions ? + reinterpret_cast(value) : nullptr; + } + tflite::LeakyReluOptionsT *AsLeakyReluOptions() { + return type == BuiltinOptions_LeakyReluOptions ? + reinterpret_cast(value) : nullptr; + } + const tflite::LeakyReluOptionsT *AsLeakyReluOptions() const { + return type == BuiltinOptions_LeakyReluOptions ? + reinterpret_cast(value) : nullptr; + } + tflite::SquaredDifferenceOptionsT *AsSquaredDifferenceOptions() { + return type == BuiltinOptions_SquaredDifferenceOptions ? + reinterpret_cast(value) : nullptr; + } + const tflite::SquaredDifferenceOptionsT *AsSquaredDifferenceOptions() const { + return type == BuiltinOptions_SquaredDifferenceOptions ? + reinterpret_cast(value) : nullptr; + } + tflite::MirrorPadOptionsT *AsMirrorPadOptions() { + return type == BuiltinOptions_MirrorPadOptions ? + reinterpret_cast(value) : nullptr; + } + const tflite::MirrorPadOptionsT *AsMirrorPadOptions() const { + return type == BuiltinOptions_MirrorPadOptions ? + reinterpret_cast(value) : nullptr; + } + tflite::AbsOptionsT *AsAbsOptions() { + return type == BuiltinOptions_AbsOptions ? + reinterpret_cast(value) : nullptr; + } + const tflite::AbsOptionsT *AsAbsOptions() const { + return type == BuiltinOptions_AbsOptions ? + reinterpret_cast(value) : nullptr; + } + tflite::SplitVOptionsT *AsSplitVOptions() { + return type == BuiltinOptions_SplitVOptions ? + reinterpret_cast(value) : nullptr; + } + const tflite::SplitVOptionsT *AsSplitVOptions() const { + return type == BuiltinOptions_SplitVOptions ? + reinterpret_cast(value) : nullptr; + } + tflite::UniqueOptionsT *AsUniqueOptions() { + return type == BuiltinOptions_UniqueOptions ? + reinterpret_cast(value) : nullptr; + } + const tflite::UniqueOptionsT *AsUniqueOptions() const { + return type == BuiltinOptions_UniqueOptions ? + reinterpret_cast(value) : nullptr; + } + tflite::ReverseV2OptionsT *AsReverseV2Options() { + return type == BuiltinOptions_ReverseV2Options ? + reinterpret_cast(value) : nullptr; + } + const tflite::ReverseV2OptionsT *AsReverseV2Options() const { + return type == BuiltinOptions_ReverseV2Options ? + reinterpret_cast(value) : nullptr; + } + tflite::AddNOptionsT *AsAddNOptions() { + return type == BuiltinOptions_AddNOptions ? + reinterpret_cast(value) : nullptr; + } + const tflite::AddNOptionsT *AsAddNOptions() const { + return type == BuiltinOptions_AddNOptions ? + reinterpret_cast(value) : nullptr; + } + tflite::GatherNdOptionsT *AsGatherNdOptions() { + return type == BuiltinOptions_GatherNdOptions ? + reinterpret_cast(value) : nullptr; + } + const tflite::GatherNdOptionsT *AsGatherNdOptions() const { + return type == BuiltinOptions_GatherNdOptions ? + reinterpret_cast(value) : nullptr; + } + tflite::CosOptionsT *AsCosOptions() { + return type == BuiltinOptions_CosOptions ? + reinterpret_cast(value) : nullptr; + } + const tflite::CosOptionsT *AsCosOptions() const { + return type == BuiltinOptions_CosOptions ? + reinterpret_cast(value) : nullptr; + } + tflite::WhereOptionsT *AsWhereOptions() { + return type == BuiltinOptions_WhereOptions ? + reinterpret_cast(value) : nullptr; + } + const tflite::WhereOptionsT *AsWhereOptions() const { + return type == BuiltinOptions_WhereOptions ? + reinterpret_cast(value) : nullptr; + } + tflite::RankOptionsT *AsRankOptions() { + return type == BuiltinOptions_RankOptions ? + reinterpret_cast(value) : nullptr; + } + const tflite::RankOptionsT *AsRankOptions() const { + return type == BuiltinOptions_RankOptions ? + reinterpret_cast(value) : nullptr; + } + tflite::ReverseSequenceOptionsT *AsReverseSequenceOptions() { + return type == BuiltinOptions_ReverseSequenceOptions ? + reinterpret_cast(value) : nullptr; + } + const tflite::ReverseSequenceOptionsT *AsReverseSequenceOptions() const { + return type == BuiltinOptions_ReverseSequenceOptions ? + reinterpret_cast(value) : nullptr; + } + tflite::MatrixDiagOptionsT *AsMatrixDiagOptions() { + return type == BuiltinOptions_MatrixDiagOptions ? + reinterpret_cast(value) : nullptr; + } + const tflite::MatrixDiagOptionsT *AsMatrixDiagOptions() const { + return type == BuiltinOptions_MatrixDiagOptions ? + reinterpret_cast(value) : nullptr; + } + tflite::QuantizeOptionsT *AsQuantizeOptions() { + return type == BuiltinOptions_QuantizeOptions ? + reinterpret_cast(value) : nullptr; + } + const tflite::QuantizeOptionsT *AsQuantizeOptions() const { + return type == BuiltinOptions_QuantizeOptions ? + reinterpret_cast(value) : nullptr; + } + tflite::MatrixSetDiagOptionsT *AsMatrixSetDiagOptions() { + return type == BuiltinOptions_MatrixSetDiagOptions ? + reinterpret_cast(value) : nullptr; + } + const tflite::MatrixSetDiagOptionsT *AsMatrixSetDiagOptions() const { + return type == BuiltinOptions_MatrixSetDiagOptions ? + reinterpret_cast(value) : nullptr; + } + tflite::HardSwishOptionsT *AsHardSwishOptions() { + return type == BuiltinOptions_HardSwishOptions ? + reinterpret_cast(value) : nullptr; + } + const tflite::HardSwishOptionsT *AsHardSwishOptions() const { + return type == BuiltinOptions_HardSwishOptions ? + reinterpret_cast(value) : nullptr; + } + tflite::IfOptionsT *AsIfOptions() { + return type == BuiltinOptions_IfOptions ? + reinterpret_cast(value) : nullptr; + } + const tflite::IfOptionsT *AsIfOptions() const { + return type == BuiltinOptions_IfOptions ? + reinterpret_cast(value) : nullptr; + } + tflite::WhileOptionsT *AsWhileOptions() { + return type == BuiltinOptions_WhileOptions ? + reinterpret_cast(value) : nullptr; + } + const tflite::WhileOptionsT *AsWhileOptions() const { + return type == BuiltinOptions_WhileOptions ? + reinterpret_cast(value) : nullptr; + } + tflite::DepthToSpaceOptionsT *AsDepthToSpaceOptions() { + return type == BuiltinOptions_DepthToSpaceOptions ? + reinterpret_cast(value) : nullptr; + } + const tflite::DepthToSpaceOptionsT *AsDepthToSpaceOptions() const { + return type == BuiltinOptions_DepthToSpaceOptions ? + reinterpret_cast(value) : nullptr; + } + tflite::NonMaxSuppressionV4OptionsT *AsNonMaxSuppressionV4Options() { + return type == BuiltinOptions_NonMaxSuppressionV4Options ? + reinterpret_cast(value) : nullptr; + } + const tflite::NonMaxSuppressionV4OptionsT *AsNonMaxSuppressionV4Options() const { + return type == BuiltinOptions_NonMaxSuppressionV4Options ? + reinterpret_cast(value) : nullptr; + } + tflite::NonMaxSuppressionV5OptionsT *AsNonMaxSuppressionV5Options() { + return type == BuiltinOptions_NonMaxSuppressionV5Options ? + reinterpret_cast(value) : nullptr; + } + const tflite::NonMaxSuppressionV5OptionsT *AsNonMaxSuppressionV5Options() const { + return type == BuiltinOptions_NonMaxSuppressionV5Options ? + reinterpret_cast(value) : nullptr; + } + tflite::ScatterNdOptionsT *AsScatterNdOptions() { + return type == BuiltinOptions_ScatterNdOptions ? + reinterpret_cast(value) : nullptr; + } + const tflite::ScatterNdOptionsT *AsScatterNdOptions() const { + return type == BuiltinOptions_ScatterNdOptions ? + reinterpret_cast(value) : nullptr; + } + tflite::SelectV2OptionsT *AsSelectV2Options() { + return type == BuiltinOptions_SelectV2Options ? + reinterpret_cast(value) : nullptr; + } + const tflite::SelectV2OptionsT *AsSelectV2Options() const { + return type == BuiltinOptions_SelectV2Options ? + reinterpret_cast(value) : nullptr; + } + tflite::DensifyOptionsT *AsDensifyOptions() { + return type == BuiltinOptions_DensifyOptions ? + reinterpret_cast(value) : nullptr; + } + const tflite::DensifyOptionsT *AsDensifyOptions() const { + return type == BuiltinOptions_DensifyOptions ? + reinterpret_cast(value) : nullptr; + } + tflite::SegmentSumOptionsT *AsSegmentSumOptions() { + return type == BuiltinOptions_SegmentSumOptions ? + reinterpret_cast(value) : nullptr; + } + const tflite::SegmentSumOptionsT *AsSegmentSumOptions() const { + return type == BuiltinOptions_SegmentSumOptions ? + reinterpret_cast(value) : nullptr; + } + tflite::BatchMatMulOptionsT *AsBatchMatMulOptions() { + return type == BuiltinOptions_BatchMatMulOptions ? + reinterpret_cast(value) : nullptr; + } + const tflite::BatchMatMulOptionsT *AsBatchMatMulOptions() const { + return type == BuiltinOptions_BatchMatMulOptions ? + reinterpret_cast(value) : nullptr; + } + tflite::CumsumOptionsT *AsCumsumOptions() { + return type == BuiltinOptions_CumsumOptions ? + reinterpret_cast(value) : nullptr; + } + const tflite::CumsumOptionsT *AsCumsumOptions() const { + return type == BuiltinOptions_CumsumOptions ? + reinterpret_cast(value) : nullptr; + } + tflite::CallOnceOptionsT *AsCallOnceOptions() { + return type == BuiltinOptions_CallOnceOptions ? + reinterpret_cast(value) : nullptr; + } + const tflite::CallOnceOptionsT *AsCallOnceOptions() const { + return type == BuiltinOptions_CallOnceOptions ? + reinterpret_cast(value) : nullptr; + } + tflite::BroadcastToOptionsT *AsBroadcastToOptions() { + return type == BuiltinOptions_BroadcastToOptions ? + reinterpret_cast(value) : nullptr; + } + const tflite::BroadcastToOptionsT *AsBroadcastToOptions() const { + return type == BuiltinOptions_BroadcastToOptions ? + reinterpret_cast(value) : nullptr; + } + tflite::Rfft2dOptionsT *AsRfft2dOptions() { + return type == BuiltinOptions_Rfft2dOptions ? + reinterpret_cast(value) : nullptr; + } + const tflite::Rfft2dOptionsT *AsRfft2dOptions() const { + return type == BuiltinOptions_Rfft2dOptions ? + reinterpret_cast(value) : nullptr; + } + tflite::Conv3DOptionsT *AsConv3DOptions() { + return type == BuiltinOptions_Conv3DOptions ? + reinterpret_cast(value) : nullptr; + } + const tflite::Conv3DOptionsT *AsConv3DOptions() const { + return type == BuiltinOptions_Conv3DOptions ? + reinterpret_cast(value) : nullptr; + } + tflite::HashtableOptionsT *AsHashtableOptions() { + return type == BuiltinOptions_HashtableOptions ? + reinterpret_cast(value) : nullptr; + } + const tflite::HashtableOptionsT *AsHashtableOptions() const { + return type == BuiltinOptions_HashtableOptions ? + reinterpret_cast(value) : nullptr; + } + tflite::HashtableFindOptionsT *AsHashtableFindOptions() { + return type == BuiltinOptions_HashtableFindOptions ? + reinterpret_cast(value) : nullptr; + } + const tflite::HashtableFindOptionsT *AsHashtableFindOptions() const { + return type == BuiltinOptions_HashtableFindOptions ? + reinterpret_cast(value) : nullptr; + } + tflite::HashtableImportOptionsT *AsHashtableImportOptions() { + return type == BuiltinOptions_HashtableImportOptions ? + reinterpret_cast(value) : nullptr; + } + const tflite::HashtableImportOptionsT *AsHashtableImportOptions() const { + return type == BuiltinOptions_HashtableImportOptions ? + reinterpret_cast(value) : nullptr; + } + tflite::HashtableSizeOptionsT *AsHashtableSizeOptions() { + return type == BuiltinOptions_HashtableSizeOptions ? + reinterpret_cast(value) : nullptr; + } + const tflite::HashtableSizeOptionsT *AsHashtableSizeOptions() const { + return type == BuiltinOptions_HashtableSizeOptions ? + reinterpret_cast(value) : nullptr; + } + tflite::VarHandleOptionsT *AsVarHandleOptions() { + return type == BuiltinOptions_VarHandleOptions ? + reinterpret_cast(value) : nullptr; + } + const tflite::VarHandleOptionsT *AsVarHandleOptions() const { + return type == BuiltinOptions_VarHandleOptions ? + reinterpret_cast(value) : nullptr; + } + tflite::ReadVariableOptionsT *AsReadVariableOptions() { + return type == BuiltinOptions_ReadVariableOptions ? + reinterpret_cast(value) : nullptr; + } + const tflite::ReadVariableOptionsT *AsReadVariableOptions() const { + return type == BuiltinOptions_ReadVariableOptions ? + reinterpret_cast(value) : nullptr; + } + tflite::AssignVariableOptionsT *AsAssignVariableOptions() { + return type == BuiltinOptions_AssignVariableOptions ? + reinterpret_cast(value) : nullptr; + } + const tflite::AssignVariableOptionsT *AsAssignVariableOptions() const { + return type == BuiltinOptions_AssignVariableOptions ? + reinterpret_cast(value) : nullptr; + } + tflite::RandomOptionsT *AsRandomOptions() { + return type == BuiltinOptions_RandomOptions ? + reinterpret_cast(value) : nullptr; + } + const tflite::RandomOptionsT *AsRandomOptions() const { + return type == BuiltinOptions_RandomOptions ? + reinterpret_cast(value) : nullptr; + } + tflite::BucketizeOptionsT *AsBucketizeOptions() { + return type == BuiltinOptions_BucketizeOptions ? + reinterpret_cast(value) : nullptr; + } + const tflite::BucketizeOptionsT *AsBucketizeOptions() const { + return type == BuiltinOptions_BucketizeOptions ? + reinterpret_cast(value) : nullptr; + } + tflite::GeluOptionsT *AsGeluOptions() { + return type == BuiltinOptions_GeluOptions ? + reinterpret_cast(value) : nullptr; + } + const tflite::GeluOptionsT *AsGeluOptions() const { + return type == BuiltinOptions_GeluOptions ? + reinterpret_cast(value) : nullptr; + } + tflite::DynamicUpdateSliceOptionsT *AsDynamicUpdateSliceOptions() { + return type == BuiltinOptions_DynamicUpdateSliceOptions ? + reinterpret_cast(value) : nullptr; + } + const tflite::DynamicUpdateSliceOptionsT *AsDynamicUpdateSliceOptions() const { + return type == BuiltinOptions_DynamicUpdateSliceOptions ? + reinterpret_cast(value) : nullptr; + } + tflite::UnsortedSegmentProdOptionsT *AsUnsortedSegmentProdOptions() { + return type == BuiltinOptions_UnsortedSegmentProdOptions ? + reinterpret_cast(value) : nullptr; + } + const tflite::UnsortedSegmentProdOptionsT *AsUnsortedSegmentProdOptions() const { + return type == BuiltinOptions_UnsortedSegmentProdOptions ? + reinterpret_cast(value) : nullptr; + } + tflite::UnsortedSegmentMaxOptionsT *AsUnsortedSegmentMaxOptions() { + return type == BuiltinOptions_UnsortedSegmentMaxOptions ? + reinterpret_cast(value) : nullptr; + } + const tflite::UnsortedSegmentMaxOptionsT *AsUnsortedSegmentMaxOptions() const { + return type == BuiltinOptions_UnsortedSegmentMaxOptions ? + reinterpret_cast(value) : nullptr; + } + tflite::UnsortedSegmentMinOptionsT *AsUnsortedSegmentMinOptions() { + return type == BuiltinOptions_UnsortedSegmentMinOptions ? + reinterpret_cast(value) : nullptr; + } + const tflite::UnsortedSegmentMinOptionsT *AsUnsortedSegmentMinOptions() const { + return type == BuiltinOptions_UnsortedSegmentMinOptions ? + reinterpret_cast(value) : nullptr; + } + tflite::UnsortedSegmentSumOptionsT *AsUnsortedSegmentSumOptions() { + return type == BuiltinOptions_UnsortedSegmentSumOptions ? + reinterpret_cast(value) : nullptr; + } + const tflite::UnsortedSegmentSumOptionsT *AsUnsortedSegmentSumOptions() const { + return type == BuiltinOptions_UnsortedSegmentSumOptions ? + reinterpret_cast(value) : nullptr; + } + tflite::ATan2OptionsT *AsATan2Options() { + return type == BuiltinOptions_ATan2Options ? + reinterpret_cast(value) : nullptr; + } + const tflite::ATan2OptionsT *AsATan2Options() const { + return type == BuiltinOptions_ATan2Options ? + reinterpret_cast(value) : nullptr; + } + tflite::SignOptionsT *AsSignOptions() { + return type == BuiltinOptions_SignOptions ? + reinterpret_cast(value) : nullptr; + } + const tflite::SignOptionsT *AsSignOptions() const { + return type == BuiltinOptions_SignOptions ? + reinterpret_cast(value) : nullptr; + } +}; + +bool VerifyBuiltinOptions(flatbuffers::Verifier &verifier, const void *obj, BuiltinOptions type); +bool VerifyBuiltinOptionsVector(flatbuffers::Verifier &verifier, const flatbuffers::Vector> *values, const flatbuffers::Vector *types); + +enum Padding : int8_t { + Padding_SAME = 0, + Padding_VALID = 1, + Padding_MIN = Padding_SAME, + Padding_MAX = Padding_VALID +}; + +inline const Padding (&EnumValuesPadding())[2] { + static const Padding values[] = { + Padding_SAME, + Padding_VALID + }; + return values; +} + +inline const char * const *EnumNamesPadding() { + static const char * const names[3] = { + "SAME", + "VALID", + nullptr + }; + return names; +} + +inline const char *EnumNamePadding(Padding e) { + if (flatbuffers::IsOutRange(e, Padding_SAME, Padding_VALID)) return ""; + const size_t index = static_cast(e); + return EnumNamesPadding()[index]; +} + +enum ActivationFunctionType : int8_t { + ActivationFunctionType_NONE = 0, + ActivationFunctionType_RELU = 1, + ActivationFunctionType_RELU_N1_TO_1 = 2, + ActivationFunctionType_RELU6 = 3, + ActivationFunctionType_TANH = 4, + ActivationFunctionType_SIGN_BIT = 5, + ActivationFunctionType_MIN = ActivationFunctionType_NONE, + ActivationFunctionType_MAX = ActivationFunctionType_SIGN_BIT +}; + +inline const ActivationFunctionType (&EnumValuesActivationFunctionType())[6] { + static const ActivationFunctionType values[] = { + ActivationFunctionType_NONE, + ActivationFunctionType_RELU, + ActivationFunctionType_RELU_N1_TO_1, + ActivationFunctionType_RELU6, + ActivationFunctionType_TANH, + ActivationFunctionType_SIGN_BIT + }; + return values; +} + +inline const char * const *EnumNamesActivationFunctionType() { + static const char * const names[7] = { + "NONE", + "RELU", + "RELU_N1_TO_1", + "RELU6", + "TANH", + "SIGN_BIT", + nullptr + }; + return names; +} + +inline const char *EnumNameActivationFunctionType(ActivationFunctionType e) { + if (flatbuffers::IsOutRange(e, ActivationFunctionType_NONE, ActivationFunctionType_SIGN_BIT)) return ""; + const size_t index = static_cast(e); + return EnumNamesActivationFunctionType()[index]; +} + +enum LSHProjectionType : int8_t { + LSHProjectionType_UNKNOWN = 0, + LSHProjectionType_SPARSE = 1, + LSHProjectionType_DENSE = 2, + LSHProjectionType_MIN = LSHProjectionType_UNKNOWN, + LSHProjectionType_MAX = LSHProjectionType_DENSE +}; + +inline const LSHProjectionType (&EnumValuesLSHProjectionType())[3] { + static const LSHProjectionType values[] = { + LSHProjectionType_UNKNOWN, + LSHProjectionType_SPARSE, + LSHProjectionType_DENSE + }; + return values; +} + +inline const char * const *EnumNamesLSHProjectionType() { + static const char * const names[4] = { + "UNKNOWN", + "SPARSE", + "DENSE", + nullptr + }; + return names; +} + +inline const char *EnumNameLSHProjectionType(LSHProjectionType e) { + if (flatbuffers::IsOutRange(e, LSHProjectionType_UNKNOWN, LSHProjectionType_DENSE)) return ""; + const size_t index = static_cast(e); + return EnumNamesLSHProjectionType()[index]; +} + +enum FullyConnectedOptionsWeightsFormat : int8_t { + FullyConnectedOptionsWeightsFormat_DEFAULT = 0, + FullyConnectedOptionsWeightsFormat_SHUFFLED4x16INT8 = 1, + FullyConnectedOptionsWeightsFormat_MIN = FullyConnectedOptionsWeightsFormat_DEFAULT, + FullyConnectedOptionsWeightsFormat_MAX = FullyConnectedOptionsWeightsFormat_SHUFFLED4x16INT8 +}; + +inline const FullyConnectedOptionsWeightsFormat (&EnumValuesFullyConnectedOptionsWeightsFormat())[2] { + static const FullyConnectedOptionsWeightsFormat values[] = { + FullyConnectedOptionsWeightsFormat_DEFAULT, + FullyConnectedOptionsWeightsFormat_SHUFFLED4x16INT8 + }; + return values; +} + +inline const char * const *EnumNamesFullyConnectedOptionsWeightsFormat() { + static const char * const names[3] = { + "DEFAULT", + "SHUFFLED4x16INT8", + nullptr + }; + return names; +} + +inline const char *EnumNameFullyConnectedOptionsWeightsFormat(FullyConnectedOptionsWeightsFormat e) { + if (flatbuffers::IsOutRange(e, FullyConnectedOptionsWeightsFormat_DEFAULT, FullyConnectedOptionsWeightsFormat_SHUFFLED4x16INT8)) return ""; + const size_t index = static_cast(e); + return EnumNamesFullyConnectedOptionsWeightsFormat()[index]; +} + +enum LSTMKernelType : int8_t { + LSTMKernelType_FULL = 0, + LSTMKernelType_BASIC = 1, + LSTMKernelType_MIN = LSTMKernelType_FULL, + LSTMKernelType_MAX = LSTMKernelType_BASIC +}; + +inline const LSTMKernelType (&EnumValuesLSTMKernelType())[2] { + static const LSTMKernelType values[] = { + LSTMKernelType_FULL, + LSTMKernelType_BASIC + }; + return values; +} + +inline const char * const *EnumNamesLSTMKernelType() { + static const char * const names[3] = { + "FULL", + "BASIC", + nullptr + }; + return names; +} + +inline const char *EnumNameLSTMKernelType(LSTMKernelType e) { + if (flatbuffers::IsOutRange(e, LSTMKernelType_FULL, LSTMKernelType_BASIC)) return ""; + const size_t index = static_cast(e); + return EnumNamesLSTMKernelType()[index]; +} + +enum CombinerType : int8_t { + CombinerType_SUM = 0, + CombinerType_MEAN = 1, + CombinerType_SQRTN = 2, + CombinerType_MIN = CombinerType_SUM, + CombinerType_MAX = CombinerType_SQRTN +}; + +inline const CombinerType (&EnumValuesCombinerType())[3] { + static const CombinerType values[] = { + CombinerType_SUM, + CombinerType_MEAN, + CombinerType_SQRTN + }; + return values; +} + +inline const char * const *EnumNamesCombinerType() { + static const char * const names[4] = { + "SUM", + "MEAN", + "SQRTN", + nullptr + }; + return names; +} + +inline const char *EnumNameCombinerType(CombinerType e) { + if (flatbuffers::IsOutRange(e, CombinerType_SUM, CombinerType_SQRTN)) return ""; + const size_t index = static_cast(e); + return EnumNamesCombinerType()[index]; +} + +enum MirrorPadMode : int8_t { + MirrorPadMode_REFLECT = 0, + MirrorPadMode_SYMMETRIC = 1, + MirrorPadMode_MIN = MirrorPadMode_REFLECT, + MirrorPadMode_MAX = MirrorPadMode_SYMMETRIC +}; + +inline const MirrorPadMode (&EnumValuesMirrorPadMode())[2] { + static const MirrorPadMode values[] = { + MirrorPadMode_REFLECT, + MirrorPadMode_SYMMETRIC + }; + return values; +} + +inline const char * const *EnumNamesMirrorPadMode() { + static const char * const names[3] = { + "REFLECT", + "SYMMETRIC", + nullptr + }; + return names; +} + +inline const char *EnumNameMirrorPadMode(MirrorPadMode e) { + if (flatbuffers::IsOutRange(e, MirrorPadMode_REFLECT, MirrorPadMode_SYMMETRIC)) return ""; + const size_t index = static_cast(e); + return EnumNamesMirrorPadMode()[index]; +} + +enum CustomOptionsFormat : int8_t { + CustomOptionsFormat_FLEXBUFFERS = 0, + CustomOptionsFormat_MIN = CustomOptionsFormat_FLEXBUFFERS, + CustomOptionsFormat_MAX = CustomOptionsFormat_FLEXBUFFERS +}; + +inline const CustomOptionsFormat (&EnumValuesCustomOptionsFormat())[1] { + static const CustomOptionsFormat values[] = { + CustomOptionsFormat_FLEXBUFFERS + }; + return values; +} + +inline const char * const *EnumNamesCustomOptionsFormat() { + static const char * const names[2] = { + "FLEXBUFFERS", + nullptr + }; + return names; +} + +inline const char *EnumNameCustomOptionsFormat(CustomOptionsFormat e) { + if (flatbuffers::IsOutRange(e, CustomOptionsFormat_FLEXBUFFERS, CustomOptionsFormat_FLEXBUFFERS)) return ""; + const size_t index = static_cast(e); + return EnumNamesCustomOptionsFormat()[index]; +} + +struct CustomQuantizationT : public flatbuffers::NativeTable { + typedef CustomQuantization TableType; + std::vector custom{}; +}; + +struct CustomQuantization FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef CustomQuantizationT NativeTableType; + typedef CustomQuantizationBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_CUSTOM = 4 + }; + const flatbuffers::Vector *custom() const { + return GetPointer *>(VT_CUSTOM); + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyOffset(verifier, VT_CUSTOM) && + verifier.VerifyVector(custom()) && + verifier.EndTable(); + } + CustomQuantizationT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(CustomQuantizationT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const CustomQuantizationT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct CustomQuantizationBuilder { + typedef CustomQuantization Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_custom(flatbuffers::Offset> custom) { + fbb_.AddOffset(CustomQuantization::VT_CUSTOM, custom); + } + explicit CustomQuantizationBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateCustomQuantization( + flatbuffers::FlatBufferBuilder &_fbb, + flatbuffers::Offset> custom = 0) { + CustomQuantizationBuilder builder_(_fbb); + builder_.add_custom(custom); + return builder_.Finish(); +} + +inline flatbuffers::Offset CreateCustomQuantizationDirect( + flatbuffers::FlatBufferBuilder &_fbb, + const std::vector *custom = nullptr) { + if (custom) { _fbb.ForceVectorAlignment(custom->size(), sizeof(uint8_t), 16); } + auto custom__ = custom ? _fbb.CreateVector(*custom) : 0; + return tflite::CreateCustomQuantization( + _fbb, + custom__); +} + +flatbuffers::Offset CreateCustomQuantization(flatbuffers::FlatBufferBuilder &_fbb, const CustomQuantizationT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct QuantizationParametersT : public flatbuffers::NativeTable { + typedef QuantizationParameters TableType; + std::vector min{}; + std::vector max{}; + std::vector scale{}; + std::vector zero_point{}; + tflite::QuantizationDetailsUnion details{}; + int32_t quantized_dimension = 0; +}; + +struct QuantizationParameters FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef QuantizationParametersT NativeTableType; + typedef QuantizationParametersBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_MIN = 4, + VT_MAX = 6, + VT_SCALE = 8, + VT_ZERO_POINT = 10, + VT_DETAILS_TYPE = 12, + VT_DETAILS = 14, + VT_QUANTIZED_DIMENSION = 16 + }; + const flatbuffers::Vector *min() const { + return GetPointer *>(VT_MIN); + } + const flatbuffers::Vector *max() const { + return GetPointer *>(VT_MAX); + } + const flatbuffers::Vector *scale() const { + return GetPointer *>(VT_SCALE); + } + const flatbuffers::Vector *zero_point() const { + return GetPointer *>(VT_ZERO_POINT); + } + tflite::QuantizationDetails details_type() const { + return static_cast(GetField(VT_DETAILS_TYPE, 0)); + } + const void *details() const { + return GetPointer(VT_DETAILS); + } + template const T *details_as() const; + const tflite::CustomQuantization *details_as_CustomQuantization() const { + return details_type() == tflite::QuantizationDetails_CustomQuantization ? static_cast(details()) : nullptr; + } + int32_t quantized_dimension() const { + return GetField(VT_QUANTIZED_DIMENSION, 0); + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyOffset(verifier, VT_MIN) && + verifier.VerifyVector(min()) && + VerifyOffset(verifier, VT_MAX) && + verifier.VerifyVector(max()) && + VerifyOffset(verifier, VT_SCALE) && + verifier.VerifyVector(scale()) && + VerifyOffset(verifier, VT_ZERO_POINT) && + verifier.VerifyVector(zero_point()) && + VerifyField(verifier, VT_DETAILS_TYPE, 1) && + VerifyOffset(verifier, VT_DETAILS) && + VerifyQuantizationDetails(verifier, details(), details_type()) && + VerifyField(verifier, VT_QUANTIZED_DIMENSION, 4) && + verifier.EndTable(); + } + QuantizationParametersT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(QuantizationParametersT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const QuantizationParametersT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +template<> inline const tflite::CustomQuantization *QuantizationParameters::details_as() const { + return details_as_CustomQuantization(); +} + +struct QuantizationParametersBuilder { + typedef QuantizationParameters Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_min(flatbuffers::Offset> min) { + fbb_.AddOffset(QuantizationParameters::VT_MIN, min); + } + void add_max(flatbuffers::Offset> max) { + fbb_.AddOffset(QuantizationParameters::VT_MAX, max); + } + void add_scale(flatbuffers::Offset> scale) { + fbb_.AddOffset(QuantizationParameters::VT_SCALE, scale); + } + void add_zero_point(flatbuffers::Offset> zero_point) { + fbb_.AddOffset(QuantizationParameters::VT_ZERO_POINT, zero_point); + } + void add_details_type(tflite::QuantizationDetails details_type) { + fbb_.AddElement(QuantizationParameters::VT_DETAILS_TYPE, static_cast(details_type), 0); + } + void add_details(flatbuffers::Offset details) { + fbb_.AddOffset(QuantizationParameters::VT_DETAILS, details); + } + void add_quantized_dimension(int32_t quantized_dimension) { + fbb_.AddElement(QuantizationParameters::VT_QUANTIZED_DIMENSION, quantized_dimension, 0); + } + explicit QuantizationParametersBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateQuantizationParameters( + flatbuffers::FlatBufferBuilder &_fbb, + flatbuffers::Offset> min = 0, + flatbuffers::Offset> max = 0, + flatbuffers::Offset> scale = 0, + flatbuffers::Offset> zero_point = 0, + tflite::QuantizationDetails details_type = tflite::QuantizationDetails_NONE, + flatbuffers::Offset details = 0, + int32_t quantized_dimension = 0) { + QuantizationParametersBuilder builder_(_fbb); + builder_.add_quantized_dimension(quantized_dimension); + builder_.add_details(details); + builder_.add_zero_point(zero_point); + builder_.add_scale(scale); + builder_.add_max(max); + builder_.add_min(min); + builder_.add_details_type(details_type); + return builder_.Finish(); +} + +inline flatbuffers::Offset CreateQuantizationParametersDirect( + flatbuffers::FlatBufferBuilder &_fbb, + const std::vector *min = nullptr, + const std::vector *max = nullptr, + const std::vector *scale = nullptr, + const std::vector *zero_point = nullptr, + tflite::QuantizationDetails details_type = tflite::QuantizationDetails_NONE, + flatbuffers::Offset details = 0, + int32_t quantized_dimension = 0) { + auto min__ = min ? _fbb.CreateVector(*min) : 0; + auto max__ = max ? _fbb.CreateVector(*max) : 0; + auto scale__ = scale ? _fbb.CreateVector(*scale) : 0; + auto zero_point__ = zero_point ? _fbb.CreateVector(*zero_point) : 0; + return tflite::CreateQuantizationParameters( + _fbb, + min__, + max__, + scale__, + zero_point__, + details_type, + details, + quantized_dimension); +} + +flatbuffers::Offset CreateQuantizationParameters(flatbuffers::FlatBufferBuilder &_fbb, const QuantizationParametersT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct Int32VectorT : public flatbuffers::NativeTable { + typedef Int32Vector TableType; + std::vector values{}; +}; + +struct Int32Vector FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef Int32VectorT NativeTableType; + typedef Int32VectorBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_VALUES = 4 + }; + const flatbuffers::Vector *values() const { + return GetPointer *>(VT_VALUES); + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyOffset(verifier, VT_VALUES) && + verifier.VerifyVector(values()) && + verifier.EndTable(); + } + Int32VectorT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(Int32VectorT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const Int32VectorT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct Int32VectorBuilder { + typedef Int32Vector Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_values(flatbuffers::Offset> values) { + fbb_.AddOffset(Int32Vector::VT_VALUES, values); + } + explicit Int32VectorBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateInt32Vector( + flatbuffers::FlatBufferBuilder &_fbb, + flatbuffers::Offset> values = 0) { + Int32VectorBuilder builder_(_fbb); + builder_.add_values(values); + return builder_.Finish(); +} + +inline flatbuffers::Offset CreateInt32VectorDirect( + flatbuffers::FlatBufferBuilder &_fbb, + const std::vector *values = nullptr) { + auto values__ = values ? _fbb.CreateVector(*values) : 0; + return tflite::CreateInt32Vector( + _fbb, + values__); +} + +flatbuffers::Offset CreateInt32Vector(flatbuffers::FlatBufferBuilder &_fbb, const Int32VectorT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct Uint16VectorT : public flatbuffers::NativeTable { + typedef Uint16Vector TableType; + std::vector values{}; +}; + +struct Uint16Vector FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef Uint16VectorT NativeTableType; + typedef Uint16VectorBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_VALUES = 4 + }; + const flatbuffers::Vector *values() const { + return GetPointer *>(VT_VALUES); + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyOffset(verifier, VT_VALUES) && + verifier.VerifyVector(values()) && + verifier.EndTable(); + } + Uint16VectorT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(Uint16VectorT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const Uint16VectorT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct Uint16VectorBuilder { + typedef Uint16Vector Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_values(flatbuffers::Offset> values) { + fbb_.AddOffset(Uint16Vector::VT_VALUES, values); + } + explicit Uint16VectorBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateUint16Vector( + flatbuffers::FlatBufferBuilder &_fbb, + flatbuffers::Offset> values = 0) { + Uint16VectorBuilder builder_(_fbb); + builder_.add_values(values); + return builder_.Finish(); +} + +inline flatbuffers::Offset CreateUint16VectorDirect( + flatbuffers::FlatBufferBuilder &_fbb, + const std::vector *values = nullptr) { + if (values) { _fbb.ForceVectorAlignment(values->size(), sizeof(uint16_t), 4); } + auto values__ = values ? _fbb.CreateVector(*values) : 0; + return tflite::CreateUint16Vector( + _fbb, + values__); +} + +flatbuffers::Offset CreateUint16Vector(flatbuffers::FlatBufferBuilder &_fbb, const Uint16VectorT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct Uint8VectorT : public flatbuffers::NativeTable { + typedef Uint8Vector TableType; + std::vector values{}; +}; + +struct Uint8Vector FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef Uint8VectorT NativeTableType; + typedef Uint8VectorBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_VALUES = 4 + }; + const flatbuffers::Vector *values() const { + return GetPointer *>(VT_VALUES); + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyOffset(verifier, VT_VALUES) && + verifier.VerifyVector(values()) && + verifier.EndTable(); + } + Uint8VectorT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(Uint8VectorT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const Uint8VectorT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct Uint8VectorBuilder { + typedef Uint8Vector Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_values(flatbuffers::Offset> values) { + fbb_.AddOffset(Uint8Vector::VT_VALUES, values); + } + explicit Uint8VectorBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateUint8Vector( + flatbuffers::FlatBufferBuilder &_fbb, + flatbuffers::Offset> values = 0) { + Uint8VectorBuilder builder_(_fbb); + builder_.add_values(values); + return builder_.Finish(); +} + +inline flatbuffers::Offset CreateUint8VectorDirect( + flatbuffers::FlatBufferBuilder &_fbb, + const std::vector *values = nullptr) { + if (values) { _fbb.ForceVectorAlignment(values->size(), sizeof(uint8_t), 4); } + auto values__ = values ? _fbb.CreateVector(*values) : 0; + return tflite::CreateUint8Vector( + _fbb, + values__); +} + +flatbuffers::Offset CreateUint8Vector(flatbuffers::FlatBufferBuilder &_fbb, const Uint8VectorT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct DimensionMetadataT : public flatbuffers::NativeTable { + typedef DimensionMetadata TableType; + tflite::DimensionType format = tflite::DimensionType_DENSE; + int32_t dense_size = 0; + tflite::SparseIndexVectorUnion array_segments{}; + tflite::SparseIndexVectorUnion array_indices{}; +}; + +struct DimensionMetadata FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef DimensionMetadataT NativeTableType; + typedef DimensionMetadataBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_FORMAT = 4, + VT_DENSE_SIZE = 6, + VT_ARRAY_SEGMENTS_TYPE = 8, + VT_ARRAY_SEGMENTS = 10, + VT_ARRAY_INDICES_TYPE = 12, + VT_ARRAY_INDICES = 14 + }; + tflite::DimensionType format() const { + return static_cast(GetField(VT_FORMAT, 0)); + } + int32_t dense_size() const { + return GetField(VT_DENSE_SIZE, 0); + } + tflite::SparseIndexVector array_segments_type() const { + return static_cast(GetField(VT_ARRAY_SEGMENTS_TYPE, 0)); + } + const void *array_segments() const { + return GetPointer(VT_ARRAY_SEGMENTS); + } + template const T *array_segments_as() const; + const tflite::Int32Vector *array_segments_as_Int32Vector() const { + return array_segments_type() == tflite::SparseIndexVector_Int32Vector ? static_cast(array_segments()) : nullptr; + } + const tflite::Uint16Vector *array_segments_as_Uint16Vector() const { + return array_segments_type() == tflite::SparseIndexVector_Uint16Vector ? static_cast(array_segments()) : nullptr; + } + const tflite::Uint8Vector *array_segments_as_Uint8Vector() const { + return array_segments_type() == tflite::SparseIndexVector_Uint8Vector ? static_cast(array_segments()) : nullptr; + } + tflite::SparseIndexVector array_indices_type() const { + return static_cast(GetField(VT_ARRAY_INDICES_TYPE, 0)); + } + const void *array_indices() const { + return GetPointer(VT_ARRAY_INDICES); + } + template const T *array_indices_as() const; + const tflite::Int32Vector *array_indices_as_Int32Vector() const { + return array_indices_type() == tflite::SparseIndexVector_Int32Vector ? static_cast(array_indices()) : nullptr; + } + const tflite::Uint16Vector *array_indices_as_Uint16Vector() const { + return array_indices_type() == tflite::SparseIndexVector_Uint16Vector ? static_cast(array_indices()) : nullptr; + } + const tflite::Uint8Vector *array_indices_as_Uint8Vector() const { + return array_indices_type() == tflite::SparseIndexVector_Uint8Vector ? static_cast(array_indices()) : nullptr; + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_FORMAT, 1) && + VerifyField(verifier, VT_DENSE_SIZE, 4) && + VerifyField(verifier, VT_ARRAY_SEGMENTS_TYPE, 1) && + VerifyOffset(verifier, VT_ARRAY_SEGMENTS) && + VerifySparseIndexVector(verifier, array_segments(), array_segments_type()) && + VerifyField(verifier, VT_ARRAY_INDICES_TYPE, 1) && + VerifyOffset(verifier, VT_ARRAY_INDICES) && + VerifySparseIndexVector(verifier, array_indices(), array_indices_type()) && + verifier.EndTable(); + } + DimensionMetadataT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(DimensionMetadataT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const DimensionMetadataT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +template<> inline const tflite::Int32Vector *DimensionMetadata::array_segments_as() const { + return array_segments_as_Int32Vector(); +} + +template<> inline const tflite::Uint16Vector *DimensionMetadata::array_segments_as() const { + return array_segments_as_Uint16Vector(); +} + +template<> inline const tflite::Uint8Vector *DimensionMetadata::array_segments_as() const { + return array_segments_as_Uint8Vector(); +} + +template<> inline const tflite::Int32Vector *DimensionMetadata::array_indices_as() const { + return array_indices_as_Int32Vector(); +} + +template<> inline const tflite::Uint16Vector *DimensionMetadata::array_indices_as() const { + return array_indices_as_Uint16Vector(); +} + +template<> inline const tflite::Uint8Vector *DimensionMetadata::array_indices_as() const { + return array_indices_as_Uint8Vector(); +} + +struct DimensionMetadataBuilder { + typedef DimensionMetadata Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_format(tflite::DimensionType format) { + fbb_.AddElement(DimensionMetadata::VT_FORMAT, static_cast(format), 0); + } + void add_dense_size(int32_t dense_size) { + fbb_.AddElement(DimensionMetadata::VT_DENSE_SIZE, dense_size, 0); + } + void add_array_segments_type(tflite::SparseIndexVector array_segments_type) { + fbb_.AddElement(DimensionMetadata::VT_ARRAY_SEGMENTS_TYPE, static_cast(array_segments_type), 0); + } + void add_array_segments(flatbuffers::Offset array_segments) { + fbb_.AddOffset(DimensionMetadata::VT_ARRAY_SEGMENTS, array_segments); + } + void add_array_indices_type(tflite::SparseIndexVector array_indices_type) { + fbb_.AddElement(DimensionMetadata::VT_ARRAY_INDICES_TYPE, static_cast(array_indices_type), 0); + } + void add_array_indices(flatbuffers::Offset array_indices) { + fbb_.AddOffset(DimensionMetadata::VT_ARRAY_INDICES, array_indices); + } + explicit DimensionMetadataBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateDimensionMetadata( + flatbuffers::FlatBufferBuilder &_fbb, + tflite::DimensionType format = tflite::DimensionType_DENSE, + int32_t dense_size = 0, + tflite::SparseIndexVector array_segments_type = tflite::SparseIndexVector_NONE, + flatbuffers::Offset array_segments = 0, + tflite::SparseIndexVector array_indices_type = tflite::SparseIndexVector_NONE, + flatbuffers::Offset array_indices = 0) { + DimensionMetadataBuilder builder_(_fbb); + builder_.add_array_indices(array_indices); + builder_.add_array_segments(array_segments); + builder_.add_dense_size(dense_size); + builder_.add_array_indices_type(array_indices_type); + builder_.add_array_segments_type(array_segments_type); + builder_.add_format(format); + return builder_.Finish(); +} + +flatbuffers::Offset CreateDimensionMetadata(flatbuffers::FlatBufferBuilder &_fbb, const DimensionMetadataT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct SparsityParametersT : public flatbuffers::NativeTable { + typedef SparsityParameters TableType; + std::vector traversal_order{}; + std::vector block_map{}; + std::vector> dim_metadata{}; + SparsityParametersT() = default; + SparsityParametersT(const SparsityParametersT &o); + SparsityParametersT(SparsityParametersT&&) FLATBUFFERS_NOEXCEPT = default; + SparsityParametersT &operator=(SparsityParametersT o) FLATBUFFERS_NOEXCEPT; +}; + +struct SparsityParameters FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef SparsityParametersT NativeTableType; + typedef SparsityParametersBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_TRAVERSAL_ORDER = 4, + VT_BLOCK_MAP = 6, + VT_DIM_METADATA = 8 + }; + const flatbuffers::Vector *traversal_order() const { + return GetPointer *>(VT_TRAVERSAL_ORDER); + } + const flatbuffers::Vector *block_map() const { + return GetPointer *>(VT_BLOCK_MAP); + } + const flatbuffers::Vector> *dim_metadata() const { + return GetPointer> *>(VT_DIM_METADATA); + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyOffset(verifier, VT_TRAVERSAL_ORDER) && + verifier.VerifyVector(traversal_order()) && + VerifyOffset(verifier, VT_BLOCK_MAP) && + verifier.VerifyVector(block_map()) && + VerifyOffset(verifier, VT_DIM_METADATA) && + verifier.VerifyVector(dim_metadata()) && + verifier.VerifyVectorOfTables(dim_metadata()) && + verifier.EndTable(); + } + SparsityParametersT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(SparsityParametersT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const SparsityParametersT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct SparsityParametersBuilder { + typedef SparsityParameters Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_traversal_order(flatbuffers::Offset> traversal_order) { + fbb_.AddOffset(SparsityParameters::VT_TRAVERSAL_ORDER, traversal_order); + } + void add_block_map(flatbuffers::Offset> block_map) { + fbb_.AddOffset(SparsityParameters::VT_BLOCK_MAP, block_map); + } + void add_dim_metadata(flatbuffers::Offset>> dim_metadata) { + fbb_.AddOffset(SparsityParameters::VT_DIM_METADATA, dim_metadata); + } + explicit SparsityParametersBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateSparsityParameters( + flatbuffers::FlatBufferBuilder &_fbb, + flatbuffers::Offset> traversal_order = 0, + flatbuffers::Offset> block_map = 0, + flatbuffers::Offset>> dim_metadata = 0) { + SparsityParametersBuilder builder_(_fbb); + builder_.add_dim_metadata(dim_metadata); + builder_.add_block_map(block_map); + builder_.add_traversal_order(traversal_order); + return builder_.Finish(); +} + +inline flatbuffers::Offset CreateSparsityParametersDirect( + flatbuffers::FlatBufferBuilder &_fbb, + const std::vector *traversal_order = nullptr, + const std::vector *block_map = nullptr, + const std::vector> *dim_metadata = nullptr) { + auto traversal_order__ = traversal_order ? _fbb.CreateVector(*traversal_order) : 0; + auto block_map__ = block_map ? _fbb.CreateVector(*block_map) : 0; + auto dim_metadata__ = dim_metadata ? _fbb.CreateVector>(*dim_metadata) : 0; + return tflite::CreateSparsityParameters( + _fbb, + traversal_order__, + block_map__, + dim_metadata__); +} + +flatbuffers::Offset CreateSparsityParameters(flatbuffers::FlatBufferBuilder &_fbb, const SparsityParametersT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct VariantSubTypeT : public flatbuffers::NativeTable { + typedef VariantSubType TableType; + std::vector shape{}; + tflite::TensorType type = tflite::TensorType_FLOAT32; + bool has_rank = false; +}; + +struct VariantSubType FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef VariantSubTypeT NativeTableType; + typedef VariantSubTypeBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_SHAPE = 4, + VT_TYPE = 6, + VT_HAS_RANK = 8 + }; + const flatbuffers::Vector *shape() const { + return GetPointer *>(VT_SHAPE); + } + tflite::TensorType type() const { + return static_cast(GetField(VT_TYPE, 0)); + } + bool has_rank() const { + return GetField(VT_HAS_RANK, 0) != 0; + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyOffset(verifier, VT_SHAPE) && + verifier.VerifyVector(shape()) && + VerifyField(verifier, VT_TYPE, 1) && + VerifyField(verifier, VT_HAS_RANK, 1) && + verifier.EndTable(); + } + VariantSubTypeT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(VariantSubTypeT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const VariantSubTypeT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct VariantSubTypeBuilder { + typedef VariantSubType Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_shape(flatbuffers::Offset> shape) { + fbb_.AddOffset(VariantSubType::VT_SHAPE, shape); + } + void add_type(tflite::TensorType type) { + fbb_.AddElement(VariantSubType::VT_TYPE, static_cast(type), 0); + } + void add_has_rank(bool has_rank) { + fbb_.AddElement(VariantSubType::VT_HAS_RANK, static_cast(has_rank), 0); + } + explicit VariantSubTypeBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateVariantSubType( + flatbuffers::FlatBufferBuilder &_fbb, + flatbuffers::Offset> shape = 0, + tflite::TensorType type = tflite::TensorType_FLOAT32, + bool has_rank = false) { + VariantSubTypeBuilder builder_(_fbb); + builder_.add_shape(shape); + builder_.add_has_rank(has_rank); + builder_.add_type(type); + return builder_.Finish(); +} + +inline flatbuffers::Offset CreateVariantSubTypeDirect( + flatbuffers::FlatBufferBuilder &_fbb, + const std::vector *shape = nullptr, + tflite::TensorType type = tflite::TensorType_FLOAT32, + bool has_rank = false) { + auto shape__ = shape ? _fbb.CreateVector(*shape) : 0; + return tflite::CreateVariantSubType( + _fbb, + shape__, + type, + has_rank); +} + +flatbuffers::Offset CreateVariantSubType(flatbuffers::FlatBufferBuilder &_fbb, const VariantSubTypeT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct TensorT : public flatbuffers::NativeTable { + typedef Tensor TableType; + std::vector shape{}; + tflite::TensorType type = tflite::TensorType_FLOAT32; + uint32_t buffer = 0; + std::string name{}; + std::unique_ptr quantization{}; + bool is_variable = false; + std::unique_ptr sparsity{}; + std::vector shape_signature{}; + bool has_rank = false; + std::vector> variant_tensors{}; + TensorT() = default; + TensorT(const TensorT &o); + TensorT(TensorT&&) FLATBUFFERS_NOEXCEPT = default; + TensorT &operator=(TensorT o) FLATBUFFERS_NOEXCEPT; +}; + +struct Tensor FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef TensorT NativeTableType; + typedef TensorBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_SHAPE = 4, + VT_TYPE = 6, + VT_BUFFER = 8, + VT_NAME = 10, + VT_QUANTIZATION = 12, + VT_IS_VARIABLE = 14, + VT_SPARSITY = 16, + VT_SHAPE_SIGNATURE = 18, + VT_HAS_RANK = 20, + VT_VARIANT_TENSORS = 22 + }; + const flatbuffers::Vector *shape() const { + return GetPointer *>(VT_SHAPE); + } + tflite::TensorType type() const { + return static_cast(GetField(VT_TYPE, 0)); + } + uint32_t buffer() const { + return GetField(VT_BUFFER, 0); + } + const flatbuffers::String *name() const { + return GetPointer(VT_NAME); + } + const tflite::QuantizationParameters *quantization() const { + return GetPointer(VT_QUANTIZATION); + } + bool is_variable() const { + return GetField(VT_IS_VARIABLE, 0) != 0; + } + const tflite::SparsityParameters *sparsity() const { + return GetPointer(VT_SPARSITY); + } + const flatbuffers::Vector *shape_signature() const { + return GetPointer *>(VT_SHAPE_SIGNATURE); + } + bool has_rank() const { + return GetField(VT_HAS_RANK, 0) != 0; + } + const flatbuffers::Vector> *variant_tensors() const { + return GetPointer> *>(VT_VARIANT_TENSORS); + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyOffset(verifier, VT_SHAPE) && + verifier.VerifyVector(shape()) && + VerifyField(verifier, VT_TYPE, 1) && + VerifyField(verifier, VT_BUFFER, 4) && + VerifyOffset(verifier, VT_NAME) && + verifier.VerifyString(name()) && + VerifyOffset(verifier, VT_QUANTIZATION) && + verifier.VerifyTable(quantization()) && + VerifyField(verifier, VT_IS_VARIABLE, 1) && + VerifyOffset(verifier, VT_SPARSITY) && + verifier.VerifyTable(sparsity()) && + VerifyOffset(verifier, VT_SHAPE_SIGNATURE) && + verifier.VerifyVector(shape_signature()) && + VerifyField(verifier, VT_HAS_RANK, 1) && + VerifyOffset(verifier, VT_VARIANT_TENSORS) && + verifier.VerifyVector(variant_tensors()) && + verifier.VerifyVectorOfTables(variant_tensors()) && + verifier.EndTable(); + } + TensorT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(TensorT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const TensorT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct TensorBuilder { + typedef Tensor Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_shape(flatbuffers::Offset> shape) { + fbb_.AddOffset(Tensor::VT_SHAPE, shape); + } + void add_type(tflite::TensorType type) { + fbb_.AddElement(Tensor::VT_TYPE, static_cast(type), 0); + } + void add_buffer(uint32_t buffer) { + fbb_.AddElement(Tensor::VT_BUFFER, buffer, 0); + } + void add_name(flatbuffers::Offset name) { + fbb_.AddOffset(Tensor::VT_NAME, name); + } + void add_quantization(flatbuffers::Offset quantization) { + fbb_.AddOffset(Tensor::VT_QUANTIZATION, quantization); + } + void add_is_variable(bool is_variable) { + fbb_.AddElement(Tensor::VT_IS_VARIABLE, static_cast(is_variable), 0); + } + void add_sparsity(flatbuffers::Offset sparsity) { + fbb_.AddOffset(Tensor::VT_SPARSITY, sparsity); + } + void add_shape_signature(flatbuffers::Offset> shape_signature) { + fbb_.AddOffset(Tensor::VT_SHAPE_SIGNATURE, shape_signature); + } + void add_has_rank(bool has_rank) { + fbb_.AddElement(Tensor::VT_HAS_RANK, static_cast(has_rank), 0); + } + void add_variant_tensors(flatbuffers::Offset>> variant_tensors) { + fbb_.AddOffset(Tensor::VT_VARIANT_TENSORS, variant_tensors); + } + explicit TensorBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateTensor( + flatbuffers::FlatBufferBuilder &_fbb, + flatbuffers::Offset> shape = 0, + tflite::TensorType type = tflite::TensorType_FLOAT32, + uint32_t buffer = 0, + flatbuffers::Offset name = 0, + flatbuffers::Offset quantization = 0, + bool is_variable = false, + flatbuffers::Offset sparsity = 0, + flatbuffers::Offset> shape_signature = 0, + bool has_rank = false, + flatbuffers::Offset>> variant_tensors = 0) { + TensorBuilder builder_(_fbb); + builder_.add_variant_tensors(variant_tensors); + builder_.add_shape_signature(shape_signature); + builder_.add_sparsity(sparsity); + builder_.add_quantization(quantization); + builder_.add_name(name); + builder_.add_buffer(buffer); + builder_.add_shape(shape); + builder_.add_has_rank(has_rank); + builder_.add_is_variable(is_variable); + builder_.add_type(type); + return builder_.Finish(); +} + +inline flatbuffers::Offset CreateTensorDirect( + flatbuffers::FlatBufferBuilder &_fbb, + const std::vector *shape = nullptr, + tflite::TensorType type = tflite::TensorType_FLOAT32, + uint32_t buffer = 0, + const char *name = nullptr, + flatbuffers::Offset quantization = 0, + bool is_variable = false, + flatbuffers::Offset sparsity = 0, + const std::vector *shape_signature = nullptr, + bool has_rank = false, + const std::vector> *variant_tensors = nullptr) { + auto shape__ = shape ? _fbb.CreateVector(*shape) : 0; + auto name__ = name ? _fbb.CreateString(name) : 0; + auto shape_signature__ = shape_signature ? _fbb.CreateVector(*shape_signature) : 0; + auto variant_tensors__ = variant_tensors ? _fbb.CreateVector>(*variant_tensors) : 0; + return tflite::CreateTensor( + _fbb, + shape__, + type, + buffer, + name__, + quantization, + is_variable, + sparsity, + shape_signature__, + has_rank, + variant_tensors__); +} + +flatbuffers::Offset CreateTensor(flatbuffers::FlatBufferBuilder &_fbb, const TensorT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct Conv2DOptionsT : public flatbuffers::NativeTable { + typedef Conv2DOptions TableType; + tflite::Padding padding = tflite::Padding_SAME; + int32_t stride_w = 0; + int32_t stride_h = 0; + tflite::ActivationFunctionType fused_activation_function = tflite::ActivationFunctionType_NONE; + int32_t dilation_w_factor = 1; + int32_t dilation_h_factor = 1; +}; + +struct Conv2DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef Conv2DOptionsT NativeTableType; + typedef Conv2DOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_PADDING = 4, + VT_STRIDE_W = 6, + VT_STRIDE_H = 8, + VT_FUSED_ACTIVATION_FUNCTION = 10, + VT_DILATION_W_FACTOR = 12, + VT_DILATION_H_FACTOR = 14 + }; + tflite::Padding padding() const { + return static_cast(GetField(VT_PADDING, 0)); + } + int32_t stride_w() const { + return GetField(VT_STRIDE_W, 0); + } + int32_t stride_h() const { + return GetField(VT_STRIDE_H, 0); + } + tflite::ActivationFunctionType fused_activation_function() const { + return static_cast(GetField(VT_FUSED_ACTIVATION_FUNCTION, 0)); + } + int32_t dilation_w_factor() const { + return GetField(VT_DILATION_W_FACTOR, 1); + } + int32_t dilation_h_factor() const { + return GetField(VT_DILATION_H_FACTOR, 1); + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_PADDING, 1) && + VerifyField(verifier, VT_STRIDE_W, 4) && + VerifyField(verifier, VT_STRIDE_H, 4) && + VerifyField(verifier, VT_FUSED_ACTIVATION_FUNCTION, 1) && + VerifyField(verifier, VT_DILATION_W_FACTOR, 4) && + VerifyField(verifier, VT_DILATION_H_FACTOR, 4) && + verifier.EndTable(); + } + Conv2DOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(Conv2DOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const Conv2DOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct Conv2DOptionsBuilder { + typedef Conv2DOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_padding(tflite::Padding padding) { + fbb_.AddElement(Conv2DOptions::VT_PADDING, static_cast(padding), 0); + } + void add_stride_w(int32_t stride_w) { + fbb_.AddElement(Conv2DOptions::VT_STRIDE_W, stride_w, 0); + } + void add_stride_h(int32_t stride_h) { + fbb_.AddElement(Conv2DOptions::VT_STRIDE_H, stride_h, 0); + } + void add_fused_activation_function(tflite::ActivationFunctionType fused_activation_function) { + fbb_.AddElement(Conv2DOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast(fused_activation_function), 0); + } + void add_dilation_w_factor(int32_t dilation_w_factor) { + fbb_.AddElement(Conv2DOptions::VT_DILATION_W_FACTOR, dilation_w_factor, 1); + } + void add_dilation_h_factor(int32_t dilation_h_factor) { + fbb_.AddElement(Conv2DOptions::VT_DILATION_H_FACTOR, dilation_h_factor, 1); + } + explicit Conv2DOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateConv2DOptions( + flatbuffers::FlatBufferBuilder &_fbb, + tflite::Padding padding = tflite::Padding_SAME, + int32_t stride_w = 0, + int32_t stride_h = 0, + tflite::ActivationFunctionType fused_activation_function = tflite::ActivationFunctionType_NONE, + int32_t dilation_w_factor = 1, + int32_t dilation_h_factor = 1) { + Conv2DOptionsBuilder builder_(_fbb); + builder_.add_dilation_h_factor(dilation_h_factor); + builder_.add_dilation_w_factor(dilation_w_factor); + builder_.add_stride_h(stride_h); + builder_.add_stride_w(stride_w); + builder_.add_fused_activation_function(fused_activation_function); + builder_.add_padding(padding); + return builder_.Finish(); +} + +flatbuffers::Offset CreateConv2DOptions(flatbuffers::FlatBufferBuilder &_fbb, const Conv2DOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct Conv3DOptionsT : public flatbuffers::NativeTable { + typedef Conv3DOptions TableType; + tflite::Padding padding = tflite::Padding_SAME; + int32_t stride_d = 0; + int32_t stride_w = 0; + int32_t stride_h = 0; + tflite::ActivationFunctionType fused_activation_function = tflite::ActivationFunctionType_NONE; + int32_t dilation_d_factor = 1; + int32_t dilation_w_factor = 1; + int32_t dilation_h_factor = 1; +}; + +struct Conv3DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef Conv3DOptionsT NativeTableType; + typedef Conv3DOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_PADDING = 4, + VT_STRIDE_D = 6, + VT_STRIDE_W = 8, + VT_STRIDE_H = 10, + VT_FUSED_ACTIVATION_FUNCTION = 12, + VT_DILATION_D_FACTOR = 14, + VT_DILATION_W_FACTOR = 16, + VT_DILATION_H_FACTOR = 18 + }; + tflite::Padding padding() const { + return static_cast(GetField(VT_PADDING, 0)); + } + int32_t stride_d() const { + return GetField(VT_STRIDE_D, 0); + } + int32_t stride_w() const { + return GetField(VT_STRIDE_W, 0); + } + int32_t stride_h() const { + return GetField(VT_STRIDE_H, 0); + } + tflite::ActivationFunctionType fused_activation_function() const { + return static_cast(GetField(VT_FUSED_ACTIVATION_FUNCTION, 0)); + } + int32_t dilation_d_factor() const { + return GetField(VT_DILATION_D_FACTOR, 1); + } + int32_t dilation_w_factor() const { + return GetField(VT_DILATION_W_FACTOR, 1); + } + int32_t dilation_h_factor() const { + return GetField(VT_DILATION_H_FACTOR, 1); + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_PADDING, 1) && + VerifyField(verifier, VT_STRIDE_D, 4) && + VerifyField(verifier, VT_STRIDE_W, 4) && + VerifyField(verifier, VT_STRIDE_H, 4) && + VerifyField(verifier, VT_FUSED_ACTIVATION_FUNCTION, 1) && + VerifyField(verifier, VT_DILATION_D_FACTOR, 4) && + VerifyField(verifier, VT_DILATION_W_FACTOR, 4) && + VerifyField(verifier, VT_DILATION_H_FACTOR, 4) && + verifier.EndTable(); + } + Conv3DOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(Conv3DOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const Conv3DOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct Conv3DOptionsBuilder { + typedef Conv3DOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_padding(tflite::Padding padding) { + fbb_.AddElement(Conv3DOptions::VT_PADDING, static_cast(padding), 0); + } + void add_stride_d(int32_t stride_d) { + fbb_.AddElement(Conv3DOptions::VT_STRIDE_D, stride_d, 0); + } + void add_stride_w(int32_t stride_w) { + fbb_.AddElement(Conv3DOptions::VT_STRIDE_W, stride_w, 0); + } + void add_stride_h(int32_t stride_h) { + fbb_.AddElement(Conv3DOptions::VT_STRIDE_H, stride_h, 0); + } + void add_fused_activation_function(tflite::ActivationFunctionType fused_activation_function) { + fbb_.AddElement(Conv3DOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast(fused_activation_function), 0); + } + void add_dilation_d_factor(int32_t dilation_d_factor) { + fbb_.AddElement(Conv3DOptions::VT_DILATION_D_FACTOR, dilation_d_factor, 1); + } + void add_dilation_w_factor(int32_t dilation_w_factor) { + fbb_.AddElement(Conv3DOptions::VT_DILATION_W_FACTOR, dilation_w_factor, 1); + } + void add_dilation_h_factor(int32_t dilation_h_factor) { + fbb_.AddElement(Conv3DOptions::VT_DILATION_H_FACTOR, dilation_h_factor, 1); + } + explicit Conv3DOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateConv3DOptions( + flatbuffers::FlatBufferBuilder &_fbb, + tflite::Padding padding = tflite::Padding_SAME, + int32_t stride_d = 0, + int32_t stride_w = 0, + int32_t stride_h = 0, + tflite::ActivationFunctionType fused_activation_function = tflite::ActivationFunctionType_NONE, + int32_t dilation_d_factor = 1, + int32_t dilation_w_factor = 1, + int32_t dilation_h_factor = 1) { + Conv3DOptionsBuilder builder_(_fbb); + builder_.add_dilation_h_factor(dilation_h_factor); + builder_.add_dilation_w_factor(dilation_w_factor); + builder_.add_dilation_d_factor(dilation_d_factor); + builder_.add_stride_h(stride_h); + builder_.add_stride_w(stride_w); + builder_.add_stride_d(stride_d); + builder_.add_fused_activation_function(fused_activation_function); + builder_.add_padding(padding); + return builder_.Finish(); +} + +flatbuffers::Offset CreateConv3DOptions(flatbuffers::FlatBufferBuilder &_fbb, const Conv3DOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct Pool2DOptionsT : public flatbuffers::NativeTable { + typedef Pool2DOptions TableType; + tflite::Padding padding = tflite::Padding_SAME; + int32_t stride_w = 0; + int32_t stride_h = 0; + int32_t filter_width = 0; + int32_t filter_height = 0; + tflite::ActivationFunctionType fused_activation_function = tflite::ActivationFunctionType_NONE; +}; + +struct Pool2DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef Pool2DOptionsT NativeTableType; + typedef Pool2DOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_PADDING = 4, + VT_STRIDE_W = 6, + VT_STRIDE_H = 8, + VT_FILTER_WIDTH = 10, + VT_FILTER_HEIGHT = 12, + VT_FUSED_ACTIVATION_FUNCTION = 14 + }; + tflite::Padding padding() const { + return static_cast(GetField(VT_PADDING, 0)); + } + int32_t stride_w() const { + return GetField(VT_STRIDE_W, 0); + } + int32_t stride_h() const { + return GetField(VT_STRIDE_H, 0); + } + int32_t filter_width() const { + return GetField(VT_FILTER_WIDTH, 0); + } + int32_t filter_height() const { + return GetField(VT_FILTER_HEIGHT, 0); + } + tflite::ActivationFunctionType fused_activation_function() const { + return static_cast(GetField(VT_FUSED_ACTIVATION_FUNCTION, 0)); + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_PADDING, 1) && + VerifyField(verifier, VT_STRIDE_W, 4) && + VerifyField(verifier, VT_STRIDE_H, 4) && + VerifyField(verifier, VT_FILTER_WIDTH, 4) && + VerifyField(verifier, VT_FILTER_HEIGHT, 4) && + VerifyField(verifier, VT_FUSED_ACTIVATION_FUNCTION, 1) && + verifier.EndTable(); + } + Pool2DOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(Pool2DOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const Pool2DOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct Pool2DOptionsBuilder { + typedef Pool2DOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_padding(tflite::Padding padding) { + fbb_.AddElement(Pool2DOptions::VT_PADDING, static_cast(padding), 0); + } + void add_stride_w(int32_t stride_w) { + fbb_.AddElement(Pool2DOptions::VT_STRIDE_W, stride_w, 0); + } + void add_stride_h(int32_t stride_h) { + fbb_.AddElement(Pool2DOptions::VT_STRIDE_H, stride_h, 0); + } + void add_filter_width(int32_t filter_width) { + fbb_.AddElement(Pool2DOptions::VT_FILTER_WIDTH, filter_width, 0); + } + void add_filter_height(int32_t filter_height) { + fbb_.AddElement(Pool2DOptions::VT_FILTER_HEIGHT, filter_height, 0); + } + void add_fused_activation_function(tflite::ActivationFunctionType fused_activation_function) { + fbb_.AddElement(Pool2DOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast(fused_activation_function), 0); + } + explicit Pool2DOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreatePool2DOptions( + flatbuffers::FlatBufferBuilder &_fbb, + tflite::Padding padding = tflite::Padding_SAME, + int32_t stride_w = 0, + int32_t stride_h = 0, + int32_t filter_width = 0, + int32_t filter_height = 0, + tflite::ActivationFunctionType fused_activation_function = tflite::ActivationFunctionType_NONE) { + Pool2DOptionsBuilder builder_(_fbb); + builder_.add_filter_height(filter_height); + builder_.add_filter_width(filter_width); + builder_.add_stride_h(stride_h); + builder_.add_stride_w(stride_w); + builder_.add_fused_activation_function(fused_activation_function); + builder_.add_padding(padding); + return builder_.Finish(); +} + +flatbuffers::Offset CreatePool2DOptions(flatbuffers::FlatBufferBuilder &_fbb, const Pool2DOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct DepthwiseConv2DOptionsT : public flatbuffers::NativeTable { + typedef DepthwiseConv2DOptions TableType; + tflite::Padding padding = tflite::Padding_SAME; + int32_t stride_w = 0; + int32_t stride_h = 0; + int32_t depth_multiplier = 0; + tflite::ActivationFunctionType fused_activation_function = tflite::ActivationFunctionType_NONE; + int32_t dilation_w_factor = 1; + int32_t dilation_h_factor = 1; +}; + +struct DepthwiseConv2DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef DepthwiseConv2DOptionsT NativeTableType; + typedef DepthwiseConv2DOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_PADDING = 4, + VT_STRIDE_W = 6, + VT_STRIDE_H = 8, + VT_DEPTH_MULTIPLIER = 10, + VT_FUSED_ACTIVATION_FUNCTION = 12, + VT_DILATION_W_FACTOR = 14, + VT_DILATION_H_FACTOR = 16 + }; + tflite::Padding padding() const { + return static_cast(GetField(VT_PADDING, 0)); + } + int32_t stride_w() const { + return GetField(VT_STRIDE_W, 0); + } + int32_t stride_h() const { + return GetField(VT_STRIDE_H, 0); + } + int32_t depth_multiplier() const { + return GetField(VT_DEPTH_MULTIPLIER, 0); + } + tflite::ActivationFunctionType fused_activation_function() const { + return static_cast(GetField(VT_FUSED_ACTIVATION_FUNCTION, 0)); + } + int32_t dilation_w_factor() const { + return GetField(VT_DILATION_W_FACTOR, 1); + } + int32_t dilation_h_factor() const { + return GetField(VT_DILATION_H_FACTOR, 1); + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_PADDING, 1) && + VerifyField(verifier, VT_STRIDE_W, 4) && + VerifyField(verifier, VT_STRIDE_H, 4) && + VerifyField(verifier, VT_DEPTH_MULTIPLIER, 4) && + VerifyField(verifier, VT_FUSED_ACTIVATION_FUNCTION, 1) && + VerifyField(verifier, VT_DILATION_W_FACTOR, 4) && + VerifyField(verifier, VT_DILATION_H_FACTOR, 4) && + verifier.EndTable(); + } + DepthwiseConv2DOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(DepthwiseConv2DOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const DepthwiseConv2DOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct DepthwiseConv2DOptionsBuilder { + typedef DepthwiseConv2DOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_padding(tflite::Padding padding) { + fbb_.AddElement(DepthwiseConv2DOptions::VT_PADDING, static_cast(padding), 0); + } + void add_stride_w(int32_t stride_w) { + fbb_.AddElement(DepthwiseConv2DOptions::VT_STRIDE_W, stride_w, 0); + } + void add_stride_h(int32_t stride_h) { + fbb_.AddElement(DepthwiseConv2DOptions::VT_STRIDE_H, stride_h, 0); + } + void add_depth_multiplier(int32_t depth_multiplier) { + fbb_.AddElement(DepthwiseConv2DOptions::VT_DEPTH_MULTIPLIER, depth_multiplier, 0); + } + void add_fused_activation_function(tflite::ActivationFunctionType fused_activation_function) { + fbb_.AddElement(DepthwiseConv2DOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast(fused_activation_function), 0); + } + void add_dilation_w_factor(int32_t dilation_w_factor) { + fbb_.AddElement(DepthwiseConv2DOptions::VT_DILATION_W_FACTOR, dilation_w_factor, 1); + } + void add_dilation_h_factor(int32_t dilation_h_factor) { + fbb_.AddElement(DepthwiseConv2DOptions::VT_DILATION_H_FACTOR, dilation_h_factor, 1); + } + explicit DepthwiseConv2DOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateDepthwiseConv2DOptions( + flatbuffers::FlatBufferBuilder &_fbb, + tflite::Padding padding = tflite::Padding_SAME, + int32_t stride_w = 0, + int32_t stride_h = 0, + int32_t depth_multiplier = 0, + tflite::ActivationFunctionType fused_activation_function = tflite::ActivationFunctionType_NONE, + int32_t dilation_w_factor = 1, + int32_t dilation_h_factor = 1) { + DepthwiseConv2DOptionsBuilder builder_(_fbb); + builder_.add_dilation_h_factor(dilation_h_factor); + builder_.add_dilation_w_factor(dilation_w_factor); + builder_.add_depth_multiplier(depth_multiplier); + builder_.add_stride_h(stride_h); + builder_.add_stride_w(stride_w); + builder_.add_fused_activation_function(fused_activation_function); + builder_.add_padding(padding); + return builder_.Finish(); +} + +flatbuffers::Offset CreateDepthwiseConv2DOptions(flatbuffers::FlatBufferBuilder &_fbb, const DepthwiseConv2DOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct ConcatEmbeddingsOptionsT : public flatbuffers::NativeTable { + typedef ConcatEmbeddingsOptions TableType; + int32_t num_channels = 0; + std::vector num_columns_per_channel{}; + std::vector embedding_dim_per_channel{}; +}; + +struct ConcatEmbeddingsOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef ConcatEmbeddingsOptionsT NativeTableType; + typedef ConcatEmbeddingsOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_NUM_CHANNELS = 4, + VT_NUM_COLUMNS_PER_CHANNEL = 6, + VT_EMBEDDING_DIM_PER_CHANNEL = 8 + }; + int32_t num_channels() const { + return GetField(VT_NUM_CHANNELS, 0); + } + const flatbuffers::Vector *num_columns_per_channel() const { + return GetPointer *>(VT_NUM_COLUMNS_PER_CHANNEL); + } + const flatbuffers::Vector *embedding_dim_per_channel() const { + return GetPointer *>(VT_EMBEDDING_DIM_PER_CHANNEL); + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_NUM_CHANNELS, 4) && + VerifyOffset(verifier, VT_NUM_COLUMNS_PER_CHANNEL) && + verifier.VerifyVector(num_columns_per_channel()) && + VerifyOffset(verifier, VT_EMBEDDING_DIM_PER_CHANNEL) && + verifier.VerifyVector(embedding_dim_per_channel()) && + verifier.EndTable(); + } + ConcatEmbeddingsOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(ConcatEmbeddingsOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const ConcatEmbeddingsOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct ConcatEmbeddingsOptionsBuilder { + typedef ConcatEmbeddingsOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_num_channels(int32_t num_channels) { + fbb_.AddElement(ConcatEmbeddingsOptions::VT_NUM_CHANNELS, num_channels, 0); + } + void add_num_columns_per_channel(flatbuffers::Offset> num_columns_per_channel) { + fbb_.AddOffset(ConcatEmbeddingsOptions::VT_NUM_COLUMNS_PER_CHANNEL, num_columns_per_channel); + } + void add_embedding_dim_per_channel(flatbuffers::Offset> embedding_dim_per_channel) { + fbb_.AddOffset(ConcatEmbeddingsOptions::VT_EMBEDDING_DIM_PER_CHANNEL, embedding_dim_per_channel); + } + explicit ConcatEmbeddingsOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateConcatEmbeddingsOptions( + flatbuffers::FlatBufferBuilder &_fbb, + int32_t num_channels = 0, + flatbuffers::Offset> num_columns_per_channel = 0, + flatbuffers::Offset> embedding_dim_per_channel = 0) { + ConcatEmbeddingsOptionsBuilder builder_(_fbb); + builder_.add_embedding_dim_per_channel(embedding_dim_per_channel); + builder_.add_num_columns_per_channel(num_columns_per_channel); + builder_.add_num_channels(num_channels); + return builder_.Finish(); +} + +inline flatbuffers::Offset CreateConcatEmbeddingsOptionsDirect( + flatbuffers::FlatBufferBuilder &_fbb, + int32_t num_channels = 0, + const std::vector *num_columns_per_channel = nullptr, + const std::vector *embedding_dim_per_channel = nullptr) { + auto num_columns_per_channel__ = num_columns_per_channel ? _fbb.CreateVector(*num_columns_per_channel) : 0; + auto embedding_dim_per_channel__ = embedding_dim_per_channel ? _fbb.CreateVector(*embedding_dim_per_channel) : 0; + return tflite::CreateConcatEmbeddingsOptions( + _fbb, + num_channels, + num_columns_per_channel__, + embedding_dim_per_channel__); +} + +flatbuffers::Offset CreateConcatEmbeddingsOptions(flatbuffers::FlatBufferBuilder &_fbb, const ConcatEmbeddingsOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct LSHProjectionOptionsT : public flatbuffers::NativeTable { + typedef LSHProjectionOptions TableType; + tflite::LSHProjectionType type = tflite::LSHProjectionType_UNKNOWN; +}; + +struct LSHProjectionOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef LSHProjectionOptionsT NativeTableType; + typedef LSHProjectionOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_TYPE = 4 + }; + tflite::LSHProjectionType type() const { + return static_cast(GetField(VT_TYPE, 0)); + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_TYPE, 1) && + verifier.EndTable(); + } + LSHProjectionOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(LSHProjectionOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const LSHProjectionOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct LSHProjectionOptionsBuilder { + typedef LSHProjectionOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_type(tflite::LSHProjectionType type) { + fbb_.AddElement(LSHProjectionOptions::VT_TYPE, static_cast(type), 0); + } + explicit LSHProjectionOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateLSHProjectionOptions( + flatbuffers::FlatBufferBuilder &_fbb, + tflite::LSHProjectionType type = tflite::LSHProjectionType_UNKNOWN) { + LSHProjectionOptionsBuilder builder_(_fbb); + builder_.add_type(type); + return builder_.Finish(); +} + +flatbuffers::Offset CreateLSHProjectionOptions(flatbuffers::FlatBufferBuilder &_fbb, const LSHProjectionOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct SVDFOptionsT : public flatbuffers::NativeTable { + typedef SVDFOptions TableType; + int32_t rank = 0; + tflite::ActivationFunctionType fused_activation_function = tflite::ActivationFunctionType_NONE; + bool asymmetric_quantize_inputs = false; +}; + +struct SVDFOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef SVDFOptionsT NativeTableType; + typedef SVDFOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_RANK = 4, + VT_FUSED_ACTIVATION_FUNCTION = 6, + VT_ASYMMETRIC_QUANTIZE_INPUTS = 8 + }; + int32_t rank() const { + return GetField(VT_RANK, 0); + } + tflite::ActivationFunctionType fused_activation_function() const { + return static_cast(GetField(VT_FUSED_ACTIVATION_FUNCTION, 0)); + } + bool asymmetric_quantize_inputs() const { + return GetField(VT_ASYMMETRIC_QUANTIZE_INPUTS, 0) != 0; + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_RANK, 4) && + VerifyField(verifier, VT_FUSED_ACTIVATION_FUNCTION, 1) && + VerifyField(verifier, VT_ASYMMETRIC_QUANTIZE_INPUTS, 1) && + verifier.EndTable(); + } + SVDFOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(SVDFOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const SVDFOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct SVDFOptionsBuilder { + typedef SVDFOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_rank(int32_t rank) { + fbb_.AddElement(SVDFOptions::VT_RANK, rank, 0); + } + void add_fused_activation_function(tflite::ActivationFunctionType fused_activation_function) { + fbb_.AddElement(SVDFOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast(fused_activation_function), 0); + } + void add_asymmetric_quantize_inputs(bool asymmetric_quantize_inputs) { + fbb_.AddElement(SVDFOptions::VT_ASYMMETRIC_QUANTIZE_INPUTS, static_cast(asymmetric_quantize_inputs), 0); + } + explicit SVDFOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateSVDFOptions( + flatbuffers::FlatBufferBuilder &_fbb, + int32_t rank = 0, + tflite::ActivationFunctionType fused_activation_function = tflite::ActivationFunctionType_NONE, + bool asymmetric_quantize_inputs = false) { + SVDFOptionsBuilder builder_(_fbb); + builder_.add_rank(rank); + builder_.add_asymmetric_quantize_inputs(asymmetric_quantize_inputs); + builder_.add_fused_activation_function(fused_activation_function); + return builder_.Finish(); +} + +flatbuffers::Offset CreateSVDFOptions(flatbuffers::FlatBufferBuilder &_fbb, const SVDFOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct RNNOptionsT : public flatbuffers::NativeTable { + typedef RNNOptions TableType; + tflite::ActivationFunctionType fused_activation_function = tflite::ActivationFunctionType_NONE; + bool asymmetric_quantize_inputs = false; +}; + +struct RNNOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef RNNOptionsT NativeTableType; + typedef RNNOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_FUSED_ACTIVATION_FUNCTION = 4, + VT_ASYMMETRIC_QUANTIZE_INPUTS = 6 + }; + tflite::ActivationFunctionType fused_activation_function() const { + return static_cast(GetField(VT_FUSED_ACTIVATION_FUNCTION, 0)); + } + bool asymmetric_quantize_inputs() const { + return GetField(VT_ASYMMETRIC_QUANTIZE_INPUTS, 0) != 0; + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_FUSED_ACTIVATION_FUNCTION, 1) && + VerifyField(verifier, VT_ASYMMETRIC_QUANTIZE_INPUTS, 1) && + verifier.EndTable(); + } + RNNOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(RNNOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const RNNOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct RNNOptionsBuilder { + typedef RNNOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_fused_activation_function(tflite::ActivationFunctionType fused_activation_function) { + fbb_.AddElement(RNNOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast(fused_activation_function), 0); + } + void add_asymmetric_quantize_inputs(bool asymmetric_quantize_inputs) { + fbb_.AddElement(RNNOptions::VT_ASYMMETRIC_QUANTIZE_INPUTS, static_cast(asymmetric_quantize_inputs), 0); + } + explicit RNNOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateRNNOptions( + flatbuffers::FlatBufferBuilder &_fbb, + tflite::ActivationFunctionType fused_activation_function = tflite::ActivationFunctionType_NONE, + bool asymmetric_quantize_inputs = false) { + RNNOptionsBuilder builder_(_fbb); + builder_.add_asymmetric_quantize_inputs(asymmetric_quantize_inputs); + builder_.add_fused_activation_function(fused_activation_function); + return builder_.Finish(); +} + +flatbuffers::Offset CreateRNNOptions(flatbuffers::FlatBufferBuilder &_fbb, const RNNOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct SequenceRNNOptionsT : public flatbuffers::NativeTable { + typedef SequenceRNNOptions TableType; + bool time_major = false; + tflite::ActivationFunctionType fused_activation_function = tflite::ActivationFunctionType_NONE; + bool asymmetric_quantize_inputs = false; +}; + +struct SequenceRNNOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef SequenceRNNOptionsT NativeTableType; + typedef SequenceRNNOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_TIME_MAJOR = 4, + VT_FUSED_ACTIVATION_FUNCTION = 6, + VT_ASYMMETRIC_QUANTIZE_INPUTS = 8 + }; + bool time_major() const { + return GetField(VT_TIME_MAJOR, 0) != 0; + } + tflite::ActivationFunctionType fused_activation_function() const { + return static_cast(GetField(VT_FUSED_ACTIVATION_FUNCTION, 0)); + } + bool asymmetric_quantize_inputs() const { + return GetField(VT_ASYMMETRIC_QUANTIZE_INPUTS, 0) != 0; + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_TIME_MAJOR, 1) && + VerifyField(verifier, VT_FUSED_ACTIVATION_FUNCTION, 1) && + VerifyField(verifier, VT_ASYMMETRIC_QUANTIZE_INPUTS, 1) && + verifier.EndTable(); + } + SequenceRNNOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(SequenceRNNOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const SequenceRNNOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct SequenceRNNOptionsBuilder { + typedef SequenceRNNOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_time_major(bool time_major) { + fbb_.AddElement(SequenceRNNOptions::VT_TIME_MAJOR, static_cast(time_major), 0); + } + void add_fused_activation_function(tflite::ActivationFunctionType fused_activation_function) { + fbb_.AddElement(SequenceRNNOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast(fused_activation_function), 0); + } + void add_asymmetric_quantize_inputs(bool asymmetric_quantize_inputs) { + fbb_.AddElement(SequenceRNNOptions::VT_ASYMMETRIC_QUANTIZE_INPUTS, static_cast(asymmetric_quantize_inputs), 0); + } + explicit SequenceRNNOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateSequenceRNNOptions( + flatbuffers::FlatBufferBuilder &_fbb, + bool time_major = false, + tflite::ActivationFunctionType fused_activation_function = tflite::ActivationFunctionType_NONE, + bool asymmetric_quantize_inputs = false) { + SequenceRNNOptionsBuilder builder_(_fbb); + builder_.add_asymmetric_quantize_inputs(asymmetric_quantize_inputs); + builder_.add_fused_activation_function(fused_activation_function); + builder_.add_time_major(time_major); + return builder_.Finish(); +} + +flatbuffers::Offset CreateSequenceRNNOptions(flatbuffers::FlatBufferBuilder &_fbb, const SequenceRNNOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct BidirectionalSequenceRNNOptionsT : public flatbuffers::NativeTable { + typedef BidirectionalSequenceRNNOptions TableType; + bool time_major = false; + tflite::ActivationFunctionType fused_activation_function = tflite::ActivationFunctionType_NONE; + bool merge_outputs = false; + bool asymmetric_quantize_inputs = false; +}; + +struct BidirectionalSequenceRNNOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef BidirectionalSequenceRNNOptionsT NativeTableType; + typedef BidirectionalSequenceRNNOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_TIME_MAJOR = 4, + VT_FUSED_ACTIVATION_FUNCTION = 6, + VT_MERGE_OUTPUTS = 8, + VT_ASYMMETRIC_QUANTIZE_INPUTS = 10 + }; + bool time_major() const { + return GetField(VT_TIME_MAJOR, 0) != 0; + } + tflite::ActivationFunctionType fused_activation_function() const { + return static_cast(GetField(VT_FUSED_ACTIVATION_FUNCTION, 0)); + } + bool merge_outputs() const { + return GetField(VT_MERGE_OUTPUTS, 0) != 0; + } + bool asymmetric_quantize_inputs() const { + return GetField(VT_ASYMMETRIC_QUANTIZE_INPUTS, 0) != 0; + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_TIME_MAJOR, 1) && + VerifyField(verifier, VT_FUSED_ACTIVATION_FUNCTION, 1) && + VerifyField(verifier, VT_MERGE_OUTPUTS, 1) && + VerifyField(verifier, VT_ASYMMETRIC_QUANTIZE_INPUTS, 1) && + verifier.EndTable(); + } + BidirectionalSequenceRNNOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(BidirectionalSequenceRNNOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const BidirectionalSequenceRNNOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct BidirectionalSequenceRNNOptionsBuilder { + typedef BidirectionalSequenceRNNOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_time_major(bool time_major) { + fbb_.AddElement(BidirectionalSequenceRNNOptions::VT_TIME_MAJOR, static_cast(time_major), 0); + } + void add_fused_activation_function(tflite::ActivationFunctionType fused_activation_function) { + fbb_.AddElement(BidirectionalSequenceRNNOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast(fused_activation_function), 0); + } + void add_merge_outputs(bool merge_outputs) { + fbb_.AddElement(BidirectionalSequenceRNNOptions::VT_MERGE_OUTPUTS, static_cast(merge_outputs), 0); + } + void add_asymmetric_quantize_inputs(bool asymmetric_quantize_inputs) { + fbb_.AddElement(BidirectionalSequenceRNNOptions::VT_ASYMMETRIC_QUANTIZE_INPUTS, static_cast(asymmetric_quantize_inputs), 0); + } + explicit BidirectionalSequenceRNNOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateBidirectionalSequenceRNNOptions( + flatbuffers::FlatBufferBuilder &_fbb, + bool time_major = false, + tflite::ActivationFunctionType fused_activation_function = tflite::ActivationFunctionType_NONE, + bool merge_outputs = false, + bool asymmetric_quantize_inputs = false) { + BidirectionalSequenceRNNOptionsBuilder builder_(_fbb); + builder_.add_asymmetric_quantize_inputs(asymmetric_quantize_inputs); + builder_.add_merge_outputs(merge_outputs); + builder_.add_fused_activation_function(fused_activation_function); + builder_.add_time_major(time_major); + return builder_.Finish(); +} + +flatbuffers::Offset CreateBidirectionalSequenceRNNOptions(flatbuffers::FlatBufferBuilder &_fbb, const BidirectionalSequenceRNNOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct FullyConnectedOptionsT : public flatbuffers::NativeTable { + typedef FullyConnectedOptions TableType; + tflite::ActivationFunctionType fused_activation_function = tflite::ActivationFunctionType_NONE; + tflite::FullyConnectedOptionsWeightsFormat weights_format = tflite::FullyConnectedOptionsWeightsFormat_DEFAULT; + bool keep_num_dims = false; + bool asymmetric_quantize_inputs = false; +}; + +struct FullyConnectedOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef FullyConnectedOptionsT NativeTableType; + typedef FullyConnectedOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_FUSED_ACTIVATION_FUNCTION = 4, + VT_WEIGHTS_FORMAT = 6, + VT_KEEP_NUM_DIMS = 8, + VT_ASYMMETRIC_QUANTIZE_INPUTS = 10 + }; + tflite::ActivationFunctionType fused_activation_function() const { + return static_cast(GetField(VT_FUSED_ACTIVATION_FUNCTION, 0)); + } + tflite::FullyConnectedOptionsWeightsFormat weights_format() const { + return static_cast(GetField(VT_WEIGHTS_FORMAT, 0)); + } + bool keep_num_dims() const { + return GetField(VT_KEEP_NUM_DIMS, 0) != 0; + } + bool asymmetric_quantize_inputs() const { + return GetField(VT_ASYMMETRIC_QUANTIZE_INPUTS, 0) != 0; + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_FUSED_ACTIVATION_FUNCTION, 1) && + VerifyField(verifier, VT_WEIGHTS_FORMAT, 1) && + VerifyField(verifier, VT_KEEP_NUM_DIMS, 1) && + VerifyField(verifier, VT_ASYMMETRIC_QUANTIZE_INPUTS, 1) && + verifier.EndTable(); + } + FullyConnectedOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(FullyConnectedOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const FullyConnectedOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct FullyConnectedOptionsBuilder { + typedef FullyConnectedOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_fused_activation_function(tflite::ActivationFunctionType fused_activation_function) { + fbb_.AddElement(FullyConnectedOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast(fused_activation_function), 0); + } + void add_weights_format(tflite::FullyConnectedOptionsWeightsFormat weights_format) { + fbb_.AddElement(FullyConnectedOptions::VT_WEIGHTS_FORMAT, static_cast(weights_format), 0); + } + void add_keep_num_dims(bool keep_num_dims) { + fbb_.AddElement(FullyConnectedOptions::VT_KEEP_NUM_DIMS, static_cast(keep_num_dims), 0); + } + void add_asymmetric_quantize_inputs(bool asymmetric_quantize_inputs) { + fbb_.AddElement(FullyConnectedOptions::VT_ASYMMETRIC_QUANTIZE_INPUTS, static_cast(asymmetric_quantize_inputs), 0); + } + explicit FullyConnectedOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateFullyConnectedOptions( + flatbuffers::FlatBufferBuilder &_fbb, + tflite::ActivationFunctionType fused_activation_function = tflite::ActivationFunctionType_NONE, + tflite::FullyConnectedOptionsWeightsFormat weights_format = tflite::FullyConnectedOptionsWeightsFormat_DEFAULT, + bool keep_num_dims = false, + bool asymmetric_quantize_inputs = false) { + FullyConnectedOptionsBuilder builder_(_fbb); + builder_.add_asymmetric_quantize_inputs(asymmetric_quantize_inputs); + builder_.add_keep_num_dims(keep_num_dims); + builder_.add_weights_format(weights_format); + builder_.add_fused_activation_function(fused_activation_function); + return builder_.Finish(); +} + +flatbuffers::Offset CreateFullyConnectedOptions(flatbuffers::FlatBufferBuilder &_fbb, const FullyConnectedOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct SoftmaxOptionsT : public flatbuffers::NativeTable { + typedef SoftmaxOptions TableType; + float beta = 0.0f; +}; + +struct SoftmaxOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef SoftmaxOptionsT NativeTableType; + typedef SoftmaxOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_BETA = 4 + }; + float beta() const { + return GetField(VT_BETA, 0.0f); + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_BETA, 4) && + verifier.EndTable(); + } + SoftmaxOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(SoftmaxOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const SoftmaxOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct SoftmaxOptionsBuilder { + typedef SoftmaxOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_beta(float beta) { + fbb_.AddElement(SoftmaxOptions::VT_BETA, beta, 0.0f); + } + explicit SoftmaxOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateSoftmaxOptions( + flatbuffers::FlatBufferBuilder &_fbb, + float beta = 0.0f) { + SoftmaxOptionsBuilder builder_(_fbb); + builder_.add_beta(beta); + return builder_.Finish(); +} + +flatbuffers::Offset CreateSoftmaxOptions(flatbuffers::FlatBufferBuilder &_fbb, const SoftmaxOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct ConcatenationOptionsT : public flatbuffers::NativeTable { + typedef ConcatenationOptions TableType; + int32_t axis = 0; + tflite::ActivationFunctionType fused_activation_function = tflite::ActivationFunctionType_NONE; +}; + +struct ConcatenationOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef ConcatenationOptionsT NativeTableType; + typedef ConcatenationOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_AXIS = 4, + VT_FUSED_ACTIVATION_FUNCTION = 6 + }; + int32_t axis() const { + return GetField(VT_AXIS, 0); + } + tflite::ActivationFunctionType fused_activation_function() const { + return static_cast(GetField(VT_FUSED_ACTIVATION_FUNCTION, 0)); + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_AXIS, 4) && + VerifyField(verifier, VT_FUSED_ACTIVATION_FUNCTION, 1) && + verifier.EndTable(); + } + ConcatenationOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(ConcatenationOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const ConcatenationOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct ConcatenationOptionsBuilder { + typedef ConcatenationOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_axis(int32_t axis) { + fbb_.AddElement(ConcatenationOptions::VT_AXIS, axis, 0); + } + void add_fused_activation_function(tflite::ActivationFunctionType fused_activation_function) { + fbb_.AddElement(ConcatenationOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast(fused_activation_function), 0); + } + explicit ConcatenationOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateConcatenationOptions( + flatbuffers::FlatBufferBuilder &_fbb, + int32_t axis = 0, + tflite::ActivationFunctionType fused_activation_function = tflite::ActivationFunctionType_NONE) { + ConcatenationOptionsBuilder builder_(_fbb); + builder_.add_axis(axis); + builder_.add_fused_activation_function(fused_activation_function); + return builder_.Finish(); +} + +flatbuffers::Offset CreateConcatenationOptions(flatbuffers::FlatBufferBuilder &_fbb, const ConcatenationOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct AddOptionsT : public flatbuffers::NativeTable { + typedef AddOptions TableType; + tflite::ActivationFunctionType fused_activation_function = tflite::ActivationFunctionType_NONE; + bool pot_scale_int16 = true; +}; + +struct AddOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef AddOptionsT NativeTableType; + typedef AddOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_FUSED_ACTIVATION_FUNCTION = 4, + VT_POT_SCALE_INT16 = 6 + }; + tflite::ActivationFunctionType fused_activation_function() const { + return static_cast(GetField(VT_FUSED_ACTIVATION_FUNCTION, 0)); + } + bool pot_scale_int16() const { + return GetField(VT_POT_SCALE_INT16, 1) != 0; + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_FUSED_ACTIVATION_FUNCTION, 1) && + VerifyField(verifier, VT_POT_SCALE_INT16, 1) && + verifier.EndTable(); + } + AddOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(AddOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const AddOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct AddOptionsBuilder { + typedef AddOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_fused_activation_function(tflite::ActivationFunctionType fused_activation_function) { + fbb_.AddElement(AddOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast(fused_activation_function), 0); + } + void add_pot_scale_int16(bool pot_scale_int16) { + fbb_.AddElement(AddOptions::VT_POT_SCALE_INT16, static_cast(pot_scale_int16), 1); + } + explicit AddOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateAddOptions( + flatbuffers::FlatBufferBuilder &_fbb, + tflite::ActivationFunctionType fused_activation_function = tflite::ActivationFunctionType_NONE, + bool pot_scale_int16 = true) { + AddOptionsBuilder builder_(_fbb); + builder_.add_pot_scale_int16(pot_scale_int16); + builder_.add_fused_activation_function(fused_activation_function); + return builder_.Finish(); +} + +flatbuffers::Offset CreateAddOptions(flatbuffers::FlatBufferBuilder &_fbb, const AddOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct MulOptionsT : public flatbuffers::NativeTable { + typedef MulOptions TableType; + tflite::ActivationFunctionType fused_activation_function = tflite::ActivationFunctionType_NONE; +}; + +struct MulOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef MulOptionsT NativeTableType; + typedef MulOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_FUSED_ACTIVATION_FUNCTION = 4 + }; + tflite::ActivationFunctionType fused_activation_function() const { + return static_cast(GetField(VT_FUSED_ACTIVATION_FUNCTION, 0)); + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_FUSED_ACTIVATION_FUNCTION, 1) && + verifier.EndTable(); + } + MulOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(MulOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const MulOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct MulOptionsBuilder { + typedef MulOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_fused_activation_function(tflite::ActivationFunctionType fused_activation_function) { + fbb_.AddElement(MulOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast(fused_activation_function), 0); + } + explicit MulOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateMulOptions( + flatbuffers::FlatBufferBuilder &_fbb, + tflite::ActivationFunctionType fused_activation_function = tflite::ActivationFunctionType_NONE) { + MulOptionsBuilder builder_(_fbb); + builder_.add_fused_activation_function(fused_activation_function); + return builder_.Finish(); +} + +flatbuffers::Offset CreateMulOptions(flatbuffers::FlatBufferBuilder &_fbb, const MulOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct L2NormOptionsT : public flatbuffers::NativeTable { + typedef L2NormOptions TableType; + tflite::ActivationFunctionType fused_activation_function = tflite::ActivationFunctionType_NONE; +}; + +struct L2NormOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef L2NormOptionsT NativeTableType; + typedef L2NormOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_FUSED_ACTIVATION_FUNCTION = 4 + }; + tflite::ActivationFunctionType fused_activation_function() const { + return static_cast(GetField(VT_FUSED_ACTIVATION_FUNCTION, 0)); + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_FUSED_ACTIVATION_FUNCTION, 1) && + verifier.EndTable(); + } + L2NormOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(L2NormOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const L2NormOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct L2NormOptionsBuilder { + typedef L2NormOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_fused_activation_function(tflite::ActivationFunctionType fused_activation_function) { + fbb_.AddElement(L2NormOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast(fused_activation_function), 0); + } + explicit L2NormOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateL2NormOptions( + flatbuffers::FlatBufferBuilder &_fbb, + tflite::ActivationFunctionType fused_activation_function = tflite::ActivationFunctionType_NONE) { + L2NormOptionsBuilder builder_(_fbb); + builder_.add_fused_activation_function(fused_activation_function); + return builder_.Finish(); +} + +flatbuffers::Offset CreateL2NormOptions(flatbuffers::FlatBufferBuilder &_fbb, const L2NormOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct LocalResponseNormalizationOptionsT : public flatbuffers::NativeTable { + typedef LocalResponseNormalizationOptions TableType; + int32_t radius = 0; + float bias = 0.0f; + float alpha = 0.0f; + float beta = 0.0f; +}; + +struct LocalResponseNormalizationOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef LocalResponseNormalizationOptionsT NativeTableType; + typedef LocalResponseNormalizationOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_RADIUS = 4, + VT_BIAS = 6, + VT_ALPHA = 8, + VT_BETA = 10 + }; + int32_t radius() const { + return GetField(VT_RADIUS, 0); + } + float bias() const { + return GetField(VT_BIAS, 0.0f); + } + float alpha() const { + return GetField(VT_ALPHA, 0.0f); + } + float beta() const { + return GetField(VT_BETA, 0.0f); + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_RADIUS, 4) && + VerifyField(verifier, VT_BIAS, 4) && + VerifyField(verifier, VT_ALPHA, 4) && + VerifyField(verifier, VT_BETA, 4) && + verifier.EndTable(); + } + LocalResponseNormalizationOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(LocalResponseNormalizationOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const LocalResponseNormalizationOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct LocalResponseNormalizationOptionsBuilder { + typedef LocalResponseNormalizationOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_radius(int32_t radius) { + fbb_.AddElement(LocalResponseNormalizationOptions::VT_RADIUS, radius, 0); + } + void add_bias(float bias) { + fbb_.AddElement(LocalResponseNormalizationOptions::VT_BIAS, bias, 0.0f); + } + void add_alpha(float alpha) { + fbb_.AddElement(LocalResponseNormalizationOptions::VT_ALPHA, alpha, 0.0f); + } + void add_beta(float beta) { + fbb_.AddElement(LocalResponseNormalizationOptions::VT_BETA, beta, 0.0f); + } + explicit LocalResponseNormalizationOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateLocalResponseNormalizationOptions( + flatbuffers::FlatBufferBuilder &_fbb, + int32_t radius = 0, + float bias = 0.0f, + float alpha = 0.0f, + float beta = 0.0f) { + LocalResponseNormalizationOptionsBuilder builder_(_fbb); + builder_.add_beta(beta); + builder_.add_alpha(alpha); + builder_.add_bias(bias); + builder_.add_radius(radius); + return builder_.Finish(); +} + +flatbuffers::Offset CreateLocalResponseNormalizationOptions(flatbuffers::FlatBufferBuilder &_fbb, const LocalResponseNormalizationOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct LSTMOptionsT : public flatbuffers::NativeTable { + typedef LSTMOptions TableType; + tflite::ActivationFunctionType fused_activation_function = tflite::ActivationFunctionType_NONE; + float cell_clip = 0.0f; + float proj_clip = 0.0f; + tflite::LSTMKernelType kernel_type = tflite::LSTMKernelType_FULL; + bool asymmetric_quantize_inputs = false; +}; + +struct LSTMOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef LSTMOptionsT NativeTableType; + typedef LSTMOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_FUSED_ACTIVATION_FUNCTION = 4, + VT_CELL_CLIP = 6, + VT_PROJ_CLIP = 8, + VT_KERNEL_TYPE = 10, + VT_ASYMMETRIC_QUANTIZE_INPUTS = 12 + }; + tflite::ActivationFunctionType fused_activation_function() const { + return static_cast(GetField(VT_FUSED_ACTIVATION_FUNCTION, 0)); + } + float cell_clip() const { + return GetField(VT_CELL_CLIP, 0.0f); + } + float proj_clip() const { + return GetField(VT_PROJ_CLIP, 0.0f); + } + tflite::LSTMKernelType kernel_type() const { + return static_cast(GetField(VT_KERNEL_TYPE, 0)); + } + bool asymmetric_quantize_inputs() const { + return GetField(VT_ASYMMETRIC_QUANTIZE_INPUTS, 0) != 0; + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_FUSED_ACTIVATION_FUNCTION, 1) && + VerifyField(verifier, VT_CELL_CLIP, 4) && + VerifyField(verifier, VT_PROJ_CLIP, 4) && + VerifyField(verifier, VT_KERNEL_TYPE, 1) && + VerifyField(verifier, VT_ASYMMETRIC_QUANTIZE_INPUTS, 1) && + verifier.EndTable(); + } + LSTMOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(LSTMOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const LSTMOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct LSTMOptionsBuilder { + typedef LSTMOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_fused_activation_function(tflite::ActivationFunctionType fused_activation_function) { + fbb_.AddElement(LSTMOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast(fused_activation_function), 0); + } + void add_cell_clip(float cell_clip) { + fbb_.AddElement(LSTMOptions::VT_CELL_CLIP, cell_clip, 0.0f); + } + void add_proj_clip(float proj_clip) { + fbb_.AddElement(LSTMOptions::VT_PROJ_CLIP, proj_clip, 0.0f); + } + void add_kernel_type(tflite::LSTMKernelType kernel_type) { + fbb_.AddElement(LSTMOptions::VT_KERNEL_TYPE, static_cast(kernel_type), 0); + } + void add_asymmetric_quantize_inputs(bool asymmetric_quantize_inputs) { + fbb_.AddElement(LSTMOptions::VT_ASYMMETRIC_QUANTIZE_INPUTS, static_cast(asymmetric_quantize_inputs), 0); + } + explicit LSTMOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateLSTMOptions( + flatbuffers::FlatBufferBuilder &_fbb, + tflite::ActivationFunctionType fused_activation_function = tflite::ActivationFunctionType_NONE, + float cell_clip = 0.0f, + float proj_clip = 0.0f, + tflite::LSTMKernelType kernel_type = tflite::LSTMKernelType_FULL, + bool asymmetric_quantize_inputs = false) { + LSTMOptionsBuilder builder_(_fbb); + builder_.add_proj_clip(proj_clip); + builder_.add_cell_clip(cell_clip); + builder_.add_asymmetric_quantize_inputs(asymmetric_quantize_inputs); + builder_.add_kernel_type(kernel_type); + builder_.add_fused_activation_function(fused_activation_function); + return builder_.Finish(); +} + +flatbuffers::Offset CreateLSTMOptions(flatbuffers::FlatBufferBuilder &_fbb, const LSTMOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct UnidirectionalSequenceLSTMOptionsT : public flatbuffers::NativeTable { + typedef UnidirectionalSequenceLSTMOptions TableType; + tflite::ActivationFunctionType fused_activation_function = tflite::ActivationFunctionType_NONE; + float cell_clip = 0.0f; + float proj_clip = 0.0f; + bool time_major = false; + bool asymmetric_quantize_inputs = false; + bool diagonal_recurrent_tensors = false; +}; + +struct UnidirectionalSequenceLSTMOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef UnidirectionalSequenceLSTMOptionsT NativeTableType; + typedef UnidirectionalSequenceLSTMOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_FUSED_ACTIVATION_FUNCTION = 4, + VT_CELL_CLIP = 6, + VT_PROJ_CLIP = 8, + VT_TIME_MAJOR = 10, + VT_ASYMMETRIC_QUANTIZE_INPUTS = 12, + VT_DIAGONAL_RECURRENT_TENSORS = 14 + }; + tflite::ActivationFunctionType fused_activation_function() const { + return static_cast(GetField(VT_FUSED_ACTIVATION_FUNCTION, 0)); + } + float cell_clip() const { + return GetField(VT_CELL_CLIP, 0.0f); + } + float proj_clip() const { + return GetField(VT_PROJ_CLIP, 0.0f); + } + bool time_major() const { + return GetField(VT_TIME_MAJOR, 0) != 0; + } + bool asymmetric_quantize_inputs() const { + return GetField(VT_ASYMMETRIC_QUANTIZE_INPUTS, 0) != 0; + } + bool diagonal_recurrent_tensors() const { + return GetField(VT_DIAGONAL_RECURRENT_TENSORS, 0) != 0; + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_FUSED_ACTIVATION_FUNCTION, 1) && + VerifyField(verifier, VT_CELL_CLIP, 4) && + VerifyField(verifier, VT_PROJ_CLIP, 4) && + VerifyField(verifier, VT_TIME_MAJOR, 1) && + VerifyField(verifier, VT_ASYMMETRIC_QUANTIZE_INPUTS, 1) && + VerifyField(verifier, VT_DIAGONAL_RECURRENT_TENSORS, 1) && + verifier.EndTable(); + } + UnidirectionalSequenceLSTMOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(UnidirectionalSequenceLSTMOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const UnidirectionalSequenceLSTMOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct UnidirectionalSequenceLSTMOptionsBuilder { + typedef UnidirectionalSequenceLSTMOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_fused_activation_function(tflite::ActivationFunctionType fused_activation_function) { + fbb_.AddElement(UnidirectionalSequenceLSTMOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast(fused_activation_function), 0); + } + void add_cell_clip(float cell_clip) { + fbb_.AddElement(UnidirectionalSequenceLSTMOptions::VT_CELL_CLIP, cell_clip, 0.0f); + } + void add_proj_clip(float proj_clip) { + fbb_.AddElement(UnidirectionalSequenceLSTMOptions::VT_PROJ_CLIP, proj_clip, 0.0f); + } + void add_time_major(bool time_major) { + fbb_.AddElement(UnidirectionalSequenceLSTMOptions::VT_TIME_MAJOR, static_cast(time_major), 0); + } + void add_asymmetric_quantize_inputs(bool asymmetric_quantize_inputs) { + fbb_.AddElement(UnidirectionalSequenceLSTMOptions::VT_ASYMMETRIC_QUANTIZE_INPUTS, static_cast(asymmetric_quantize_inputs), 0); + } + void add_diagonal_recurrent_tensors(bool diagonal_recurrent_tensors) { + fbb_.AddElement(UnidirectionalSequenceLSTMOptions::VT_DIAGONAL_RECURRENT_TENSORS, static_cast(diagonal_recurrent_tensors), 0); + } + explicit UnidirectionalSequenceLSTMOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateUnidirectionalSequenceLSTMOptions( + flatbuffers::FlatBufferBuilder &_fbb, + tflite::ActivationFunctionType fused_activation_function = tflite::ActivationFunctionType_NONE, + float cell_clip = 0.0f, + float proj_clip = 0.0f, + bool time_major = false, + bool asymmetric_quantize_inputs = false, + bool diagonal_recurrent_tensors = false) { + UnidirectionalSequenceLSTMOptionsBuilder builder_(_fbb); + builder_.add_proj_clip(proj_clip); + builder_.add_cell_clip(cell_clip); + builder_.add_diagonal_recurrent_tensors(diagonal_recurrent_tensors); + builder_.add_asymmetric_quantize_inputs(asymmetric_quantize_inputs); + builder_.add_time_major(time_major); + builder_.add_fused_activation_function(fused_activation_function); + return builder_.Finish(); +} + +flatbuffers::Offset CreateUnidirectionalSequenceLSTMOptions(flatbuffers::FlatBufferBuilder &_fbb, const UnidirectionalSequenceLSTMOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct BidirectionalSequenceLSTMOptionsT : public flatbuffers::NativeTable { + typedef BidirectionalSequenceLSTMOptions TableType; + tflite::ActivationFunctionType fused_activation_function = tflite::ActivationFunctionType_NONE; + float cell_clip = 0.0f; + float proj_clip = 0.0f; + bool merge_outputs = false; + bool time_major = true; + bool asymmetric_quantize_inputs = false; +}; + +struct BidirectionalSequenceLSTMOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef BidirectionalSequenceLSTMOptionsT NativeTableType; + typedef BidirectionalSequenceLSTMOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_FUSED_ACTIVATION_FUNCTION = 4, + VT_CELL_CLIP = 6, + VT_PROJ_CLIP = 8, + VT_MERGE_OUTPUTS = 10, + VT_TIME_MAJOR = 12, + VT_ASYMMETRIC_QUANTIZE_INPUTS = 14 + }; + tflite::ActivationFunctionType fused_activation_function() const { + return static_cast(GetField(VT_FUSED_ACTIVATION_FUNCTION, 0)); + } + float cell_clip() const { + return GetField(VT_CELL_CLIP, 0.0f); + } + float proj_clip() const { + return GetField(VT_PROJ_CLIP, 0.0f); + } + bool merge_outputs() const { + return GetField(VT_MERGE_OUTPUTS, 0) != 0; + } + bool time_major() const { + return GetField(VT_TIME_MAJOR, 1) != 0; + } + bool asymmetric_quantize_inputs() const { + return GetField(VT_ASYMMETRIC_QUANTIZE_INPUTS, 0) != 0; + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_FUSED_ACTIVATION_FUNCTION, 1) && + VerifyField(verifier, VT_CELL_CLIP, 4) && + VerifyField(verifier, VT_PROJ_CLIP, 4) && + VerifyField(verifier, VT_MERGE_OUTPUTS, 1) && + VerifyField(verifier, VT_TIME_MAJOR, 1) && + VerifyField(verifier, VT_ASYMMETRIC_QUANTIZE_INPUTS, 1) && + verifier.EndTable(); + } + BidirectionalSequenceLSTMOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(BidirectionalSequenceLSTMOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const BidirectionalSequenceLSTMOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct BidirectionalSequenceLSTMOptionsBuilder { + typedef BidirectionalSequenceLSTMOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_fused_activation_function(tflite::ActivationFunctionType fused_activation_function) { + fbb_.AddElement(BidirectionalSequenceLSTMOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast(fused_activation_function), 0); + } + void add_cell_clip(float cell_clip) { + fbb_.AddElement(BidirectionalSequenceLSTMOptions::VT_CELL_CLIP, cell_clip, 0.0f); + } + void add_proj_clip(float proj_clip) { + fbb_.AddElement(BidirectionalSequenceLSTMOptions::VT_PROJ_CLIP, proj_clip, 0.0f); + } + void add_merge_outputs(bool merge_outputs) { + fbb_.AddElement(BidirectionalSequenceLSTMOptions::VT_MERGE_OUTPUTS, static_cast(merge_outputs), 0); + } + void add_time_major(bool time_major) { + fbb_.AddElement(BidirectionalSequenceLSTMOptions::VT_TIME_MAJOR, static_cast(time_major), 1); + } + void add_asymmetric_quantize_inputs(bool asymmetric_quantize_inputs) { + fbb_.AddElement(BidirectionalSequenceLSTMOptions::VT_ASYMMETRIC_QUANTIZE_INPUTS, static_cast(asymmetric_quantize_inputs), 0); + } + explicit BidirectionalSequenceLSTMOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateBidirectionalSequenceLSTMOptions( + flatbuffers::FlatBufferBuilder &_fbb, + tflite::ActivationFunctionType fused_activation_function = tflite::ActivationFunctionType_NONE, + float cell_clip = 0.0f, + float proj_clip = 0.0f, + bool merge_outputs = false, + bool time_major = true, + bool asymmetric_quantize_inputs = false) { + BidirectionalSequenceLSTMOptionsBuilder builder_(_fbb); + builder_.add_proj_clip(proj_clip); + builder_.add_cell_clip(cell_clip); + builder_.add_asymmetric_quantize_inputs(asymmetric_quantize_inputs); + builder_.add_time_major(time_major); + builder_.add_merge_outputs(merge_outputs); + builder_.add_fused_activation_function(fused_activation_function); + return builder_.Finish(); +} + +flatbuffers::Offset CreateBidirectionalSequenceLSTMOptions(flatbuffers::FlatBufferBuilder &_fbb, const BidirectionalSequenceLSTMOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct ResizeBilinearOptionsT : public flatbuffers::NativeTable { + typedef ResizeBilinearOptions TableType; + bool align_corners = false; + bool half_pixel_centers = false; +}; + +struct ResizeBilinearOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef ResizeBilinearOptionsT NativeTableType; + typedef ResizeBilinearOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_ALIGN_CORNERS = 8, + VT_HALF_PIXEL_CENTERS = 10 + }; + bool align_corners() const { + return GetField(VT_ALIGN_CORNERS, 0) != 0; + } + bool half_pixel_centers() const { + return GetField(VT_HALF_PIXEL_CENTERS, 0) != 0; + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_ALIGN_CORNERS, 1) && + VerifyField(verifier, VT_HALF_PIXEL_CENTERS, 1) && + verifier.EndTable(); + } + ResizeBilinearOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(ResizeBilinearOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const ResizeBilinearOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct ResizeBilinearOptionsBuilder { + typedef ResizeBilinearOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_align_corners(bool align_corners) { + fbb_.AddElement(ResizeBilinearOptions::VT_ALIGN_CORNERS, static_cast(align_corners), 0); + } + void add_half_pixel_centers(bool half_pixel_centers) { + fbb_.AddElement(ResizeBilinearOptions::VT_HALF_PIXEL_CENTERS, static_cast(half_pixel_centers), 0); + } + explicit ResizeBilinearOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateResizeBilinearOptions( + flatbuffers::FlatBufferBuilder &_fbb, + bool align_corners = false, + bool half_pixel_centers = false) { + ResizeBilinearOptionsBuilder builder_(_fbb); + builder_.add_half_pixel_centers(half_pixel_centers); + builder_.add_align_corners(align_corners); + return builder_.Finish(); +} + +flatbuffers::Offset CreateResizeBilinearOptions(flatbuffers::FlatBufferBuilder &_fbb, const ResizeBilinearOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct ResizeNearestNeighborOptionsT : public flatbuffers::NativeTable { + typedef ResizeNearestNeighborOptions TableType; + bool align_corners = false; + bool half_pixel_centers = false; +}; + +struct ResizeNearestNeighborOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef ResizeNearestNeighborOptionsT NativeTableType; + typedef ResizeNearestNeighborOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_ALIGN_CORNERS = 4, + VT_HALF_PIXEL_CENTERS = 6 + }; + bool align_corners() const { + return GetField(VT_ALIGN_CORNERS, 0) != 0; + } + bool half_pixel_centers() const { + return GetField(VT_HALF_PIXEL_CENTERS, 0) != 0; + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_ALIGN_CORNERS, 1) && + VerifyField(verifier, VT_HALF_PIXEL_CENTERS, 1) && + verifier.EndTable(); + } + ResizeNearestNeighborOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(ResizeNearestNeighborOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const ResizeNearestNeighborOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct ResizeNearestNeighborOptionsBuilder { + typedef ResizeNearestNeighborOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_align_corners(bool align_corners) { + fbb_.AddElement(ResizeNearestNeighborOptions::VT_ALIGN_CORNERS, static_cast(align_corners), 0); + } + void add_half_pixel_centers(bool half_pixel_centers) { + fbb_.AddElement(ResizeNearestNeighborOptions::VT_HALF_PIXEL_CENTERS, static_cast(half_pixel_centers), 0); + } + explicit ResizeNearestNeighborOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateResizeNearestNeighborOptions( + flatbuffers::FlatBufferBuilder &_fbb, + bool align_corners = false, + bool half_pixel_centers = false) { + ResizeNearestNeighborOptionsBuilder builder_(_fbb); + builder_.add_half_pixel_centers(half_pixel_centers); + builder_.add_align_corners(align_corners); + return builder_.Finish(); +} + +flatbuffers::Offset CreateResizeNearestNeighborOptions(flatbuffers::FlatBufferBuilder &_fbb, const ResizeNearestNeighborOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct CallOptionsT : public flatbuffers::NativeTable { + typedef CallOptions TableType; + uint32_t subgraph = 0; +}; + +struct CallOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef CallOptionsT NativeTableType; + typedef CallOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_SUBGRAPH = 4 + }; + uint32_t subgraph() const { + return GetField(VT_SUBGRAPH, 0); + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_SUBGRAPH, 4) && + verifier.EndTable(); + } + CallOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(CallOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const CallOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct CallOptionsBuilder { + typedef CallOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_subgraph(uint32_t subgraph) { + fbb_.AddElement(CallOptions::VT_SUBGRAPH, subgraph, 0); + } + explicit CallOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateCallOptions( + flatbuffers::FlatBufferBuilder &_fbb, + uint32_t subgraph = 0) { + CallOptionsBuilder builder_(_fbb); + builder_.add_subgraph(subgraph); + return builder_.Finish(); +} + +flatbuffers::Offset CreateCallOptions(flatbuffers::FlatBufferBuilder &_fbb, const CallOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct PadOptionsT : public flatbuffers::NativeTable { + typedef PadOptions TableType; +}; + +struct PadOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef PadOptionsT NativeTableType; + typedef PadOptionsBuilder Builder; + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } + PadOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(PadOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const PadOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct PadOptionsBuilder { + typedef PadOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit PadOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreatePadOptions( + flatbuffers::FlatBufferBuilder &_fbb) { + PadOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +flatbuffers::Offset CreatePadOptions(flatbuffers::FlatBufferBuilder &_fbb, const PadOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct PadV2OptionsT : public flatbuffers::NativeTable { + typedef PadV2Options TableType; +}; + +struct PadV2Options FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef PadV2OptionsT NativeTableType; + typedef PadV2OptionsBuilder Builder; + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } + PadV2OptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(PadV2OptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const PadV2OptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct PadV2OptionsBuilder { + typedef PadV2Options Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit PadV2OptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreatePadV2Options( + flatbuffers::FlatBufferBuilder &_fbb) { + PadV2OptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +flatbuffers::Offset CreatePadV2Options(flatbuffers::FlatBufferBuilder &_fbb, const PadV2OptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct ReshapeOptionsT : public flatbuffers::NativeTable { + typedef ReshapeOptions TableType; + std::vector new_shape{}; +}; + +struct ReshapeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef ReshapeOptionsT NativeTableType; + typedef ReshapeOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_NEW_SHAPE = 4 + }; + const flatbuffers::Vector *new_shape() const { + return GetPointer *>(VT_NEW_SHAPE); + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyOffset(verifier, VT_NEW_SHAPE) && + verifier.VerifyVector(new_shape()) && + verifier.EndTable(); + } + ReshapeOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(ReshapeOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const ReshapeOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct ReshapeOptionsBuilder { + typedef ReshapeOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_new_shape(flatbuffers::Offset> new_shape) { + fbb_.AddOffset(ReshapeOptions::VT_NEW_SHAPE, new_shape); + } + explicit ReshapeOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateReshapeOptions( + flatbuffers::FlatBufferBuilder &_fbb, + flatbuffers::Offset> new_shape = 0) { + ReshapeOptionsBuilder builder_(_fbb); + builder_.add_new_shape(new_shape); + return builder_.Finish(); +} + +inline flatbuffers::Offset CreateReshapeOptionsDirect( + flatbuffers::FlatBufferBuilder &_fbb, + const std::vector *new_shape = nullptr) { + auto new_shape__ = new_shape ? _fbb.CreateVector(*new_shape) : 0; + return tflite::CreateReshapeOptions( + _fbb, + new_shape__); +} + +flatbuffers::Offset CreateReshapeOptions(flatbuffers::FlatBufferBuilder &_fbb, const ReshapeOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct SpaceToBatchNDOptionsT : public flatbuffers::NativeTable { + typedef SpaceToBatchNDOptions TableType; +}; + +struct SpaceToBatchNDOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef SpaceToBatchNDOptionsT NativeTableType; + typedef SpaceToBatchNDOptionsBuilder Builder; + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } + SpaceToBatchNDOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(SpaceToBatchNDOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const SpaceToBatchNDOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct SpaceToBatchNDOptionsBuilder { + typedef SpaceToBatchNDOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit SpaceToBatchNDOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateSpaceToBatchNDOptions( + flatbuffers::FlatBufferBuilder &_fbb) { + SpaceToBatchNDOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +flatbuffers::Offset CreateSpaceToBatchNDOptions(flatbuffers::FlatBufferBuilder &_fbb, const SpaceToBatchNDOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct BatchToSpaceNDOptionsT : public flatbuffers::NativeTable { + typedef BatchToSpaceNDOptions TableType; +}; + +struct BatchToSpaceNDOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef BatchToSpaceNDOptionsT NativeTableType; + typedef BatchToSpaceNDOptionsBuilder Builder; + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } + BatchToSpaceNDOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(BatchToSpaceNDOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const BatchToSpaceNDOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct BatchToSpaceNDOptionsBuilder { + typedef BatchToSpaceNDOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit BatchToSpaceNDOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateBatchToSpaceNDOptions( + flatbuffers::FlatBufferBuilder &_fbb) { + BatchToSpaceNDOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +flatbuffers::Offset CreateBatchToSpaceNDOptions(flatbuffers::FlatBufferBuilder &_fbb, const BatchToSpaceNDOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct SkipGramOptionsT : public flatbuffers::NativeTable { + typedef SkipGramOptions TableType; + int32_t ngram_size = 0; + int32_t max_skip_size = 0; + bool include_all_ngrams = false; +}; + +struct SkipGramOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef SkipGramOptionsT NativeTableType; + typedef SkipGramOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_NGRAM_SIZE = 4, + VT_MAX_SKIP_SIZE = 6, + VT_INCLUDE_ALL_NGRAMS = 8 + }; + int32_t ngram_size() const { + return GetField(VT_NGRAM_SIZE, 0); + } + int32_t max_skip_size() const { + return GetField(VT_MAX_SKIP_SIZE, 0); + } + bool include_all_ngrams() const { + return GetField(VT_INCLUDE_ALL_NGRAMS, 0) != 0; + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_NGRAM_SIZE, 4) && + VerifyField(verifier, VT_MAX_SKIP_SIZE, 4) && + VerifyField(verifier, VT_INCLUDE_ALL_NGRAMS, 1) && + verifier.EndTable(); + } + SkipGramOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(SkipGramOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const SkipGramOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct SkipGramOptionsBuilder { + typedef SkipGramOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_ngram_size(int32_t ngram_size) { + fbb_.AddElement(SkipGramOptions::VT_NGRAM_SIZE, ngram_size, 0); + } + void add_max_skip_size(int32_t max_skip_size) { + fbb_.AddElement(SkipGramOptions::VT_MAX_SKIP_SIZE, max_skip_size, 0); + } + void add_include_all_ngrams(bool include_all_ngrams) { + fbb_.AddElement(SkipGramOptions::VT_INCLUDE_ALL_NGRAMS, static_cast(include_all_ngrams), 0); + } + explicit SkipGramOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateSkipGramOptions( + flatbuffers::FlatBufferBuilder &_fbb, + int32_t ngram_size = 0, + int32_t max_skip_size = 0, + bool include_all_ngrams = false) { + SkipGramOptionsBuilder builder_(_fbb); + builder_.add_max_skip_size(max_skip_size); + builder_.add_ngram_size(ngram_size); + builder_.add_include_all_ngrams(include_all_ngrams); + return builder_.Finish(); +} + +flatbuffers::Offset CreateSkipGramOptions(flatbuffers::FlatBufferBuilder &_fbb, const SkipGramOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct SpaceToDepthOptionsT : public flatbuffers::NativeTable { + typedef SpaceToDepthOptions TableType; + int32_t block_size = 0; +}; + +struct SpaceToDepthOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef SpaceToDepthOptionsT NativeTableType; + typedef SpaceToDepthOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_BLOCK_SIZE = 4 + }; + int32_t block_size() const { + return GetField(VT_BLOCK_SIZE, 0); + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_BLOCK_SIZE, 4) && + verifier.EndTable(); + } + SpaceToDepthOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(SpaceToDepthOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const SpaceToDepthOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct SpaceToDepthOptionsBuilder { + typedef SpaceToDepthOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_block_size(int32_t block_size) { + fbb_.AddElement(SpaceToDepthOptions::VT_BLOCK_SIZE, block_size, 0); + } + explicit SpaceToDepthOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateSpaceToDepthOptions( + flatbuffers::FlatBufferBuilder &_fbb, + int32_t block_size = 0) { + SpaceToDepthOptionsBuilder builder_(_fbb); + builder_.add_block_size(block_size); + return builder_.Finish(); +} + +flatbuffers::Offset CreateSpaceToDepthOptions(flatbuffers::FlatBufferBuilder &_fbb, const SpaceToDepthOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct DepthToSpaceOptionsT : public flatbuffers::NativeTable { + typedef DepthToSpaceOptions TableType; + int32_t block_size = 0; +}; + +struct DepthToSpaceOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef DepthToSpaceOptionsT NativeTableType; + typedef DepthToSpaceOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_BLOCK_SIZE = 4 + }; + int32_t block_size() const { + return GetField(VT_BLOCK_SIZE, 0); + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_BLOCK_SIZE, 4) && + verifier.EndTable(); + } + DepthToSpaceOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(DepthToSpaceOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const DepthToSpaceOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct DepthToSpaceOptionsBuilder { + typedef DepthToSpaceOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_block_size(int32_t block_size) { + fbb_.AddElement(DepthToSpaceOptions::VT_BLOCK_SIZE, block_size, 0); + } + explicit DepthToSpaceOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateDepthToSpaceOptions( + flatbuffers::FlatBufferBuilder &_fbb, + int32_t block_size = 0) { + DepthToSpaceOptionsBuilder builder_(_fbb); + builder_.add_block_size(block_size); + return builder_.Finish(); +} + +flatbuffers::Offset CreateDepthToSpaceOptions(flatbuffers::FlatBufferBuilder &_fbb, const DepthToSpaceOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct SubOptionsT : public flatbuffers::NativeTable { + typedef SubOptions TableType; + tflite::ActivationFunctionType fused_activation_function = tflite::ActivationFunctionType_NONE; + bool pot_scale_int16 = true; +}; + +struct SubOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef SubOptionsT NativeTableType; + typedef SubOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_FUSED_ACTIVATION_FUNCTION = 4, + VT_POT_SCALE_INT16 = 6 + }; + tflite::ActivationFunctionType fused_activation_function() const { + return static_cast(GetField(VT_FUSED_ACTIVATION_FUNCTION, 0)); + } + bool pot_scale_int16() const { + return GetField(VT_POT_SCALE_INT16, 1) != 0; + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_FUSED_ACTIVATION_FUNCTION, 1) && + VerifyField(verifier, VT_POT_SCALE_INT16, 1) && + verifier.EndTable(); + } + SubOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(SubOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const SubOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct SubOptionsBuilder { + typedef SubOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_fused_activation_function(tflite::ActivationFunctionType fused_activation_function) { + fbb_.AddElement(SubOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast(fused_activation_function), 0); + } + void add_pot_scale_int16(bool pot_scale_int16) { + fbb_.AddElement(SubOptions::VT_POT_SCALE_INT16, static_cast(pot_scale_int16), 1); + } + explicit SubOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateSubOptions( + flatbuffers::FlatBufferBuilder &_fbb, + tflite::ActivationFunctionType fused_activation_function = tflite::ActivationFunctionType_NONE, + bool pot_scale_int16 = true) { + SubOptionsBuilder builder_(_fbb); + builder_.add_pot_scale_int16(pot_scale_int16); + builder_.add_fused_activation_function(fused_activation_function); + return builder_.Finish(); +} + +flatbuffers::Offset CreateSubOptions(flatbuffers::FlatBufferBuilder &_fbb, const SubOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct DivOptionsT : public flatbuffers::NativeTable { + typedef DivOptions TableType; + tflite::ActivationFunctionType fused_activation_function = tflite::ActivationFunctionType_NONE; +}; + +struct DivOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef DivOptionsT NativeTableType; + typedef DivOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_FUSED_ACTIVATION_FUNCTION = 4 + }; + tflite::ActivationFunctionType fused_activation_function() const { + return static_cast(GetField(VT_FUSED_ACTIVATION_FUNCTION, 0)); + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_FUSED_ACTIVATION_FUNCTION, 1) && + verifier.EndTable(); + } + DivOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(DivOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const DivOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct DivOptionsBuilder { + typedef DivOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_fused_activation_function(tflite::ActivationFunctionType fused_activation_function) { + fbb_.AddElement(DivOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast(fused_activation_function), 0); + } + explicit DivOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateDivOptions( + flatbuffers::FlatBufferBuilder &_fbb, + tflite::ActivationFunctionType fused_activation_function = tflite::ActivationFunctionType_NONE) { + DivOptionsBuilder builder_(_fbb); + builder_.add_fused_activation_function(fused_activation_function); + return builder_.Finish(); +} + +flatbuffers::Offset CreateDivOptions(flatbuffers::FlatBufferBuilder &_fbb, const DivOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct TopKV2OptionsT : public flatbuffers::NativeTable { + typedef TopKV2Options TableType; +}; + +struct TopKV2Options FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef TopKV2OptionsT NativeTableType; + typedef TopKV2OptionsBuilder Builder; + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } + TopKV2OptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(TopKV2OptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const TopKV2OptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct TopKV2OptionsBuilder { + typedef TopKV2Options Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit TopKV2OptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateTopKV2Options( + flatbuffers::FlatBufferBuilder &_fbb) { + TopKV2OptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +flatbuffers::Offset CreateTopKV2Options(flatbuffers::FlatBufferBuilder &_fbb, const TopKV2OptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct EmbeddingLookupSparseOptionsT : public flatbuffers::NativeTable { + typedef EmbeddingLookupSparseOptions TableType; + tflite::CombinerType combiner = tflite::CombinerType_SUM; +}; + +struct EmbeddingLookupSparseOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef EmbeddingLookupSparseOptionsT NativeTableType; + typedef EmbeddingLookupSparseOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_COMBINER = 4 + }; + tflite::CombinerType combiner() const { + return static_cast(GetField(VT_COMBINER, 0)); + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_COMBINER, 1) && + verifier.EndTable(); + } + EmbeddingLookupSparseOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(EmbeddingLookupSparseOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const EmbeddingLookupSparseOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct EmbeddingLookupSparseOptionsBuilder { + typedef EmbeddingLookupSparseOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_combiner(tflite::CombinerType combiner) { + fbb_.AddElement(EmbeddingLookupSparseOptions::VT_COMBINER, static_cast(combiner), 0); + } + explicit EmbeddingLookupSparseOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateEmbeddingLookupSparseOptions( + flatbuffers::FlatBufferBuilder &_fbb, + tflite::CombinerType combiner = tflite::CombinerType_SUM) { + EmbeddingLookupSparseOptionsBuilder builder_(_fbb); + builder_.add_combiner(combiner); + return builder_.Finish(); +} + +flatbuffers::Offset CreateEmbeddingLookupSparseOptions(flatbuffers::FlatBufferBuilder &_fbb, const EmbeddingLookupSparseOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct GatherOptionsT : public flatbuffers::NativeTable { + typedef GatherOptions TableType; + int32_t axis = 0; + int32_t batch_dims = 0; +}; + +struct GatherOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef GatherOptionsT NativeTableType; + typedef GatherOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_AXIS = 4, + VT_BATCH_DIMS = 6 + }; + int32_t axis() const { + return GetField(VT_AXIS, 0); + } + int32_t batch_dims() const { + return GetField(VT_BATCH_DIMS, 0); + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_AXIS, 4) && + VerifyField(verifier, VT_BATCH_DIMS, 4) && + verifier.EndTable(); + } + GatherOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(GatherOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const GatherOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct GatherOptionsBuilder { + typedef GatherOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_axis(int32_t axis) { + fbb_.AddElement(GatherOptions::VT_AXIS, axis, 0); + } + void add_batch_dims(int32_t batch_dims) { + fbb_.AddElement(GatherOptions::VT_BATCH_DIMS, batch_dims, 0); + } + explicit GatherOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateGatherOptions( + flatbuffers::FlatBufferBuilder &_fbb, + int32_t axis = 0, + int32_t batch_dims = 0) { + GatherOptionsBuilder builder_(_fbb); + builder_.add_batch_dims(batch_dims); + builder_.add_axis(axis); + return builder_.Finish(); +} + +flatbuffers::Offset CreateGatherOptions(flatbuffers::FlatBufferBuilder &_fbb, const GatherOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct TransposeOptionsT : public flatbuffers::NativeTable { + typedef TransposeOptions TableType; +}; + +struct TransposeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef TransposeOptionsT NativeTableType; + typedef TransposeOptionsBuilder Builder; + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } + TransposeOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(TransposeOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const TransposeOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct TransposeOptionsBuilder { + typedef TransposeOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit TransposeOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateTransposeOptions( + flatbuffers::FlatBufferBuilder &_fbb) { + TransposeOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +flatbuffers::Offset CreateTransposeOptions(flatbuffers::FlatBufferBuilder &_fbb, const TransposeOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct ExpOptionsT : public flatbuffers::NativeTable { + typedef ExpOptions TableType; +}; + +struct ExpOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef ExpOptionsT NativeTableType; + typedef ExpOptionsBuilder Builder; + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } + ExpOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(ExpOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const ExpOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct ExpOptionsBuilder { + typedef ExpOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit ExpOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateExpOptions( + flatbuffers::FlatBufferBuilder &_fbb) { + ExpOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +flatbuffers::Offset CreateExpOptions(flatbuffers::FlatBufferBuilder &_fbb, const ExpOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct CosOptionsT : public flatbuffers::NativeTable { + typedef CosOptions TableType; +}; + +struct CosOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef CosOptionsT NativeTableType; + typedef CosOptionsBuilder Builder; + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } + CosOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(CosOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const CosOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct CosOptionsBuilder { + typedef CosOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit CosOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateCosOptions( + flatbuffers::FlatBufferBuilder &_fbb) { + CosOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +flatbuffers::Offset CreateCosOptions(flatbuffers::FlatBufferBuilder &_fbb, const CosOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct ReducerOptionsT : public flatbuffers::NativeTable { + typedef ReducerOptions TableType; + bool keep_dims = false; +}; + +struct ReducerOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef ReducerOptionsT NativeTableType; + typedef ReducerOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_KEEP_DIMS = 4 + }; + bool keep_dims() const { + return GetField(VT_KEEP_DIMS, 0) != 0; + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_KEEP_DIMS, 1) && + verifier.EndTable(); + } + ReducerOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(ReducerOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const ReducerOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct ReducerOptionsBuilder { + typedef ReducerOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_keep_dims(bool keep_dims) { + fbb_.AddElement(ReducerOptions::VT_KEEP_DIMS, static_cast(keep_dims), 0); + } + explicit ReducerOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateReducerOptions( + flatbuffers::FlatBufferBuilder &_fbb, + bool keep_dims = false) { + ReducerOptionsBuilder builder_(_fbb); + builder_.add_keep_dims(keep_dims); + return builder_.Finish(); +} + +flatbuffers::Offset CreateReducerOptions(flatbuffers::FlatBufferBuilder &_fbb, const ReducerOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct SqueezeOptionsT : public flatbuffers::NativeTable { + typedef SqueezeOptions TableType; + std::vector squeeze_dims{}; +}; + +struct SqueezeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef SqueezeOptionsT NativeTableType; + typedef SqueezeOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_SQUEEZE_DIMS = 4 + }; + const flatbuffers::Vector *squeeze_dims() const { + return GetPointer *>(VT_SQUEEZE_DIMS); + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyOffset(verifier, VT_SQUEEZE_DIMS) && + verifier.VerifyVector(squeeze_dims()) && + verifier.EndTable(); + } + SqueezeOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(SqueezeOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const SqueezeOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct SqueezeOptionsBuilder { + typedef SqueezeOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_squeeze_dims(flatbuffers::Offset> squeeze_dims) { + fbb_.AddOffset(SqueezeOptions::VT_SQUEEZE_DIMS, squeeze_dims); + } + explicit SqueezeOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateSqueezeOptions( + flatbuffers::FlatBufferBuilder &_fbb, + flatbuffers::Offset> squeeze_dims = 0) { + SqueezeOptionsBuilder builder_(_fbb); + builder_.add_squeeze_dims(squeeze_dims); + return builder_.Finish(); +} + +inline flatbuffers::Offset CreateSqueezeOptionsDirect( + flatbuffers::FlatBufferBuilder &_fbb, + const std::vector *squeeze_dims = nullptr) { + auto squeeze_dims__ = squeeze_dims ? _fbb.CreateVector(*squeeze_dims) : 0; + return tflite::CreateSqueezeOptions( + _fbb, + squeeze_dims__); +} + +flatbuffers::Offset CreateSqueezeOptions(flatbuffers::FlatBufferBuilder &_fbb, const SqueezeOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct SplitOptionsT : public flatbuffers::NativeTable { + typedef SplitOptions TableType; + int32_t num_splits = 0; +}; + +struct SplitOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef SplitOptionsT NativeTableType; + typedef SplitOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_NUM_SPLITS = 4 + }; + int32_t num_splits() const { + return GetField(VT_NUM_SPLITS, 0); + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_NUM_SPLITS, 4) && + verifier.EndTable(); + } + SplitOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(SplitOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const SplitOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct SplitOptionsBuilder { + typedef SplitOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_num_splits(int32_t num_splits) { + fbb_.AddElement(SplitOptions::VT_NUM_SPLITS, num_splits, 0); + } + explicit SplitOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateSplitOptions( + flatbuffers::FlatBufferBuilder &_fbb, + int32_t num_splits = 0) { + SplitOptionsBuilder builder_(_fbb); + builder_.add_num_splits(num_splits); + return builder_.Finish(); +} + +flatbuffers::Offset CreateSplitOptions(flatbuffers::FlatBufferBuilder &_fbb, const SplitOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct SplitVOptionsT : public flatbuffers::NativeTable { + typedef SplitVOptions TableType; + int32_t num_splits = 0; +}; + +struct SplitVOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef SplitVOptionsT NativeTableType; + typedef SplitVOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_NUM_SPLITS = 4 + }; + int32_t num_splits() const { + return GetField(VT_NUM_SPLITS, 0); + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_NUM_SPLITS, 4) && + verifier.EndTable(); + } + SplitVOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(SplitVOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const SplitVOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct SplitVOptionsBuilder { + typedef SplitVOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_num_splits(int32_t num_splits) { + fbb_.AddElement(SplitVOptions::VT_NUM_SPLITS, num_splits, 0); + } + explicit SplitVOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateSplitVOptions( + flatbuffers::FlatBufferBuilder &_fbb, + int32_t num_splits = 0) { + SplitVOptionsBuilder builder_(_fbb); + builder_.add_num_splits(num_splits); + return builder_.Finish(); +} + +flatbuffers::Offset CreateSplitVOptions(flatbuffers::FlatBufferBuilder &_fbb, const SplitVOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct StridedSliceOptionsT : public flatbuffers::NativeTable { + typedef StridedSliceOptions TableType; + int32_t begin_mask = 0; + int32_t end_mask = 0; + int32_t ellipsis_mask = 0; + int32_t new_axis_mask = 0; + int32_t shrink_axis_mask = 0; +}; + +struct StridedSliceOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef StridedSliceOptionsT NativeTableType; + typedef StridedSliceOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_BEGIN_MASK = 4, + VT_END_MASK = 6, + VT_ELLIPSIS_MASK = 8, + VT_NEW_AXIS_MASK = 10, + VT_SHRINK_AXIS_MASK = 12 + }; + int32_t begin_mask() const { + return GetField(VT_BEGIN_MASK, 0); + } + int32_t end_mask() const { + return GetField(VT_END_MASK, 0); + } + int32_t ellipsis_mask() const { + return GetField(VT_ELLIPSIS_MASK, 0); + } + int32_t new_axis_mask() const { + return GetField(VT_NEW_AXIS_MASK, 0); + } + int32_t shrink_axis_mask() const { + return GetField(VT_SHRINK_AXIS_MASK, 0); + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_BEGIN_MASK, 4) && + VerifyField(verifier, VT_END_MASK, 4) && + VerifyField(verifier, VT_ELLIPSIS_MASK, 4) && + VerifyField(verifier, VT_NEW_AXIS_MASK, 4) && + VerifyField(verifier, VT_SHRINK_AXIS_MASK, 4) && + verifier.EndTable(); + } + StridedSliceOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(StridedSliceOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const StridedSliceOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct StridedSliceOptionsBuilder { + typedef StridedSliceOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_begin_mask(int32_t begin_mask) { + fbb_.AddElement(StridedSliceOptions::VT_BEGIN_MASK, begin_mask, 0); + } + void add_end_mask(int32_t end_mask) { + fbb_.AddElement(StridedSliceOptions::VT_END_MASK, end_mask, 0); + } + void add_ellipsis_mask(int32_t ellipsis_mask) { + fbb_.AddElement(StridedSliceOptions::VT_ELLIPSIS_MASK, ellipsis_mask, 0); + } + void add_new_axis_mask(int32_t new_axis_mask) { + fbb_.AddElement(StridedSliceOptions::VT_NEW_AXIS_MASK, new_axis_mask, 0); + } + void add_shrink_axis_mask(int32_t shrink_axis_mask) { + fbb_.AddElement(StridedSliceOptions::VT_SHRINK_AXIS_MASK, shrink_axis_mask, 0); + } + explicit StridedSliceOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateStridedSliceOptions( + flatbuffers::FlatBufferBuilder &_fbb, + int32_t begin_mask = 0, + int32_t end_mask = 0, + int32_t ellipsis_mask = 0, + int32_t new_axis_mask = 0, + int32_t shrink_axis_mask = 0) { + StridedSliceOptionsBuilder builder_(_fbb); + builder_.add_shrink_axis_mask(shrink_axis_mask); + builder_.add_new_axis_mask(new_axis_mask); + builder_.add_ellipsis_mask(ellipsis_mask); + builder_.add_end_mask(end_mask); + builder_.add_begin_mask(begin_mask); + return builder_.Finish(); +} + +flatbuffers::Offset CreateStridedSliceOptions(flatbuffers::FlatBufferBuilder &_fbb, const StridedSliceOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct LogSoftmaxOptionsT : public flatbuffers::NativeTable { + typedef LogSoftmaxOptions TableType; +}; + +struct LogSoftmaxOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef LogSoftmaxOptionsT NativeTableType; + typedef LogSoftmaxOptionsBuilder Builder; + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } + LogSoftmaxOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(LogSoftmaxOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const LogSoftmaxOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct LogSoftmaxOptionsBuilder { + typedef LogSoftmaxOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit LogSoftmaxOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateLogSoftmaxOptions( + flatbuffers::FlatBufferBuilder &_fbb) { + LogSoftmaxOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +flatbuffers::Offset CreateLogSoftmaxOptions(flatbuffers::FlatBufferBuilder &_fbb, const LogSoftmaxOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct CastOptionsT : public flatbuffers::NativeTable { + typedef CastOptions TableType; + tflite::TensorType in_data_type = tflite::TensorType_FLOAT32; + tflite::TensorType out_data_type = tflite::TensorType_FLOAT32; +}; + +struct CastOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef CastOptionsT NativeTableType; + typedef CastOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_IN_DATA_TYPE = 4, + VT_OUT_DATA_TYPE = 6 + }; + tflite::TensorType in_data_type() const { + return static_cast(GetField(VT_IN_DATA_TYPE, 0)); + } + tflite::TensorType out_data_type() const { + return static_cast(GetField(VT_OUT_DATA_TYPE, 0)); + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_IN_DATA_TYPE, 1) && + VerifyField(verifier, VT_OUT_DATA_TYPE, 1) && + verifier.EndTable(); + } + CastOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(CastOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const CastOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct CastOptionsBuilder { + typedef CastOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_in_data_type(tflite::TensorType in_data_type) { + fbb_.AddElement(CastOptions::VT_IN_DATA_TYPE, static_cast(in_data_type), 0); + } + void add_out_data_type(tflite::TensorType out_data_type) { + fbb_.AddElement(CastOptions::VT_OUT_DATA_TYPE, static_cast(out_data_type), 0); + } + explicit CastOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateCastOptions( + flatbuffers::FlatBufferBuilder &_fbb, + tflite::TensorType in_data_type = tflite::TensorType_FLOAT32, + tflite::TensorType out_data_type = tflite::TensorType_FLOAT32) { + CastOptionsBuilder builder_(_fbb); + builder_.add_out_data_type(out_data_type); + builder_.add_in_data_type(in_data_type); + return builder_.Finish(); +} + +flatbuffers::Offset CreateCastOptions(flatbuffers::FlatBufferBuilder &_fbb, const CastOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct DequantizeOptionsT : public flatbuffers::NativeTable { + typedef DequantizeOptions TableType; +}; + +struct DequantizeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef DequantizeOptionsT NativeTableType; + typedef DequantizeOptionsBuilder Builder; + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } + DequantizeOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(DequantizeOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const DequantizeOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct DequantizeOptionsBuilder { + typedef DequantizeOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit DequantizeOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateDequantizeOptions( + flatbuffers::FlatBufferBuilder &_fbb) { + DequantizeOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +flatbuffers::Offset CreateDequantizeOptions(flatbuffers::FlatBufferBuilder &_fbb, const DequantizeOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct MaximumMinimumOptionsT : public flatbuffers::NativeTable { + typedef MaximumMinimumOptions TableType; +}; + +struct MaximumMinimumOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef MaximumMinimumOptionsT NativeTableType; + typedef MaximumMinimumOptionsBuilder Builder; + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } + MaximumMinimumOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(MaximumMinimumOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const MaximumMinimumOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct MaximumMinimumOptionsBuilder { + typedef MaximumMinimumOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit MaximumMinimumOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateMaximumMinimumOptions( + flatbuffers::FlatBufferBuilder &_fbb) { + MaximumMinimumOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +flatbuffers::Offset CreateMaximumMinimumOptions(flatbuffers::FlatBufferBuilder &_fbb, const MaximumMinimumOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct TileOptionsT : public flatbuffers::NativeTable { + typedef TileOptions TableType; +}; + +struct TileOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef TileOptionsT NativeTableType; + typedef TileOptionsBuilder Builder; + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } + TileOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(TileOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const TileOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct TileOptionsBuilder { + typedef TileOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit TileOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateTileOptions( + flatbuffers::FlatBufferBuilder &_fbb) { + TileOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +flatbuffers::Offset CreateTileOptions(flatbuffers::FlatBufferBuilder &_fbb, const TileOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct ArgMaxOptionsT : public flatbuffers::NativeTable { + typedef ArgMaxOptions TableType; + tflite::TensorType output_type = tflite::TensorType_FLOAT32; +}; + +struct ArgMaxOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef ArgMaxOptionsT NativeTableType; + typedef ArgMaxOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_OUTPUT_TYPE = 4 + }; + tflite::TensorType output_type() const { + return static_cast(GetField(VT_OUTPUT_TYPE, 0)); + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_OUTPUT_TYPE, 1) && + verifier.EndTable(); + } + ArgMaxOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(ArgMaxOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const ArgMaxOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct ArgMaxOptionsBuilder { + typedef ArgMaxOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_output_type(tflite::TensorType output_type) { + fbb_.AddElement(ArgMaxOptions::VT_OUTPUT_TYPE, static_cast(output_type), 0); + } + explicit ArgMaxOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateArgMaxOptions( + flatbuffers::FlatBufferBuilder &_fbb, + tflite::TensorType output_type = tflite::TensorType_FLOAT32) { + ArgMaxOptionsBuilder builder_(_fbb); + builder_.add_output_type(output_type); + return builder_.Finish(); +} + +flatbuffers::Offset CreateArgMaxOptions(flatbuffers::FlatBufferBuilder &_fbb, const ArgMaxOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct ArgMinOptionsT : public flatbuffers::NativeTable { + typedef ArgMinOptions TableType; + tflite::TensorType output_type = tflite::TensorType_FLOAT32; +}; + +struct ArgMinOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef ArgMinOptionsT NativeTableType; + typedef ArgMinOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_OUTPUT_TYPE = 4 + }; + tflite::TensorType output_type() const { + return static_cast(GetField(VT_OUTPUT_TYPE, 0)); + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_OUTPUT_TYPE, 1) && + verifier.EndTable(); + } + ArgMinOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(ArgMinOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const ArgMinOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct ArgMinOptionsBuilder { + typedef ArgMinOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_output_type(tflite::TensorType output_type) { + fbb_.AddElement(ArgMinOptions::VT_OUTPUT_TYPE, static_cast(output_type), 0); + } + explicit ArgMinOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateArgMinOptions( + flatbuffers::FlatBufferBuilder &_fbb, + tflite::TensorType output_type = tflite::TensorType_FLOAT32) { + ArgMinOptionsBuilder builder_(_fbb); + builder_.add_output_type(output_type); + return builder_.Finish(); +} + +flatbuffers::Offset CreateArgMinOptions(flatbuffers::FlatBufferBuilder &_fbb, const ArgMinOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct GreaterOptionsT : public flatbuffers::NativeTable { + typedef GreaterOptions TableType; +}; + +struct GreaterOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef GreaterOptionsT NativeTableType; + typedef GreaterOptionsBuilder Builder; + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } + GreaterOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(GreaterOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const GreaterOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct GreaterOptionsBuilder { + typedef GreaterOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit GreaterOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateGreaterOptions( + flatbuffers::FlatBufferBuilder &_fbb) { + GreaterOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +flatbuffers::Offset CreateGreaterOptions(flatbuffers::FlatBufferBuilder &_fbb, const GreaterOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct GreaterEqualOptionsT : public flatbuffers::NativeTable { + typedef GreaterEqualOptions TableType; +}; + +struct GreaterEqualOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef GreaterEqualOptionsT NativeTableType; + typedef GreaterEqualOptionsBuilder Builder; + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } + GreaterEqualOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(GreaterEqualOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const GreaterEqualOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct GreaterEqualOptionsBuilder { + typedef GreaterEqualOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit GreaterEqualOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateGreaterEqualOptions( + flatbuffers::FlatBufferBuilder &_fbb) { + GreaterEqualOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +flatbuffers::Offset CreateGreaterEqualOptions(flatbuffers::FlatBufferBuilder &_fbb, const GreaterEqualOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct LessOptionsT : public flatbuffers::NativeTable { + typedef LessOptions TableType; +}; + +struct LessOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef LessOptionsT NativeTableType; + typedef LessOptionsBuilder Builder; + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } + LessOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(LessOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const LessOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct LessOptionsBuilder { + typedef LessOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit LessOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateLessOptions( + flatbuffers::FlatBufferBuilder &_fbb) { + LessOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +flatbuffers::Offset CreateLessOptions(flatbuffers::FlatBufferBuilder &_fbb, const LessOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct LessEqualOptionsT : public flatbuffers::NativeTable { + typedef LessEqualOptions TableType; +}; + +struct LessEqualOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef LessEqualOptionsT NativeTableType; + typedef LessEqualOptionsBuilder Builder; + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } + LessEqualOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(LessEqualOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const LessEqualOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct LessEqualOptionsBuilder { + typedef LessEqualOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit LessEqualOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateLessEqualOptions( + flatbuffers::FlatBufferBuilder &_fbb) { + LessEqualOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +flatbuffers::Offset CreateLessEqualOptions(flatbuffers::FlatBufferBuilder &_fbb, const LessEqualOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct NegOptionsT : public flatbuffers::NativeTable { + typedef NegOptions TableType; +}; + +struct NegOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef NegOptionsT NativeTableType; + typedef NegOptionsBuilder Builder; + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } + NegOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(NegOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const NegOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct NegOptionsBuilder { + typedef NegOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit NegOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateNegOptions( + flatbuffers::FlatBufferBuilder &_fbb) { + NegOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +flatbuffers::Offset CreateNegOptions(flatbuffers::FlatBufferBuilder &_fbb, const NegOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct SelectOptionsT : public flatbuffers::NativeTable { + typedef SelectOptions TableType; +}; + +struct SelectOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef SelectOptionsT NativeTableType; + typedef SelectOptionsBuilder Builder; + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } + SelectOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(SelectOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const SelectOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct SelectOptionsBuilder { + typedef SelectOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit SelectOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateSelectOptions( + flatbuffers::FlatBufferBuilder &_fbb) { + SelectOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +flatbuffers::Offset CreateSelectOptions(flatbuffers::FlatBufferBuilder &_fbb, const SelectOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct SliceOptionsT : public flatbuffers::NativeTable { + typedef SliceOptions TableType; +}; + +struct SliceOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef SliceOptionsT NativeTableType; + typedef SliceOptionsBuilder Builder; + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } + SliceOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(SliceOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const SliceOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct SliceOptionsBuilder { + typedef SliceOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit SliceOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateSliceOptions( + flatbuffers::FlatBufferBuilder &_fbb) { + SliceOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +flatbuffers::Offset CreateSliceOptions(flatbuffers::FlatBufferBuilder &_fbb, const SliceOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct TransposeConvOptionsT : public flatbuffers::NativeTable { + typedef TransposeConvOptions TableType; + tflite::Padding padding = tflite::Padding_SAME; + int32_t stride_w = 0; + int32_t stride_h = 0; + tflite::ActivationFunctionType fused_activation_function = tflite::ActivationFunctionType_NONE; +}; + +struct TransposeConvOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef TransposeConvOptionsT NativeTableType; + typedef TransposeConvOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_PADDING = 4, + VT_STRIDE_W = 6, + VT_STRIDE_H = 8, + VT_FUSED_ACTIVATION_FUNCTION = 10 + }; + tflite::Padding padding() const { + return static_cast(GetField(VT_PADDING, 0)); + } + int32_t stride_w() const { + return GetField(VT_STRIDE_W, 0); + } + int32_t stride_h() const { + return GetField(VT_STRIDE_H, 0); + } + tflite::ActivationFunctionType fused_activation_function() const { + return static_cast(GetField(VT_FUSED_ACTIVATION_FUNCTION, 0)); + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_PADDING, 1) && + VerifyField(verifier, VT_STRIDE_W, 4) && + VerifyField(verifier, VT_STRIDE_H, 4) && + VerifyField(verifier, VT_FUSED_ACTIVATION_FUNCTION, 1) && + verifier.EndTable(); + } + TransposeConvOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(TransposeConvOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const TransposeConvOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct TransposeConvOptionsBuilder { + typedef TransposeConvOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_padding(tflite::Padding padding) { + fbb_.AddElement(TransposeConvOptions::VT_PADDING, static_cast(padding), 0); + } + void add_stride_w(int32_t stride_w) { + fbb_.AddElement(TransposeConvOptions::VT_STRIDE_W, stride_w, 0); + } + void add_stride_h(int32_t stride_h) { + fbb_.AddElement(TransposeConvOptions::VT_STRIDE_H, stride_h, 0); + } + void add_fused_activation_function(tflite::ActivationFunctionType fused_activation_function) { + fbb_.AddElement(TransposeConvOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast(fused_activation_function), 0); + } + explicit TransposeConvOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateTransposeConvOptions( + flatbuffers::FlatBufferBuilder &_fbb, + tflite::Padding padding = tflite::Padding_SAME, + int32_t stride_w = 0, + int32_t stride_h = 0, + tflite::ActivationFunctionType fused_activation_function = tflite::ActivationFunctionType_NONE) { + TransposeConvOptionsBuilder builder_(_fbb); + builder_.add_stride_h(stride_h); + builder_.add_stride_w(stride_w); + builder_.add_fused_activation_function(fused_activation_function); + builder_.add_padding(padding); + return builder_.Finish(); +} + +flatbuffers::Offset CreateTransposeConvOptions(flatbuffers::FlatBufferBuilder &_fbb, const TransposeConvOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct ExpandDimsOptionsT : public flatbuffers::NativeTable { + typedef ExpandDimsOptions TableType; +}; + +struct ExpandDimsOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef ExpandDimsOptionsT NativeTableType; + typedef ExpandDimsOptionsBuilder Builder; + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } + ExpandDimsOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(ExpandDimsOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const ExpandDimsOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct ExpandDimsOptionsBuilder { + typedef ExpandDimsOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit ExpandDimsOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateExpandDimsOptions( + flatbuffers::FlatBufferBuilder &_fbb) { + ExpandDimsOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +flatbuffers::Offset CreateExpandDimsOptions(flatbuffers::FlatBufferBuilder &_fbb, const ExpandDimsOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct SparseToDenseOptionsT : public flatbuffers::NativeTable { + typedef SparseToDenseOptions TableType; + bool validate_indices = false; +}; + +struct SparseToDenseOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef SparseToDenseOptionsT NativeTableType; + typedef SparseToDenseOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_VALIDATE_INDICES = 4 + }; + bool validate_indices() const { + return GetField(VT_VALIDATE_INDICES, 0) != 0; + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_VALIDATE_INDICES, 1) && + verifier.EndTable(); + } + SparseToDenseOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(SparseToDenseOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const SparseToDenseOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct SparseToDenseOptionsBuilder { + typedef SparseToDenseOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_validate_indices(bool validate_indices) { + fbb_.AddElement(SparseToDenseOptions::VT_VALIDATE_INDICES, static_cast(validate_indices), 0); + } + explicit SparseToDenseOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateSparseToDenseOptions( + flatbuffers::FlatBufferBuilder &_fbb, + bool validate_indices = false) { + SparseToDenseOptionsBuilder builder_(_fbb); + builder_.add_validate_indices(validate_indices); + return builder_.Finish(); +} + +flatbuffers::Offset CreateSparseToDenseOptions(flatbuffers::FlatBufferBuilder &_fbb, const SparseToDenseOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct EqualOptionsT : public flatbuffers::NativeTable { + typedef EqualOptions TableType; +}; + +struct EqualOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef EqualOptionsT NativeTableType; + typedef EqualOptionsBuilder Builder; + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } + EqualOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(EqualOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const EqualOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct EqualOptionsBuilder { + typedef EqualOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit EqualOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateEqualOptions( + flatbuffers::FlatBufferBuilder &_fbb) { + EqualOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +flatbuffers::Offset CreateEqualOptions(flatbuffers::FlatBufferBuilder &_fbb, const EqualOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct NotEqualOptionsT : public flatbuffers::NativeTable { + typedef NotEqualOptions TableType; +}; + +struct NotEqualOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef NotEqualOptionsT NativeTableType; + typedef NotEqualOptionsBuilder Builder; + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } + NotEqualOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(NotEqualOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const NotEqualOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct NotEqualOptionsBuilder { + typedef NotEqualOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit NotEqualOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateNotEqualOptions( + flatbuffers::FlatBufferBuilder &_fbb) { + NotEqualOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +flatbuffers::Offset CreateNotEqualOptions(flatbuffers::FlatBufferBuilder &_fbb, const NotEqualOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct ShapeOptionsT : public flatbuffers::NativeTable { + typedef ShapeOptions TableType; + tflite::TensorType out_type = tflite::TensorType_FLOAT32; +}; + +struct ShapeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef ShapeOptionsT NativeTableType; + typedef ShapeOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_OUT_TYPE = 4 + }; + tflite::TensorType out_type() const { + return static_cast(GetField(VT_OUT_TYPE, 0)); + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_OUT_TYPE, 1) && + verifier.EndTable(); + } + ShapeOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(ShapeOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const ShapeOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct ShapeOptionsBuilder { + typedef ShapeOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_out_type(tflite::TensorType out_type) { + fbb_.AddElement(ShapeOptions::VT_OUT_TYPE, static_cast(out_type), 0); + } + explicit ShapeOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateShapeOptions( + flatbuffers::FlatBufferBuilder &_fbb, + tflite::TensorType out_type = tflite::TensorType_FLOAT32) { + ShapeOptionsBuilder builder_(_fbb); + builder_.add_out_type(out_type); + return builder_.Finish(); +} + +flatbuffers::Offset CreateShapeOptions(flatbuffers::FlatBufferBuilder &_fbb, const ShapeOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct RankOptionsT : public flatbuffers::NativeTable { + typedef RankOptions TableType; +}; + +struct RankOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef RankOptionsT NativeTableType; + typedef RankOptionsBuilder Builder; + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } + RankOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(RankOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const RankOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct RankOptionsBuilder { + typedef RankOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit RankOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateRankOptions( + flatbuffers::FlatBufferBuilder &_fbb) { + RankOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +flatbuffers::Offset CreateRankOptions(flatbuffers::FlatBufferBuilder &_fbb, const RankOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct PowOptionsT : public flatbuffers::NativeTable { + typedef PowOptions TableType; +}; + +struct PowOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef PowOptionsT NativeTableType; + typedef PowOptionsBuilder Builder; + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } + PowOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(PowOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const PowOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct PowOptionsBuilder { + typedef PowOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit PowOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreatePowOptions( + flatbuffers::FlatBufferBuilder &_fbb) { + PowOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +flatbuffers::Offset CreatePowOptions(flatbuffers::FlatBufferBuilder &_fbb, const PowOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct FakeQuantOptionsT : public flatbuffers::NativeTable { + typedef FakeQuantOptions TableType; + float min = 0.0f; + float max = 0.0f; + int32_t num_bits = 0; + bool narrow_range = false; +}; + +struct FakeQuantOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef FakeQuantOptionsT NativeTableType; + typedef FakeQuantOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_MIN = 4, + VT_MAX = 6, + VT_NUM_BITS = 8, + VT_NARROW_RANGE = 10 + }; + float min() const { + return GetField(VT_MIN, 0.0f); + } + float max() const { + return GetField(VT_MAX, 0.0f); + } + int32_t num_bits() const { + return GetField(VT_NUM_BITS, 0); + } + bool narrow_range() const { + return GetField(VT_NARROW_RANGE, 0) != 0; + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_MIN, 4) && + VerifyField(verifier, VT_MAX, 4) && + VerifyField(verifier, VT_NUM_BITS, 4) && + VerifyField(verifier, VT_NARROW_RANGE, 1) && + verifier.EndTable(); + } + FakeQuantOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(FakeQuantOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const FakeQuantOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct FakeQuantOptionsBuilder { + typedef FakeQuantOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_min(float min) { + fbb_.AddElement(FakeQuantOptions::VT_MIN, min, 0.0f); + } + void add_max(float max) { + fbb_.AddElement(FakeQuantOptions::VT_MAX, max, 0.0f); + } + void add_num_bits(int32_t num_bits) { + fbb_.AddElement(FakeQuantOptions::VT_NUM_BITS, num_bits, 0); + } + void add_narrow_range(bool narrow_range) { + fbb_.AddElement(FakeQuantOptions::VT_NARROW_RANGE, static_cast(narrow_range), 0); + } + explicit FakeQuantOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateFakeQuantOptions( + flatbuffers::FlatBufferBuilder &_fbb, + float min = 0.0f, + float max = 0.0f, + int32_t num_bits = 0, + bool narrow_range = false) { + FakeQuantOptionsBuilder builder_(_fbb); + builder_.add_num_bits(num_bits); + builder_.add_max(max); + builder_.add_min(min); + builder_.add_narrow_range(narrow_range); + return builder_.Finish(); +} + +flatbuffers::Offset CreateFakeQuantOptions(flatbuffers::FlatBufferBuilder &_fbb, const FakeQuantOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct PackOptionsT : public flatbuffers::NativeTable { + typedef PackOptions TableType; + int32_t values_count = 0; + int32_t axis = 0; +}; + +struct PackOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef PackOptionsT NativeTableType; + typedef PackOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_VALUES_COUNT = 4, + VT_AXIS = 6 + }; + int32_t values_count() const { + return GetField(VT_VALUES_COUNT, 0); + } + int32_t axis() const { + return GetField(VT_AXIS, 0); + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_VALUES_COUNT, 4) && + VerifyField(verifier, VT_AXIS, 4) && + verifier.EndTable(); + } + PackOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(PackOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const PackOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct PackOptionsBuilder { + typedef PackOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_values_count(int32_t values_count) { + fbb_.AddElement(PackOptions::VT_VALUES_COUNT, values_count, 0); + } + void add_axis(int32_t axis) { + fbb_.AddElement(PackOptions::VT_AXIS, axis, 0); + } + explicit PackOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreatePackOptions( + flatbuffers::FlatBufferBuilder &_fbb, + int32_t values_count = 0, + int32_t axis = 0) { + PackOptionsBuilder builder_(_fbb); + builder_.add_axis(axis); + builder_.add_values_count(values_count); + return builder_.Finish(); +} + +flatbuffers::Offset CreatePackOptions(flatbuffers::FlatBufferBuilder &_fbb, const PackOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct LogicalOrOptionsT : public flatbuffers::NativeTable { + typedef LogicalOrOptions TableType; +}; + +struct LogicalOrOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef LogicalOrOptionsT NativeTableType; + typedef LogicalOrOptionsBuilder Builder; + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } + LogicalOrOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(LogicalOrOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const LogicalOrOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct LogicalOrOptionsBuilder { + typedef LogicalOrOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit LogicalOrOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateLogicalOrOptions( + flatbuffers::FlatBufferBuilder &_fbb) { + LogicalOrOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +flatbuffers::Offset CreateLogicalOrOptions(flatbuffers::FlatBufferBuilder &_fbb, const LogicalOrOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct OneHotOptionsT : public flatbuffers::NativeTable { + typedef OneHotOptions TableType; + int32_t axis = 0; +}; + +struct OneHotOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef OneHotOptionsT NativeTableType; + typedef OneHotOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_AXIS = 4 + }; + int32_t axis() const { + return GetField(VT_AXIS, 0); + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_AXIS, 4) && + verifier.EndTable(); + } + OneHotOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(OneHotOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const OneHotOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct OneHotOptionsBuilder { + typedef OneHotOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_axis(int32_t axis) { + fbb_.AddElement(OneHotOptions::VT_AXIS, axis, 0); + } + explicit OneHotOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateOneHotOptions( + flatbuffers::FlatBufferBuilder &_fbb, + int32_t axis = 0) { + OneHotOptionsBuilder builder_(_fbb); + builder_.add_axis(axis); + return builder_.Finish(); +} + +flatbuffers::Offset CreateOneHotOptions(flatbuffers::FlatBufferBuilder &_fbb, const OneHotOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct AbsOptionsT : public flatbuffers::NativeTable { + typedef AbsOptions TableType; +}; + +struct AbsOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef AbsOptionsT NativeTableType; + typedef AbsOptionsBuilder Builder; + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } + AbsOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(AbsOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const AbsOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct AbsOptionsBuilder { + typedef AbsOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit AbsOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateAbsOptions( + flatbuffers::FlatBufferBuilder &_fbb) { + AbsOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +flatbuffers::Offset CreateAbsOptions(flatbuffers::FlatBufferBuilder &_fbb, const AbsOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct HardSwishOptionsT : public flatbuffers::NativeTable { + typedef HardSwishOptions TableType; +}; + +struct HardSwishOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef HardSwishOptionsT NativeTableType; + typedef HardSwishOptionsBuilder Builder; + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } + HardSwishOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(HardSwishOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const HardSwishOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct HardSwishOptionsBuilder { + typedef HardSwishOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit HardSwishOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateHardSwishOptions( + flatbuffers::FlatBufferBuilder &_fbb) { + HardSwishOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +flatbuffers::Offset CreateHardSwishOptions(flatbuffers::FlatBufferBuilder &_fbb, const HardSwishOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct LogicalAndOptionsT : public flatbuffers::NativeTable { + typedef LogicalAndOptions TableType; +}; + +struct LogicalAndOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef LogicalAndOptionsT NativeTableType; + typedef LogicalAndOptionsBuilder Builder; + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } + LogicalAndOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(LogicalAndOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const LogicalAndOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct LogicalAndOptionsBuilder { + typedef LogicalAndOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit LogicalAndOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateLogicalAndOptions( + flatbuffers::FlatBufferBuilder &_fbb) { + LogicalAndOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +flatbuffers::Offset CreateLogicalAndOptions(flatbuffers::FlatBufferBuilder &_fbb, const LogicalAndOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct LogicalNotOptionsT : public flatbuffers::NativeTable { + typedef LogicalNotOptions TableType; +}; + +struct LogicalNotOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef LogicalNotOptionsT NativeTableType; + typedef LogicalNotOptionsBuilder Builder; + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } + LogicalNotOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(LogicalNotOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const LogicalNotOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct LogicalNotOptionsBuilder { + typedef LogicalNotOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit LogicalNotOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateLogicalNotOptions( + flatbuffers::FlatBufferBuilder &_fbb) { + LogicalNotOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +flatbuffers::Offset CreateLogicalNotOptions(flatbuffers::FlatBufferBuilder &_fbb, const LogicalNotOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct UnpackOptionsT : public flatbuffers::NativeTable { + typedef UnpackOptions TableType; + int32_t num = 0; + int32_t axis = 0; +}; + +struct UnpackOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef UnpackOptionsT NativeTableType; + typedef UnpackOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_NUM = 4, + VT_AXIS = 6 + }; + int32_t num() const { + return GetField(VT_NUM, 0); + } + int32_t axis() const { + return GetField(VT_AXIS, 0); + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_NUM, 4) && + VerifyField(verifier, VT_AXIS, 4) && + verifier.EndTable(); + } + UnpackOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(UnpackOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const UnpackOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct UnpackOptionsBuilder { + typedef UnpackOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_num(int32_t num) { + fbb_.AddElement(UnpackOptions::VT_NUM, num, 0); + } + void add_axis(int32_t axis) { + fbb_.AddElement(UnpackOptions::VT_AXIS, axis, 0); + } + explicit UnpackOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateUnpackOptions( + flatbuffers::FlatBufferBuilder &_fbb, + int32_t num = 0, + int32_t axis = 0) { + UnpackOptionsBuilder builder_(_fbb); + builder_.add_axis(axis); + builder_.add_num(num); + return builder_.Finish(); +} + +flatbuffers::Offset CreateUnpackOptions(flatbuffers::FlatBufferBuilder &_fbb, const UnpackOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct FloorDivOptionsT : public flatbuffers::NativeTable { + typedef FloorDivOptions TableType; +}; + +struct FloorDivOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef FloorDivOptionsT NativeTableType; + typedef FloorDivOptionsBuilder Builder; + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } + FloorDivOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(FloorDivOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const FloorDivOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct FloorDivOptionsBuilder { + typedef FloorDivOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit FloorDivOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateFloorDivOptions( + flatbuffers::FlatBufferBuilder &_fbb) { + FloorDivOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +flatbuffers::Offset CreateFloorDivOptions(flatbuffers::FlatBufferBuilder &_fbb, const FloorDivOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct SquareOptionsT : public flatbuffers::NativeTable { + typedef SquareOptions TableType; +}; + +struct SquareOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef SquareOptionsT NativeTableType; + typedef SquareOptionsBuilder Builder; + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } + SquareOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(SquareOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const SquareOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct SquareOptionsBuilder { + typedef SquareOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit SquareOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateSquareOptions( + flatbuffers::FlatBufferBuilder &_fbb) { + SquareOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +flatbuffers::Offset CreateSquareOptions(flatbuffers::FlatBufferBuilder &_fbb, const SquareOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct ZerosLikeOptionsT : public flatbuffers::NativeTable { + typedef ZerosLikeOptions TableType; +}; + +struct ZerosLikeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef ZerosLikeOptionsT NativeTableType; + typedef ZerosLikeOptionsBuilder Builder; + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } + ZerosLikeOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(ZerosLikeOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const ZerosLikeOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct ZerosLikeOptionsBuilder { + typedef ZerosLikeOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit ZerosLikeOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateZerosLikeOptions( + flatbuffers::FlatBufferBuilder &_fbb) { + ZerosLikeOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +flatbuffers::Offset CreateZerosLikeOptions(flatbuffers::FlatBufferBuilder &_fbb, const ZerosLikeOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct FillOptionsT : public flatbuffers::NativeTable { + typedef FillOptions TableType; +}; + +struct FillOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef FillOptionsT NativeTableType; + typedef FillOptionsBuilder Builder; + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } + FillOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(FillOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const FillOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct FillOptionsBuilder { + typedef FillOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit FillOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateFillOptions( + flatbuffers::FlatBufferBuilder &_fbb) { + FillOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +flatbuffers::Offset CreateFillOptions(flatbuffers::FlatBufferBuilder &_fbb, const FillOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct FloorModOptionsT : public flatbuffers::NativeTable { + typedef FloorModOptions TableType; +}; + +struct FloorModOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef FloorModOptionsT NativeTableType; + typedef FloorModOptionsBuilder Builder; + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } + FloorModOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(FloorModOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const FloorModOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct FloorModOptionsBuilder { + typedef FloorModOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit FloorModOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateFloorModOptions( + flatbuffers::FlatBufferBuilder &_fbb) { + FloorModOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +flatbuffers::Offset CreateFloorModOptions(flatbuffers::FlatBufferBuilder &_fbb, const FloorModOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct RangeOptionsT : public flatbuffers::NativeTable { + typedef RangeOptions TableType; +}; + +struct RangeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef RangeOptionsT NativeTableType; + typedef RangeOptionsBuilder Builder; + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } + RangeOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(RangeOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const RangeOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct RangeOptionsBuilder { + typedef RangeOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit RangeOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateRangeOptions( + flatbuffers::FlatBufferBuilder &_fbb) { + RangeOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +flatbuffers::Offset CreateRangeOptions(flatbuffers::FlatBufferBuilder &_fbb, const RangeOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct LeakyReluOptionsT : public flatbuffers::NativeTable { + typedef LeakyReluOptions TableType; + float alpha = 0.0f; +}; + +struct LeakyReluOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef LeakyReluOptionsT NativeTableType; + typedef LeakyReluOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_ALPHA = 4 + }; + float alpha() const { + return GetField(VT_ALPHA, 0.0f); + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_ALPHA, 4) && + verifier.EndTable(); + } + LeakyReluOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(LeakyReluOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const LeakyReluOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct LeakyReluOptionsBuilder { + typedef LeakyReluOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_alpha(float alpha) { + fbb_.AddElement(LeakyReluOptions::VT_ALPHA, alpha, 0.0f); + } + explicit LeakyReluOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateLeakyReluOptions( + flatbuffers::FlatBufferBuilder &_fbb, + float alpha = 0.0f) { + LeakyReluOptionsBuilder builder_(_fbb); + builder_.add_alpha(alpha); + return builder_.Finish(); +} + +flatbuffers::Offset CreateLeakyReluOptions(flatbuffers::FlatBufferBuilder &_fbb, const LeakyReluOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct SquaredDifferenceOptionsT : public flatbuffers::NativeTable { + typedef SquaredDifferenceOptions TableType; +}; + +struct SquaredDifferenceOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef SquaredDifferenceOptionsT NativeTableType; + typedef SquaredDifferenceOptionsBuilder Builder; + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } + SquaredDifferenceOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(SquaredDifferenceOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const SquaredDifferenceOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct SquaredDifferenceOptionsBuilder { + typedef SquaredDifferenceOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit SquaredDifferenceOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateSquaredDifferenceOptions( + flatbuffers::FlatBufferBuilder &_fbb) { + SquaredDifferenceOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +flatbuffers::Offset CreateSquaredDifferenceOptions(flatbuffers::FlatBufferBuilder &_fbb, const SquaredDifferenceOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct MirrorPadOptionsT : public flatbuffers::NativeTable { + typedef MirrorPadOptions TableType; + tflite::MirrorPadMode mode = tflite::MirrorPadMode_REFLECT; +}; + +struct MirrorPadOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef MirrorPadOptionsT NativeTableType; + typedef MirrorPadOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_MODE = 4 + }; + tflite::MirrorPadMode mode() const { + return static_cast(GetField(VT_MODE, 0)); + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_MODE, 1) && + verifier.EndTable(); + } + MirrorPadOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(MirrorPadOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const MirrorPadOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct MirrorPadOptionsBuilder { + typedef MirrorPadOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_mode(tflite::MirrorPadMode mode) { + fbb_.AddElement(MirrorPadOptions::VT_MODE, static_cast(mode), 0); + } + explicit MirrorPadOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateMirrorPadOptions( + flatbuffers::FlatBufferBuilder &_fbb, + tflite::MirrorPadMode mode = tflite::MirrorPadMode_REFLECT) { + MirrorPadOptionsBuilder builder_(_fbb); + builder_.add_mode(mode); + return builder_.Finish(); +} + +flatbuffers::Offset CreateMirrorPadOptions(flatbuffers::FlatBufferBuilder &_fbb, const MirrorPadOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct UniqueOptionsT : public flatbuffers::NativeTable { + typedef UniqueOptions TableType; + tflite::TensorType idx_out_type = tflite::TensorType_INT32; +}; + +struct UniqueOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef UniqueOptionsT NativeTableType; + typedef UniqueOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_IDX_OUT_TYPE = 4 + }; + tflite::TensorType idx_out_type() const { + return static_cast(GetField(VT_IDX_OUT_TYPE, 2)); + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_IDX_OUT_TYPE, 1) && + verifier.EndTable(); + } + UniqueOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(UniqueOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const UniqueOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct UniqueOptionsBuilder { + typedef UniqueOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_idx_out_type(tflite::TensorType idx_out_type) { + fbb_.AddElement(UniqueOptions::VT_IDX_OUT_TYPE, static_cast(idx_out_type), 2); + } + explicit UniqueOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateUniqueOptions( + flatbuffers::FlatBufferBuilder &_fbb, + tflite::TensorType idx_out_type = tflite::TensorType_INT32) { + UniqueOptionsBuilder builder_(_fbb); + builder_.add_idx_out_type(idx_out_type); + return builder_.Finish(); +} + +flatbuffers::Offset CreateUniqueOptions(flatbuffers::FlatBufferBuilder &_fbb, const UniqueOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct ReverseV2OptionsT : public flatbuffers::NativeTable { + typedef ReverseV2Options TableType; +}; + +struct ReverseV2Options FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef ReverseV2OptionsT NativeTableType; + typedef ReverseV2OptionsBuilder Builder; + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } + ReverseV2OptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(ReverseV2OptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const ReverseV2OptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct ReverseV2OptionsBuilder { + typedef ReverseV2Options Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit ReverseV2OptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateReverseV2Options( + flatbuffers::FlatBufferBuilder &_fbb) { + ReverseV2OptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +flatbuffers::Offset CreateReverseV2Options(flatbuffers::FlatBufferBuilder &_fbb, const ReverseV2OptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct AddNOptionsT : public flatbuffers::NativeTable { + typedef AddNOptions TableType; +}; + +struct AddNOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef AddNOptionsT NativeTableType; + typedef AddNOptionsBuilder Builder; + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } + AddNOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(AddNOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const AddNOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct AddNOptionsBuilder { + typedef AddNOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit AddNOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateAddNOptions( + flatbuffers::FlatBufferBuilder &_fbb) { + AddNOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +flatbuffers::Offset CreateAddNOptions(flatbuffers::FlatBufferBuilder &_fbb, const AddNOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct GatherNdOptionsT : public flatbuffers::NativeTable { + typedef GatherNdOptions TableType; +}; + +struct GatherNdOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef GatherNdOptionsT NativeTableType; + typedef GatherNdOptionsBuilder Builder; + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } + GatherNdOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(GatherNdOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const GatherNdOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct GatherNdOptionsBuilder { + typedef GatherNdOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit GatherNdOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateGatherNdOptions( + flatbuffers::FlatBufferBuilder &_fbb) { + GatherNdOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +flatbuffers::Offset CreateGatherNdOptions(flatbuffers::FlatBufferBuilder &_fbb, const GatherNdOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct WhereOptionsT : public flatbuffers::NativeTable { + typedef WhereOptions TableType; +}; + +struct WhereOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef WhereOptionsT NativeTableType; + typedef WhereOptionsBuilder Builder; + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } + WhereOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(WhereOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const WhereOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct WhereOptionsBuilder { + typedef WhereOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit WhereOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateWhereOptions( + flatbuffers::FlatBufferBuilder &_fbb) { + WhereOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +flatbuffers::Offset CreateWhereOptions(flatbuffers::FlatBufferBuilder &_fbb, const WhereOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct ReverseSequenceOptionsT : public flatbuffers::NativeTable { + typedef ReverseSequenceOptions TableType; + int32_t seq_dim = 0; + int32_t batch_dim = 0; +}; + +struct ReverseSequenceOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef ReverseSequenceOptionsT NativeTableType; + typedef ReverseSequenceOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_SEQ_DIM = 4, + VT_BATCH_DIM = 6 + }; + int32_t seq_dim() const { + return GetField(VT_SEQ_DIM, 0); + } + int32_t batch_dim() const { + return GetField(VT_BATCH_DIM, 0); + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_SEQ_DIM, 4) && + VerifyField(verifier, VT_BATCH_DIM, 4) && + verifier.EndTable(); + } + ReverseSequenceOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(ReverseSequenceOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const ReverseSequenceOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct ReverseSequenceOptionsBuilder { + typedef ReverseSequenceOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_seq_dim(int32_t seq_dim) { + fbb_.AddElement(ReverseSequenceOptions::VT_SEQ_DIM, seq_dim, 0); + } + void add_batch_dim(int32_t batch_dim) { + fbb_.AddElement(ReverseSequenceOptions::VT_BATCH_DIM, batch_dim, 0); + } + explicit ReverseSequenceOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateReverseSequenceOptions( + flatbuffers::FlatBufferBuilder &_fbb, + int32_t seq_dim = 0, + int32_t batch_dim = 0) { + ReverseSequenceOptionsBuilder builder_(_fbb); + builder_.add_batch_dim(batch_dim); + builder_.add_seq_dim(seq_dim); + return builder_.Finish(); +} + +flatbuffers::Offset CreateReverseSequenceOptions(flatbuffers::FlatBufferBuilder &_fbb, const ReverseSequenceOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct MatrixDiagOptionsT : public flatbuffers::NativeTable { + typedef MatrixDiagOptions TableType; +}; + +struct MatrixDiagOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef MatrixDiagOptionsT NativeTableType; + typedef MatrixDiagOptionsBuilder Builder; + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } + MatrixDiagOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(MatrixDiagOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const MatrixDiagOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct MatrixDiagOptionsBuilder { + typedef MatrixDiagOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit MatrixDiagOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateMatrixDiagOptions( + flatbuffers::FlatBufferBuilder &_fbb) { + MatrixDiagOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +flatbuffers::Offset CreateMatrixDiagOptions(flatbuffers::FlatBufferBuilder &_fbb, const MatrixDiagOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct QuantizeOptionsT : public flatbuffers::NativeTable { + typedef QuantizeOptions TableType; +}; + +struct QuantizeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef QuantizeOptionsT NativeTableType; + typedef QuantizeOptionsBuilder Builder; + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } + QuantizeOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(QuantizeOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const QuantizeOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct QuantizeOptionsBuilder { + typedef QuantizeOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit QuantizeOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateQuantizeOptions( + flatbuffers::FlatBufferBuilder &_fbb) { + QuantizeOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +flatbuffers::Offset CreateQuantizeOptions(flatbuffers::FlatBufferBuilder &_fbb, const QuantizeOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct MatrixSetDiagOptionsT : public flatbuffers::NativeTable { + typedef MatrixSetDiagOptions TableType; +}; + +struct MatrixSetDiagOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef MatrixSetDiagOptionsT NativeTableType; + typedef MatrixSetDiagOptionsBuilder Builder; + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } + MatrixSetDiagOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(MatrixSetDiagOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const MatrixSetDiagOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct MatrixSetDiagOptionsBuilder { + typedef MatrixSetDiagOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit MatrixSetDiagOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateMatrixSetDiagOptions( + flatbuffers::FlatBufferBuilder &_fbb) { + MatrixSetDiagOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +flatbuffers::Offset CreateMatrixSetDiagOptions(flatbuffers::FlatBufferBuilder &_fbb, const MatrixSetDiagOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct IfOptionsT : public flatbuffers::NativeTable { + typedef IfOptions TableType; + int32_t then_subgraph_index = 0; + int32_t else_subgraph_index = 0; +}; + +struct IfOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef IfOptionsT NativeTableType; + typedef IfOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_THEN_SUBGRAPH_INDEX = 4, + VT_ELSE_SUBGRAPH_INDEX = 6 + }; + int32_t then_subgraph_index() const { + return GetField(VT_THEN_SUBGRAPH_INDEX, 0); + } + int32_t else_subgraph_index() const { + return GetField(VT_ELSE_SUBGRAPH_INDEX, 0); + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_THEN_SUBGRAPH_INDEX, 4) && + VerifyField(verifier, VT_ELSE_SUBGRAPH_INDEX, 4) && + verifier.EndTable(); + } + IfOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(IfOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const IfOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct IfOptionsBuilder { + typedef IfOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_then_subgraph_index(int32_t then_subgraph_index) { + fbb_.AddElement(IfOptions::VT_THEN_SUBGRAPH_INDEX, then_subgraph_index, 0); + } + void add_else_subgraph_index(int32_t else_subgraph_index) { + fbb_.AddElement(IfOptions::VT_ELSE_SUBGRAPH_INDEX, else_subgraph_index, 0); + } + explicit IfOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateIfOptions( + flatbuffers::FlatBufferBuilder &_fbb, + int32_t then_subgraph_index = 0, + int32_t else_subgraph_index = 0) { + IfOptionsBuilder builder_(_fbb); + builder_.add_else_subgraph_index(else_subgraph_index); + builder_.add_then_subgraph_index(then_subgraph_index); + return builder_.Finish(); +} + +flatbuffers::Offset CreateIfOptions(flatbuffers::FlatBufferBuilder &_fbb, const IfOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct CallOnceOptionsT : public flatbuffers::NativeTable { + typedef CallOnceOptions TableType; + int32_t init_subgraph_index = 0; +}; + +struct CallOnceOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef CallOnceOptionsT NativeTableType; + typedef CallOnceOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_INIT_SUBGRAPH_INDEX = 4 + }; + int32_t init_subgraph_index() const { + return GetField(VT_INIT_SUBGRAPH_INDEX, 0); + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_INIT_SUBGRAPH_INDEX, 4) && + verifier.EndTable(); + } + CallOnceOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(CallOnceOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const CallOnceOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct CallOnceOptionsBuilder { + typedef CallOnceOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_init_subgraph_index(int32_t init_subgraph_index) { + fbb_.AddElement(CallOnceOptions::VT_INIT_SUBGRAPH_INDEX, init_subgraph_index, 0); + } + explicit CallOnceOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateCallOnceOptions( + flatbuffers::FlatBufferBuilder &_fbb, + int32_t init_subgraph_index = 0) { + CallOnceOptionsBuilder builder_(_fbb); + builder_.add_init_subgraph_index(init_subgraph_index); + return builder_.Finish(); +} + +flatbuffers::Offset CreateCallOnceOptions(flatbuffers::FlatBufferBuilder &_fbb, const CallOnceOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct WhileOptionsT : public flatbuffers::NativeTable { + typedef WhileOptions TableType; + int32_t cond_subgraph_index = 0; + int32_t body_subgraph_index = 0; +}; + +struct WhileOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef WhileOptionsT NativeTableType; + typedef WhileOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_COND_SUBGRAPH_INDEX = 4, + VT_BODY_SUBGRAPH_INDEX = 6 + }; + int32_t cond_subgraph_index() const { + return GetField(VT_COND_SUBGRAPH_INDEX, 0); + } + int32_t body_subgraph_index() const { + return GetField(VT_BODY_SUBGRAPH_INDEX, 0); + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_COND_SUBGRAPH_INDEX, 4) && + VerifyField(verifier, VT_BODY_SUBGRAPH_INDEX, 4) && + verifier.EndTable(); + } + WhileOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(WhileOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const WhileOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct WhileOptionsBuilder { + typedef WhileOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_cond_subgraph_index(int32_t cond_subgraph_index) { + fbb_.AddElement(WhileOptions::VT_COND_SUBGRAPH_INDEX, cond_subgraph_index, 0); + } + void add_body_subgraph_index(int32_t body_subgraph_index) { + fbb_.AddElement(WhileOptions::VT_BODY_SUBGRAPH_INDEX, body_subgraph_index, 0); + } + explicit WhileOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateWhileOptions( + flatbuffers::FlatBufferBuilder &_fbb, + int32_t cond_subgraph_index = 0, + int32_t body_subgraph_index = 0) { + WhileOptionsBuilder builder_(_fbb); + builder_.add_body_subgraph_index(body_subgraph_index); + builder_.add_cond_subgraph_index(cond_subgraph_index); + return builder_.Finish(); +} + +flatbuffers::Offset CreateWhileOptions(flatbuffers::FlatBufferBuilder &_fbb, const WhileOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct NonMaxSuppressionV4OptionsT : public flatbuffers::NativeTable { + typedef NonMaxSuppressionV4Options TableType; +}; + +struct NonMaxSuppressionV4Options FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef NonMaxSuppressionV4OptionsT NativeTableType; + typedef NonMaxSuppressionV4OptionsBuilder Builder; + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } + NonMaxSuppressionV4OptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(NonMaxSuppressionV4OptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const NonMaxSuppressionV4OptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct NonMaxSuppressionV4OptionsBuilder { + typedef NonMaxSuppressionV4Options Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit NonMaxSuppressionV4OptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateNonMaxSuppressionV4Options( + flatbuffers::FlatBufferBuilder &_fbb) { + NonMaxSuppressionV4OptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +flatbuffers::Offset CreateNonMaxSuppressionV4Options(flatbuffers::FlatBufferBuilder &_fbb, const NonMaxSuppressionV4OptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct NonMaxSuppressionV5OptionsT : public flatbuffers::NativeTable { + typedef NonMaxSuppressionV5Options TableType; +}; + +struct NonMaxSuppressionV5Options FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef NonMaxSuppressionV5OptionsT NativeTableType; + typedef NonMaxSuppressionV5OptionsBuilder Builder; + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } + NonMaxSuppressionV5OptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(NonMaxSuppressionV5OptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const NonMaxSuppressionV5OptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct NonMaxSuppressionV5OptionsBuilder { + typedef NonMaxSuppressionV5Options Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit NonMaxSuppressionV5OptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateNonMaxSuppressionV5Options( + flatbuffers::FlatBufferBuilder &_fbb) { + NonMaxSuppressionV5OptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +flatbuffers::Offset CreateNonMaxSuppressionV5Options(flatbuffers::FlatBufferBuilder &_fbb, const NonMaxSuppressionV5OptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct ScatterNdOptionsT : public flatbuffers::NativeTable { + typedef ScatterNdOptions TableType; +}; + +struct ScatterNdOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef ScatterNdOptionsT NativeTableType; + typedef ScatterNdOptionsBuilder Builder; + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } + ScatterNdOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(ScatterNdOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const ScatterNdOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct ScatterNdOptionsBuilder { + typedef ScatterNdOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit ScatterNdOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateScatterNdOptions( + flatbuffers::FlatBufferBuilder &_fbb) { + ScatterNdOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +flatbuffers::Offset CreateScatterNdOptions(flatbuffers::FlatBufferBuilder &_fbb, const ScatterNdOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct SelectV2OptionsT : public flatbuffers::NativeTable { + typedef SelectV2Options TableType; +}; + +struct SelectV2Options FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef SelectV2OptionsT NativeTableType; + typedef SelectV2OptionsBuilder Builder; + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } + SelectV2OptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(SelectV2OptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const SelectV2OptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct SelectV2OptionsBuilder { + typedef SelectV2Options Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit SelectV2OptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateSelectV2Options( + flatbuffers::FlatBufferBuilder &_fbb) { + SelectV2OptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +flatbuffers::Offset CreateSelectV2Options(flatbuffers::FlatBufferBuilder &_fbb, const SelectV2OptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct DensifyOptionsT : public flatbuffers::NativeTable { + typedef DensifyOptions TableType; +}; + +struct DensifyOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef DensifyOptionsT NativeTableType; + typedef DensifyOptionsBuilder Builder; + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } + DensifyOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(DensifyOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const DensifyOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct DensifyOptionsBuilder { + typedef DensifyOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit DensifyOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateDensifyOptions( + flatbuffers::FlatBufferBuilder &_fbb) { + DensifyOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +flatbuffers::Offset CreateDensifyOptions(flatbuffers::FlatBufferBuilder &_fbb, const DensifyOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct SegmentSumOptionsT : public flatbuffers::NativeTable { + typedef SegmentSumOptions TableType; +}; + +struct SegmentSumOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef SegmentSumOptionsT NativeTableType; + typedef SegmentSumOptionsBuilder Builder; + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } + SegmentSumOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(SegmentSumOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const SegmentSumOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct SegmentSumOptionsBuilder { + typedef SegmentSumOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit SegmentSumOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateSegmentSumOptions( + flatbuffers::FlatBufferBuilder &_fbb) { + SegmentSumOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +flatbuffers::Offset CreateSegmentSumOptions(flatbuffers::FlatBufferBuilder &_fbb, const SegmentSumOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct BatchMatMulOptionsT : public flatbuffers::NativeTable { + typedef BatchMatMulOptions TableType; + bool adj_x = false; + bool adj_y = false; + bool asymmetric_quantize_inputs = false; +}; + +struct BatchMatMulOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef BatchMatMulOptionsT NativeTableType; + typedef BatchMatMulOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_ADJ_X = 4, + VT_ADJ_Y = 6, + VT_ASYMMETRIC_QUANTIZE_INPUTS = 8 + }; + bool adj_x() const { + return GetField(VT_ADJ_X, 0) != 0; + } + bool adj_y() const { + return GetField(VT_ADJ_Y, 0) != 0; + } + bool asymmetric_quantize_inputs() const { + return GetField(VT_ASYMMETRIC_QUANTIZE_INPUTS, 0) != 0; + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_ADJ_X, 1) && + VerifyField(verifier, VT_ADJ_Y, 1) && + VerifyField(verifier, VT_ASYMMETRIC_QUANTIZE_INPUTS, 1) && + verifier.EndTable(); + } + BatchMatMulOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(BatchMatMulOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const BatchMatMulOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct BatchMatMulOptionsBuilder { + typedef BatchMatMulOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_adj_x(bool adj_x) { + fbb_.AddElement(BatchMatMulOptions::VT_ADJ_X, static_cast(adj_x), 0); + } + void add_adj_y(bool adj_y) { + fbb_.AddElement(BatchMatMulOptions::VT_ADJ_Y, static_cast(adj_y), 0); + } + void add_asymmetric_quantize_inputs(bool asymmetric_quantize_inputs) { + fbb_.AddElement(BatchMatMulOptions::VT_ASYMMETRIC_QUANTIZE_INPUTS, static_cast(asymmetric_quantize_inputs), 0); + } + explicit BatchMatMulOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateBatchMatMulOptions( + flatbuffers::FlatBufferBuilder &_fbb, + bool adj_x = false, + bool adj_y = false, + bool asymmetric_quantize_inputs = false) { + BatchMatMulOptionsBuilder builder_(_fbb); + builder_.add_asymmetric_quantize_inputs(asymmetric_quantize_inputs); + builder_.add_adj_y(adj_y); + builder_.add_adj_x(adj_x); + return builder_.Finish(); +} + +flatbuffers::Offset CreateBatchMatMulOptions(flatbuffers::FlatBufferBuilder &_fbb, const BatchMatMulOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct CumsumOptionsT : public flatbuffers::NativeTable { + typedef CumsumOptions TableType; + bool exclusive = false; + bool reverse = false; +}; + +struct CumsumOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef CumsumOptionsT NativeTableType; + typedef CumsumOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_EXCLUSIVE = 4, + VT_REVERSE = 6 + }; + bool exclusive() const { + return GetField(VT_EXCLUSIVE, 0) != 0; + } + bool reverse() const { + return GetField(VT_REVERSE, 0) != 0; + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_EXCLUSIVE, 1) && + VerifyField(verifier, VT_REVERSE, 1) && + verifier.EndTable(); + } + CumsumOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(CumsumOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const CumsumOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct CumsumOptionsBuilder { + typedef CumsumOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_exclusive(bool exclusive) { + fbb_.AddElement(CumsumOptions::VT_EXCLUSIVE, static_cast(exclusive), 0); + } + void add_reverse(bool reverse) { + fbb_.AddElement(CumsumOptions::VT_REVERSE, static_cast(reverse), 0); + } + explicit CumsumOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateCumsumOptions( + flatbuffers::FlatBufferBuilder &_fbb, + bool exclusive = false, + bool reverse = false) { + CumsumOptionsBuilder builder_(_fbb); + builder_.add_reverse(reverse); + builder_.add_exclusive(exclusive); + return builder_.Finish(); +} + +flatbuffers::Offset CreateCumsumOptions(flatbuffers::FlatBufferBuilder &_fbb, const CumsumOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct BroadcastToOptionsT : public flatbuffers::NativeTable { + typedef BroadcastToOptions TableType; +}; + +struct BroadcastToOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef BroadcastToOptionsT NativeTableType; + typedef BroadcastToOptionsBuilder Builder; + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } + BroadcastToOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(BroadcastToOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const BroadcastToOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct BroadcastToOptionsBuilder { + typedef BroadcastToOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit BroadcastToOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateBroadcastToOptions( + flatbuffers::FlatBufferBuilder &_fbb) { + BroadcastToOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +flatbuffers::Offset CreateBroadcastToOptions(flatbuffers::FlatBufferBuilder &_fbb, const BroadcastToOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct Rfft2dOptionsT : public flatbuffers::NativeTable { + typedef Rfft2dOptions TableType; +}; + +struct Rfft2dOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef Rfft2dOptionsT NativeTableType; + typedef Rfft2dOptionsBuilder Builder; + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } + Rfft2dOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(Rfft2dOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const Rfft2dOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct Rfft2dOptionsBuilder { + typedef Rfft2dOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit Rfft2dOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateRfft2dOptions( + flatbuffers::FlatBufferBuilder &_fbb) { + Rfft2dOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +flatbuffers::Offset CreateRfft2dOptions(flatbuffers::FlatBufferBuilder &_fbb, const Rfft2dOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct HashtableOptionsT : public flatbuffers::NativeTable { + typedef HashtableOptions TableType; + int32_t table_id = 0; + tflite::TensorType key_dtype = tflite::TensorType_FLOAT32; + tflite::TensorType value_dtype = tflite::TensorType_FLOAT32; +}; + +struct HashtableOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef HashtableOptionsT NativeTableType; + typedef HashtableOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_TABLE_ID = 4, + VT_KEY_DTYPE = 6, + VT_VALUE_DTYPE = 8 + }; + int32_t table_id() const { + return GetField(VT_TABLE_ID, 0); + } + tflite::TensorType key_dtype() const { + return static_cast(GetField(VT_KEY_DTYPE, 0)); + } + tflite::TensorType value_dtype() const { + return static_cast(GetField(VT_VALUE_DTYPE, 0)); + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_TABLE_ID, 4) && + VerifyField(verifier, VT_KEY_DTYPE, 1) && + VerifyField(verifier, VT_VALUE_DTYPE, 1) && + verifier.EndTable(); + } + HashtableOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(HashtableOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const HashtableOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct HashtableOptionsBuilder { + typedef HashtableOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_table_id(int32_t table_id) { + fbb_.AddElement(HashtableOptions::VT_TABLE_ID, table_id, 0); + } + void add_key_dtype(tflite::TensorType key_dtype) { + fbb_.AddElement(HashtableOptions::VT_KEY_DTYPE, static_cast(key_dtype), 0); + } + void add_value_dtype(tflite::TensorType value_dtype) { + fbb_.AddElement(HashtableOptions::VT_VALUE_DTYPE, static_cast(value_dtype), 0); + } + explicit HashtableOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateHashtableOptions( + flatbuffers::FlatBufferBuilder &_fbb, + int32_t table_id = 0, + tflite::TensorType key_dtype = tflite::TensorType_FLOAT32, + tflite::TensorType value_dtype = tflite::TensorType_FLOAT32) { + HashtableOptionsBuilder builder_(_fbb); + builder_.add_table_id(table_id); + builder_.add_value_dtype(value_dtype); + builder_.add_key_dtype(key_dtype); + return builder_.Finish(); +} + +flatbuffers::Offset CreateHashtableOptions(flatbuffers::FlatBufferBuilder &_fbb, const HashtableOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct HashtableFindOptionsT : public flatbuffers::NativeTable { + typedef HashtableFindOptions TableType; +}; + +struct HashtableFindOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef HashtableFindOptionsT NativeTableType; + typedef HashtableFindOptionsBuilder Builder; + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } + HashtableFindOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(HashtableFindOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const HashtableFindOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct HashtableFindOptionsBuilder { + typedef HashtableFindOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit HashtableFindOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateHashtableFindOptions( + flatbuffers::FlatBufferBuilder &_fbb) { + HashtableFindOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +flatbuffers::Offset CreateHashtableFindOptions(flatbuffers::FlatBufferBuilder &_fbb, const HashtableFindOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct HashtableImportOptionsT : public flatbuffers::NativeTable { + typedef HashtableImportOptions TableType; +}; + +struct HashtableImportOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef HashtableImportOptionsT NativeTableType; + typedef HashtableImportOptionsBuilder Builder; + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } + HashtableImportOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(HashtableImportOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const HashtableImportOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct HashtableImportOptionsBuilder { + typedef HashtableImportOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit HashtableImportOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateHashtableImportOptions( + flatbuffers::FlatBufferBuilder &_fbb) { + HashtableImportOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +flatbuffers::Offset CreateHashtableImportOptions(flatbuffers::FlatBufferBuilder &_fbb, const HashtableImportOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct HashtableSizeOptionsT : public flatbuffers::NativeTable { + typedef HashtableSizeOptions TableType; +}; + +struct HashtableSizeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef HashtableSizeOptionsT NativeTableType; + typedef HashtableSizeOptionsBuilder Builder; + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } + HashtableSizeOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(HashtableSizeOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const HashtableSizeOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct HashtableSizeOptionsBuilder { + typedef HashtableSizeOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit HashtableSizeOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateHashtableSizeOptions( + flatbuffers::FlatBufferBuilder &_fbb) { + HashtableSizeOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +flatbuffers::Offset CreateHashtableSizeOptions(flatbuffers::FlatBufferBuilder &_fbb, const HashtableSizeOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct VarHandleOptionsT : public flatbuffers::NativeTable { + typedef VarHandleOptions TableType; + std::string container{}; + std::string shared_name{}; +}; + +struct VarHandleOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef VarHandleOptionsT NativeTableType; + typedef VarHandleOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_CONTAINER = 4, + VT_SHARED_NAME = 6 + }; + const flatbuffers::String *container() const { + return GetPointer(VT_CONTAINER); + } + const flatbuffers::String *shared_name() const { + return GetPointer(VT_SHARED_NAME); + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyOffset(verifier, VT_CONTAINER) && + verifier.VerifyString(container()) && + VerifyOffset(verifier, VT_SHARED_NAME) && + verifier.VerifyString(shared_name()) && + verifier.EndTable(); + } + VarHandleOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(VarHandleOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const VarHandleOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct VarHandleOptionsBuilder { + typedef VarHandleOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_container(flatbuffers::Offset container) { + fbb_.AddOffset(VarHandleOptions::VT_CONTAINER, container); + } + void add_shared_name(flatbuffers::Offset shared_name) { + fbb_.AddOffset(VarHandleOptions::VT_SHARED_NAME, shared_name); + } + explicit VarHandleOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateVarHandleOptions( + flatbuffers::FlatBufferBuilder &_fbb, + flatbuffers::Offset container = 0, + flatbuffers::Offset shared_name = 0) { + VarHandleOptionsBuilder builder_(_fbb); + builder_.add_shared_name(shared_name); + builder_.add_container(container); + return builder_.Finish(); +} + +inline flatbuffers::Offset CreateVarHandleOptionsDirect( + flatbuffers::FlatBufferBuilder &_fbb, + const char *container = nullptr, + const char *shared_name = nullptr) { + auto container__ = container ? _fbb.CreateString(container) : 0; + auto shared_name__ = shared_name ? _fbb.CreateString(shared_name) : 0; + return tflite::CreateVarHandleOptions( + _fbb, + container__, + shared_name__); +} + +flatbuffers::Offset CreateVarHandleOptions(flatbuffers::FlatBufferBuilder &_fbb, const VarHandleOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct ReadVariableOptionsT : public flatbuffers::NativeTable { + typedef ReadVariableOptions TableType; +}; + +struct ReadVariableOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef ReadVariableOptionsT NativeTableType; + typedef ReadVariableOptionsBuilder Builder; + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } + ReadVariableOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(ReadVariableOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const ReadVariableOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct ReadVariableOptionsBuilder { + typedef ReadVariableOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit ReadVariableOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateReadVariableOptions( + flatbuffers::FlatBufferBuilder &_fbb) { + ReadVariableOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +flatbuffers::Offset CreateReadVariableOptions(flatbuffers::FlatBufferBuilder &_fbb, const ReadVariableOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct AssignVariableOptionsT : public flatbuffers::NativeTable { + typedef AssignVariableOptions TableType; +}; + +struct AssignVariableOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef AssignVariableOptionsT NativeTableType; + typedef AssignVariableOptionsBuilder Builder; + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } + AssignVariableOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(AssignVariableOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const AssignVariableOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct AssignVariableOptionsBuilder { + typedef AssignVariableOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit AssignVariableOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateAssignVariableOptions( + flatbuffers::FlatBufferBuilder &_fbb) { + AssignVariableOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +flatbuffers::Offset CreateAssignVariableOptions(flatbuffers::FlatBufferBuilder &_fbb, const AssignVariableOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct RandomOptionsT : public flatbuffers::NativeTable { + typedef RandomOptions TableType; + int64_t seed = 0; + int64_t seed2 = 0; +}; + +struct RandomOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef RandomOptionsT NativeTableType; + typedef RandomOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_SEED = 4, + VT_SEED2 = 6 + }; + int64_t seed() const { + return GetField(VT_SEED, 0); + } + int64_t seed2() const { + return GetField(VT_SEED2, 0); + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_SEED, 8) && + VerifyField(verifier, VT_SEED2, 8) && + verifier.EndTable(); + } + RandomOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(RandomOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const RandomOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct RandomOptionsBuilder { + typedef RandomOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_seed(int64_t seed) { + fbb_.AddElement(RandomOptions::VT_SEED, seed, 0); + } + void add_seed2(int64_t seed2) { + fbb_.AddElement(RandomOptions::VT_SEED2, seed2, 0); + } + explicit RandomOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateRandomOptions( + flatbuffers::FlatBufferBuilder &_fbb, + int64_t seed = 0, + int64_t seed2 = 0) { + RandomOptionsBuilder builder_(_fbb); + builder_.add_seed2(seed2); + builder_.add_seed(seed); + return builder_.Finish(); +} + +flatbuffers::Offset CreateRandomOptions(flatbuffers::FlatBufferBuilder &_fbb, const RandomOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct BucketizeOptionsT : public flatbuffers::NativeTable { + typedef BucketizeOptions TableType; + std::vector boundaries{}; +}; + +struct BucketizeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef BucketizeOptionsT NativeTableType; + typedef BucketizeOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_BOUNDARIES = 4 + }; + const flatbuffers::Vector *boundaries() const { + return GetPointer *>(VT_BOUNDARIES); + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyOffset(verifier, VT_BOUNDARIES) && + verifier.VerifyVector(boundaries()) && + verifier.EndTable(); + } + BucketizeOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(BucketizeOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const BucketizeOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct BucketizeOptionsBuilder { + typedef BucketizeOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_boundaries(flatbuffers::Offset> boundaries) { + fbb_.AddOffset(BucketizeOptions::VT_BOUNDARIES, boundaries); + } + explicit BucketizeOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateBucketizeOptions( + flatbuffers::FlatBufferBuilder &_fbb, + flatbuffers::Offset> boundaries = 0) { + BucketizeOptionsBuilder builder_(_fbb); + builder_.add_boundaries(boundaries); + return builder_.Finish(); +} + +inline flatbuffers::Offset CreateBucketizeOptionsDirect( + flatbuffers::FlatBufferBuilder &_fbb, + const std::vector *boundaries = nullptr) { + auto boundaries__ = boundaries ? _fbb.CreateVector(*boundaries) : 0; + return tflite::CreateBucketizeOptions( + _fbb, + boundaries__); +} + +flatbuffers::Offset CreateBucketizeOptions(flatbuffers::FlatBufferBuilder &_fbb, const BucketizeOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct GeluOptionsT : public flatbuffers::NativeTable { + typedef GeluOptions TableType; + bool approximate = false; +}; + +struct GeluOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef GeluOptionsT NativeTableType; + typedef GeluOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_APPROXIMATE = 4 + }; + bool approximate() const { + return GetField(VT_APPROXIMATE, 0) != 0; + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_APPROXIMATE, 1) && + verifier.EndTable(); + } + GeluOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(GeluOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const GeluOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct GeluOptionsBuilder { + typedef GeluOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_approximate(bool approximate) { + fbb_.AddElement(GeluOptions::VT_APPROXIMATE, static_cast(approximate), 0); + } + explicit GeluOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateGeluOptions( + flatbuffers::FlatBufferBuilder &_fbb, + bool approximate = false) { + GeluOptionsBuilder builder_(_fbb); + builder_.add_approximate(approximate); + return builder_.Finish(); +} + +flatbuffers::Offset CreateGeluOptions(flatbuffers::FlatBufferBuilder &_fbb, const GeluOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct DynamicUpdateSliceOptionsT : public flatbuffers::NativeTable { + typedef DynamicUpdateSliceOptions TableType; +}; + +struct DynamicUpdateSliceOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef DynamicUpdateSliceOptionsT NativeTableType; + typedef DynamicUpdateSliceOptionsBuilder Builder; + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } + DynamicUpdateSliceOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(DynamicUpdateSliceOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const DynamicUpdateSliceOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct DynamicUpdateSliceOptionsBuilder { + typedef DynamicUpdateSliceOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit DynamicUpdateSliceOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateDynamicUpdateSliceOptions( + flatbuffers::FlatBufferBuilder &_fbb) { + DynamicUpdateSliceOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +flatbuffers::Offset CreateDynamicUpdateSliceOptions(flatbuffers::FlatBufferBuilder &_fbb, const DynamicUpdateSliceOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct UnsortedSegmentProdOptionsT : public flatbuffers::NativeTable { + typedef UnsortedSegmentProdOptions TableType; +}; + +struct UnsortedSegmentProdOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef UnsortedSegmentProdOptionsT NativeTableType; + typedef UnsortedSegmentProdOptionsBuilder Builder; + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } + UnsortedSegmentProdOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(UnsortedSegmentProdOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const UnsortedSegmentProdOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct UnsortedSegmentProdOptionsBuilder { + typedef UnsortedSegmentProdOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit UnsortedSegmentProdOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateUnsortedSegmentProdOptions( + flatbuffers::FlatBufferBuilder &_fbb) { + UnsortedSegmentProdOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +flatbuffers::Offset CreateUnsortedSegmentProdOptions(flatbuffers::FlatBufferBuilder &_fbb, const UnsortedSegmentProdOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct UnsortedSegmentMaxOptionsT : public flatbuffers::NativeTable { + typedef UnsortedSegmentMaxOptions TableType; +}; + +struct UnsortedSegmentMaxOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef UnsortedSegmentMaxOptionsT NativeTableType; + typedef UnsortedSegmentMaxOptionsBuilder Builder; + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } + UnsortedSegmentMaxOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(UnsortedSegmentMaxOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const UnsortedSegmentMaxOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct UnsortedSegmentMaxOptionsBuilder { + typedef UnsortedSegmentMaxOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit UnsortedSegmentMaxOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateUnsortedSegmentMaxOptions( + flatbuffers::FlatBufferBuilder &_fbb) { + UnsortedSegmentMaxOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +flatbuffers::Offset CreateUnsortedSegmentMaxOptions(flatbuffers::FlatBufferBuilder &_fbb, const UnsortedSegmentMaxOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct UnsortedSegmentSumOptionsT : public flatbuffers::NativeTable { + typedef UnsortedSegmentSumOptions TableType; +}; + +struct UnsortedSegmentSumOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef UnsortedSegmentSumOptionsT NativeTableType; + typedef UnsortedSegmentSumOptionsBuilder Builder; + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } + UnsortedSegmentSumOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(UnsortedSegmentSumOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const UnsortedSegmentSumOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct UnsortedSegmentSumOptionsBuilder { + typedef UnsortedSegmentSumOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit UnsortedSegmentSumOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateUnsortedSegmentSumOptions( + flatbuffers::FlatBufferBuilder &_fbb) { + UnsortedSegmentSumOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +flatbuffers::Offset CreateUnsortedSegmentSumOptions(flatbuffers::FlatBufferBuilder &_fbb, const UnsortedSegmentSumOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct ATan2OptionsT : public flatbuffers::NativeTable { + typedef ATan2Options TableType; +}; + +struct ATan2Options FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef ATan2OptionsT NativeTableType; + typedef ATan2OptionsBuilder Builder; + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } + ATan2OptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(ATan2OptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const ATan2OptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct ATan2OptionsBuilder { + typedef ATan2Options Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit ATan2OptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateATan2Options( + flatbuffers::FlatBufferBuilder &_fbb) { + ATan2OptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +flatbuffers::Offset CreateATan2Options(flatbuffers::FlatBufferBuilder &_fbb, const ATan2OptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct UnsortedSegmentMinOptionsT : public flatbuffers::NativeTable { + typedef UnsortedSegmentMinOptions TableType; +}; + +struct UnsortedSegmentMinOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef UnsortedSegmentMinOptionsT NativeTableType; + typedef UnsortedSegmentMinOptionsBuilder Builder; + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } + UnsortedSegmentMinOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(UnsortedSegmentMinOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const UnsortedSegmentMinOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct UnsortedSegmentMinOptionsBuilder { + typedef UnsortedSegmentMinOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit UnsortedSegmentMinOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateUnsortedSegmentMinOptions( + flatbuffers::FlatBufferBuilder &_fbb) { + UnsortedSegmentMinOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +flatbuffers::Offset CreateUnsortedSegmentMinOptions(flatbuffers::FlatBufferBuilder &_fbb, const UnsortedSegmentMinOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct SignOptionsT : public flatbuffers::NativeTable { + typedef SignOptions TableType; +}; + +struct SignOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef SignOptionsT NativeTableType; + typedef SignOptionsBuilder Builder; + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } + SignOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(SignOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const SignOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct SignOptionsBuilder { + typedef SignOptions Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit SignOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateSignOptions( + flatbuffers::FlatBufferBuilder &_fbb) { + SignOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +flatbuffers::Offset CreateSignOptions(flatbuffers::FlatBufferBuilder &_fbb, const SignOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + + + + + +struct OperatorCodeBuilder { + typedef OperatorCode Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_deprecated_builtin_code(int8_t deprecated_builtin_code) { + fbb_.AddElement(OperatorCode::VT_DEPRECATED_BUILTIN_CODE, deprecated_builtin_code, 0); + } + void add_custom_code(flatbuffers::Offset custom_code) { + fbb_.AddOffset(OperatorCode::VT_CUSTOM_CODE, custom_code); + } + void add_version(int32_t version) { + fbb_.AddElement(OperatorCode::VT_VERSION, version, 1); + } + void add_builtin_code(tflite::BuiltinOperator builtin_code) { + fbb_.AddElement(OperatorCode::VT_BUILTIN_CODE, static_cast(builtin_code), 0); + } + explicit OperatorCodeBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateOperatorCode( + flatbuffers::FlatBufferBuilder &_fbb, + int8_t deprecated_builtin_code = 0, + flatbuffers::Offset custom_code = 0, + int32_t version = 1, + tflite::BuiltinOperator builtin_code = tflite::BuiltinOperator_ADD) { + OperatorCodeBuilder builder_(_fbb); + builder_.add_builtin_code(builtin_code); + builder_.add_version(version); + builder_.add_custom_code(custom_code); + builder_.add_deprecated_builtin_code(deprecated_builtin_code); + return builder_.Finish(); +} + +inline flatbuffers::Offset CreateOperatorCodeDirect( + flatbuffers::FlatBufferBuilder &_fbb, + int8_t deprecated_builtin_code = 0, + const char *custom_code = nullptr, + int32_t version = 1, + tflite::BuiltinOperator builtin_code = tflite::BuiltinOperator_ADD) { + auto custom_code__ = custom_code ? _fbb.CreateString(custom_code) : 0; + return tflite::CreateOperatorCode( + _fbb, + deprecated_builtin_code, + custom_code__, + version, + builtin_code); +} + +flatbuffers::Offset CreateOperatorCode(flatbuffers::FlatBufferBuilder &_fbb, const OperatorCodeT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct OperatorT : public flatbuffers::NativeTable { + typedef Operator TableType; + uint32_t opcode_index = 0; + std::vector inputs{}; + std::vector outputs{}; + tflite::BuiltinOptionsUnion builtin_options{}; + std::vector custom_options{}; + tflite::CustomOptionsFormat custom_options_format = tflite::CustomOptionsFormat_FLEXBUFFERS; + std::vector mutating_variable_inputs{}; + std::vector intermediates{}; +}; + +struct Operator FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef OperatorT NativeTableType; + typedef OperatorBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_OPCODE_INDEX = 4, + VT_INPUTS = 6, + VT_OUTPUTS = 8, + VT_BUILTIN_OPTIONS_TYPE = 10, + VT_BUILTIN_OPTIONS = 12, + VT_CUSTOM_OPTIONS = 14, + VT_CUSTOM_OPTIONS_FORMAT = 16, + VT_MUTATING_VARIABLE_INPUTS = 18, + VT_INTERMEDIATES = 20 + }; + uint32_t opcode_index() const { + return GetField(VT_OPCODE_INDEX, 0); + } + const flatbuffers::Vector *inputs() const { + return GetPointer *>(VT_INPUTS); + } + const flatbuffers::Vector *outputs() const { + return GetPointer *>(VT_OUTPUTS); + } + tflite::BuiltinOptions builtin_options_type() const { + return static_cast(GetField(VT_BUILTIN_OPTIONS_TYPE, 0)); + } + const void *builtin_options() const { + return GetPointer(VT_BUILTIN_OPTIONS); + } + template const T *builtin_options_as() const; + const tflite::Conv2DOptions *builtin_options_as_Conv2DOptions() const { + return builtin_options_type() == tflite::BuiltinOptions_Conv2DOptions ? static_cast(builtin_options()) : nullptr; + } + const tflite::DepthwiseConv2DOptions *builtin_options_as_DepthwiseConv2DOptions() const { + return builtin_options_type() == tflite::BuiltinOptions_DepthwiseConv2DOptions ? static_cast(builtin_options()) : nullptr; + } + const tflite::ConcatEmbeddingsOptions *builtin_options_as_ConcatEmbeddingsOptions() const { + return builtin_options_type() == tflite::BuiltinOptions_ConcatEmbeddingsOptions ? static_cast(builtin_options()) : nullptr; + } + const tflite::LSHProjectionOptions *builtin_options_as_LSHProjectionOptions() const { + return builtin_options_type() == tflite::BuiltinOptions_LSHProjectionOptions ? static_cast(builtin_options()) : nullptr; + } + const tflite::Pool2DOptions *builtin_options_as_Pool2DOptions() const { + return builtin_options_type() == tflite::BuiltinOptions_Pool2DOptions ? static_cast(builtin_options()) : nullptr; + } + const tflite::SVDFOptions *builtin_options_as_SVDFOptions() const { + return builtin_options_type() == tflite::BuiltinOptions_SVDFOptions ? static_cast(builtin_options()) : nullptr; + } + const tflite::RNNOptions *builtin_options_as_RNNOptions() const { + return builtin_options_type() == tflite::BuiltinOptions_RNNOptions ? static_cast(builtin_options()) : nullptr; + } + const tflite::FullyConnectedOptions *builtin_options_as_FullyConnectedOptions() const { + return builtin_options_type() == tflite::BuiltinOptions_FullyConnectedOptions ? static_cast(builtin_options()) : nullptr; + } + const tflite::SoftmaxOptions *builtin_options_as_SoftmaxOptions() const { + return builtin_options_type() == tflite::BuiltinOptions_SoftmaxOptions ? static_cast(builtin_options()) : nullptr; + } + const tflite::ConcatenationOptions *builtin_options_as_ConcatenationOptions() const { + return builtin_options_type() == tflite::BuiltinOptions_ConcatenationOptions ? static_cast(builtin_options()) : nullptr; + } + const tflite::AddOptions *builtin_options_as_AddOptions() const { + return builtin_options_type() == tflite::BuiltinOptions_AddOptions ? static_cast(builtin_options()) : nullptr; + } + const tflite::L2NormOptions *builtin_options_as_L2NormOptions() const { + return builtin_options_type() == tflite::BuiltinOptions_L2NormOptions ? static_cast(builtin_options()) : nullptr; + } + const tflite::LocalResponseNormalizationOptions *builtin_options_as_LocalResponseNormalizationOptions() const { + return builtin_options_type() == tflite::BuiltinOptions_LocalResponseNormalizationOptions ? static_cast(builtin_options()) : nullptr; + } + const tflite::LSTMOptions *builtin_options_as_LSTMOptions() const { + return builtin_options_type() == tflite::BuiltinOptions_LSTMOptions ? static_cast(builtin_options()) : nullptr; + } + const tflite::ResizeBilinearOptions *builtin_options_as_ResizeBilinearOptions() const { + return builtin_options_type() == tflite::BuiltinOptions_ResizeBilinearOptions ? static_cast(builtin_options()) : nullptr; + } + const tflite::CallOptions *builtin_options_as_CallOptions() const { + return builtin_options_type() == tflite::BuiltinOptions_CallOptions ? static_cast(builtin_options()) : nullptr; + } + const tflite::ReshapeOptions *builtin_options_as_ReshapeOptions() const { + return builtin_options_type() == tflite::BuiltinOptions_ReshapeOptions ? static_cast(builtin_options()) : nullptr; + } + const tflite::SkipGramOptions *builtin_options_as_SkipGramOptions() const { + return builtin_options_type() == tflite::BuiltinOptions_SkipGramOptions ? static_cast(builtin_options()) : nullptr; + } + const tflite::SpaceToDepthOptions *builtin_options_as_SpaceToDepthOptions() const { + return builtin_options_type() == tflite::BuiltinOptions_SpaceToDepthOptions ? static_cast(builtin_options()) : nullptr; + } + const tflite::EmbeddingLookupSparseOptions *builtin_options_as_EmbeddingLookupSparseOptions() const { + return builtin_options_type() == tflite::BuiltinOptions_EmbeddingLookupSparseOptions ? static_cast(builtin_options()) : nullptr; + } + const tflite::MulOptions *builtin_options_as_MulOptions() const { + return builtin_options_type() == tflite::BuiltinOptions_MulOptions ? static_cast(builtin_options()) : nullptr; + } + const tflite::PadOptions *builtin_options_as_PadOptions() const { + return builtin_options_type() == tflite::BuiltinOptions_PadOptions ? static_cast(builtin_options()) : nullptr; + } + const tflite::GatherOptions *builtin_options_as_GatherOptions() const { + return builtin_options_type() == tflite::BuiltinOptions_GatherOptions ? static_cast(builtin_options()) : nullptr; + } + const tflite::BatchToSpaceNDOptions *builtin_options_as_BatchToSpaceNDOptions() const { + return builtin_options_type() == tflite::BuiltinOptions_BatchToSpaceNDOptions ? static_cast(builtin_options()) : nullptr; + } + const tflite::SpaceToBatchNDOptions *builtin_options_as_SpaceToBatchNDOptions() const { + return builtin_options_type() == tflite::BuiltinOptions_SpaceToBatchNDOptions ? static_cast(builtin_options()) : nullptr; + } + const tflite::TransposeOptions *builtin_options_as_TransposeOptions() const { + return builtin_options_type() == tflite::BuiltinOptions_TransposeOptions ? static_cast(builtin_options()) : nullptr; + } + const tflite::ReducerOptions *builtin_options_as_ReducerOptions() const { + return builtin_options_type() == tflite::BuiltinOptions_ReducerOptions ? static_cast(builtin_options()) : nullptr; + } + const tflite::SubOptions *builtin_options_as_SubOptions() const { + return builtin_options_type() == tflite::BuiltinOptions_SubOptions ? static_cast(builtin_options()) : nullptr; + } + const tflite::DivOptions *builtin_options_as_DivOptions() const { + return builtin_options_type() == tflite::BuiltinOptions_DivOptions ? static_cast(builtin_options()) : nullptr; + } + const tflite::SqueezeOptions *builtin_options_as_SqueezeOptions() const { + return builtin_options_type() == tflite::BuiltinOptions_SqueezeOptions ? static_cast(builtin_options()) : nullptr; + } + const tflite::SequenceRNNOptions *builtin_options_as_SequenceRNNOptions() const { + return builtin_options_type() == tflite::BuiltinOptions_SequenceRNNOptions ? static_cast(builtin_options()) : nullptr; + } + const tflite::StridedSliceOptions *builtin_options_as_StridedSliceOptions() const { + return builtin_options_type() == tflite::BuiltinOptions_StridedSliceOptions ? static_cast(builtin_options()) : nullptr; + } + const tflite::ExpOptions *builtin_options_as_ExpOptions() const { + return builtin_options_type() == tflite::BuiltinOptions_ExpOptions ? static_cast(builtin_options()) : nullptr; + } + const tflite::TopKV2Options *builtin_options_as_TopKV2Options() const { + return builtin_options_type() == tflite::BuiltinOptions_TopKV2Options ? static_cast(builtin_options()) : nullptr; + } + const tflite::SplitOptions *builtin_options_as_SplitOptions() const { + return builtin_options_type() == tflite::BuiltinOptions_SplitOptions ? static_cast(builtin_options()) : nullptr; + } + const tflite::LogSoftmaxOptions *builtin_options_as_LogSoftmaxOptions() const { + return builtin_options_type() == tflite::BuiltinOptions_LogSoftmaxOptions ? static_cast(builtin_options()) : nullptr; + } + const tflite::CastOptions *builtin_options_as_CastOptions() const { + return builtin_options_type() == tflite::BuiltinOptions_CastOptions ? static_cast(builtin_options()) : nullptr; + } + const tflite::DequantizeOptions *builtin_options_as_DequantizeOptions() const { + return builtin_options_type() == tflite::BuiltinOptions_DequantizeOptions ? static_cast(builtin_options()) : nullptr; + } + const tflite::MaximumMinimumOptions *builtin_options_as_MaximumMinimumOptions() const { + return builtin_options_type() == tflite::BuiltinOptions_MaximumMinimumOptions ? static_cast(builtin_options()) : nullptr; + } + const tflite::ArgMaxOptions *builtin_options_as_ArgMaxOptions() const { + return builtin_options_type() == tflite::BuiltinOptions_ArgMaxOptions ? static_cast(builtin_options()) : nullptr; + } + const tflite::LessOptions *builtin_options_as_LessOptions() const { + return builtin_options_type() == tflite::BuiltinOptions_LessOptions ? static_cast(builtin_options()) : nullptr; + } + const tflite::NegOptions *builtin_options_as_NegOptions() const { + return builtin_options_type() == tflite::BuiltinOptions_NegOptions ? static_cast(builtin_options()) : nullptr; + } + const tflite::PadV2Options *builtin_options_as_PadV2Options() const { + return builtin_options_type() == tflite::BuiltinOptions_PadV2Options ? static_cast(builtin_options()) : nullptr; + } + const tflite::GreaterOptions *builtin_options_as_GreaterOptions() const { + return builtin_options_type() == tflite::BuiltinOptions_GreaterOptions ? static_cast(builtin_options()) : nullptr; + } + const tflite::GreaterEqualOptions *builtin_options_as_GreaterEqualOptions() const { + return builtin_options_type() == tflite::BuiltinOptions_GreaterEqualOptions ? static_cast(builtin_options()) : nullptr; + } + const tflite::LessEqualOptions *builtin_options_as_LessEqualOptions() const { + return builtin_options_type() == tflite::BuiltinOptions_LessEqualOptions ? static_cast(builtin_options()) : nullptr; + } + const tflite::SelectOptions *builtin_options_as_SelectOptions() const { + return builtin_options_type() == tflite::BuiltinOptions_SelectOptions ? static_cast(builtin_options()) : nullptr; + } + const tflite::SliceOptions *builtin_options_as_SliceOptions() const { + return builtin_options_type() == tflite::BuiltinOptions_SliceOptions ? static_cast(builtin_options()) : nullptr; + } + const tflite::TransposeConvOptions *builtin_options_as_TransposeConvOptions() const { + return builtin_options_type() == tflite::BuiltinOptions_TransposeConvOptions ? static_cast(builtin_options()) : nullptr; + } + const tflite::SparseToDenseOptions *builtin_options_as_SparseToDenseOptions() const { + return builtin_options_type() == tflite::BuiltinOptions_SparseToDenseOptions ? static_cast(builtin_options()) : nullptr; + } + const tflite::TileOptions *builtin_options_as_TileOptions() const { + return builtin_options_type() == tflite::BuiltinOptions_TileOptions ? static_cast(builtin_options()) : nullptr; + } + const tflite::ExpandDimsOptions *builtin_options_as_ExpandDimsOptions() const { + return builtin_options_type() == tflite::BuiltinOptions_ExpandDimsOptions ? static_cast(builtin_options()) : nullptr; + } + const tflite::EqualOptions *builtin_options_as_EqualOptions() const { + return builtin_options_type() == tflite::BuiltinOptions_EqualOptions ? static_cast(builtin_options()) : nullptr; + } + const tflite::NotEqualOptions *builtin_options_as_NotEqualOptions() const { + return builtin_options_type() == tflite::BuiltinOptions_NotEqualOptions ? static_cast(builtin_options()) : nullptr; + } + const tflite::ShapeOptions *builtin_options_as_ShapeOptions() const { + return builtin_options_type() == tflite::BuiltinOptions_ShapeOptions ? static_cast(builtin_options()) : nullptr; + } + const tflite::PowOptions *builtin_options_as_PowOptions() const { + return builtin_options_type() == tflite::BuiltinOptions_PowOptions ? static_cast(builtin_options()) : nullptr; + } + const tflite::ArgMinOptions *builtin_options_as_ArgMinOptions() const { + return builtin_options_type() == tflite::BuiltinOptions_ArgMinOptions ? static_cast(builtin_options()) : nullptr; + } + const tflite::FakeQuantOptions *builtin_options_as_FakeQuantOptions() const { + return builtin_options_type() == tflite::BuiltinOptions_FakeQuantOptions ? static_cast(builtin_options()) : nullptr; + } + const tflite::PackOptions *builtin_options_as_PackOptions() const { + return builtin_options_type() == tflite::BuiltinOptions_PackOptions ? static_cast(builtin_options()) : nullptr; + } + const tflite::LogicalOrOptions *builtin_options_as_LogicalOrOptions() const { + return builtin_options_type() == tflite::BuiltinOptions_LogicalOrOptions ? static_cast(builtin_options()) : nullptr; + } + const tflite::OneHotOptions *builtin_options_as_OneHotOptions() const { + return builtin_options_type() == tflite::BuiltinOptions_OneHotOptions ? static_cast(builtin_options()) : nullptr; + } + const tflite::LogicalAndOptions *builtin_options_as_LogicalAndOptions() const { + return builtin_options_type() == tflite::BuiltinOptions_LogicalAndOptions ? static_cast(builtin_options()) : nullptr; + } + const tflite::LogicalNotOptions *builtin_options_as_LogicalNotOptions() const { + return builtin_options_type() == tflite::BuiltinOptions_LogicalNotOptions ? static_cast(builtin_options()) : nullptr; + } + const tflite::UnpackOptions *builtin_options_as_UnpackOptions() const { + return builtin_options_type() == tflite::BuiltinOptions_UnpackOptions ? static_cast(builtin_options()) : nullptr; + } + const tflite::FloorDivOptions *builtin_options_as_FloorDivOptions() const { + return builtin_options_type() == tflite::BuiltinOptions_FloorDivOptions ? static_cast(builtin_options()) : nullptr; + } + const tflite::SquareOptions *builtin_options_as_SquareOptions() const { + return builtin_options_type() == tflite::BuiltinOptions_SquareOptions ? static_cast(builtin_options()) : nullptr; + } + const tflite::ZerosLikeOptions *builtin_options_as_ZerosLikeOptions() const { + return builtin_options_type() == tflite::BuiltinOptions_ZerosLikeOptions ? static_cast(builtin_options()) : nullptr; + } + const tflite::FillOptions *builtin_options_as_FillOptions() const { + return builtin_options_type() == tflite::BuiltinOptions_FillOptions ? static_cast(builtin_options()) : nullptr; + } + const tflite::BidirectionalSequenceLSTMOptions *builtin_options_as_BidirectionalSequenceLSTMOptions() const { + return builtin_options_type() == tflite::BuiltinOptions_BidirectionalSequenceLSTMOptions ? static_cast(builtin_options()) : nullptr; + } + const tflite::BidirectionalSequenceRNNOptions *builtin_options_as_BidirectionalSequenceRNNOptions() const { + return builtin_options_type() == tflite::BuiltinOptions_BidirectionalSequenceRNNOptions ? static_cast(builtin_options()) : nullptr; + } + const tflite::UnidirectionalSequenceLSTMOptions *builtin_options_as_UnidirectionalSequenceLSTMOptions() const { + return builtin_options_type() == tflite::BuiltinOptions_UnidirectionalSequenceLSTMOptions ? static_cast(builtin_options()) : nullptr; + } + const tflite::FloorModOptions *builtin_options_as_FloorModOptions() const { + return builtin_options_type() == tflite::BuiltinOptions_FloorModOptions ? static_cast(builtin_options()) : nullptr; + } + const tflite::RangeOptions *builtin_options_as_RangeOptions() const { + return builtin_options_type() == tflite::BuiltinOptions_RangeOptions ? static_cast(builtin_options()) : nullptr; + } + const tflite::ResizeNearestNeighborOptions *builtin_options_as_ResizeNearestNeighborOptions() const { + return builtin_options_type() == tflite::BuiltinOptions_ResizeNearestNeighborOptions ? static_cast(builtin_options()) : nullptr; + } + const tflite::LeakyReluOptions *builtin_options_as_LeakyReluOptions() const { + return builtin_options_type() == tflite::BuiltinOptions_LeakyReluOptions ? static_cast(builtin_options()) : nullptr; + } + const tflite::SquaredDifferenceOptions *builtin_options_as_SquaredDifferenceOptions() const { + return builtin_options_type() == tflite::BuiltinOptions_SquaredDifferenceOptions ? static_cast(builtin_options()) : nullptr; + } + const tflite::MirrorPadOptions *builtin_options_as_MirrorPadOptions() const { + return builtin_options_type() == tflite::BuiltinOptions_MirrorPadOptions ? static_cast(builtin_options()) : nullptr; + } + const tflite::AbsOptions *builtin_options_as_AbsOptions() const { + return builtin_options_type() == tflite::BuiltinOptions_AbsOptions ? static_cast(builtin_options()) : nullptr; + } + const tflite::SplitVOptions *builtin_options_as_SplitVOptions() const { + return builtin_options_type() == tflite::BuiltinOptions_SplitVOptions ? static_cast(builtin_options()) : nullptr; + } + const tflite::UniqueOptions *builtin_options_as_UniqueOptions() const { + return builtin_options_type() == tflite::BuiltinOptions_UniqueOptions ? static_cast(builtin_options()) : nullptr; + } + const tflite::ReverseV2Options *builtin_options_as_ReverseV2Options() const { + return builtin_options_type() == tflite::BuiltinOptions_ReverseV2Options ? static_cast(builtin_options()) : nullptr; + } + const tflite::AddNOptions *builtin_options_as_AddNOptions() const { + return builtin_options_type() == tflite::BuiltinOptions_AddNOptions ? static_cast(builtin_options()) : nullptr; + } + const tflite::GatherNdOptions *builtin_options_as_GatherNdOptions() const { + return builtin_options_type() == tflite::BuiltinOptions_GatherNdOptions ? static_cast(builtin_options()) : nullptr; + } + const tflite::CosOptions *builtin_options_as_CosOptions() const { + return builtin_options_type() == tflite::BuiltinOptions_CosOptions ? static_cast(builtin_options()) : nullptr; + } + const tflite::WhereOptions *builtin_options_as_WhereOptions() const { + return builtin_options_type() == tflite::BuiltinOptions_WhereOptions ? static_cast(builtin_options()) : nullptr; + } + const tflite::RankOptions *builtin_options_as_RankOptions() const { + return builtin_options_type() == tflite::BuiltinOptions_RankOptions ? static_cast(builtin_options()) : nullptr; + } + const tflite::ReverseSequenceOptions *builtin_options_as_ReverseSequenceOptions() const { + return builtin_options_type() == tflite::BuiltinOptions_ReverseSequenceOptions ? static_cast(builtin_options()) : nullptr; + } + const tflite::MatrixDiagOptions *builtin_options_as_MatrixDiagOptions() const { + return builtin_options_type() == tflite::BuiltinOptions_MatrixDiagOptions ? static_cast(builtin_options()) : nullptr; + } + const tflite::QuantizeOptions *builtin_options_as_QuantizeOptions() const { + return builtin_options_type() == tflite::BuiltinOptions_QuantizeOptions ? static_cast(builtin_options()) : nullptr; + } + const tflite::MatrixSetDiagOptions *builtin_options_as_MatrixSetDiagOptions() const { + return builtin_options_type() == tflite::BuiltinOptions_MatrixSetDiagOptions ? static_cast(builtin_options()) : nullptr; + } + const tflite::HardSwishOptions *builtin_options_as_HardSwishOptions() const { + return builtin_options_type() == tflite::BuiltinOptions_HardSwishOptions ? static_cast(builtin_options()) : nullptr; + } + const tflite::IfOptions *builtin_options_as_IfOptions() const { + return builtin_options_type() == tflite::BuiltinOptions_IfOptions ? static_cast(builtin_options()) : nullptr; + } + const tflite::WhileOptions *builtin_options_as_WhileOptions() const { + return builtin_options_type() == tflite::BuiltinOptions_WhileOptions ? static_cast(builtin_options()) : nullptr; + } + const tflite::DepthToSpaceOptions *builtin_options_as_DepthToSpaceOptions() const { + return builtin_options_type() == tflite::BuiltinOptions_DepthToSpaceOptions ? static_cast(builtin_options()) : nullptr; + } + const tflite::NonMaxSuppressionV4Options *builtin_options_as_NonMaxSuppressionV4Options() const { + return builtin_options_type() == tflite::BuiltinOptions_NonMaxSuppressionV4Options ? static_cast(builtin_options()) : nullptr; + } + const tflite::NonMaxSuppressionV5Options *builtin_options_as_NonMaxSuppressionV5Options() const { + return builtin_options_type() == tflite::BuiltinOptions_NonMaxSuppressionV5Options ? static_cast(builtin_options()) : nullptr; + } + const tflite::ScatterNdOptions *builtin_options_as_ScatterNdOptions() const { + return builtin_options_type() == tflite::BuiltinOptions_ScatterNdOptions ? static_cast(builtin_options()) : nullptr; + } + const tflite::SelectV2Options *builtin_options_as_SelectV2Options() const { + return builtin_options_type() == tflite::BuiltinOptions_SelectV2Options ? static_cast(builtin_options()) : nullptr; + } + const tflite::DensifyOptions *builtin_options_as_DensifyOptions() const { + return builtin_options_type() == tflite::BuiltinOptions_DensifyOptions ? static_cast(builtin_options()) : nullptr; + } + const tflite::SegmentSumOptions *builtin_options_as_SegmentSumOptions() const { + return builtin_options_type() == tflite::BuiltinOptions_SegmentSumOptions ? static_cast(builtin_options()) : nullptr; + } + const tflite::BatchMatMulOptions *builtin_options_as_BatchMatMulOptions() const { + return builtin_options_type() == tflite::BuiltinOptions_BatchMatMulOptions ? static_cast(builtin_options()) : nullptr; + } + const tflite::CumsumOptions *builtin_options_as_CumsumOptions() const { + return builtin_options_type() == tflite::BuiltinOptions_CumsumOptions ? static_cast(builtin_options()) : nullptr; + } + const tflite::CallOnceOptions *builtin_options_as_CallOnceOptions() const { + return builtin_options_type() == tflite::BuiltinOptions_CallOnceOptions ? static_cast(builtin_options()) : nullptr; + } + const tflite::BroadcastToOptions *builtin_options_as_BroadcastToOptions() const { + return builtin_options_type() == tflite::BuiltinOptions_BroadcastToOptions ? static_cast(builtin_options()) : nullptr; + } + const tflite::Rfft2dOptions *builtin_options_as_Rfft2dOptions() const { + return builtin_options_type() == tflite::BuiltinOptions_Rfft2dOptions ? static_cast(builtin_options()) : nullptr; + } + const tflite::Conv3DOptions *builtin_options_as_Conv3DOptions() const { + return builtin_options_type() == tflite::BuiltinOptions_Conv3DOptions ? static_cast(builtin_options()) : nullptr; + } + const tflite::HashtableOptions *builtin_options_as_HashtableOptions() const { + return builtin_options_type() == tflite::BuiltinOptions_HashtableOptions ? static_cast(builtin_options()) : nullptr; + } + const tflite::HashtableFindOptions *builtin_options_as_HashtableFindOptions() const { + return builtin_options_type() == tflite::BuiltinOptions_HashtableFindOptions ? static_cast(builtin_options()) : nullptr; + } + const tflite::HashtableImportOptions *builtin_options_as_HashtableImportOptions() const { + return builtin_options_type() == tflite::BuiltinOptions_HashtableImportOptions ? static_cast(builtin_options()) : nullptr; + } + const tflite::HashtableSizeOptions *builtin_options_as_HashtableSizeOptions() const { + return builtin_options_type() == tflite::BuiltinOptions_HashtableSizeOptions ? static_cast(builtin_options()) : nullptr; + } + const tflite::VarHandleOptions *builtin_options_as_VarHandleOptions() const { + return builtin_options_type() == tflite::BuiltinOptions_VarHandleOptions ? static_cast(builtin_options()) : nullptr; + } + const tflite::ReadVariableOptions *builtin_options_as_ReadVariableOptions() const { + return builtin_options_type() == tflite::BuiltinOptions_ReadVariableOptions ? static_cast(builtin_options()) : nullptr; + } + const tflite::AssignVariableOptions *builtin_options_as_AssignVariableOptions() const { + return builtin_options_type() == tflite::BuiltinOptions_AssignVariableOptions ? static_cast(builtin_options()) : nullptr; + } + const tflite::RandomOptions *builtin_options_as_RandomOptions() const { + return builtin_options_type() == tflite::BuiltinOptions_RandomOptions ? static_cast(builtin_options()) : nullptr; + } + const tflite::BucketizeOptions *builtin_options_as_BucketizeOptions() const { + return builtin_options_type() == tflite::BuiltinOptions_BucketizeOptions ? static_cast(builtin_options()) : nullptr; + } + const tflite::GeluOptions *builtin_options_as_GeluOptions() const { + return builtin_options_type() == tflite::BuiltinOptions_GeluOptions ? static_cast(builtin_options()) : nullptr; + } + const tflite::DynamicUpdateSliceOptions *builtin_options_as_DynamicUpdateSliceOptions() const { + return builtin_options_type() == tflite::BuiltinOptions_DynamicUpdateSliceOptions ? static_cast(builtin_options()) : nullptr; + } + const tflite::UnsortedSegmentProdOptions *builtin_options_as_UnsortedSegmentProdOptions() const { + return builtin_options_type() == tflite::BuiltinOptions_UnsortedSegmentProdOptions ? static_cast(builtin_options()) : nullptr; + } + const tflite::UnsortedSegmentMaxOptions *builtin_options_as_UnsortedSegmentMaxOptions() const { + return builtin_options_type() == tflite::BuiltinOptions_UnsortedSegmentMaxOptions ? static_cast(builtin_options()) : nullptr; + } + const tflite::UnsortedSegmentMinOptions *builtin_options_as_UnsortedSegmentMinOptions() const { + return builtin_options_type() == tflite::BuiltinOptions_UnsortedSegmentMinOptions ? static_cast(builtin_options()) : nullptr; + } + const tflite::UnsortedSegmentSumOptions *builtin_options_as_UnsortedSegmentSumOptions() const { + return builtin_options_type() == tflite::BuiltinOptions_UnsortedSegmentSumOptions ? static_cast(builtin_options()) : nullptr; + } + const tflite::ATan2Options *builtin_options_as_ATan2Options() const { + return builtin_options_type() == tflite::BuiltinOptions_ATan2Options ? static_cast(builtin_options()) : nullptr; + } + const tflite::SignOptions *builtin_options_as_SignOptions() const { + return builtin_options_type() == tflite::BuiltinOptions_SignOptions ? static_cast(builtin_options()) : nullptr; + } + const flatbuffers::Vector *custom_options() const { + return GetPointer *>(VT_CUSTOM_OPTIONS); + } + tflite::CustomOptionsFormat custom_options_format() const { + return static_cast(GetField(VT_CUSTOM_OPTIONS_FORMAT, 0)); + } + const flatbuffers::Vector *mutating_variable_inputs() const { + return GetPointer *>(VT_MUTATING_VARIABLE_INPUTS); + } + const flatbuffers::Vector *intermediates() const { + return GetPointer *>(VT_INTERMEDIATES); + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_OPCODE_INDEX, 4) && + VerifyOffset(verifier, VT_INPUTS) && + verifier.VerifyVector(inputs()) && + VerifyOffset(verifier, VT_OUTPUTS) && + verifier.VerifyVector(outputs()) && + VerifyField(verifier, VT_BUILTIN_OPTIONS_TYPE, 1) && + VerifyOffset(verifier, VT_BUILTIN_OPTIONS) && + VerifyBuiltinOptions(verifier, builtin_options(), builtin_options_type()) && + VerifyOffset(verifier, VT_CUSTOM_OPTIONS) && + verifier.VerifyVector(custom_options()) && + VerifyField(verifier, VT_CUSTOM_OPTIONS_FORMAT, 1) && + VerifyOffset(verifier, VT_MUTATING_VARIABLE_INPUTS) && + verifier.VerifyVector(mutating_variable_inputs()) && + VerifyOffset(verifier, VT_INTERMEDIATES) && + verifier.VerifyVector(intermediates()) && + verifier.EndTable(); + } + OperatorT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(OperatorT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const OperatorT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +template<> inline const tflite::Conv2DOptions *Operator::builtin_options_as() const { + return builtin_options_as_Conv2DOptions(); +} + +template<> inline const tflite::DepthwiseConv2DOptions *Operator::builtin_options_as() const { + return builtin_options_as_DepthwiseConv2DOptions(); +} + +template<> inline const tflite::ConcatEmbeddingsOptions *Operator::builtin_options_as() const { + return builtin_options_as_ConcatEmbeddingsOptions(); +} + +template<> inline const tflite::LSHProjectionOptions *Operator::builtin_options_as() const { + return builtin_options_as_LSHProjectionOptions(); +} + +template<> inline const tflite::Pool2DOptions *Operator::builtin_options_as() const { + return builtin_options_as_Pool2DOptions(); +} + +template<> inline const tflite::SVDFOptions *Operator::builtin_options_as() const { + return builtin_options_as_SVDFOptions(); +} + +template<> inline const tflite::RNNOptions *Operator::builtin_options_as() const { + return builtin_options_as_RNNOptions(); +} + +template<> inline const tflite::FullyConnectedOptions *Operator::builtin_options_as() const { + return builtin_options_as_FullyConnectedOptions(); +} + +template<> inline const tflite::SoftmaxOptions *Operator::builtin_options_as() const { + return builtin_options_as_SoftmaxOptions(); +} + +template<> inline const tflite::ConcatenationOptions *Operator::builtin_options_as() const { + return builtin_options_as_ConcatenationOptions(); +} + +template<> inline const tflite::AddOptions *Operator::builtin_options_as() const { + return builtin_options_as_AddOptions(); +} + +template<> inline const tflite::L2NormOptions *Operator::builtin_options_as() const { + return builtin_options_as_L2NormOptions(); +} + +template<> inline const tflite::LocalResponseNormalizationOptions *Operator::builtin_options_as() const { + return builtin_options_as_LocalResponseNormalizationOptions(); +} + +template<> inline const tflite::LSTMOptions *Operator::builtin_options_as() const { + return builtin_options_as_LSTMOptions(); +} + +template<> inline const tflite::ResizeBilinearOptions *Operator::builtin_options_as() const { + return builtin_options_as_ResizeBilinearOptions(); +} + +template<> inline const tflite::CallOptions *Operator::builtin_options_as() const { + return builtin_options_as_CallOptions(); +} + +template<> inline const tflite::ReshapeOptions *Operator::builtin_options_as() const { + return builtin_options_as_ReshapeOptions(); +} + +template<> inline const tflite::SkipGramOptions *Operator::builtin_options_as() const { + return builtin_options_as_SkipGramOptions(); +} + +template<> inline const tflite::SpaceToDepthOptions *Operator::builtin_options_as() const { + return builtin_options_as_SpaceToDepthOptions(); +} + +template<> inline const tflite::EmbeddingLookupSparseOptions *Operator::builtin_options_as() const { + return builtin_options_as_EmbeddingLookupSparseOptions(); +} + +template<> inline const tflite::MulOptions *Operator::builtin_options_as() const { + return builtin_options_as_MulOptions(); +} + +template<> inline const tflite::PadOptions *Operator::builtin_options_as() const { + return builtin_options_as_PadOptions(); +} + +template<> inline const tflite::GatherOptions *Operator::builtin_options_as() const { + return builtin_options_as_GatherOptions(); +} + +template<> inline const tflite::BatchToSpaceNDOptions *Operator::builtin_options_as() const { + return builtin_options_as_BatchToSpaceNDOptions(); +} + +template<> inline const tflite::SpaceToBatchNDOptions *Operator::builtin_options_as() const { + return builtin_options_as_SpaceToBatchNDOptions(); +} + +template<> inline const tflite::TransposeOptions *Operator::builtin_options_as() const { + return builtin_options_as_TransposeOptions(); +} + +template<> inline const tflite::ReducerOptions *Operator::builtin_options_as() const { + return builtin_options_as_ReducerOptions(); +} + +template<> inline const tflite::SubOptions *Operator::builtin_options_as() const { + return builtin_options_as_SubOptions(); +} + +template<> inline const tflite::DivOptions *Operator::builtin_options_as() const { + return builtin_options_as_DivOptions(); +} + +template<> inline const tflite::SqueezeOptions *Operator::builtin_options_as() const { + return builtin_options_as_SqueezeOptions(); +} + +template<> inline const tflite::SequenceRNNOptions *Operator::builtin_options_as() const { + return builtin_options_as_SequenceRNNOptions(); +} + +template<> inline const tflite::StridedSliceOptions *Operator::builtin_options_as() const { + return builtin_options_as_StridedSliceOptions(); +} + +template<> inline const tflite::ExpOptions *Operator::builtin_options_as() const { + return builtin_options_as_ExpOptions(); +} + +template<> inline const tflite::TopKV2Options *Operator::builtin_options_as() const { + return builtin_options_as_TopKV2Options(); +} + +template<> inline const tflite::SplitOptions *Operator::builtin_options_as() const { + return builtin_options_as_SplitOptions(); +} + +template<> inline const tflite::LogSoftmaxOptions *Operator::builtin_options_as() const { + return builtin_options_as_LogSoftmaxOptions(); +} + +template<> inline const tflite::CastOptions *Operator::builtin_options_as() const { + return builtin_options_as_CastOptions(); +} + +template<> inline const tflite::DequantizeOptions *Operator::builtin_options_as() const { + return builtin_options_as_DequantizeOptions(); +} + +template<> inline const tflite::MaximumMinimumOptions *Operator::builtin_options_as() const { + return builtin_options_as_MaximumMinimumOptions(); +} + +template<> inline const tflite::ArgMaxOptions *Operator::builtin_options_as() const { + return builtin_options_as_ArgMaxOptions(); +} + +template<> inline const tflite::LessOptions *Operator::builtin_options_as() const { + return builtin_options_as_LessOptions(); +} + +template<> inline const tflite::NegOptions *Operator::builtin_options_as() const { + return builtin_options_as_NegOptions(); +} + +template<> inline const tflite::PadV2Options *Operator::builtin_options_as() const { + return builtin_options_as_PadV2Options(); +} + +template<> inline const tflite::GreaterOptions *Operator::builtin_options_as() const { + return builtin_options_as_GreaterOptions(); +} + +template<> inline const tflite::GreaterEqualOptions *Operator::builtin_options_as() const { + return builtin_options_as_GreaterEqualOptions(); +} + +template<> inline const tflite::LessEqualOptions *Operator::builtin_options_as() const { + return builtin_options_as_LessEqualOptions(); +} + +template<> inline const tflite::SelectOptions *Operator::builtin_options_as() const { + return builtin_options_as_SelectOptions(); +} + +template<> inline const tflite::SliceOptions *Operator::builtin_options_as() const { + return builtin_options_as_SliceOptions(); +} + +template<> inline const tflite::TransposeConvOptions *Operator::builtin_options_as() const { + return builtin_options_as_TransposeConvOptions(); +} + +template<> inline const tflite::SparseToDenseOptions *Operator::builtin_options_as() const { + return builtin_options_as_SparseToDenseOptions(); +} + +template<> inline const tflite::TileOptions *Operator::builtin_options_as() const { + return builtin_options_as_TileOptions(); +} + +template<> inline const tflite::ExpandDimsOptions *Operator::builtin_options_as() const { + return builtin_options_as_ExpandDimsOptions(); +} + +template<> inline const tflite::EqualOptions *Operator::builtin_options_as() const { + return builtin_options_as_EqualOptions(); +} + +template<> inline const tflite::NotEqualOptions *Operator::builtin_options_as() const { + return builtin_options_as_NotEqualOptions(); +} + +template<> inline const tflite::ShapeOptions *Operator::builtin_options_as() const { + return builtin_options_as_ShapeOptions(); +} + +template<> inline const tflite::PowOptions *Operator::builtin_options_as() const { + return builtin_options_as_PowOptions(); +} + +template<> inline const tflite::ArgMinOptions *Operator::builtin_options_as() const { + return builtin_options_as_ArgMinOptions(); +} + +template<> inline const tflite::FakeQuantOptions *Operator::builtin_options_as() const { + return builtin_options_as_FakeQuantOptions(); +} + +template<> inline const tflite::PackOptions *Operator::builtin_options_as() const { + return builtin_options_as_PackOptions(); +} + +template<> inline const tflite::LogicalOrOptions *Operator::builtin_options_as() const { + return builtin_options_as_LogicalOrOptions(); +} + +template<> inline const tflite::OneHotOptions *Operator::builtin_options_as() const { + return builtin_options_as_OneHotOptions(); +} + +template<> inline const tflite::LogicalAndOptions *Operator::builtin_options_as() const { + return builtin_options_as_LogicalAndOptions(); +} + +template<> inline const tflite::LogicalNotOptions *Operator::builtin_options_as() const { + return builtin_options_as_LogicalNotOptions(); +} + +template<> inline const tflite::UnpackOptions *Operator::builtin_options_as() const { + return builtin_options_as_UnpackOptions(); +} + +template<> inline const tflite::FloorDivOptions *Operator::builtin_options_as() const { + return builtin_options_as_FloorDivOptions(); +} + +template<> inline const tflite::SquareOptions *Operator::builtin_options_as() const { + return builtin_options_as_SquareOptions(); +} + +template<> inline const tflite::ZerosLikeOptions *Operator::builtin_options_as() const { + return builtin_options_as_ZerosLikeOptions(); +} + +template<> inline const tflite::FillOptions *Operator::builtin_options_as() const { + return builtin_options_as_FillOptions(); +} + +template<> inline const tflite::BidirectionalSequenceLSTMOptions *Operator::builtin_options_as() const { + return builtin_options_as_BidirectionalSequenceLSTMOptions(); +} + +template<> inline const tflite::BidirectionalSequenceRNNOptions *Operator::builtin_options_as() const { + return builtin_options_as_BidirectionalSequenceRNNOptions(); +} + +template<> inline const tflite::UnidirectionalSequenceLSTMOptions *Operator::builtin_options_as() const { + return builtin_options_as_UnidirectionalSequenceLSTMOptions(); +} + +template<> inline const tflite::FloorModOptions *Operator::builtin_options_as() const { + return builtin_options_as_FloorModOptions(); +} + +template<> inline const tflite::RangeOptions *Operator::builtin_options_as() const { + return builtin_options_as_RangeOptions(); +} + +template<> inline const tflite::ResizeNearestNeighborOptions *Operator::builtin_options_as() const { + return builtin_options_as_ResizeNearestNeighborOptions(); +} + +template<> inline const tflite::LeakyReluOptions *Operator::builtin_options_as() const { + return builtin_options_as_LeakyReluOptions(); +} + +template<> inline const tflite::SquaredDifferenceOptions *Operator::builtin_options_as() const { + return builtin_options_as_SquaredDifferenceOptions(); +} + +template<> inline const tflite::MirrorPadOptions *Operator::builtin_options_as() const { + return builtin_options_as_MirrorPadOptions(); +} + +template<> inline const tflite::AbsOptions *Operator::builtin_options_as() const { + return builtin_options_as_AbsOptions(); +} + +template<> inline const tflite::SplitVOptions *Operator::builtin_options_as() const { + return builtin_options_as_SplitVOptions(); +} + +template<> inline const tflite::UniqueOptions *Operator::builtin_options_as() const { + return builtin_options_as_UniqueOptions(); +} + +template<> inline const tflite::ReverseV2Options *Operator::builtin_options_as() const { + return builtin_options_as_ReverseV2Options(); +} + +template<> inline const tflite::AddNOptions *Operator::builtin_options_as() const { + return builtin_options_as_AddNOptions(); +} + +template<> inline const tflite::GatherNdOptions *Operator::builtin_options_as() const { + return builtin_options_as_GatherNdOptions(); +} + +template<> inline const tflite::CosOptions *Operator::builtin_options_as() const { + return builtin_options_as_CosOptions(); +} + +template<> inline const tflite::WhereOptions *Operator::builtin_options_as() const { + return builtin_options_as_WhereOptions(); +} + +template<> inline const tflite::RankOptions *Operator::builtin_options_as() const { + return builtin_options_as_RankOptions(); +} + +template<> inline const tflite::ReverseSequenceOptions *Operator::builtin_options_as() const { + return builtin_options_as_ReverseSequenceOptions(); +} + +template<> inline const tflite::MatrixDiagOptions *Operator::builtin_options_as() const { + return builtin_options_as_MatrixDiagOptions(); +} + +template<> inline const tflite::QuantizeOptions *Operator::builtin_options_as() const { + return builtin_options_as_QuantizeOptions(); +} + +template<> inline const tflite::MatrixSetDiagOptions *Operator::builtin_options_as() const { + return builtin_options_as_MatrixSetDiagOptions(); +} + +template<> inline const tflite::HardSwishOptions *Operator::builtin_options_as() const { + return builtin_options_as_HardSwishOptions(); +} + +template<> inline const tflite::IfOptions *Operator::builtin_options_as() const { + return builtin_options_as_IfOptions(); +} + +template<> inline const tflite::WhileOptions *Operator::builtin_options_as() const { + return builtin_options_as_WhileOptions(); +} + +template<> inline const tflite::DepthToSpaceOptions *Operator::builtin_options_as() const { + return builtin_options_as_DepthToSpaceOptions(); +} + +template<> inline const tflite::NonMaxSuppressionV4Options *Operator::builtin_options_as() const { + return builtin_options_as_NonMaxSuppressionV4Options(); +} + +template<> inline const tflite::NonMaxSuppressionV5Options *Operator::builtin_options_as() const { + return builtin_options_as_NonMaxSuppressionV5Options(); +} + +template<> inline const tflite::ScatterNdOptions *Operator::builtin_options_as() const { + return builtin_options_as_ScatterNdOptions(); +} + +template<> inline const tflite::SelectV2Options *Operator::builtin_options_as() const { + return builtin_options_as_SelectV2Options(); +} + +template<> inline const tflite::DensifyOptions *Operator::builtin_options_as() const { + return builtin_options_as_DensifyOptions(); +} + +template<> inline const tflite::SegmentSumOptions *Operator::builtin_options_as() const { + return builtin_options_as_SegmentSumOptions(); +} + +template<> inline const tflite::BatchMatMulOptions *Operator::builtin_options_as() const { + return builtin_options_as_BatchMatMulOptions(); +} + +template<> inline const tflite::CumsumOptions *Operator::builtin_options_as() const { + return builtin_options_as_CumsumOptions(); +} + +template<> inline const tflite::CallOnceOptions *Operator::builtin_options_as() const { + return builtin_options_as_CallOnceOptions(); +} + +template<> inline const tflite::BroadcastToOptions *Operator::builtin_options_as() const { + return builtin_options_as_BroadcastToOptions(); +} + +template<> inline const tflite::Rfft2dOptions *Operator::builtin_options_as() const { + return builtin_options_as_Rfft2dOptions(); +} + +template<> inline const tflite::Conv3DOptions *Operator::builtin_options_as() const { + return builtin_options_as_Conv3DOptions(); +} + +template<> inline const tflite::HashtableOptions *Operator::builtin_options_as() const { + return builtin_options_as_HashtableOptions(); +} + +template<> inline const tflite::HashtableFindOptions *Operator::builtin_options_as() const { + return builtin_options_as_HashtableFindOptions(); +} + +template<> inline const tflite::HashtableImportOptions *Operator::builtin_options_as() const { + return builtin_options_as_HashtableImportOptions(); +} + +template<> inline const tflite::HashtableSizeOptions *Operator::builtin_options_as() const { + return builtin_options_as_HashtableSizeOptions(); +} + +template<> inline const tflite::VarHandleOptions *Operator::builtin_options_as() const { + return builtin_options_as_VarHandleOptions(); +} + +template<> inline const tflite::ReadVariableOptions *Operator::builtin_options_as() const { + return builtin_options_as_ReadVariableOptions(); +} + +template<> inline const tflite::AssignVariableOptions *Operator::builtin_options_as() const { + return builtin_options_as_AssignVariableOptions(); +} + +template<> inline const tflite::RandomOptions *Operator::builtin_options_as() const { + return builtin_options_as_RandomOptions(); +} + +template<> inline const tflite::BucketizeOptions *Operator::builtin_options_as() const { + return builtin_options_as_BucketizeOptions(); +} + +template<> inline const tflite::GeluOptions *Operator::builtin_options_as() const { + return builtin_options_as_GeluOptions(); +} + +template<> inline const tflite::DynamicUpdateSliceOptions *Operator::builtin_options_as() const { + return builtin_options_as_DynamicUpdateSliceOptions(); +} + +template<> inline const tflite::UnsortedSegmentProdOptions *Operator::builtin_options_as() const { + return builtin_options_as_UnsortedSegmentProdOptions(); +} + +template<> inline const tflite::UnsortedSegmentMaxOptions *Operator::builtin_options_as() const { + return builtin_options_as_UnsortedSegmentMaxOptions(); +} + +template<> inline const tflite::UnsortedSegmentMinOptions *Operator::builtin_options_as() const { + return builtin_options_as_UnsortedSegmentMinOptions(); +} + +template<> inline const tflite::UnsortedSegmentSumOptions *Operator::builtin_options_as() const { + return builtin_options_as_UnsortedSegmentSumOptions(); +} + +template<> inline const tflite::ATan2Options *Operator::builtin_options_as() const { + return builtin_options_as_ATan2Options(); +} + +template<> inline const tflite::SignOptions *Operator::builtin_options_as() const { + return builtin_options_as_SignOptions(); +} + +struct OperatorBuilder { + typedef Operator Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_opcode_index(uint32_t opcode_index) { + fbb_.AddElement(Operator::VT_OPCODE_INDEX, opcode_index, 0); + } + void add_inputs(flatbuffers::Offset> inputs) { + fbb_.AddOffset(Operator::VT_INPUTS, inputs); + } + void add_outputs(flatbuffers::Offset> outputs) { + fbb_.AddOffset(Operator::VT_OUTPUTS, outputs); + } + void add_builtin_options_type(tflite::BuiltinOptions builtin_options_type) { + fbb_.AddElement(Operator::VT_BUILTIN_OPTIONS_TYPE, static_cast(builtin_options_type), 0); + } + void add_builtin_options(flatbuffers::Offset builtin_options) { + fbb_.AddOffset(Operator::VT_BUILTIN_OPTIONS, builtin_options); + } + void add_custom_options(flatbuffers::Offset> custom_options) { + fbb_.AddOffset(Operator::VT_CUSTOM_OPTIONS, custom_options); + } + void add_custom_options_format(tflite::CustomOptionsFormat custom_options_format) { + fbb_.AddElement(Operator::VT_CUSTOM_OPTIONS_FORMAT, static_cast(custom_options_format), 0); + } + void add_mutating_variable_inputs(flatbuffers::Offset> mutating_variable_inputs) { + fbb_.AddOffset(Operator::VT_MUTATING_VARIABLE_INPUTS, mutating_variable_inputs); + } + void add_intermediates(flatbuffers::Offset> intermediates) { + fbb_.AddOffset(Operator::VT_INTERMEDIATES, intermediates); + } + explicit OperatorBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateOperator( + flatbuffers::FlatBufferBuilder &_fbb, + uint32_t opcode_index = 0, + flatbuffers::Offset> inputs = 0, + flatbuffers::Offset> outputs = 0, + tflite::BuiltinOptions builtin_options_type = tflite::BuiltinOptions_NONE, + flatbuffers::Offset builtin_options = 0, + flatbuffers::Offset> custom_options = 0, + tflite::CustomOptionsFormat custom_options_format = tflite::CustomOptionsFormat_FLEXBUFFERS, + flatbuffers::Offset> mutating_variable_inputs = 0, + flatbuffers::Offset> intermediates = 0) { + OperatorBuilder builder_(_fbb); + builder_.add_intermediates(intermediates); + builder_.add_mutating_variable_inputs(mutating_variable_inputs); + builder_.add_custom_options(custom_options); + builder_.add_builtin_options(builtin_options); + builder_.add_outputs(outputs); + builder_.add_inputs(inputs); + builder_.add_opcode_index(opcode_index); + builder_.add_custom_options_format(custom_options_format); + builder_.add_builtin_options_type(builtin_options_type); + return builder_.Finish(); +} + +inline flatbuffers::Offset CreateOperatorDirect( + flatbuffers::FlatBufferBuilder &_fbb, + uint32_t opcode_index = 0, + const std::vector *inputs = nullptr, + const std::vector *outputs = nullptr, + tflite::BuiltinOptions builtin_options_type = tflite::BuiltinOptions_NONE, + flatbuffers::Offset builtin_options = 0, + const std::vector *custom_options = nullptr, + tflite::CustomOptionsFormat custom_options_format = tflite::CustomOptionsFormat_FLEXBUFFERS, + const std::vector *mutating_variable_inputs = nullptr, + const std::vector *intermediates = nullptr) { + auto inputs__ = inputs ? _fbb.CreateVector(*inputs) : 0; + auto outputs__ = outputs ? _fbb.CreateVector(*outputs) : 0; + auto custom_options__ = custom_options ? _fbb.CreateVector(*custom_options) : 0; + auto mutating_variable_inputs__ = mutating_variable_inputs ? _fbb.CreateVector(*mutating_variable_inputs) : 0; + auto intermediates__ = intermediates ? _fbb.CreateVector(*intermediates) : 0; + return tflite::CreateOperator( + _fbb, + opcode_index, + inputs__, + outputs__, + builtin_options_type, + builtin_options, + custom_options__, + custom_options_format, + mutating_variable_inputs__, + intermediates__); +} + +flatbuffers::Offset CreateOperator(flatbuffers::FlatBufferBuilder &_fbb, const OperatorT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct SubGraphT : public flatbuffers::NativeTable { + typedef SubGraph TableType; + std::vector> tensors{}; + std::vector inputs{}; + std::vector outputs{}; + std::vector> operators{}; + std::string name{}; + SubGraphT() = default; + SubGraphT(const SubGraphT &o); + SubGraphT(SubGraphT&&) FLATBUFFERS_NOEXCEPT = default; + SubGraphT &operator=(SubGraphT o) FLATBUFFERS_NOEXCEPT; +}; + +struct SubGraph FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef SubGraphT NativeTableType; + typedef SubGraphBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_TENSORS = 4, + VT_INPUTS = 6, + VT_OUTPUTS = 8, + VT_OPERATORS = 10, + VT_NAME = 12 + }; + const flatbuffers::Vector> *tensors() const { + return GetPointer> *>(VT_TENSORS); + } + const flatbuffers::Vector *inputs() const { + return GetPointer *>(VT_INPUTS); + } + const flatbuffers::Vector *outputs() const { + return GetPointer *>(VT_OUTPUTS); + } + const flatbuffers::Vector> *operators() const { + return GetPointer> *>(VT_OPERATORS); + } + const flatbuffers::String *name() const { + return GetPointer(VT_NAME); + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyOffset(verifier, VT_TENSORS) && + verifier.VerifyVector(tensors()) && + verifier.VerifyVectorOfTables(tensors()) && + VerifyOffset(verifier, VT_INPUTS) && + verifier.VerifyVector(inputs()) && + VerifyOffset(verifier, VT_OUTPUTS) && + verifier.VerifyVector(outputs()) && + VerifyOffset(verifier, VT_OPERATORS) && + verifier.VerifyVector(operators()) && + verifier.VerifyVectorOfTables(operators()) && + VerifyOffset(verifier, VT_NAME) && + verifier.VerifyString(name()) && + verifier.EndTable(); + } + SubGraphT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(SubGraphT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const SubGraphT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct SubGraphBuilder { + typedef SubGraph Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_tensors(flatbuffers::Offset>> tensors) { + fbb_.AddOffset(SubGraph::VT_TENSORS, tensors); + } + void add_inputs(flatbuffers::Offset> inputs) { + fbb_.AddOffset(SubGraph::VT_INPUTS, inputs); + } + void add_outputs(flatbuffers::Offset> outputs) { + fbb_.AddOffset(SubGraph::VT_OUTPUTS, outputs); + } + void add_operators(flatbuffers::Offset>> operators) { + fbb_.AddOffset(SubGraph::VT_OPERATORS, operators); + } + void add_name(flatbuffers::Offset name) { + fbb_.AddOffset(SubGraph::VT_NAME, name); + } + explicit SubGraphBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateSubGraph( + flatbuffers::FlatBufferBuilder &_fbb, + flatbuffers::Offset>> tensors = 0, + flatbuffers::Offset> inputs = 0, + flatbuffers::Offset> outputs = 0, + flatbuffers::Offset>> operators = 0, + flatbuffers::Offset name = 0) { + SubGraphBuilder builder_(_fbb); + builder_.add_name(name); + builder_.add_operators(operators); + builder_.add_outputs(outputs); + builder_.add_inputs(inputs); + builder_.add_tensors(tensors); + return builder_.Finish(); +} + +inline flatbuffers::Offset CreateSubGraphDirect( + flatbuffers::FlatBufferBuilder &_fbb, + const std::vector> *tensors = nullptr, + const std::vector *inputs = nullptr, + const std::vector *outputs = nullptr, + const std::vector> *operators = nullptr, + const char *name = nullptr) { + auto tensors__ = tensors ? _fbb.CreateVector>(*tensors) : 0; + auto inputs__ = inputs ? _fbb.CreateVector(*inputs) : 0; + auto outputs__ = outputs ? _fbb.CreateVector(*outputs) : 0; + auto operators__ = operators ? _fbb.CreateVector>(*operators) : 0; + auto name__ = name ? _fbb.CreateString(name) : 0; + return tflite::CreateSubGraph( + _fbb, + tensors__, + inputs__, + outputs__, + operators__, + name__); +} + +flatbuffers::Offset CreateSubGraph(flatbuffers::FlatBufferBuilder &_fbb, const SubGraphT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct BufferT : public flatbuffers::NativeTable { + typedef Buffer TableType; + std::vector data{}; +}; + +struct Buffer FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef BufferT NativeTableType; + typedef BufferBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_DATA = 4 + }; + const flatbuffers::Vector *data() const { + return GetPointer *>(VT_DATA); + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyOffset(verifier, VT_DATA) && + verifier.VerifyVector(data()) && + verifier.EndTable(); + } + BufferT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(BufferT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const BufferT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct BufferBuilder { + typedef Buffer Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_data(flatbuffers::Offset> data) { + fbb_.AddOffset(Buffer::VT_DATA, data); + } + explicit BufferBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateBuffer( + flatbuffers::FlatBufferBuilder &_fbb, + flatbuffers::Offset> data = 0) { + BufferBuilder builder_(_fbb); + builder_.add_data(data); + return builder_.Finish(); +} + +inline flatbuffers::Offset CreateBufferDirect( + flatbuffers::FlatBufferBuilder &_fbb, + const std::vector *data = nullptr) { + if (data) { _fbb.ForceVectorAlignment(data->size(), sizeof(uint8_t), 16); } + auto data__ = data ? _fbb.CreateVector(*data) : 0; + return tflite::CreateBuffer( + _fbb, + data__); +} + +flatbuffers::Offset CreateBuffer(flatbuffers::FlatBufferBuilder &_fbb, const BufferT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct MetadataT : public flatbuffers::NativeTable { + typedef Metadata TableType; + std::string name{}; + uint32_t buffer = 0; +}; + +struct Metadata FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef MetadataT NativeTableType; + typedef MetadataBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_NAME = 4, + VT_BUFFER = 6 + }; + const flatbuffers::String *name() const { + return GetPointer(VT_NAME); + } + uint32_t buffer() const { + return GetField(VT_BUFFER, 0); + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyOffset(verifier, VT_NAME) && + verifier.VerifyString(name()) && + VerifyField(verifier, VT_BUFFER, 4) && + verifier.EndTable(); + } + MetadataT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(MetadataT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const MetadataT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct MetadataBuilder { + typedef Metadata Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_name(flatbuffers::Offset name) { + fbb_.AddOffset(Metadata::VT_NAME, name); + } + void add_buffer(uint32_t buffer) { + fbb_.AddElement(Metadata::VT_BUFFER, buffer, 0); + } + explicit MetadataBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateMetadata( + flatbuffers::FlatBufferBuilder &_fbb, + flatbuffers::Offset name = 0, + uint32_t buffer = 0) { + MetadataBuilder builder_(_fbb); + builder_.add_buffer(buffer); + builder_.add_name(name); + return builder_.Finish(); +} + +inline flatbuffers::Offset CreateMetadataDirect( + flatbuffers::FlatBufferBuilder &_fbb, + const char *name = nullptr, + uint32_t buffer = 0) { + auto name__ = name ? _fbb.CreateString(name) : 0; + return tflite::CreateMetadata( + _fbb, + name__, + buffer); +} + +flatbuffers::Offset CreateMetadata(flatbuffers::FlatBufferBuilder &_fbb, const MetadataT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct TensorMapT : public flatbuffers::NativeTable { + typedef TensorMap TableType; + std::string name{}; + uint32_t tensor_index = 0; +}; + +struct TensorMap FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef TensorMapT NativeTableType; + typedef TensorMapBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_NAME = 4, + VT_TENSOR_INDEX = 6 + }; + const flatbuffers::String *name() const { + return GetPointer(VT_NAME); + } + uint32_t tensor_index() const { + return GetField(VT_TENSOR_INDEX, 0); + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyOffset(verifier, VT_NAME) && + verifier.VerifyString(name()) && + VerifyField(verifier, VT_TENSOR_INDEX, 4) && + verifier.EndTable(); + } + TensorMapT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(TensorMapT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const TensorMapT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct TensorMapBuilder { + typedef TensorMap Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_name(flatbuffers::Offset name) { + fbb_.AddOffset(TensorMap::VT_NAME, name); + } + void add_tensor_index(uint32_t tensor_index) { + fbb_.AddElement(TensorMap::VT_TENSOR_INDEX, tensor_index, 0); + } + explicit TensorMapBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateTensorMap( + flatbuffers::FlatBufferBuilder &_fbb, + flatbuffers::Offset name = 0, + uint32_t tensor_index = 0) { + TensorMapBuilder builder_(_fbb); + builder_.add_tensor_index(tensor_index); + builder_.add_name(name); + return builder_.Finish(); +} + +inline flatbuffers::Offset CreateTensorMapDirect( + flatbuffers::FlatBufferBuilder &_fbb, + const char *name = nullptr, + uint32_t tensor_index = 0) { + auto name__ = name ? _fbb.CreateString(name) : 0; + return tflite::CreateTensorMap( + _fbb, + name__, + tensor_index); +} + +flatbuffers::Offset CreateTensorMap(flatbuffers::FlatBufferBuilder &_fbb, const TensorMapT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct SignatureDefT : public flatbuffers::NativeTable { + typedef SignatureDef TableType; + std::vector> inputs{}; + std::vector> outputs{}; + std::string signature_key{}; + uint32_t subgraph_index = 0; + SignatureDefT() = default; + SignatureDefT(const SignatureDefT &o); + SignatureDefT(SignatureDefT&&) FLATBUFFERS_NOEXCEPT = default; + SignatureDefT &operator=(SignatureDefT o) FLATBUFFERS_NOEXCEPT; +}; + +struct SignatureDef FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef SignatureDefT NativeTableType; + typedef SignatureDefBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_INPUTS = 4, + VT_OUTPUTS = 6, + VT_SIGNATURE_KEY = 8, + VT_SUBGRAPH_INDEX = 12 + }; + const flatbuffers::Vector> *inputs() const { + return GetPointer> *>(VT_INPUTS); + } + const flatbuffers::Vector> *outputs() const { + return GetPointer> *>(VT_OUTPUTS); + } + const flatbuffers::String *signature_key() const { + return GetPointer(VT_SIGNATURE_KEY); + } + uint32_t subgraph_index() const { + return GetField(VT_SUBGRAPH_INDEX, 0); + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyOffset(verifier, VT_INPUTS) && + verifier.VerifyVector(inputs()) && + verifier.VerifyVectorOfTables(inputs()) && + VerifyOffset(verifier, VT_OUTPUTS) && + verifier.VerifyVector(outputs()) && + verifier.VerifyVectorOfTables(outputs()) && + VerifyOffset(verifier, VT_SIGNATURE_KEY) && + verifier.VerifyString(signature_key()) && + VerifyField(verifier, VT_SUBGRAPH_INDEX, 4) && + verifier.EndTable(); + } + SignatureDefT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(SignatureDefT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const SignatureDefT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct SignatureDefBuilder { + typedef SignatureDef Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_inputs(flatbuffers::Offset>> inputs) { + fbb_.AddOffset(SignatureDef::VT_INPUTS, inputs); + } + void add_outputs(flatbuffers::Offset>> outputs) { + fbb_.AddOffset(SignatureDef::VT_OUTPUTS, outputs); + } + void add_signature_key(flatbuffers::Offset signature_key) { + fbb_.AddOffset(SignatureDef::VT_SIGNATURE_KEY, signature_key); + } + void add_subgraph_index(uint32_t subgraph_index) { + fbb_.AddElement(SignatureDef::VT_SUBGRAPH_INDEX, subgraph_index, 0); + } + explicit SignatureDefBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateSignatureDef( + flatbuffers::FlatBufferBuilder &_fbb, + flatbuffers::Offset>> inputs = 0, + flatbuffers::Offset>> outputs = 0, + flatbuffers::Offset signature_key = 0, + uint32_t subgraph_index = 0) { + SignatureDefBuilder builder_(_fbb); + builder_.add_subgraph_index(subgraph_index); + builder_.add_signature_key(signature_key); + builder_.add_outputs(outputs); + builder_.add_inputs(inputs); + return builder_.Finish(); +} + +inline flatbuffers::Offset CreateSignatureDefDirect( + flatbuffers::FlatBufferBuilder &_fbb, + const std::vector> *inputs = nullptr, + const std::vector> *outputs = nullptr, + const char *signature_key = nullptr, + uint32_t subgraph_index = 0) { + auto inputs__ = inputs ? _fbb.CreateVector>(*inputs) : 0; + auto outputs__ = outputs ? _fbb.CreateVector>(*outputs) : 0; + auto signature_key__ = signature_key ? _fbb.CreateString(signature_key) : 0; + return tflite::CreateSignatureDef( + _fbb, + inputs__, + outputs__, + signature_key__, + subgraph_index); +} + +flatbuffers::Offset CreateSignatureDef(flatbuffers::FlatBufferBuilder &_fbb, const SignatureDefT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct ModelT : public flatbuffers::NativeTable { + typedef Model TableType; + uint32_t version = 0; + std::vector> operator_codes{}; + std::vector> subgraphs{}; + std::string description{}; + std::vector> buffers{}; + std::vector metadata_buffer{}; + std::vector> metadata{}; + std::vector> signature_defs{}; + ModelT() = default; + ModelT(const ModelT &o); + ModelT(ModelT&&) FLATBUFFERS_NOEXCEPT = default; + ModelT &operator=(ModelT o) FLATBUFFERS_NOEXCEPT; +}; + +struct Model FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef ModelT NativeTableType; + typedef ModelBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_VERSION = 4, + VT_OPERATOR_CODES = 6, + VT_SUBGRAPHS = 8, + VT_DESCRIPTION = 10, + VT_BUFFERS = 12, + VT_METADATA_BUFFER = 14, + VT_METADATA = 16, + VT_SIGNATURE_DEFS = 18 + }; + uint32_t version() const { + return GetField(VT_VERSION, 0); + } + const flatbuffers::Vector> *operator_codes() const { + return GetPointer> *>(VT_OPERATOR_CODES); + } + const flatbuffers::Vector> *subgraphs() const { + return GetPointer> *>(VT_SUBGRAPHS); + } + const flatbuffers::String *description() const { + return GetPointer(VT_DESCRIPTION); + } + const flatbuffers::Vector> *buffers() const { + return GetPointer> *>(VT_BUFFERS); + } + const flatbuffers::Vector *metadata_buffer() const { + return GetPointer *>(VT_METADATA_BUFFER); + } + const flatbuffers::Vector> *metadata() const { + return GetPointer> *>(VT_METADATA); + } + const flatbuffers::Vector> *signature_defs() const { + return GetPointer> *>(VT_SIGNATURE_DEFS); + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_VERSION, 4) && + VerifyOffset(verifier, VT_OPERATOR_CODES) && + verifier.VerifyVector(operator_codes()) && + verifier.VerifyVectorOfTables(operator_codes()) && + VerifyOffset(verifier, VT_SUBGRAPHS) && + verifier.VerifyVector(subgraphs()) && + verifier.VerifyVectorOfTables(subgraphs()) && + VerifyOffset(verifier, VT_DESCRIPTION) && + verifier.VerifyString(description()) && + VerifyOffset(verifier, VT_BUFFERS) && + verifier.VerifyVector(buffers()) && + verifier.VerifyVectorOfTables(buffers()) && + VerifyOffset(verifier, VT_METADATA_BUFFER) && + verifier.VerifyVector(metadata_buffer()) && + VerifyOffset(verifier, VT_METADATA) && + verifier.VerifyVector(metadata()) && + verifier.VerifyVectorOfTables(metadata()) && + VerifyOffset(verifier, VT_SIGNATURE_DEFS) && + verifier.VerifyVector(signature_defs()) && + verifier.VerifyVectorOfTables(signature_defs()) && + verifier.EndTable(); + } + ModelT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(ModelT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const ModelT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct ModelBuilder { + typedef Model Table; + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_version(uint32_t version) { + fbb_.AddElement(Model::VT_VERSION, version, 0); + } + void add_operator_codes(flatbuffers::Offset>> operator_codes) { + fbb_.AddOffset(Model::VT_OPERATOR_CODES, operator_codes); + } + void add_subgraphs(flatbuffers::Offset>> subgraphs) { + fbb_.AddOffset(Model::VT_SUBGRAPHS, subgraphs); + } + void add_description(flatbuffers::Offset description) { + fbb_.AddOffset(Model::VT_DESCRIPTION, description); + } + void add_buffers(flatbuffers::Offset>> buffers) { + fbb_.AddOffset(Model::VT_BUFFERS, buffers); + } + void add_metadata_buffer(flatbuffers::Offset> metadata_buffer) { + fbb_.AddOffset(Model::VT_METADATA_BUFFER, metadata_buffer); + } + void add_metadata(flatbuffers::Offset>> metadata) { + fbb_.AddOffset(Model::VT_METADATA, metadata); + } + void add_signature_defs(flatbuffers::Offset>> signature_defs) { + fbb_.AddOffset(Model::VT_SIGNATURE_DEFS, signature_defs); + } + explicit ModelBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateModel( + flatbuffers::FlatBufferBuilder &_fbb, + uint32_t version = 0, + flatbuffers::Offset>> operator_codes = 0, + flatbuffers::Offset>> subgraphs = 0, + flatbuffers::Offset description = 0, + flatbuffers::Offset>> buffers = 0, + flatbuffers::Offset> metadata_buffer = 0, + flatbuffers::Offset>> metadata = 0, + flatbuffers::Offset>> signature_defs = 0) { + ModelBuilder builder_(_fbb); + builder_.add_signature_defs(signature_defs); + builder_.add_metadata(metadata); + builder_.add_metadata_buffer(metadata_buffer); + builder_.add_buffers(buffers); + builder_.add_description(description); + builder_.add_subgraphs(subgraphs); + builder_.add_operator_codes(operator_codes); + builder_.add_version(version); + return builder_.Finish(); +} + +inline flatbuffers::Offset CreateModelDirect( + flatbuffers::FlatBufferBuilder &_fbb, + uint32_t version = 0, + const std::vector> *operator_codes = nullptr, + const std::vector> *subgraphs = nullptr, + const char *description = nullptr, + const std::vector> *buffers = nullptr, + const std::vector *metadata_buffer = nullptr, + const std::vector> *metadata = nullptr, + const std::vector> *signature_defs = nullptr) { + auto operator_codes__ = operator_codes ? _fbb.CreateVector>(*operator_codes) : 0; + auto subgraphs__ = subgraphs ? _fbb.CreateVector>(*subgraphs) : 0; + auto description__ = description ? _fbb.CreateString(description) : 0; + auto buffers__ = buffers ? _fbb.CreateVector>(*buffers) : 0; + auto metadata_buffer__ = metadata_buffer ? _fbb.CreateVector(*metadata_buffer) : 0; + auto metadata__ = metadata ? _fbb.CreateVector>(*metadata) : 0; + auto signature_defs__ = signature_defs ? _fbb.CreateVector>(*signature_defs) : 0; + return tflite::CreateModel( + _fbb, + version, + operator_codes__, + subgraphs__, + description__, + buffers__, + metadata_buffer__, + metadata__, + signature_defs__); +} + +flatbuffers::Offset CreateModel(flatbuffers::FlatBufferBuilder &_fbb, const ModelT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +inline CustomQuantizationT *CustomQuantization::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new CustomQuantizationT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void CustomQuantization::UnPackTo(CustomQuantizationT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; + { auto _e = custom(); if (_e) { _o->custom.resize(_e->size()); std::copy(_e->begin(), _e->end(), _o->custom.begin()); } } +} + +inline flatbuffers::Offset CustomQuantization::Pack(flatbuffers::FlatBufferBuilder &_fbb, const CustomQuantizationT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateCustomQuantization(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateCustomQuantization(flatbuffers::FlatBufferBuilder &_fbb, const CustomQuantizationT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const CustomQuantizationT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + _fbb.ForceVectorAlignment(_o->custom.size(), sizeof(uint8_t), 16); + auto _custom = _o->custom.size() ? _fbb.CreateVector(_o->custom) : 0; + return tflite::CreateCustomQuantization( + _fbb, + _custom); +} + +inline QuantizationParametersT *QuantizationParameters::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new QuantizationParametersT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void QuantizationParameters::UnPackTo(QuantizationParametersT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; + { auto _e = min(); if (_e) { _o->min.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->min[_i] = _e->Get(_i); } } } + { auto _e = max(); if (_e) { _o->max.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->max[_i] = _e->Get(_i); } } } + { auto _e = scale(); if (_e) { _o->scale.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->scale[_i] = _e->Get(_i); } } } + { auto _e = zero_point(); if (_e) { _o->zero_point.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->zero_point[_i] = _e->Get(_i); } } } + { auto _e = details_type(); _o->details.type = _e; } + { auto _e = details(); if (_e) _o->details.value = tflite::QuantizationDetailsUnion::UnPack(_e, details_type(), _resolver); } + { auto _e = quantized_dimension(); _o->quantized_dimension = _e; } +} + +inline flatbuffers::Offset QuantizationParameters::Pack(flatbuffers::FlatBufferBuilder &_fbb, const QuantizationParametersT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateQuantizationParameters(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateQuantizationParameters(flatbuffers::FlatBufferBuilder &_fbb, const QuantizationParametersT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const QuantizationParametersT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + auto _min = _o->min.size() ? _fbb.CreateVector(_o->min) : 0; + auto _max = _o->max.size() ? _fbb.CreateVector(_o->max) : 0; + auto _scale = _o->scale.size() ? _fbb.CreateVector(_o->scale) : 0; + auto _zero_point = _o->zero_point.size() ? _fbb.CreateVector(_o->zero_point) : 0; + auto _details_type = _o->details.type; + auto _details = _o->details.Pack(_fbb); + auto _quantized_dimension = _o->quantized_dimension; + return tflite::CreateQuantizationParameters( + _fbb, + _min, + _max, + _scale, + _zero_point, + _details_type, + _details, + _quantized_dimension); +} + +inline Int32VectorT *Int32Vector::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new Int32VectorT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void Int32Vector::UnPackTo(Int32VectorT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; + { auto _e = values(); if (_e) { _o->values.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->values[_i] = _e->Get(_i); } } } +} + +inline flatbuffers::Offset Int32Vector::Pack(flatbuffers::FlatBufferBuilder &_fbb, const Int32VectorT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateInt32Vector(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateInt32Vector(flatbuffers::FlatBufferBuilder &_fbb, const Int32VectorT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const Int32VectorT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + auto _values = _o->values.size() ? _fbb.CreateVector(_o->values) : 0; + return tflite::CreateInt32Vector( + _fbb, + _values); +} + +inline Uint16VectorT *Uint16Vector::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new Uint16VectorT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void Uint16Vector::UnPackTo(Uint16VectorT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; + { auto _e = values(); if (_e) { _o->values.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->values[_i] = _e->Get(_i); } } } +} + +inline flatbuffers::Offset Uint16Vector::Pack(flatbuffers::FlatBufferBuilder &_fbb, const Uint16VectorT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateUint16Vector(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateUint16Vector(flatbuffers::FlatBufferBuilder &_fbb, const Uint16VectorT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const Uint16VectorT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + _fbb.ForceVectorAlignment(_o->values.size(), sizeof(uint16_t), 4); + auto _values = _o->values.size() ? _fbb.CreateVector(_o->values) : 0; + return tflite::CreateUint16Vector( + _fbb, + _values); +} + +inline Uint8VectorT *Uint8Vector::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new Uint8VectorT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void Uint8Vector::UnPackTo(Uint8VectorT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; + { auto _e = values(); if (_e) { _o->values.resize(_e->size()); std::copy(_e->begin(), _e->end(), _o->values.begin()); } } +} + +inline flatbuffers::Offset Uint8Vector::Pack(flatbuffers::FlatBufferBuilder &_fbb, const Uint8VectorT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateUint8Vector(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateUint8Vector(flatbuffers::FlatBufferBuilder &_fbb, const Uint8VectorT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const Uint8VectorT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + _fbb.ForceVectorAlignment(_o->values.size(), sizeof(uint8_t), 4); + auto _values = _o->values.size() ? _fbb.CreateVector(_o->values) : 0; + return tflite::CreateUint8Vector( + _fbb, + _values); +} + +inline DimensionMetadataT *DimensionMetadata::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new DimensionMetadataT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void DimensionMetadata::UnPackTo(DimensionMetadataT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; + { auto _e = format(); _o->format = _e; } + { auto _e = dense_size(); _o->dense_size = _e; } + { auto _e = array_segments_type(); _o->array_segments.type = _e; } + { auto _e = array_segments(); if (_e) _o->array_segments.value = tflite::SparseIndexVectorUnion::UnPack(_e, array_segments_type(), _resolver); } + { auto _e = array_indices_type(); _o->array_indices.type = _e; } + { auto _e = array_indices(); if (_e) _o->array_indices.value = tflite::SparseIndexVectorUnion::UnPack(_e, array_indices_type(), _resolver); } +} + +inline flatbuffers::Offset DimensionMetadata::Pack(flatbuffers::FlatBufferBuilder &_fbb, const DimensionMetadataT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateDimensionMetadata(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateDimensionMetadata(flatbuffers::FlatBufferBuilder &_fbb, const DimensionMetadataT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const DimensionMetadataT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + auto _format = _o->format; + auto _dense_size = _o->dense_size; + auto _array_segments_type = _o->array_segments.type; + auto _array_segments = _o->array_segments.Pack(_fbb); + auto _array_indices_type = _o->array_indices.type; + auto _array_indices = _o->array_indices.Pack(_fbb); + return tflite::CreateDimensionMetadata( + _fbb, + _format, + _dense_size, + _array_segments_type, + _array_segments, + _array_indices_type, + _array_indices); +} + +inline SparsityParametersT::SparsityParametersT(const SparsityParametersT &o) + : traversal_order(o.traversal_order), + block_map(o.block_map) { + dim_metadata.reserve(o.dim_metadata.size()); + for (const auto &dim_metadata_ : o.dim_metadata) { dim_metadata.emplace_back((dim_metadata_) ? new tflite::DimensionMetadataT(*dim_metadata_) : nullptr); } +} + +inline SparsityParametersT &SparsityParametersT::operator=(SparsityParametersT o) FLATBUFFERS_NOEXCEPT { + std::swap(traversal_order, o.traversal_order); + std::swap(block_map, o.block_map); + std::swap(dim_metadata, o.dim_metadata); + return *this; +} + +inline SparsityParametersT *SparsityParameters::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new SparsityParametersT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void SparsityParameters::UnPackTo(SparsityParametersT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; + { auto _e = traversal_order(); if (_e) { _o->traversal_order.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->traversal_order[_i] = _e->Get(_i); } } } + { auto _e = block_map(); if (_e) { _o->block_map.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->block_map[_i] = _e->Get(_i); } } } + { auto _e = dim_metadata(); if (_e) { _o->dim_metadata.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { if(_o->dim_metadata[_i]) { _e->Get(_i)->UnPackTo(_o->dim_metadata[_i].get(), _resolver); } else { _o->dim_metadata[_i] = std::unique_ptr(_e->Get(_i)->UnPack(_resolver)); }; } } } +} + +inline flatbuffers::Offset SparsityParameters::Pack(flatbuffers::FlatBufferBuilder &_fbb, const SparsityParametersT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateSparsityParameters(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateSparsityParameters(flatbuffers::FlatBufferBuilder &_fbb, const SparsityParametersT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const SparsityParametersT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + auto _traversal_order = _o->traversal_order.size() ? _fbb.CreateVector(_o->traversal_order) : 0; + auto _block_map = _o->block_map.size() ? _fbb.CreateVector(_o->block_map) : 0; + auto _dim_metadata = _o->dim_metadata.size() ? _fbb.CreateVector> (_o->dim_metadata.size(), [](size_t i, _VectorArgs *__va) { return CreateDimensionMetadata(*__va->__fbb, __va->__o->dim_metadata[i].get(), __va->__rehasher); }, &_va ) : 0; + return tflite::CreateSparsityParameters( + _fbb, + _traversal_order, + _block_map, + _dim_metadata); +} + +inline VariantSubTypeT *VariantSubType::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new VariantSubTypeT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void VariantSubType::UnPackTo(VariantSubTypeT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; + { auto _e = shape(); if (_e) { _o->shape.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->shape[_i] = _e->Get(_i); } } } + { auto _e = type(); _o->type = _e; } + { auto _e = has_rank(); _o->has_rank = _e; } +} + +inline flatbuffers::Offset VariantSubType::Pack(flatbuffers::FlatBufferBuilder &_fbb, const VariantSubTypeT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateVariantSubType(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateVariantSubType(flatbuffers::FlatBufferBuilder &_fbb, const VariantSubTypeT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const VariantSubTypeT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + auto _shape = _o->shape.size() ? _fbb.CreateVector(_o->shape) : 0; + auto _type = _o->type; + auto _has_rank = _o->has_rank; + return tflite::CreateVariantSubType( + _fbb, + _shape, + _type, + _has_rank); +} + +inline TensorT::TensorT(const TensorT &o) + : shape(o.shape), + type(o.type), + buffer(o.buffer), + name(o.name), + quantization((o.quantization) ? new tflite::QuantizationParametersT(*o.quantization) : nullptr), + is_variable(o.is_variable), + sparsity((o.sparsity) ? new tflite::SparsityParametersT(*o.sparsity) : nullptr), + shape_signature(o.shape_signature), + has_rank(o.has_rank) { + variant_tensors.reserve(o.variant_tensors.size()); + for (const auto &variant_tensors_ : o.variant_tensors) { variant_tensors.emplace_back((variant_tensors_) ? new tflite::VariantSubTypeT(*variant_tensors_) : nullptr); } +} + +inline TensorT &TensorT::operator=(TensorT o) FLATBUFFERS_NOEXCEPT { + std::swap(shape, o.shape); + std::swap(type, o.type); + std::swap(buffer, o.buffer); + std::swap(name, o.name); + std::swap(quantization, o.quantization); + std::swap(is_variable, o.is_variable); + std::swap(sparsity, o.sparsity); + std::swap(shape_signature, o.shape_signature); + std::swap(has_rank, o.has_rank); + std::swap(variant_tensors, o.variant_tensors); + return *this; +} + +inline TensorT *Tensor::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new TensorT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void Tensor::UnPackTo(TensorT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; + { auto _e = shape(); if (_e) { _o->shape.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->shape[_i] = _e->Get(_i); } } } + { auto _e = type(); _o->type = _e; } + { auto _e = buffer(); _o->buffer = _e; } + { auto _e = name(); if (_e) _o->name = _e->str(); } + { auto _e = quantization(); if (_e) { if(_o->quantization) { _e->UnPackTo(_o->quantization.get(), _resolver); } else { _o->quantization = std::unique_ptr(_e->UnPack(_resolver)); } } } + { auto _e = is_variable(); _o->is_variable = _e; } + { auto _e = sparsity(); if (_e) { if(_o->sparsity) { _e->UnPackTo(_o->sparsity.get(), _resolver); } else { _o->sparsity = std::unique_ptr(_e->UnPack(_resolver)); } } } + { auto _e = shape_signature(); if (_e) { _o->shape_signature.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->shape_signature[_i] = _e->Get(_i); } } } + { auto _e = has_rank(); _o->has_rank = _e; } + { auto _e = variant_tensors(); if (_e) { _o->variant_tensors.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { if(_o->variant_tensors[_i]) { _e->Get(_i)->UnPackTo(_o->variant_tensors[_i].get(), _resolver); } else { _o->variant_tensors[_i] = std::unique_ptr(_e->Get(_i)->UnPack(_resolver)); }; } } } +} + +inline flatbuffers::Offset Tensor::Pack(flatbuffers::FlatBufferBuilder &_fbb, const TensorT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateTensor(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateTensor(flatbuffers::FlatBufferBuilder &_fbb, const TensorT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const TensorT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + auto _shape = _o->shape.size() ? _fbb.CreateVector(_o->shape) : 0; + auto _type = _o->type; + auto _buffer = _o->buffer; + auto _name = _o->name.empty() ? 0 : _fbb.CreateString(_o->name); + auto _quantization = _o->quantization ? CreateQuantizationParameters(_fbb, _o->quantization.get(), _rehasher) : 0; + auto _is_variable = _o->is_variable; + auto _sparsity = _o->sparsity ? CreateSparsityParameters(_fbb, _o->sparsity.get(), _rehasher) : 0; + auto _shape_signature = _o->shape_signature.size() ? _fbb.CreateVector(_o->shape_signature) : 0; + auto _has_rank = _o->has_rank; + auto _variant_tensors = _o->variant_tensors.size() ? _fbb.CreateVector> (_o->variant_tensors.size(), [](size_t i, _VectorArgs *__va) { return CreateVariantSubType(*__va->__fbb, __va->__o->variant_tensors[i].get(), __va->__rehasher); }, &_va ) : 0; + return tflite::CreateTensor( + _fbb, + _shape, + _type, + _buffer, + _name, + _quantization, + _is_variable, + _sparsity, + _shape_signature, + _has_rank, + _variant_tensors); +} + +inline Conv2DOptionsT *Conv2DOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new Conv2DOptionsT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void Conv2DOptions::UnPackTo(Conv2DOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; + { auto _e = padding(); _o->padding = _e; } + { auto _e = stride_w(); _o->stride_w = _e; } + { auto _e = stride_h(); _o->stride_h = _e; } + { auto _e = fused_activation_function(); _o->fused_activation_function = _e; } + { auto _e = dilation_w_factor(); _o->dilation_w_factor = _e; } + { auto _e = dilation_h_factor(); _o->dilation_h_factor = _e; } +} + +inline flatbuffers::Offset Conv2DOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const Conv2DOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateConv2DOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateConv2DOptions(flatbuffers::FlatBufferBuilder &_fbb, const Conv2DOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const Conv2DOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + auto _padding = _o->padding; + auto _stride_w = _o->stride_w; + auto _stride_h = _o->stride_h; + auto _fused_activation_function = _o->fused_activation_function; + auto _dilation_w_factor = _o->dilation_w_factor; + auto _dilation_h_factor = _o->dilation_h_factor; + return tflite::CreateConv2DOptions( + _fbb, + _padding, + _stride_w, + _stride_h, + _fused_activation_function, + _dilation_w_factor, + _dilation_h_factor); +} + +inline Conv3DOptionsT *Conv3DOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new Conv3DOptionsT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void Conv3DOptions::UnPackTo(Conv3DOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; + { auto _e = padding(); _o->padding = _e; } + { auto _e = stride_d(); _o->stride_d = _e; } + { auto _e = stride_w(); _o->stride_w = _e; } + { auto _e = stride_h(); _o->stride_h = _e; } + { auto _e = fused_activation_function(); _o->fused_activation_function = _e; } + { auto _e = dilation_d_factor(); _o->dilation_d_factor = _e; } + { auto _e = dilation_w_factor(); _o->dilation_w_factor = _e; } + { auto _e = dilation_h_factor(); _o->dilation_h_factor = _e; } +} + +inline flatbuffers::Offset Conv3DOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const Conv3DOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateConv3DOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateConv3DOptions(flatbuffers::FlatBufferBuilder &_fbb, const Conv3DOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const Conv3DOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + auto _padding = _o->padding; + auto _stride_d = _o->stride_d; + auto _stride_w = _o->stride_w; + auto _stride_h = _o->stride_h; + auto _fused_activation_function = _o->fused_activation_function; + auto _dilation_d_factor = _o->dilation_d_factor; + auto _dilation_w_factor = _o->dilation_w_factor; + auto _dilation_h_factor = _o->dilation_h_factor; + return tflite::CreateConv3DOptions( + _fbb, + _padding, + _stride_d, + _stride_w, + _stride_h, + _fused_activation_function, + _dilation_d_factor, + _dilation_w_factor, + _dilation_h_factor); +} + +inline Pool2DOptionsT *Pool2DOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new Pool2DOptionsT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void Pool2DOptions::UnPackTo(Pool2DOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; + { auto _e = padding(); _o->padding = _e; } + { auto _e = stride_w(); _o->stride_w = _e; } + { auto _e = stride_h(); _o->stride_h = _e; } + { auto _e = filter_width(); _o->filter_width = _e; } + { auto _e = filter_height(); _o->filter_height = _e; } + { auto _e = fused_activation_function(); _o->fused_activation_function = _e; } +} + +inline flatbuffers::Offset Pool2DOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const Pool2DOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreatePool2DOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreatePool2DOptions(flatbuffers::FlatBufferBuilder &_fbb, const Pool2DOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const Pool2DOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + auto _padding = _o->padding; + auto _stride_w = _o->stride_w; + auto _stride_h = _o->stride_h; + auto _filter_width = _o->filter_width; + auto _filter_height = _o->filter_height; + auto _fused_activation_function = _o->fused_activation_function; + return tflite::CreatePool2DOptions( + _fbb, + _padding, + _stride_w, + _stride_h, + _filter_width, + _filter_height, + _fused_activation_function); +} + +inline DepthwiseConv2DOptionsT *DepthwiseConv2DOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new DepthwiseConv2DOptionsT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void DepthwiseConv2DOptions::UnPackTo(DepthwiseConv2DOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; + { auto _e = padding(); _o->padding = _e; } + { auto _e = stride_w(); _o->stride_w = _e; } + { auto _e = stride_h(); _o->stride_h = _e; } + { auto _e = depth_multiplier(); _o->depth_multiplier = _e; } + { auto _e = fused_activation_function(); _o->fused_activation_function = _e; } + { auto _e = dilation_w_factor(); _o->dilation_w_factor = _e; } + { auto _e = dilation_h_factor(); _o->dilation_h_factor = _e; } +} + +inline flatbuffers::Offset DepthwiseConv2DOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const DepthwiseConv2DOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateDepthwiseConv2DOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateDepthwiseConv2DOptions(flatbuffers::FlatBufferBuilder &_fbb, const DepthwiseConv2DOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const DepthwiseConv2DOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + auto _padding = _o->padding; + auto _stride_w = _o->stride_w; + auto _stride_h = _o->stride_h; + auto _depth_multiplier = _o->depth_multiplier; + auto _fused_activation_function = _o->fused_activation_function; + auto _dilation_w_factor = _o->dilation_w_factor; + auto _dilation_h_factor = _o->dilation_h_factor; + return tflite::CreateDepthwiseConv2DOptions( + _fbb, + _padding, + _stride_w, + _stride_h, + _depth_multiplier, + _fused_activation_function, + _dilation_w_factor, + _dilation_h_factor); +} + +inline ConcatEmbeddingsOptionsT *ConcatEmbeddingsOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new ConcatEmbeddingsOptionsT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void ConcatEmbeddingsOptions::UnPackTo(ConcatEmbeddingsOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; + { auto _e = num_channels(); _o->num_channels = _e; } + { auto _e = num_columns_per_channel(); if (_e) { _o->num_columns_per_channel.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->num_columns_per_channel[_i] = _e->Get(_i); } } } + { auto _e = embedding_dim_per_channel(); if (_e) { _o->embedding_dim_per_channel.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->embedding_dim_per_channel[_i] = _e->Get(_i); } } } +} + +inline flatbuffers::Offset ConcatEmbeddingsOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const ConcatEmbeddingsOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateConcatEmbeddingsOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateConcatEmbeddingsOptions(flatbuffers::FlatBufferBuilder &_fbb, const ConcatEmbeddingsOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const ConcatEmbeddingsOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + auto _num_channels = _o->num_channels; + auto _num_columns_per_channel = _o->num_columns_per_channel.size() ? _fbb.CreateVector(_o->num_columns_per_channel) : 0; + auto _embedding_dim_per_channel = _o->embedding_dim_per_channel.size() ? _fbb.CreateVector(_o->embedding_dim_per_channel) : 0; + return tflite::CreateConcatEmbeddingsOptions( + _fbb, + _num_channels, + _num_columns_per_channel, + _embedding_dim_per_channel); +} + +inline LSHProjectionOptionsT *LSHProjectionOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new LSHProjectionOptionsT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void LSHProjectionOptions::UnPackTo(LSHProjectionOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; + { auto _e = type(); _o->type = _e; } +} + +inline flatbuffers::Offset LSHProjectionOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const LSHProjectionOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateLSHProjectionOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateLSHProjectionOptions(flatbuffers::FlatBufferBuilder &_fbb, const LSHProjectionOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const LSHProjectionOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + auto _type = _o->type; + return tflite::CreateLSHProjectionOptions( + _fbb, + _type); +} + +inline SVDFOptionsT *SVDFOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new SVDFOptionsT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void SVDFOptions::UnPackTo(SVDFOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; + { auto _e = rank(); _o->rank = _e; } + { auto _e = fused_activation_function(); _o->fused_activation_function = _e; } + { auto _e = asymmetric_quantize_inputs(); _o->asymmetric_quantize_inputs = _e; } +} + +inline flatbuffers::Offset SVDFOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const SVDFOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateSVDFOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateSVDFOptions(flatbuffers::FlatBufferBuilder &_fbb, const SVDFOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const SVDFOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + auto _rank = _o->rank; + auto _fused_activation_function = _o->fused_activation_function; + auto _asymmetric_quantize_inputs = _o->asymmetric_quantize_inputs; + return tflite::CreateSVDFOptions( + _fbb, + _rank, + _fused_activation_function, + _asymmetric_quantize_inputs); +} + +inline RNNOptionsT *RNNOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new RNNOptionsT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void RNNOptions::UnPackTo(RNNOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; + { auto _e = fused_activation_function(); _o->fused_activation_function = _e; } + { auto _e = asymmetric_quantize_inputs(); _o->asymmetric_quantize_inputs = _e; } +} + +inline flatbuffers::Offset RNNOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const RNNOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateRNNOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateRNNOptions(flatbuffers::FlatBufferBuilder &_fbb, const RNNOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const RNNOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + auto _fused_activation_function = _o->fused_activation_function; + auto _asymmetric_quantize_inputs = _o->asymmetric_quantize_inputs; + return tflite::CreateRNNOptions( + _fbb, + _fused_activation_function, + _asymmetric_quantize_inputs); +} + +inline SequenceRNNOptionsT *SequenceRNNOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new SequenceRNNOptionsT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void SequenceRNNOptions::UnPackTo(SequenceRNNOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; + { auto _e = time_major(); _o->time_major = _e; } + { auto _e = fused_activation_function(); _o->fused_activation_function = _e; } + { auto _e = asymmetric_quantize_inputs(); _o->asymmetric_quantize_inputs = _e; } +} + +inline flatbuffers::Offset SequenceRNNOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const SequenceRNNOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateSequenceRNNOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateSequenceRNNOptions(flatbuffers::FlatBufferBuilder &_fbb, const SequenceRNNOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const SequenceRNNOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + auto _time_major = _o->time_major; + auto _fused_activation_function = _o->fused_activation_function; + auto _asymmetric_quantize_inputs = _o->asymmetric_quantize_inputs; + return tflite::CreateSequenceRNNOptions( + _fbb, + _time_major, + _fused_activation_function, + _asymmetric_quantize_inputs); +} + +inline BidirectionalSequenceRNNOptionsT *BidirectionalSequenceRNNOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new BidirectionalSequenceRNNOptionsT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void BidirectionalSequenceRNNOptions::UnPackTo(BidirectionalSequenceRNNOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; + { auto _e = time_major(); _o->time_major = _e; } + { auto _e = fused_activation_function(); _o->fused_activation_function = _e; } + { auto _e = merge_outputs(); _o->merge_outputs = _e; } + { auto _e = asymmetric_quantize_inputs(); _o->asymmetric_quantize_inputs = _e; } +} + +inline flatbuffers::Offset BidirectionalSequenceRNNOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const BidirectionalSequenceRNNOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateBidirectionalSequenceRNNOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateBidirectionalSequenceRNNOptions(flatbuffers::FlatBufferBuilder &_fbb, const BidirectionalSequenceRNNOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const BidirectionalSequenceRNNOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + auto _time_major = _o->time_major; + auto _fused_activation_function = _o->fused_activation_function; + auto _merge_outputs = _o->merge_outputs; + auto _asymmetric_quantize_inputs = _o->asymmetric_quantize_inputs; + return tflite::CreateBidirectionalSequenceRNNOptions( + _fbb, + _time_major, + _fused_activation_function, + _merge_outputs, + _asymmetric_quantize_inputs); +} + +inline FullyConnectedOptionsT *FullyConnectedOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new FullyConnectedOptionsT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void FullyConnectedOptions::UnPackTo(FullyConnectedOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; + { auto _e = fused_activation_function(); _o->fused_activation_function = _e; } + { auto _e = weights_format(); _o->weights_format = _e; } + { auto _e = keep_num_dims(); _o->keep_num_dims = _e; } + { auto _e = asymmetric_quantize_inputs(); _o->asymmetric_quantize_inputs = _e; } +} + +inline flatbuffers::Offset FullyConnectedOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const FullyConnectedOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateFullyConnectedOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateFullyConnectedOptions(flatbuffers::FlatBufferBuilder &_fbb, const FullyConnectedOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const FullyConnectedOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + auto _fused_activation_function = _o->fused_activation_function; + auto _weights_format = _o->weights_format; + auto _keep_num_dims = _o->keep_num_dims; + auto _asymmetric_quantize_inputs = _o->asymmetric_quantize_inputs; + return tflite::CreateFullyConnectedOptions( + _fbb, + _fused_activation_function, + _weights_format, + _keep_num_dims, + _asymmetric_quantize_inputs); +} + +inline SoftmaxOptionsT *SoftmaxOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new SoftmaxOptionsT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void SoftmaxOptions::UnPackTo(SoftmaxOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; + { auto _e = beta(); _o->beta = _e; } +} + +inline flatbuffers::Offset SoftmaxOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const SoftmaxOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateSoftmaxOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateSoftmaxOptions(flatbuffers::FlatBufferBuilder &_fbb, const SoftmaxOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const SoftmaxOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + auto _beta = _o->beta; + return tflite::CreateSoftmaxOptions( + _fbb, + _beta); +} + +inline ConcatenationOptionsT *ConcatenationOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new ConcatenationOptionsT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void ConcatenationOptions::UnPackTo(ConcatenationOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; + { auto _e = axis(); _o->axis = _e; } + { auto _e = fused_activation_function(); _o->fused_activation_function = _e; } +} + +inline flatbuffers::Offset ConcatenationOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const ConcatenationOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateConcatenationOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateConcatenationOptions(flatbuffers::FlatBufferBuilder &_fbb, const ConcatenationOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const ConcatenationOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + auto _axis = _o->axis; + auto _fused_activation_function = _o->fused_activation_function; + return tflite::CreateConcatenationOptions( + _fbb, + _axis, + _fused_activation_function); +} + +inline AddOptionsT *AddOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new AddOptionsT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void AddOptions::UnPackTo(AddOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; + { auto _e = fused_activation_function(); _o->fused_activation_function = _e; } + { auto _e = pot_scale_int16(); _o->pot_scale_int16 = _e; } +} + +inline flatbuffers::Offset AddOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const AddOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateAddOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateAddOptions(flatbuffers::FlatBufferBuilder &_fbb, const AddOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const AddOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + auto _fused_activation_function = _o->fused_activation_function; + auto _pot_scale_int16 = _o->pot_scale_int16; + return tflite::CreateAddOptions( + _fbb, + _fused_activation_function, + _pot_scale_int16); +} + +inline MulOptionsT *MulOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new MulOptionsT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void MulOptions::UnPackTo(MulOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; + { auto _e = fused_activation_function(); _o->fused_activation_function = _e; } +} + +inline flatbuffers::Offset MulOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const MulOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateMulOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateMulOptions(flatbuffers::FlatBufferBuilder &_fbb, const MulOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const MulOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + auto _fused_activation_function = _o->fused_activation_function; + return tflite::CreateMulOptions( + _fbb, + _fused_activation_function); +} + +inline L2NormOptionsT *L2NormOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new L2NormOptionsT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void L2NormOptions::UnPackTo(L2NormOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; + { auto _e = fused_activation_function(); _o->fused_activation_function = _e; } +} + +inline flatbuffers::Offset L2NormOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const L2NormOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateL2NormOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateL2NormOptions(flatbuffers::FlatBufferBuilder &_fbb, const L2NormOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const L2NormOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + auto _fused_activation_function = _o->fused_activation_function; + return tflite::CreateL2NormOptions( + _fbb, + _fused_activation_function); +} + +inline LocalResponseNormalizationOptionsT *LocalResponseNormalizationOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new LocalResponseNormalizationOptionsT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void LocalResponseNormalizationOptions::UnPackTo(LocalResponseNormalizationOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; + { auto _e = radius(); _o->radius = _e; } + { auto _e = bias(); _o->bias = _e; } + { auto _e = alpha(); _o->alpha = _e; } + { auto _e = beta(); _o->beta = _e; } +} + +inline flatbuffers::Offset LocalResponseNormalizationOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const LocalResponseNormalizationOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateLocalResponseNormalizationOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateLocalResponseNormalizationOptions(flatbuffers::FlatBufferBuilder &_fbb, const LocalResponseNormalizationOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const LocalResponseNormalizationOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + auto _radius = _o->radius; + auto _bias = _o->bias; + auto _alpha = _o->alpha; + auto _beta = _o->beta; + return tflite::CreateLocalResponseNormalizationOptions( + _fbb, + _radius, + _bias, + _alpha, + _beta); +} + +inline LSTMOptionsT *LSTMOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new LSTMOptionsT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void LSTMOptions::UnPackTo(LSTMOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; + { auto _e = fused_activation_function(); _o->fused_activation_function = _e; } + { auto _e = cell_clip(); _o->cell_clip = _e; } + { auto _e = proj_clip(); _o->proj_clip = _e; } + { auto _e = kernel_type(); _o->kernel_type = _e; } + { auto _e = asymmetric_quantize_inputs(); _o->asymmetric_quantize_inputs = _e; } +} + +inline flatbuffers::Offset LSTMOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const LSTMOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateLSTMOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateLSTMOptions(flatbuffers::FlatBufferBuilder &_fbb, const LSTMOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const LSTMOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + auto _fused_activation_function = _o->fused_activation_function; + auto _cell_clip = _o->cell_clip; + auto _proj_clip = _o->proj_clip; + auto _kernel_type = _o->kernel_type; + auto _asymmetric_quantize_inputs = _o->asymmetric_quantize_inputs; + return tflite::CreateLSTMOptions( + _fbb, + _fused_activation_function, + _cell_clip, + _proj_clip, + _kernel_type, + _asymmetric_quantize_inputs); +} + +inline UnidirectionalSequenceLSTMOptionsT *UnidirectionalSequenceLSTMOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new UnidirectionalSequenceLSTMOptionsT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void UnidirectionalSequenceLSTMOptions::UnPackTo(UnidirectionalSequenceLSTMOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; + { auto _e = fused_activation_function(); _o->fused_activation_function = _e; } + { auto _e = cell_clip(); _o->cell_clip = _e; } + { auto _e = proj_clip(); _o->proj_clip = _e; } + { auto _e = time_major(); _o->time_major = _e; } + { auto _e = asymmetric_quantize_inputs(); _o->asymmetric_quantize_inputs = _e; } + { auto _e = diagonal_recurrent_tensors(); _o->diagonal_recurrent_tensors = _e; } +} + +inline flatbuffers::Offset UnidirectionalSequenceLSTMOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const UnidirectionalSequenceLSTMOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateUnidirectionalSequenceLSTMOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateUnidirectionalSequenceLSTMOptions(flatbuffers::FlatBufferBuilder &_fbb, const UnidirectionalSequenceLSTMOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const UnidirectionalSequenceLSTMOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + auto _fused_activation_function = _o->fused_activation_function; + auto _cell_clip = _o->cell_clip; + auto _proj_clip = _o->proj_clip; + auto _time_major = _o->time_major; + auto _asymmetric_quantize_inputs = _o->asymmetric_quantize_inputs; + auto _diagonal_recurrent_tensors = _o->diagonal_recurrent_tensors; + return tflite::CreateUnidirectionalSequenceLSTMOptions( + _fbb, + _fused_activation_function, + _cell_clip, + _proj_clip, + _time_major, + _asymmetric_quantize_inputs, + _diagonal_recurrent_tensors); +} + +inline BidirectionalSequenceLSTMOptionsT *BidirectionalSequenceLSTMOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new BidirectionalSequenceLSTMOptionsT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void BidirectionalSequenceLSTMOptions::UnPackTo(BidirectionalSequenceLSTMOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; + { auto _e = fused_activation_function(); _o->fused_activation_function = _e; } + { auto _e = cell_clip(); _o->cell_clip = _e; } + { auto _e = proj_clip(); _o->proj_clip = _e; } + { auto _e = merge_outputs(); _o->merge_outputs = _e; } + { auto _e = time_major(); _o->time_major = _e; } + { auto _e = asymmetric_quantize_inputs(); _o->asymmetric_quantize_inputs = _e; } +} + +inline flatbuffers::Offset BidirectionalSequenceLSTMOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const BidirectionalSequenceLSTMOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateBidirectionalSequenceLSTMOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateBidirectionalSequenceLSTMOptions(flatbuffers::FlatBufferBuilder &_fbb, const BidirectionalSequenceLSTMOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const BidirectionalSequenceLSTMOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + auto _fused_activation_function = _o->fused_activation_function; + auto _cell_clip = _o->cell_clip; + auto _proj_clip = _o->proj_clip; + auto _merge_outputs = _o->merge_outputs; + auto _time_major = _o->time_major; + auto _asymmetric_quantize_inputs = _o->asymmetric_quantize_inputs; + return tflite::CreateBidirectionalSequenceLSTMOptions( + _fbb, + _fused_activation_function, + _cell_clip, + _proj_clip, + _merge_outputs, + _time_major, + _asymmetric_quantize_inputs); +} + +inline ResizeBilinearOptionsT *ResizeBilinearOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new ResizeBilinearOptionsT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void ResizeBilinearOptions::UnPackTo(ResizeBilinearOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; + { auto _e = align_corners(); _o->align_corners = _e; } + { auto _e = half_pixel_centers(); _o->half_pixel_centers = _e; } +} + +inline flatbuffers::Offset ResizeBilinearOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const ResizeBilinearOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateResizeBilinearOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateResizeBilinearOptions(flatbuffers::FlatBufferBuilder &_fbb, const ResizeBilinearOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const ResizeBilinearOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + auto _align_corners = _o->align_corners; + auto _half_pixel_centers = _o->half_pixel_centers; + return tflite::CreateResizeBilinearOptions( + _fbb, + _align_corners, + _half_pixel_centers); +} + +inline ResizeNearestNeighborOptionsT *ResizeNearestNeighborOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new ResizeNearestNeighborOptionsT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void ResizeNearestNeighborOptions::UnPackTo(ResizeNearestNeighborOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; + { auto _e = align_corners(); _o->align_corners = _e; } + { auto _e = half_pixel_centers(); _o->half_pixel_centers = _e; } +} + +inline flatbuffers::Offset ResizeNearestNeighborOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const ResizeNearestNeighborOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateResizeNearestNeighborOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateResizeNearestNeighborOptions(flatbuffers::FlatBufferBuilder &_fbb, const ResizeNearestNeighborOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const ResizeNearestNeighborOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + auto _align_corners = _o->align_corners; + auto _half_pixel_centers = _o->half_pixel_centers; + return tflite::CreateResizeNearestNeighborOptions( + _fbb, + _align_corners, + _half_pixel_centers); +} + +inline CallOptionsT *CallOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new CallOptionsT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void CallOptions::UnPackTo(CallOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; + { auto _e = subgraph(); _o->subgraph = _e; } +} + +inline flatbuffers::Offset CallOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const CallOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateCallOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateCallOptions(flatbuffers::FlatBufferBuilder &_fbb, const CallOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const CallOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + auto _subgraph = _o->subgraph; + return tflite::CreateCallOptions( + _fbb, + _subgraph); +} + +inline PadOptionsT *PadOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new PadOptionsT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void PadOptions::UnPackTo(PadOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; +} + +inline flatbuffers::Offset PadOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const PadOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreatePadOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreatePadOptions(flatbuffers::FlatBufferBuilder &_fbb, const PadOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const PadOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + return tflite::CreatePadOptions( + _fbb); +} + +inline PadV2OptionsT *PadV2Options::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new PadV2OptionsT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void PadV2Options::UnPackTo(PadV2OptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; +} + +inline flatbuffers::Offset PadV2Options::Pack(flatbuffers::FlatBufferBuilder &_fbb, const PadV2OptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreatePadV2Options(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreatePadV2Options(flatbuffers::FlatBufferBuilder &_fbb, const PadV2OptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const PadV2OptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + return tflite::CreatePadV2Options( + _fbb); +} + +inline ReshapeOptionsT *ReshapeOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new ReshapeOptionsT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void ReshapeOptions::UnPackTo(ReshapeOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; + { auto _e = new_shape(); if (_e) { _o->new_shape.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->new_shape[_i] = _e->Get(_i); } } } +} + +inline flatbuffers::Offset ReshapeOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const ReshapeOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateReshapeOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateReshapeOptions(flatbuffers::FlatBufferBuilder &_fbb, const ReshapeOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const ReshapeOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + auto _new_shape = _o->new_shape.size() ? _fbb.CreateVector(_o->new_shape) : 0; + return tflite::CreateReshapeOptions( + _fbb, + _new_shape); +} + +inline SpaceToBatchNDOptionsT *SpaceToBatchNDOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new SpaceToBatchNDOptionsT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void SpaceToBatchNDOptions::UnPackTo(SpaceToBatchNDOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; +} + +inline flatbuffers::Offset SpaceToBatchNDOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const SpaceToBatchNDOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateSpaceToBatchNDOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateSpaceToBatchNDOptions(flatbuffers::FlatBufferBuilder &_fbb, const SpaceToBatchNDOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const SpaceToBatchNDOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + return tflite::CreateSpaceToBatchNDOptions( + _fbb); +} + +inline BatchToSpaceNDOptionsT *BatchToSpaceNDOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new BatchToSpaceNDOptionsT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void BatchToSpaceNDOptions::UnPackTo(BatchToSpaceNDOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; +} + +inline flatbuffers::Offset BatchToSpaceNDOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const BatchToSpaceNDOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateBatchToSpaceNDOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateBatchToSpaceNDOptions(flatbuffers::FlatBufferBuilder &_fbb, const BatchToSpaceNDOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const BatchToSpaceNDOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + return tflite::CreateBatchToSpaceNDOptions( + _fbb); +} + +inline SkipGramOptionsT *SkipGramOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new SkipGramOptionsT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void SkipGramOptions::UnPackTo(SkipGramOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; + { auto _e = ngram_size(); _o->ngram_size = _e; } + { auto _e = max_skip_size(); _o->max_skip_size = _e; } + { auto _e = include_all_ngrams(); _o->include_all_ngrams = _e; } +} + +inline flatbuffers::Offset SkipGramOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const SkipGramOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateSkipGramOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateSkipGramOptions(flatbuffers::FlatBufferBuilder &_fbb, const SkipGramOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const SkipGramOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + auto _ngram_size = _o->ngram_size; + auto _max_skip_size = _o->max_skip_size; + auto _include_all_ngrams = _o->include_all_ngrams; + return tflite::CreateSkipGramOptions( + _fbb, + _ngram_size, + _max_skip_size, + _include_all_ngrams); +} + +inline SpaceToDepthOptionsT *SpaceToDepthOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new SpaceToDepthOptionsT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void SpaceToDepthOptions::UnPackTo(SpaceToDepthOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; + { auto _e = block_size(); _o->block_size = _e; } +} + +inline flatbuffers::Offset SpaceToDepthOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const SpaceToDepthOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateSpaceToDepthOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateSpaceToDepthOptions(flatbuffers::FlatBufferBuilder &_fbb, const SpaceToDepthOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const SpaceToDepthOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + auto _block_size = _o->block_size; + return tflite::CreateSpaceToDepthOptions( + _fbb, + _block_size); +} + +inline DepthToSpaceOptionsT *DepthToSpaceOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new DepthToSpaceOptionsT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void DepthToSpaceOptions::UnPackTo(DepthToSpaceOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; + { auto _e = block_size(); _o->block_size = _e; } +} + +inline flatbuffers::Offset DepthToSpaceOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const DepthToSpaceOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateDepthToSpaceOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateDepthToSpaceOptions(flatbuffers::FlatBufferBuilder &_fbb, const DepthToSpaceOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const DepthToSpaceOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + auto _block_size = _o->block_size; + return tflite::CreateDepthToSpaceOptions( + _fbb, + _block_size); +} + +inline SubOptionsT *SubOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new SubOptionsT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void SubOptions::UnPackTo(SubOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; + { auto _e = fused_activation_function(); _o->fused_activation_function = _e; } + { auto _e = pot_scale_int16(); _o->pot_scale_int16 = _e; } +} + +inline flatbuffers::Offset SubOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const SubOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateSubOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateSubOptions(flatbuffers::FlatBufferBuilder &_fbb, const SubOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const SubOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + auto _fused_activation_function = _o->fused_activation_function; + auto _pot_scale_int16 = _o->pot_scale_int16; + return tflite::CreateSubOptions( + _fbb, + _fused_activation_function, + _pot_scale_int16); +} + +inline DivOptionsT *DivOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new DivOptionsT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void DivOptions::UnPackTo(DivOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; + { auto _e = fused_activation_function(); _o->fused_activation_function = _e; } +} + +inline flatbuffers::Offset DivOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const DivOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateDivOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateDivOptions(flatbuffers::FlatBufferBuilder &_fbb, const DivOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const DivOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + auto _fused_activation_function = _o->fused_activation_function; + return tflite::CreateDivOptions( + _fbb, + _fused_activation_function); +} + +inline TopKV2OptionsT *TopKV2Options::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new TopKV2OptionsT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void TopKV2Options::UnPackTo(TopKV2OptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; +} + +inline flatbuffers::Offset TopKV2Options::Pack(flatbuffers::FlatBufferBuilder &_fbb, const TopKV2OptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateTopKV2Options(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateTopKV2Options(flatbuffers::FlatBufferBuilder &_fbb, const TopKV2OptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const TopKV2OptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + return tflite::CreateTopKV2Options( + _fbb); +} + +inline EmbeddingLookupSparseOptionsT *EmbeddingLookupSparseOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new EmbeddingLookupSparseOptionsT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void EmbeddingLookupSparseOptions::UnPackTo(EmbeddingLookupSparseOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; + { auto _e = combiner(); _o->combiner = _e; } +} + +inline flatbuffers::Offset EmbeddingLookupSparseOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const EmbeddingLookupSparseOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateEmbeddingLookupSparseOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateEmbeddingLookupSparseOptions(flatbuffers::FlatBufferBuilder &_fbb, const EmbeddingLookupSparseOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const EmbeddingLookupSparseOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + auto _combiner = _o->combiner; + return tflite::CreateEmbeddingLookupSparseOptions( + _fbb, + _combiner); +} + +inline GatherOptionsT *GatherOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new GatherOptionsT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void GatherOptions::UnPackTo(GatherOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; + { auto _e = axis(); _o->axis = _e; } + { auto _e = batch_dims(); _o->batch_dims = _e; } +} + +inline flatbuffers::Offset GatherOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const GatherOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateGatherOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateGatherOptions(flatbuffers::FlatBufferBuilder &_fbb, const GatherOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const GatherOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + auto _axis = _o->axis; + auto _batch_dims = _o->batch_dims; + return tflite::CreateGatherOptions( + _fbb, + _axis, + _batch_dims); +} + +inline TransposeOptionsT *TransposeOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new TransposeOptionsT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void TransposeOptions::UnPackTo(TransposeOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; +} + +inline flatbuffers::Offset TransposeOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const TransposeOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateTransposeOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateTransposeOptions(flatbuffers::FlatBufferBuilder &_fbb, const TransposeOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const TransposeOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + return tflite::CreateTransposeOptions( + _fbb); +} + +inline ExpOptionsT *ExpOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new ExpOptionsT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void ExpOptions::UnPackTo(ExpOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; +} + +inline flatbuffers::Offset ExpOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const ExpOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateExpOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateExpOptions(flatbuffers::FlatBufferBuilder &_fbb, const ExpOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const ExpOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + return tflite::CreateExpOptions( + _fbb); +} + +inline CosOptionsT *CosOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new CosOptionsT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void CosOptions::UnPackTo(CosOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; +} + +inline flatbuffers::Offset CosOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const CosOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateCosOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateCosOptions(flatbuffers::FlatBufferBuilder &_fbb, const CosOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const CosOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + return tflite::CreateCosOptions( + _fbb); +} + +inline ReducerOptionsT *ReducerOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new ReducerOptionsT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void ReducerOptions::UnPackTo(ReducerOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; + { auto _e = keep_dims(); _o->keep_dims = _e; } +} + +inline flatbuffers::Offset ReducerOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const ReducerOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateReducerOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateReducerOptions(flatbuffers::FlatBufferBuilder &_fbb, const ReducerOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const ReducerOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + auto _keep_dims = _o->keep_dims; + return tflite::CreateReducerOptions( + _fbb, + _keep_dims); +} + +inline SqueezeOptionsT *SqueezeOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new SqueezeOptionsT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void SqueezeOptions::UnPackTo(SqueezeOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; + { auto _e = squeeze_dims(); if (_e) { _o->squeeze_dims.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->squeeze_dims[_i] = _e->Get(_i); } } } +} + +inline flatbuffers::Offset SqueezeOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const SqueezeOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateSqueezeOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateSqueezeOptions(flatbuffers::FlatBufferBuilder &_fbb, const SqueezeOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const SqueezeOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + auto _squeeze_dims = _o->squeeze_dims.size() ? _fbb.CreateVector(_o->squeeze_dims) : 0; + return tflite::CreateSqueezeOptions( + _fbb, + _squeeze_dims); +} + +inline SplitOptionsT *SplitOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new SplitOptionsT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void SplitOptions::UnPackTo(SplitOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; + { auto _e = num_splits(); _o->num_splits = _e; } +} + +inline flatbuffers::Offset SplitOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const SplitOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateSplitOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateSplitOptions(flatbuffers::FlatBufferBuilder &_fbb, const SplitOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const SplitOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + auto _num_splits = _o->num_splits; + return tflite::CreateSplitOptions( + _fbb, + _num_splits); +} + +inline SplitVOptionsT *SplitVOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new SplitVOptionsT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void SplitVOptions::UnPackTo(SplitVOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; + { auto _e = num_splits(); _o->num_splits = _e; } +} + +inline flatbuffers::Offset SplitVOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const SplitVOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateSplitVOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateSplitVOptions(flatbuffers::FlatBufferBuilder &_fbb, const SplitVOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const SplitVOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + auto _num_splits = _o->num_splits; + return tflite::CreateSplitVOptions( + _fbb, + _num_splits); +} + +inline StridedSliceOptionsT *StridedSliceOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new StridedSliceOptionsT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void StridedSliceOptions::UnPackTo(StridedSliceOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; + { auto _e = begin_mask(); _o->begin_mask = _e; } + { auto _e = end_mask(); _o->end_mask = _e; } + { auto _e = ellipsis_mask(); _o->ellipsis_mask = _e; } + { auto _e = new_axis_mask(); _o->new_axis_mask = _e; } + { auto _e = shrink_axis_mask(); _o->shrink_axis_mask = _e; } +} + +inline flatbuffers::Offset StridedSliceOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const StridedSliceOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateStridedSliceOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateStridedSliceOptions(flatbuffers::FlatBufferBuilder &_fbb, const StridedSliceOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const StridedSliceOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + auto _begin_mask = _o->begin_mask; + auto _end_mask = _o->end_mask; + auto _ellipsis_mask = _o->ellipsis_mask; + auto _new_axis_mask = _o->new_axis_mask; + auto _shrink_axis_mask = _o->shrink_axis_mask; + return tflite::CreateStridedSliceOptions( + _fbb, + _begin_mask, + _end_mask, + _ellipsis_mask, + _new_axis_mask, + _shrink_axis_mask); +} + +inline LogSoftmaxOptionsT *LogSoftmaxOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new LogSoftmaxOptionsT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void LogSoftmaxOptions::UnPackTo(LogSoftmaxOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; +} + +inline flatbuffers::Offset LogSoftmaxOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const LogSoftmaxOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateLogSoftmaxOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateLogSoftmaxOptions(flatbuffers::FlatBufferBuilder &_fbb, const LogSoftmaxOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const LogSoftmaxOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + return tflite::CreateLogSoftmaxOptions( + _fbb); +} + +inline CastOptionsT *CastOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new CastOptionsT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void CastOptions::UnPackTo(CastOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; + { auto _e = in_data_type(); _o->in_data_type = _e; } + { auto _e = out_data_type(); _o->out_data_type = _e; } +} + +inline flatbuffers::Offset CastOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const CastOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateCastOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateCastOptions(flatbuffers::FlatBufferBuilder &_fbb, const CastOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const CastOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + auto _in_data_type = _o->in_data_type; + auto _out_data_type = _o->out_data_type; + return tflite::CreateCastOptions( + _fbb, + _in_data_type, + _out_data_type); +} + +inline DequantizeOptionsT *DequantizeOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new DequantizeOptionsT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void DequantizeOptions::UnPackTo(DequantizeOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; +} + +inline flatbuffers::Offset DequantizeOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const DequantizeOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateDequantizeOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateDequantizeOptions(flatbuffers::FlatBufferBuilder &_fbb, const DequantizeOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const DequantizeOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + return tflite::CreateDequantizeOptions( + _fbb); +} + +inline MaximumMinimumOptionsT *MaximumMinimumOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new MaximumMinimumOptionsT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void MaximumMinimumOptions::UnPackTo(MaximumMinimumOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; +} + +inline flatbuffers::Offset MaximumMinimumOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const MaximumMinimumOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateMaximumMinimumOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateMaximumMinimumOptions(flatbuffers::FlatBufferBuilder &_fbb, const MaximumMinimumOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const MaximumMinimumOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + return tflite::CreateMaximumMinimumOptions( + _fbb); +} + +inline TileOptionsT *TileOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new TileOptionsT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void TileOptions::UnPackTo(TileOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; +} + +inline flatbuffers::Offset TileOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const TileOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateTileOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateTileOptions(flatbuffers::FlatBufferBuilder &_fbb, const TileOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const TileOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + return tflite::CreateTileOptions( + _fbb); +} + +inline ArgMaxOptionsT *ArgMaxOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new ArgMaxOptionsT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void ArgMaxOptions::UnPackTo(ArgMaxOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; + { auto _e = output_type(); _o->output_type = _e; } +} + +inline flatbuffers::Offset ArgMaxOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const ArgMaxOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateArgMaxOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateArgMaxOptions(flatbuffers::FlatBufferBuilder &_fbb, const ArgMaxOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const ArgMaxOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + auto _output_type = _o->output_type; + return tflite::CreateArgMaxOptions( + _fbb, + _output_type); +} + +inline ArgMinOptionsT *ArgMinOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new ArgMinOptionsT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void ArgMinOptions::UnPackTo(ArgMinOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; + { auto _e = output_type(); _o->output_type = _e; } +} + +inline flatbuffers::Offset ArgMinOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const ArgMinOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateArgMinOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateArgMinOptions(flatbuffers::FlatBufferBuilder &_fbb, const ArgMinOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const ArgMinOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + auto _output_type = _o->output_type; + return tflite::CreateArgMinOptions( + _fbb, + _output_type); +} + +inline GreaterOptionsT *GreaterOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new GreaterOptionsT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void GreaterOptions::UnPackTo(GreaterOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; +} + +inline flatbuffers::Offset GreaterOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const GreaterOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateGreaterOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateGreaterOptions(flatbuffers::FlatBufferBuilder &_fbb, const GreaterOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const GreaterOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + return tflite::CreateGreaterOptions( + _fbb); +} + +inline GreaterEqualOptionsT *GreaterEqualOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new GreaterEqualOptionsT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void GreaterEqualOptions::UnPackTo(GreaterEqualOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; +} + +inline flatbuffers::Offset GreaterEqualOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const GreaterEqualOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateGreaterEqualOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateGreaterEqualOptions(flatbuffers::FlatBufferBuilder &_fbb, const GreaterEqualOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const GreaterEqualOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + return tflite::CreateGreaterEqualOptions( + _fbb); +} + +inline LessOptionsT *LessOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new LessOptionsT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void LessOptions::UnPackTo(LessOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; +} + +inline flatbuffers::Offset LessOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const LessOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateLessOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateLessOptions(flatbuffers::FlatBufferBuilder &_fbb, const LessOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const LessOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + return tflite::CreateLessOptions( + _fbb); +} + +inline LessEqualOptionsT *LessEqualOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new LessEqualOptionsT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void LessEqualOptions::UnPackTo(LessEqualOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; +} + +inline flatbuffers::Offset LessEqualOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const LessEqualOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateLessEqualOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateLessEqualOptions(flatbuffers::FlatBufferBuilder &_fbb, const LessEqualOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const LessEqualOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + return tflite::CreateLessEqualOptions( + _fbb); +} + +inline NegOptionsT *NegOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new NegOptionsT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void NegOptions::UnPackTo(NegOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; +} + +inline flatbuffers::Offset NegOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const NegOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateNegOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateNegOptions(flatbuffers::FlatBufferBuilder &_fbb, const NegOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const NegOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + return tflite::CreateNegOptions( + _fbb); +} + +inline SelectOptionsT *SelectOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new SelectOptionsT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void SelectOptions::UnPackTo(SelectOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; +} + +inline flatbuffers::Offset SelectOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const SelectOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateSelectOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateSelectOptions(flatbuffers::FlatBufferBuilder &_fbb, const SelectOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const SelectOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + return tflite::CreateSelectOptions( + _fbb); +} + +inline SliceOptionsT *SliceOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new SliceOptionsT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void SliceOptions::UnPackTo(SliceOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; +} + +inline flatbuffers::Offset SliceOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const SliceOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateSliceOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateSliceOptions(flatbuffers::FlatBufferBuilder &_fbb, const SliceOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const SliceOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + return tflite::CreateSliceOptions( + _fbb); +} + +inline TransposeConvOptionsT *TransposeConvOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new TransposeConvOptionsT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void TransposeConvOptions::UnPackTo(TransposeConvOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; + { auto _e = padding(); _o->padding = _e; } + { auto _e = stride_w(); _o->stride_w = _e; } + { auto _e = stride_h(); _o->stride_h = _e; } + { auto _e = fused_activation_function(); _o->fused_activation_function = _e; } +} + +inline flatbuffers::Offset TransposeConvOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const TransposeConvOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateTransposeConvOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateTransposeConvOptions(flatbuffers::FlatBufferBuilder &_fbb, const TransposeConvOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const TransposeConvOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + auto _padding = _o->padding; + auto _stride_w = _o->stride_w; + auto _stride_h = _o->stride_h; + auto _fused_activation_function = _o->fused_activation_function; + return tflite::CreateTransposeConvOptions( + _fbb, + _padding, + _stride_w, + _stride_h, + _fused_activation_function); +} + +inline ExpandDimsOptionsT *ExpandDimsOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new ExpandDimsOptionsT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void ExpandDimsOptions::UnPackTo(ExpandDimsOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; +} + +inline flatbuffers::Offset ExpandDimsOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const ExpandDimsOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateExpandDimsOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateExpandDimsOptions(flatbuffers::FlatBufferBuilder &_fbb, const ExpandDimsOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const ExpandDimsOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + return tflite::CreateExpandDimsOptions( + _fbb); +} + +inline SparseToDenseOptionsT *SparseToDenseOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new SparseToDenseOptionsT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void SparseToDenseOptions::UnPackTo(SparseToDenseOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; + { auto _e = validate_indices(); _o->validate_indices = _e; } +} + +inline flatbuffers::Offset SparseToDenseOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const SparseToDenseOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateSparseToDenseOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateSparseToDenseOptions(flatbuffers::FlatBufferBuilder &_fbb, const SparseToDenseOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const SparseToDenseOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + auto _validate_indices = _o->validate_indices; + return tflite::CreateSparseToDenseOptions( + _fbb, + _validate_indices); +} + +inline EqualOptionsT *EqualOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new EqualOptionsT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void EqualOptions::UnPackTo(EqualOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; +} + +inline flatbuffers::Offset EqualOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const EqualOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateEqualOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateEqualOptions(flatbuffers::FlatBufferBuilder &_fbb, const EqualOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const EqualOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + return tflite::CreateEqualOptions( + _fbb); +} + +inline NotEqualOptionsT *NotEqualOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new NotEqualOptionsT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void NotEqualOptions::UnPackTo(NotEqualOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; +} + +inline flatbuffers::Offset NotEqualOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const NotEqualOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateNotEqualOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateNotEqualOptions(flatbuffers::FlatBufferBuilder &_fbb, const NotEqualOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const NotEqualOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + return tflite::CreateNotEqualOptions( + _fbb); +} + +inline ShapeOptionsT *ShapeOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new ShapeOptionsT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void ShapeOptions::UnPackTo(ShapeOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; + { auto _e = out_type(); _o->out_type = _e; } +} + +inline flatbuffers::Offset ShapeOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const ShapeOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateShapeOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateShapeOptions(flatbuffers::FlatBufferBuilder &_fbb, const ShapeOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const ShapeOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + auto _out_type = _o->out_type; + return tflite::CreateShapeOptions( + _fbb, + _out_type); +} + +inline RankOptionsT *RankOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new RankOptionsT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void RankOptions::UnPackTo(RankOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; +} + +inline flatbuffers::Offset RankOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const RankOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateRankOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateRankOptions(flatbuffers::FlatBufferBuilder &_fbb, const RankOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const RankOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + return tflite::CreateRankOptions( + _fbb); +} + +inline PowOptionsT *PowOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new PowOptionsT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void PowOptions::UnPackTo(PowOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; +} + +inline flatbuffers::Offset PowOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const PowOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreatePowOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreatePowOptions(flatbuffers::FlatBufferBuilder &_fbb, const PowOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const PowOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + return tflite::CreatePowOptions( + _fbb); +} + +inline FakeQuantOptionsT *FakeQuantOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new FakeQuantOptionsT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void FakeQuantOptions::UnPackTo(FakeQuantOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; + { auto _e = min(); _o->min = _e; } + { auto _e = max(); _o->max = _e; } + { auto _e = num_bits(); _o->num_bits = _e; } + { auto _e = narrow_range(); _o->narrow_range = _e; } +} + +inline flatbuffers::Offset FakeQuantOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const FakeQuantOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateFakeQuantOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateFakeQuantOptions(flatbuffers::FlatBufferBuilder &_fbb, const FakeQuantOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const FakeQuantOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + auto _min = _o->min; + auto _max = _o->max; + auto _num_bits = _o->num_bits; + auto _narrow_range = _o->narrow_range; + return tflite::CreateFakeQuantOptions( + _fbb, + _min, + _max, + _num_bits, + _narrow_range); +} + +inline PackOptionsT *PackOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new PackOptionsT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void PackOptions::UnPackTo(PackOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; + { auto _e = values_count(); _o->values_count = _e; } + { auto _e = axis(); _o->axis = _e; } +} + +inline flatbuffers::Offset PackOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const PackOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreatePackOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreatePackOptions(flatbuffers::FlatBufferBuilder &_fbb, const PackOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const PackOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + auto _values_count = _o->values_count; + auto _axis = _o->axis; + return tflite::CreatePackOptions( + _fbb, + _values_count, + _axis); +} + +inline LogicalOrOptionsT *LogicalOrOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new LogicalOrOptionsT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void LogicalOrOptions::UnPackTo(LogicalOrOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; +} + +inline flatbuffers::Offset LogicalOrOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const LogicalOrOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateLogicalOrOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateLogicalOrOptions(flatbuffers::FlatBufferBuilder &_fbb, const LogicalOrOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const LogicalOrOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + return tflite::CreateLogicalOrOptions( + _fbb); +} + +inline OneHotOptionsT *OneHotOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new OneHotOptionsT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void OneHotOptions::UnPackTo(OneHotOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; + { auto _e = axis(); _o->axis = _e; } +} + +inline flatbuffers::Offset OneHotOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const OneHotOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateOneHotOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateOneHotOptions(flatbuffers::FlatBufferBuilder &_fbb, const OneHotOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const OneHotOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + auto _axis = _o->axis; + return tflite::CreateOneHotOptions( + _fbb, + _axis); +} + +inline AbsOptionsT *AbsOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new AbsOptionsT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void AbsOptions::UnPackTo(AbsOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; +} + +inline flatbuffers::Offset AbsOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const AbsOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateAbsOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateAbsOptions(flatbuffers::FlatBufferBuilder &_fbb, const AbsOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const AbsOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + return tflite::CreateAbsOptions( + _fbb); +} + +inline HardSwishOptionsT *HardSwishOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new HardSwishOptionsT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void HardSwishOptions::UnPackTo(HardSwishOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; +} + +inline flatbuffers::Offset HardSwishOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const HardSwishOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateHardSwishOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateHardSwishOptions(flatbuffers::FlatBufferBuilder &_fbb, const HardSwishOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const HardSwishOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + return tflite::CreateHardSwishOptions( + _fbb); +} + +inline LogicalAndOptionsT *LogicalAndOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new LogicalAndOptionsT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void LogicalAndOptions::UnPackTo(LogicalAndOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; +} + +inline flatbuffers::Offset LogicalAndOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const LogicalAndOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateLogicalAndOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateLogicalAndOptions(flatbuffers::FlatBufferBuilder &_fbb, const LogicalAndOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const LogicalAndOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + return tflite::CreateLogicalAndOptions( + _fbb); +} + +inline LogicalNotOptionsT *LogicalNotOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new LogicalNotOptionsT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void LogicalNotOptions::UnPackTo(LogicalNotOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; +} + +inline flatbuffers::Offset LogicalNotOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const LogicalNotOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateLogicalNotOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateLogicalNotOptions(flatbuffers::FlatBufferBuilder &_fbb, const LogicalNotOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const LogicalNotOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + return tflite::CreateLogicalNotOptions( + _fbb); +} + +inline UnpackOptionsT *UnpackOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new UnpackOptionsT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void UnpackOptions::UnPackTo(UnpackOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; + { auto _e = num(); _o->num = _e; } + { auto _e = axis(); _o->axis = _e; } +} + +inline flatbuffers::Offset UnpackOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const UnpackOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateUnpackOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateUnpackOptions(flatbuffers::FlatBufferBuilder &_fbb, const UnpackOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const UnpackOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + auto _num = _o->num; + auto _axis = _o->axis; + return tflite::CreateUnpackOptions( + _fbb, + _num, + _axis); +} + +inline FloorDivOptionsT *FloorDivOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new FloorDivOptionsT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void FloorDivOptions::UnPackTo(FloorDivOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; +} + +inline flatbuffers::Offset FloorDivOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const FloorDivOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateFloorDivOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateFloorDivOptions(flatbuffers::FlatBufferBuilder &_fbb, const FloorDivOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const FloorDivOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + return tflite::CreateFloorDivOptions( + _fbb); +} + +inline SquareOptionsT *SquareOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new SquareOptionsT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void SquareOptions::UnPackTo(SquareOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; +} + +inline flatbuffers::Offset SquareOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const SquareOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateSquareOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateSquareOptions(flatbuffers::FlatBufferBuilder &_fbb, const SquareOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const SquareOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + return tflite::CreateSquareOptions( + _fbb); +} + +inline ZerosLikeOptionsT *ZerosLikeOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new ZerosLikeOptionsT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void ZerosLikeOptions::UnPackTo(ZerosLikeOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; +} + +inline flatbuffers::Offset ZerosLikeOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const ZerosLikeOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateZerosLikeOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateZerosLikeOptions(flatbuffers::FlatBufferBuilder &_fbb, const ZerosLikeOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const ZerosLikeOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + return tflite::CreateZerosLikeOptions( + _fbb); +} + +inline FillOptionsT *FillOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new FillOptionsT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void FillOptions::UnPackTo(FillOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; +} + +inline flatbuffers::Offset FillOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const FillOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateFillOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateFillOptions(flatbuffers::FlatBufferBuilder &_fbb, const FillOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const FillOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + return tflite::CreateFillOptions( + _fbb); +} + +inline FloorModOptionsT *FloorModOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new FloorModOptionsT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void FloorModOptions::UnPackTo(FloorModOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; +} + +inline flatbuffers::Offset FloorModOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const FloorModOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateFloorModOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateFloorModOptions(flatbuffers::FlatBufferBuilder &_fbb, const FloorModOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const FloorModOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + return tflite::CreateFloorModOptions( + _fbb); +} + +inline RangeOptionsT *RangeOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new RangeOptionsT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void RangeOptions::UnPackTo(RangeOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; +} + +inline flatbuffers::Offset RangeOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const RangeOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateRangeOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateRangeOptions(flatbuffers::FlatBufferBuilder &_fbb, const RangeOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const RangeOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + return tflite::CreateRangeOptions( + _fbb); +} + +inline LeakyReluOptionsT *LeakyReluOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new LeakyReluOptionsT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void LeakyReluOptions::UnPackTo(LeakyReluOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; + { auto _e = alpha(); _o->alpha = _e; } +} + +inline flatbuffers::Offset LeakyReluOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const LeakyReluOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateLeakyReluOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateLeakyReluOptions(flatbuffers::FlatBufferBuilder &_fbb, const LeakyReluOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const LeakyReluOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + auto _alpha = _o->alpha; + return tflite::CreateLeakyReluOptions( + _fbb, + _alpha); +} + +inline SquaredDifferenceOptionsT *SquaredDifferenceOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new SquaredDifferenceOptionsT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void SquaredDifferenceOptions::UnPackTo(SquaredDifferenceOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; +} + +inline flatbuffers::Offset SquaredDifferenceOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const SquaredDifferenceOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateSquaredDifferenceOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateSquaredDifferenceOptions(flatbuffers::FlatBufferBuilder &_fbb, const SquaredDifferenceOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const SquaredDifferenceOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + return tflite::CreateSquaredDifferenceOptions( + _fbb); +} + +inline MirrorPadOptionsT *MirrorPadOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new MirrorPadOptionsT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void MirrorPadOptions::UnPackTo(MirrorPadOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; + { auto _e = mode(); _o->mode = _e; } +} + +inline flatbuffers::Offset MirrorPadOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const MirrorPadOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateMirrorPadOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateMirrorPadOptions(flatbuffers::FlatBufferBuilder &_fbb, const MirrorPadOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const MirrorPadOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + auto _mode = _o->mode; + return tflite::CreateMirrorPadOptions( + _fbb, + _mode); +} + +inline UniqueOptionsT *UniqueOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new UniqueOptionsT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void UniqueOptions::UnPackTo(UniqueOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; + { auto _e = idx_out_type(); _o->idx_out_type = _e; } +} + +inline flatbuffers::Offset UniqueOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const UniqueOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateUniqueOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateUniqueOptions(flatbuffers::FlatBufferBuilder &_fbb, const UniqueOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const UniqueOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + auto _idx_out_type = _o->idx_out_type; + return tflite::CreateUniqueOptions( + _fbb, + _idx_out_type); +} + +inline ReverseV2OptionsT *ReverseV2Options::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new ReverseV2OptionsT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void ReverseV2Options::UnPackTo(ReverseV2OptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; +} + +inline flatbuffers::Offset ReverseV2Options::Pack(flatbuffers::FlatBufferBuilder &_fbb, const ReverseV2OptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateReverseV2Options(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateReverseV2Options(flatbuffers::FlatBufferBuilder &_fbb, const ReverseV2OptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const ReverseV2OptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + return tflite::CreateReverseV2Options( + _fbb); +} + +inline AddNOptionsT *AddNOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new AddNOptionsT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void AddNOptions::UnPackTo(AddNOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; +} + +inline flatbuffers::Offset AddNOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const AddNOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateAddNOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateAddNOptions(flatbuffers::FlatBufferBuilder &_fbb, const AddNOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const AddNOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + return tflite::CreateAddNOptions( + _fbb); +} + +inline GatherNdOptionsT *GatherNdOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new GatherNdOptionsT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void GatherNdOptions::UnPackTo(GatherNdOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; +} + +inline flatbuffers::Offset GatherNdOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const GatherNdOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateGatherNdOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateGatherNdOptions(flatbuffers::FlatBufferBuilder &_fbb, const GatherNdOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const GatherNdOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + return tflite::CreateGatherNdOptions( + _fbb); +} + +inline WhereOptionsT *WhereOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new WhereOptionsT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void WhereOptions::UnPackTo(WhereOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; +} + +inline flatbuffers::Offset WhereOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const WhereOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateWhereOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateWhereOptions(flatbuffers::FlatBufferBuilder &_fbb, const WhereOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const WhereOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + return tflite::CreateWhereOptions( + _fbb); +} + +inline ReverseSequenceOptionsT *ReverseSequenceOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new ReverseSequenceOptionsT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void ReverseSequenceOptions::UnPackTo(ReverseSequenceOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; + { auto _e = seq_dim(); _o->seq_dim = _e; } + { auto _e = batch_dim(); _o->batch_dim = _e; } +} + +inline flatbuffers::Offset ReverseSequenceOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const ReverseSequenceOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateReverseSequenceOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateReverseSequenceOptions(flatbuffers::FlatBufferBuilder &_fbb, const ReverseSequenceOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const ReverseSequenceOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + auto _seq_dim = _o->seq_dim; + auto _batch_dim = _o->batch_dim; + return tflite::CreateReverseSequenceOptions( + _fbb, + _seq_dim, + _batch_dim); +} + +inline MatrixDiagOptionsT *MatrixDiagOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new MatrixDiagOptionsT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void MatrixDiagOptions::UnPackTo(MatrixDiagOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; +} + +inline flatbuffers::Offset MatrixDiagOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const MatrixDiagOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateMatrixDiagOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateMatrixDiagOptions(flatbuffers::FlatBufferBuilder &_fbb, const MatrixDiagOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const MatrixDiagOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + return tflite::CreateMatrixDiagOptions( + _fbb); +} + +inline QuantizeOptionsT *QuantizeOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new QuantizeOptionsT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void QuantizeOptions::UnPackTo(QuantizeOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; +} + +inline flatbuffers::Offset QuantizeOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const QuantizeOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateQuantizeOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateQuantizeOptions(flatbuffers::FlatBufferBuilder &_fbb, const QuantizeOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const QuantizeOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + return tflite::CreateQuantizeOptions( + _fbb); +} + +inline MatrixSetDiagOptionsT *MatrixSetDiagOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new MatrixSetDiagOptionsT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void MatrixSetDiagOptions::UnPackTo(MatrixSetDiagOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; +} + +inline flatbuffers::Offset MatrixSetDiagOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const MatrixSetDiagOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateMatrixSetDiagOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateMatrixSetDiagOptions(flatbuffers::FlatBufferBuilder &_fbb, const MatrixSetDiagOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const MatrixSetDiagOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + return tflite::CreateMatrixSetDiagOptions( + _fbb); +} + +inline IfOptionsT *IfOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new IfOptionsT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void IfOptions::UnPackTo(IfOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; + { auto _e = then_subgraph_index(); _o->then_subgraph_index = _e; } + { auto _e = else_subgraph_index(); _o->else_subgraph_index = _e; } +} + +inline flatbuffers::Offset IfOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const IfOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateIfOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateIfOptions(flatbuffers::FlatBufferBuilder &_fbb, const IfOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const IfOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + auto _then_subgraph_index = _o->then_subgraph_index; + auto _else_subgraph_index = _o->else_subgraph_index; + return tflite::CreateIfOptions( + _fbb, + _then_subgraph_index, + _else_subgraph_index); +} + +inline CallOnceOptionsT *CallOnceOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new CallOnceOptionsT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void CallOnceOptions::UnPackTo(CallOnceOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; + { auto _e = init_subgraph_index(); _o->init_subgraph_index = _e; } +} + +inline flatbuffers::Offset CallOnceOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const CallOnceOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateCallOnceOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateCallOnceOptions(flatbuffers::FlatBufferBuilder &_fbb, const CallOnceOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const CallOnceOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + auto _init_subgraph_index = _o->init_subgraph_index; + return tflite::CreateCallOnceOptions( + _fbb, + _init_subgraph_index); +} + +inline WhileOptionsT *WhileOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new WhileOptionsT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void WhileOptions::UnPackTo(WhileOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; + { auto _e = cond_subgraph_index(); _o->cond_subgraph_index = _e; } + { auto _e = body_subgraph_index(); _o->body_subgraph_index = _e; } +} + +inline flatbuffers::Offset WhileOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const WhileOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateWhileOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateWhileOptions(flatbuffers::FlatBufferBuilder &_fbb, const WhileOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const WhileOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + auto _cond_subgraph_index = _o->cond_subgraph_index; + auto _body_subgraph_index = _o->body_subgraph_index; + return tflite::CreateWhileOptions( + _fbb, + _cond_subgraph_index, + _body_subgraph_index); +} + +inline NonMaxSuppressionV4OptionsT *NonMaxSuppressionV4Options::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new NonMaxSuppressionV4OptionsT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void NonMaxSuppressionV4Options::UnPackTo(NonMaxSuppressionV4OptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; +} + +inline flatbuffers::Offset NonMaxSuppressionV4Options::Pack(flatbuffers::FlatBufferBuilder &_fbb, const NonMaxSuppressionV4OptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateNonMaxSuppressionV4Options(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateNonMaxSuppressionV4Options(flatbuffers::FlatBufferBuilder &_fbb, const NonMaxSuppressionV4OptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const NonMaxSuppressionV4OptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + return tflite::CreateNonMaxSuppressionV4Options( + _fbb); +} + +inline NonMaxSuppressionV5OptionsT *NonMaxSuppressionV5Options::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new NonMaxSuppressionV5OptionsT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void NonMaxSuppressionV5Options::UnPackTo(NonMaxSuppressionV5OptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; +} + +inline flatbuffers::Offset NonMaxSuppressionV5Options::Pack(flatbuffers::FlatBufferBuilder &_fbb, const NonMaxSuppressionV5OptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateNonMaxSuppressionV5Options(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateNonMaxSuppressionV5Options(flatbuffers::FlatBufferBuilder &_fbb, const NonMaxSuppressionV5OptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const NonMaxSuppressionV5OptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + return tflite::CreateNonMaxSuppressionV5Options( + _fbb); +} + +inline ScatterNdOptionsT *ScatterNdOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new ScatterNdOptionsT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void ScatterNdOptions::UnPackTo(ScatterNdOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; +} + +inline flatbuffers::Offset ScatterNdOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const ScatterNdOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateScatterNdOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateScatterNdOptions(flatbuffers::FlatBufferBuilder &_fbb, const ScatterNdOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const ScatterNdOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + return tflite::CreateScatterNdOptions( + _fbb); +} + +inline SelectV2OptionsT *SelectV2Options::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new SelectV2OptionsT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void SelectV2Options::UnPackTo(SelectV2OptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; +} + +inline flatbuffers::Offset SelectV2Options::Pack(flatbuffers::FlatBufferBuilder &_fbb, const SelectV2OptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateSelectV2Options(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateSelectV2Options(flatbuffers::FlatBufferBuilder &_fbb, const SelectV2OptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const SelectV2OptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + return tflite::CreateSelectV2Options( + _fbb); +} + +inline DensifyOptionsT *DensifyOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new DensifyOptionsT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void DensifyOptions::UnPackTo(DensifyOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; +} + +inline flatbuffers::Offset DensifyOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const DensifyOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateDensifyOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateDensifyOptions(flatbuffers::FlatBufferBuilder &_fbb, const DensifyOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const DensifyOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + return tflite::CreateDensifyOptions( + _fbb); +} + +inline SegmentSumOptionsT *SegmentSumOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new SegmentSumOptionsT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void SegmentSumOptions::UnPackTo(SegmentSumOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; +} + +inline flatbuffers::Offset SegmentSumOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const SegmentSumOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateSegmentSumOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateSegmentSumOptions(flatbuffers::FlatBufferBuilder &_fbb, const SegmentSumOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const SegmentSumOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + return tflite::CreateSegmentSumOptions( + _fbb); +} + +inline BatchMatMulOptionsT *BatchMatMulOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new BatchMatMulOptionsT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void BatchMatMulOptions::UnPackTo(BatchMatMulOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; + { auto _e = adj_x(); _o->adj_x = _e; } + { auto _e = adj_y(); _o->adj_y = _e; } + { auto _e = asymmetric_quantize_inputs(); _o->asymmetric_quantize_inputs = _e; } +} + +inline flatbuffers::Offset BatchMatMulOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const BatchMatMulOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateBatchMatMulOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateBatchMatMulOptions(flatbuffers::FlatBufferBuilder &_fbb, const BatchMatMulOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const BatchMatMulOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + auto _adj_x = _o->adj_x; + auto _adj_y = _o->adj_y; + auto _asymmetric_quantize_inputs = _o->asymmetric_quantize_inputs; + return tflite::CreateBatchMatMulOptions( + _fbb, + _adj_x, + _adj_y, + _asymmetric_quantize_inputs); +} + +inline CumsumOptionsT *CumsumOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new CumsumOptionsT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void CumsumOptions::UnPackTo(CumsumOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; + { auto _e = exclusive(); _o->exclusive = _e; } + { auto _e = reverse(); _o->reverse = _e; } +} + +inline flatbuffers::Offset CumsumOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const CumsumOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateCumsumOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateCumsumOptions(flatbuffers::FlatBufferBuilder &_fbb, const CumsumOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const CumsumOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + auto _exclusive = _o->exclusive; + auto _reverse = _o->reverse; + return tflite::CreateCumsumOptions( + _fbb, + _exclusive, + _reverse); +} + +inline BroadcastToOptionsT *BroadcastToOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new BroadcastToOptionsT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void BroadcastToOptions::UnPackTo(BroadcastToOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; +} + +inline flatbuffers::Offset BroadcastToOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const BroadcastToOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateBroadcastToOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateBroadcastToOptions(flatbuffers::FlatBufferBuilder &_fbb, const BroadcastToOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const BroadcastToOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + return tflite::CreateBroadcastToOptions( + _fbb); +} + +inline Rfft2dOptionsT *Rfft2dOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new Rfft2dOptionsT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void Rfft2dOptions::UnPackTo(Rfft2dOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; +} + +inline flatbuffers::Offset Rfft2dOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const Rfft2dOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateRfft2dOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateRfft2dOptions(flatbuffers::FlatBufferBuilder &_fbb, const Rfft2dOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const Rfft2dOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + return tflite::CreateRfft2dOptions( + _fbb); +} + +inline HashtableOptionsT *HashtableOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new HashtableOptionsT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void HashtableOptions::UnPackTo(HashtableOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; + { auto _e = table_id(); _o->table_id = _e; } + { auto _e = key_dtype(); _o->key_dtype = _e; } + { auto _e = value_dtype(); _o->value_dtype = _e; } +} + +inline flatbuffers::Offset HashtableOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const HashtableOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateHashtableOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateHashtableOptions(flatbuffers::FlatBufferBuilder &_fbb, const HashtableOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const HashtableOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + auto _table_id = _o->table_id; + auto _key_dtype = _o->key_dtype; + auto _value_dtype = _o->value_dtype; + return tflite::CreateHashtableOptions( + _fbb, + _table_id, + _key_dtype, + _value_dtype); +} + +inline HashtableFindOptionsT *HashtableFindOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new HashtableFindOptionsT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void HashtableFindOptions::UnPackTo(HashtableFindOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; +} + +inline flatbuffers::Offset HashtableFindOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const HashtableFindOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateHashtableFindOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateHashtableFindOptions(flatbuffers::FlatBufferBuilder &_fbb, const HashtableFindOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const HashtableFindOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + return tflite::CreateHashtableFindOptions( + _fbb); +} + +inline HashtableImportOptionsT *HashtableImportOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new HashtableImportOptionsT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void HashtableImportOptions::UnPackTo(HashtableImportOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; +} + +inline flatbuffers::Offset HashtableImportOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const HashtableImportOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateHashtableImportOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateHashtableImportOptions(flatbuffers::FlatBufferBuilder &_fbb, const HashtableImportOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const HashtableImportOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + return tflite::CreateHashtableImportOptions( + _fbb); +} + +inline HashtableSizeOptionsT *HashtableSizeOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new HashtableSizeOptionsT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void HashtableSizeOptions::UnPackTo(HashtableSizeOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; +} + +inline flatbuffers::Offset HashtableSizeOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const HashtableSizeOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateHashtableSizeOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateHashtableSizeOptions(flatbuffers::FlatBufferBuilder &_fbb, const HashtableSizeOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const HashtableSizeOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + return tflite::CreateHashtableSizeOptions( + _fbb); +} + +inline VarHandleOptionsT *VarHandleOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new VarHandleOptionsT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void VarHandleOptions::UnPackTo(VarHandleOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; + { auto _e = container(); if (_e) _o->container = _e->str(); } + { auto _e = shared_name(); if (_e) _o->shared_name = _e->str(); } +} + +inline flatbuffers::Offset VarHandleOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const VarHandleOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateVarHandleOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateVarHandleOptions(flatbuffers::FlatBufferBuilder &_fbb, const VarHandleOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const VarHandleOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + auto _container = _o->container.empty() ? 0 : _fbb.CreateString(_o->container); + auto _shared_name = _o->shared_name.empty() ? 0 : _fbb.CreateString(_o->shared_name); + return tflite::CreateVarHandleOptions( + _fbb, + _container, + _shared_name); +} + +inline ReadVariableOptionsT *ReadVariableOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new ReadVariableOptionsT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void ReadVariableOptions::UnPackTo(ReadVariableOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; +} + +inline flatbuffers::Offset ReadVariableOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const ReadVariableOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateReadVariableOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateReadVariableOptions(flatbuffers::FlatBufferBuilder &_fbb, const ReadVariableOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const ReadVariableOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + return tflite::CreateReadVariableOptions( + _fbb); +} + +inline AssignVariableOptionsT *AssignVariableOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new AssignVariableOptionsT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void AssignVariableOptions::UnPackTo(AssignVariableOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; +} + +inline flatbuffers::Offset AssignVariableOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const AssignVariableOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateAssignVariableOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateAssignVariableOptions(flatbuffers::FlatBufferBuilder &_fbb, const AssignVariableOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const AssignVariableOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + return tflite::CreateAssignVariableOptions( + _fbb); +} + +inline RandomOptionsT *RandomOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new RandomOptionsT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void RandomOptions::UnPackTo(RandomOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; + { auto _e = seed(); _o->seed = _e; } + { auto _e = seed2(); _o->seed2 = _e; } +} + +inline flatbuffers::Offset RandomOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const RandomOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateRandomOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateRandomOptions(flatbuffers::FlatBufferBuilder &_fbb, const RandomOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const RandomOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + auto _seed = _o->seed; + auto _seed2 = _o->seed2; + return tflite::CreateRandomOptions( + _fbb, + _seed, + _seed2); +} + +inline BucketizeOptionsT *BucketizeOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new BucketizeOptionsT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void BucketizeOptions::UnPackTo(BucketizeOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; + { auto _e = boundaries(); if (_e) { _o->boundaries.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->boundaries[_i] = _e->Get(_i); } } } +} + +inline flatbuffers::Offset BucketizeOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const BucketizeOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateBucketizeOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateBucketizeOptions(flatbuffers::FlatBufferBuilder &_fbb, const BucketizeOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const BucketizeOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + auto _boundaries = _o->boundaries.size() ? _fbb.CreateVector(_o->boundaries) : 0; + return tflite::CreateBucketizeOptions( + _fbb, + _boundaries); +} + +inline GeluOptionsT *GeluOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new GeluOptionsT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void GeluOptions::UnPackTo(GeluOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; + { auto _e = approximate(); _o->approximate = _e; } +} + +inline flatbuffers::Offset GeluOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const GeluOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateGeluOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateGeluOptions(flatbuffers::FlatBufferBuilder &_fbb, const GeluOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const GeluOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + auto _approximate = _o->approximate; + return tflite::CreateGeluOptions( + _fbb, + _approximate); +} + +inline DynamicUpdateSliceOptionsT *DynamicUpdateSliceOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new DynamicUpdateSliceOptionsT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void DynamicUpdateSliceOptions::UnPackTo(DynamicUpdateSliceOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; +} + +inline flatbuffers::Offset DynamicUpdateSliceOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const DynamicUpdateSliceOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateDynamicUpdateSliceOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateDynamicUpdateSliceOptions(flatbuffers::FlatBufferBuilder &_fbb, const DynamicUpdateSliceOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const DynamicUpdateSliceOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + return tflite::CreateDynamicUpdateSliceOptions( + _fbb); +} + +inline UnsortedSegmentProdOptionsT *UnsortedSegmentProdOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new UnsortedSegmentProdOptionsT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void UnsortedSegmentProdOptions::UnPackTo(UnsortedSegmentProdOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; +} + +inline flatbuffers::Offset UnsortedSegmentProdOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const UnsortedSegmentProdOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateUnsortedSegmentProdOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateUnsortedSegmentProdOptions(flatbuffers::FlatBufferBuilder &_fbb, const UnsortedSegmentProdOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const UnsortedSegmentProdOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + return tflite::CreateUnsortedSegmentProdOptions( + _fbb); +} + +inline UnsortedSegmentMaxOptionsT *UnsortedSegmentMaxOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new UnsortedSegmentMaxOptionsT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void UnsortedSegmentMaxOptions::UnPackTo(UnsortedSegmentMaxOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; +} + +inline flatbuffers::Offset UnsortedSegmentMaxOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const UnsortedSegmentMaxOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateUnsortedSegmentMaxOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateUnsortedSegmentMaxOptions(flatbuffers::FlatBufferBuilder &_fbb, const UnsortedSegmentMaxOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const UnsortedSegmentMaxOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + return tflite::CreateUnsortedSegmentMaxOptions( + _fbb); +} + +inline UnsortedSegmentSumOptionsT *UnsortedSegmentSumOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new UnsortedSegmentSumOptionsT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void UnsortedSegmentSumOptions::UnPackTo(UnsortedSegmentSumOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; +} + +inline flatbuffers::Offset UnsortedSegmentSumOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const UnsortedSegmentSumOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateUnsortedSegmentSumOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateUnsortedSegmentSumOptions(flatbuffers::FlatBufferBuilder &_fbb, const UnsortedSegmentSumOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const UnsortedSegmentSumOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + return tflite::CreateUnsortedSegmentSumOptions( + _fbb); +} + +inline ATan2OptionsT *ATan2Options::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new ATan2OptionsT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void ATan2Options::UnPackTo(ATan2OptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; +} + +inline flatbuffers::Offset ATan2Options::Pack(flatbuffers::FlatBufferBuilder &_fbb, const ATan2OptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateATan2Options(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateATan2Options(flatbuffers::FlatBufferBuilder &_fbb, const ATan2OptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const ATan2OptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + return tflite::CreateATan2Options( + _fbb); +} + +inline UnsortedSegmentMinOptionsT *UnsortedSegmentMinOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new UnsortedSegmentMinOptionsT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void UnsortedSegmentMinOptions::UnPackTo(UnsortedSegmentMinOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; +} + +inline flatbuffers::Offset UnsortedSegmentMinOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const UnsortedSegmentMinOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateUnsortedSegmentMinOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateUnsortedSegmentMinOptions(flatbuffers::FlatBufferBuilder &_fbb, const UnsortedSegmentMinOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const UnsortedSegmentMinOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + return tflite::CreateUnsortedSegmentMinOptions( + _fbb); +} + +inline SignOptionsT *SignOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new SignOptionsT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void SignOptions::UnPackTo(SignOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; +} + +inline flatbuffers::Offset SignOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const SignOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateSignOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateSignOptions(flatbuffers::FlatBufferBuilder &_fbb, const SignOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const SignOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + return tflite::CreateSignOptions( + _fbb); +} + +inline OperatorCodeT *OperatorCode::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new OperatorCodeT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void OperatorCode::UnPackTo(OperatorCodeT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; + { auto _e = deprecated_builtin_code(); _o->deprecated_builtin_code = _e; } + { auto _e = custom_code(); if (_e) _o->custom_code = _e->str(); } + { auto _e = version(); _o->version = _e; } + { auto _e = builtin_code(); _o->builtin_code = _e; } +} + +inline flatbuffers::Offset OperatorCode::Pack(flatbuffers::FlatBufferBuilder &_fbb, const OperatorCodeT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateOperatorCode(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateOperatorCode(flatbuffers::FlatBufferBuilder &_fbb, const OperatorCodeT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const OperatorCodeT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + auto _deprecated_builtin_code = _o->deprecated_builtin_code; + auto _custom_code = _o->custom_code.empty() ? 0 : _fbb.CreateString(_o->custom_code); + auto _version = _o->version; + auto _builtin_code = _o->builtin_code; + return tflite::CreateOperatorCode( + _fbb, + _deprecated_builtin_code, + _custom_code, + _version, + _builtin_code); +} + +inline OperatorT *Operator::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new OperatorT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void Operator::UnPackTo(OperatorT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; + { auto _e = opcode_index(); _o->opcode_index = _e; } + { auto _e = inputs(); if (_e) { _o->inputs.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->inputs[_i] = _e->Get(_i); } } } + { auto _e = outputs(); if (_e) { _o->outputs.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->outputs[_i] = _e->Get(_i); } } } + { auto _e = builtin_options_type(); _o->builtin_options.type = _e; } + { auto _e = builtin_options(); if (_e) _o->builtin_options.value = tflite::BuiltinOptionsUnion::UnPack(_e, builtin_options_type(), _resolver); } + { auto _e = custom_options(); if (_e) { _o->custom_options.resize(_e->size()); std::copy(_e->begin(), _e->end(), _o->custom_options.begin()); } } + { auto _e = custom_options_format(); _o->custom_options_format = _e; } + { auto _e = mutating_variable_inputs(); if (_e) { _o->mutating_variable_inputs.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->mutating_variable_inputs[_i] = _e->Get(_i) != 0; } } } + { auto _e = intermediates(); if (_e) { _o->intermediates.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->intermediates[_i] = _e->Get(_i); } } } +} + +inline flatbuffers::Offset Operator::Pack(flatbuffers::FlatBufferBuilder &_fbb, const OperatorT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateOperator(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateOperator(flatbuffers::FlatBufferBuilder &_fbb, const OperatorT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const OperatorT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + auto _opcode_index = _o->opcode_index; + auto _inputs = _o->inputs.size() ? _fbb.CreateVector(_o->inputs) : 0; + auto _outputs = _o->outputs.size() ? _fbb.CreateVector(_o->outputs) : 0; + auto _builtin_options_type = _o->builtin_options.type; + auto _builtin_options = _o->builtin_options.Pack(_fbb); + auto _custom_options = _o->custom_options.size() ? _fbb.CreateVector(_o->custom_options) : 0; + auto _custom_options_format = _o->custom_options_format; + auto _mutating_variable_inputs = _o->mutating_variable_inputs.size() ? _fbb.CreateVector(_o->mutating_variable_inputs) : 0; + auto _intermediates = _o->intermediates.size() ? _fbb.CreateVector(_o->intermediates) : 0; + return tflite::CreateOperator( + _fbb, + _opcode_index, + _inputs, + _outputs, + _builtin_options_type, + _builtin_options, + _custom_options, + _custom_options_format, + _mutating_variable_inputs, + _intermediates); +} + +inline SubGraphT::SubGraphT(const SubGraphT &o) + : inputs(o.inputs), + outputs(o.outputs), + name(o.name) { + tensors.reserve(o.tensors.size()); + for (const auto &tensors_ : o.tensors) { tensors.emplace_back((tensors_) ? new tflite::TensorT(*tensors_) : nullptr); } + operators.reserve(o.operators.size()); + for (const auto &operators_ : o.operators) { operators.emplace_back((operators_) ? new tflite::OperatorT(*operators_) : nullptr); } +} + +inline SubGraphT &SubGraphT::operator=(SubGraphT o) FLATBUFFERS_NOEXCEPT { + std::swap(tensors, o.tensors); + std::swap(inputs, o.inputs); + std::swap(outputs, o.outputs); + std::swap(operators, o.operators); + std::swap(name, o.name); + return *this; +} + +inline SubGraphT *SubGraph::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new SubGraphT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void SubGraph::UnPackTo(SubGraphT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; + { auto _e = tensors(); if (_e) { _o->tensors.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { if(_o->tensors[_i]) { _e->Get(_i)->UnPackTo(_o->tensors[_i].get(), _resolver); } else { _o->tensors[_i] = std::unique_ptr(_e->Get(_i)->UnPack(_resolver)); }; } } } + { auto _e = inputs(); if (_e) { _o->inputs.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->inputs[_i] = _e->Get(_i); } } } + { auto _e = outputs(); if (_e) { _o->outputs.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->outputs[_i] = _e->Get(_i); } } } + { auto _e = operators(); if (_e) { _o->operators.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { if(_o->operators[_i]) { _e->Get(_i)->UnPackTo(_o->operators[_i].get(), _resolver); } else { _o->operators[_i] = std::unique_ptr(_e->Get(_i)->UnPack(_resolver)); }; } } } + { auto _e = name(); if (_e) _o->name = _e->str(); } +} + +inline flatbuffers::Offset SubGraph::Pack(flatbuffers::FlatBufferBuilder &_fbb, const SubGraphT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateSubGraph(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateSubGraph(flatbuffers::FlatBufferBuilder &_fbb, const SubGraphT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const SubGraphT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + auto _tensors = _o->tensors.size() ? _fbb.CreateVector> (_o->tensors.size(), [](size_t i, _VectorArgs *__va) { return CreateTensor(*__va->__fbb, __va->__o->tensors[i].get(), __va->__rehasher); }, &_va ) : 0; + auto _inputs = _o->inputs.size() ? _fbb.CreateVector(_o->inputs) : 0; + auto _outputs = _o->outputs.size() ? _fbb.CreateVector(_o->outputs) : 0; + auto _operators = _o->operators.size() ? _fbb.CreateVector> (_o->operators.size(), [](size_t i, _VectorArgs *__va) { return CreateOperator(*__va->__fbb, __va->__o->operators[i].get(), __va->__rehasher); }, &_va ) : 0; + auto _name = _o->name.empty() ? 0 : _fbb.CreateString(_o->name); + return tflite::CreateSubGraph( + _fbb, + _tensors, + _inputs, + _outputs, + _operators, + _name); +} + +inline BufferT *Buffer::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new BufferT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void Buffer::UnPackTo(BufferT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; + { auto _e = data(); if (_e) { _o->data.resize(_e->size()); std::copy(_e->begin(), _e->end(), _o->data.begin()); } } +} + +inline flatbuffers::Offset Buffer::Pack(flatbuffers::FlatBufferBuilder &_fbb, const BufferT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateBuffer(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateBuffer(flatbuffers::FlatBufferBuilder &_fbb, const BufferT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const BufferT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + _fbb.ForceVectorAlignment(_o->data.size(), sizeof(uint8_t), 16); + auto _data = _o->data.size() ? _fbb.CreateVector(_o->data) : 0; + return tflite::CreateBuffer( + _fbb, + _data); +} + +inline MetadataT *Metadata::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new MetadataT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void Metadata::UnPackTo(MetadataT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; + { auto _e = name(); if (_e) _o->name = _e->str(); } + { auto _e = buffer(); _o->buffer = _e; } +} + +inline flatbuffers::Offset Metadata::Pack(flatbuffers::FlatBufferBuilder &_fbb, const MetadataT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateMetadata(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateMetadata(flatbuffers::FlatBufferBuilder &_fbb, const MetadataT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const MetadataT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + auto _name = _o->name.empty() ? 0 : _fbb.CreateString(_o->name); + auto _buffer = _o->buffer; + return tflite::CreateMetadata( + _fbb, + _name, + _buffer); +} + +inline TensorMapT *TensorMap::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new TensorMapT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void TensorMap::UnPackTo(TensorMapT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; + { auto _e = name(); if (_e) _o->name = _e->str(); } + { auto _e = tensor_index(); _o->tensor_index = _e; } +} + +inline flatbuffers::Offset TensorMap::Pack(flatbuffers::FlatBufferBuilder &_fbb, const TensorMapT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateTensorMap(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateTensorMap(flatbuffers::FlatBufferBuilder &_fbb, const TensorMapT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const TensorMapT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + auto _name = _o->name.empty() ? 0 : _fbb.CreateString(_o->name); + auto _tensor_index = _o->tensor_index; + return tflite::CreateTensorMap( + _fbb, + _name, + _tensor_index); +} + +inline SignatureDefT::SignatureDefT(const SignatureDefT &o) + : signature_key(o.signature_key), + subgraph_index(o.subgraph_index) { + inputs.reserve(o.inputs.size()); + for (const auto &inputs_ : o.inputs) { inputs.emplace_back((inputs_) ? new tflite::TensorMapT(*inputs_) : nullptr); } + outputs.reserve(o.outputs.size()); + for (const auto &outputs_ : o.outputs) { outputs.emplace_back((outputs_) ? new tflite::TensorMapT(*outputs_) : nullptr); } +} + +inline SignatureDefT &SignatureDefT::operator=(SignatureDefT o) FLATBUFFERS_NOEXCEPT { + std::swap(inputs, o.inputs); + std::swap(outputs, o.outputs); + std::swap(signature_key, o.signature_key); + std::swap(subgraph_index, o.subgraph_index); + return *this; +} + +inline SignatureDefT *SignatureDef::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new SignatureDefT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void SignatureDef::UnPackTo(SignatureDefT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; + { auto _e = inputs(); if (_e) { _o->inputs.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { if(_o->inputs[_i]) { _e->Get(_i)->UnPackTo(_o->inputs[_i].get(), _resolver); } else { _o->inputs[_i] = std::unique_ptr(_e->Get(_i)->UnPack(_resolver)); }; } } } + { auto _e = outputs(); if (_e) { _o->outputs.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { if(_o->outputs[_i]) { _e->Get(_i)->UnPackTo(_o->outputs[_i].get(), _resolver); } else { _o->outputs[_i] = std::unique_ptr(_e->Get(_i)->UnPack(_resolver)); }; } } } + { auto _e = signature_key(); if (_e) _o->signature_key = _e->str(); } + { auto _e = subgraph_index(); _o->subgraph_index = _e; } +} + +inline flatbuffers::Offset SignatureDef::Pack(flatbuffers::FlatBufferBuilder &_fbb, const SignatureDefT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateSignatureDef(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateSignatureDef(flatbuffers::FlatBufferBuilder &_fbb, const SignatureDefT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const SignatureDefT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + auto _inputs = _o->inputs.size() ? _fbb.CreateVector> (_o->inputs.size(), [](size_t i, _VectorArgs *__va) { return CreateTensorMap(*__va->__fbb, __va->__o->inputs[i].get(), __va->__rehasher); }, &_va ) : 0; + auto _outputs = _o->outputs.size() ? _fbb.CreateVector> (_o->outputs.size(), [](size_t i, _VectorArgs *__va) { return CreateTensorMap(*__va->__fbb, __va->__o->outputs[i].get(), __va->__rehasher); }, &_va ) : 0; + auto _signature_key = _o->signature_key.empty() ? 0 : _fbb.CreateString(_o->signature_key); + auto _subgraph_index = _o->subgraph_index; + return tflite::CreateSignatureDef( + _fbb, + _inputs, + _outputs, + _signature_key, + _subgraph_index); +} + +inline ModelT::ModelT(const ModelT &o) + : version(o.version), + description(o.description), + metadata_buffer(o.metadata_buffer) { + operator_codes.reserve(o.operator_codes.size()); + for (const auto &operator_codes_ : o.operator_codes) { operator_codes.emplace_back((operator_codes_) ? new tflite::OperatorCodeT(*operator_codes_) : nullptr); } + subgraphs.reserve(o.subgraphs.size()); + for (const auto &subgraphs_ : o.subgraphs) { subgraphs.emplace_back((subgraphs_) ? new tflite::SubGraphT(*subgraphs_) : nullptr); } + buffers.reserve(o.buffers.size()); + for (const auto &buffers_ : o.buffers) { buffers.emplace_back((buffers_) ? new tflite::BufferT(*buffers_) : nullptr); } + metadata.reserve(o.metadata.size()); + for (const auto &metadata_ : o.metadata) { metadata.emplace_back((metadata_) ? new tflite::MetadataT(*metadata_) : nullptr); } + signature_defs.reserve(o.signature_defs.size()); + for (const auto &signature_defs_ : o.signature_defs) { signature_defs.emplace_back((signature_defs_) ? new tflite::SignatureDefT(*signature_defs_) : nullptr); } +} + +inline ModelT &ModelT::operator=(ModelT o) FLATBUFFERS_NOEXCEPT { + std::swap(version, o.version); + std::swap(operator_codes, o.operator_codes); + std::swap(subgraphs, o.subgraphs); + std::swap(description, o.description); + std::swap(buffers, o.buffers); + std::swap(metadata_buffer, o.metadata_buffer); + std::swap(metadata, o.metadata); + std::swap(signature_defs, o.signature_defs); + return *this; +} + +inline ModelT *Model::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = std::unique_ptr(new ModelT()); + UnPackTo(_o.get(), _resolver); + return _o.release(); +} + +inline void Model::UnPackTo(ModelT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; + { auto _e = version(); _o->version = _e; } + { auto _e = operator_codes(); if (_e) { _o->operator_codes.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { if(_o->operator_codes[_i]) { _e->Get(_i)->UnPackTo(_o->operator_codes[_i].get(), _resolver); } else { _o->operator_codes[_i] = std::unique_ptr(_e->Get(_i)->UnPack(_resolver)); }; } } } + { auto _e = subgraphs(); if (_e) { _o->subgraphs.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { if(_o->subgraphs[_i]) { _e->Get(_i)->UnPackTo(_o->subgraphs[_i].get(), _resolver); } else { _o->subgraphs[_i] = std::unique_ptr(_e->Get(_i)->UnPack(_resolver)); }; } } } + { auto _e = description(); if (_e) _o->description = _e->str(); } + { auto _e = buffers(); if (_e) { _o->buffers.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { if(_o->buffers[_i]) { _e->Get(_i)->UnPackTo(_o->buffers[_i].get(), _resolver); } else { _o->buffers[_i] = std::unique_ptr(_e->Get(_i)->UnPack(_resolver)); }; } } } + { auto _e = metadata_buffer(); if (_e) { _o->metadata_buffer.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->metadata_buffer[_i] = _e->Get(_i); } } } + { auto _e = metadata(); if (_e) { _o->metadata.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { if(_o->metadata[_i]) { _e->Get(_i)->UnPackTo(_o->metadata[_i].get(), _resolver); } else { _o->metadata[_i] = std::unique_ptr(_e->Get(_i)->UnPack(_resolver)); }; } } } + { auto _e = signature_defs(); if (_e) { _o->signature_defs.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { if(_o->signature_defs[_i]) { _e->Get(_i)->UnPackTo(_o->signature_defs[_i].get(), _resolver); } else { _o->signature_defs[_i] = std::unique_ptr(_e->Get(_i)->UnPack(_resolver)); }; } } } +} + +inline flatbuffers::Offset Model::Pack(flatbuffers::FlatBufferBuilder &_fbb, const ModelT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateModel(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateModel(flatbuffers::FlatBufferBuilder &_fbb, const ModelT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const ModelT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + auto _version = _o->version; + auto _operator_codes = _o->operator_codes.size() ? _fbb.CreateVector> (_o->operator_codes.size(), [](size_t i, _VectorArgs *__va) { return CreateOperatorCode(*__va->__fbb, __va->__o->operator_codes[i].get(), __va->__rehasher); }, &_va ) : 0; + auto _subgraphs = _o->subgraphs.size() ? _fbb.CreateVector> (_o->subgraphs.size(), [](size_t i, _VectorArgs *__va) { return CreateSubGraph(*__va->__fbb, __va->__o->subgraphs[i].get(), __va->__rehasher); }, &_va ) : 0; + auto _description = _o->description.empty() ? 0 : _fbb.CreateString(_o->description); + auto _buffers = _o->buffers.size() ? _fbb.CreateVector> (_o->buffers.size(), [](size_t i, _VectorArgs *__va) { return CreateBuffer(*__va->__fbb, __va->__o->buffers[i].get(), __va->__rehasher); }, &_va ) : 0; + auto _metadata_buffer = _o->metadata_buffer.size() ? _fbb.CreateVector(_o->metadata_buffer) : 0; + auto _metadata = _o->metadata.size() ? _fbb.CreateVector> (_o->metadata.size(), [](size_t i, _VectorArgs *__va) { return CreateMetadata(*__va->__fbb, __va->__o->metadata[i].get(), __va->__rehasher); }, &_va ) : 0; + auto _signature_defs = _o->signature_defs.size() ? _fbb.CreateVector> (_o->signature_defs.size(), [](size_t i, _VectorArgs *__va) { return CreateSignatureDef(*__va->__fbb, __va->__o->signature_defs[i].get(), __va->__rehasher); }, &_va ) : 0; + return tflite::CreateModel( + _fbb, + _version, + _operator_codes, + _subgraphs, + _description, + _buffers, + _metadata_buffer, + _metadata, + _signature_defs); +} + +inline bool VerifyQuantizationDetails(flatbuffers::Verifier &verifier, const void *obj, QuantizationDetails type) { + switch (type) { + case QuantizationDetails_NONE: { + return true; + } + case QuantizationDetails_CustomQuantization: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + default: return true; + } +} + +inline bool VerifyQuantizationDetailsVector(flatbuffers::Verifier &verifier, const flatbuffers::Vector> *values, const flatbuffers::Vector *types) { + if (!values || !types) return !values && !types; + if (values->size() != types->size()) return false; + for (flatbuffers::uoffset_t i = 0; i < values->size(); ++i) { + if (!VerifyQuantizationDetails( + verifier, values->Get(i), types->GetEnum(i))) { + return false; + } + } + return true; +} + +inline void *QuantizationDetailsUnion::UnPack(const void *obj, QuantizationDetails type, const flatbuffers::resolver_function_t *resolver) { + (void)resolver; + switch (type) { + case QuantizationDetails_CustomQuantization: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + default: return nullptr; + } +} + +inline flatbuffers::Offset QuantizationDetailsUnion::Pack(flatbuffers::FlatBufferBuilder &_fbb, const flatbuffers::rehasher_function_t *_rehasher) const { + (void)_rehasher; + switch (type) { + case QuantizationDetails_CustomQuantization: { + auto ptr = reinterpret_cast(value); + return CreateCustomQuantization(_fbb, ptr, _rehasher).Union(); + } + default: return 0; + } +} + +inline QuantizationDetailsUnion::QuantizationDetailsUnion(const QuantizationDetailsUnion &u) : type(u.type), value(nullptr) { + switch (type) { + case QuantizationDetails_CustomQuantization: { + value = new tflite::CustomQuantizationT(*reinterpret_cast(u.value)); + break; + } + default: + break; + } +} + +inline void QuantizationDetailsUnion::Reset() { + switch (type) { + case QuantizationDetails_CustomQuantization: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + default: break; + } + value = nullptr; + type = QuantizationDetails_NONE; +} + +inline bool VerifySparseIndexVector(flatbuffers::Verifier &verifier, const void *obj, SparseIndexVector type) { + switch (type) { + case SparseIndexVector_NONE: { + return true; + } + case SparseIndexVector_Int32Vector: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case SparseIndexVector_Uint16Vector: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case SparseIndexVector_Uint8Vector: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + default: return true; + } +} + +inline bool VerifySparseIndexVectorVector(flatbuffers::Verifier &verifier, const flatbuffers::Vector> *values, const flatbuffers::Vector *types) { + if (!values || !types) return !values && !types; + if (values->size() != types->size()) return false; + for (flatbuffers::uoffset_t i = 0; i < values->size(); ++i) { + if (!VerifySparseIndexVector( + verifier, values->Get(i), types->GetEnum(i))) { + return false; + } + } + return true; +} + +inline void *SparseIndexVectorUnion::UnPack(const void *obj, SparseIndexVector type, const flatbuffers::resolver_function_t *resolver) { + (void)resolver; + switch (type) { + case SparseIndexVector_Int32Vector: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case SparseIndexVector_Uint16Vector: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case SparseIndexVector_Uint8Vector: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + default: return nullptr; + } +} + +inline flatbuffers::Offset SparseIndexVectorUnion::Pack(flatbuffers::FlatBufferBuilder &_fbb, const flatbuffers::rehasher_function_t *_rehasher) const { + (void)_rehasher; + switch (type) { + case SparseIndexVector_Int32Vector: { + auto ptr = reinterpret_cast(value); + return CreateInt32Vector(_fbb, ptr, _rehasher).Union(); + } + case SparseIndexVector_Uint16Vector: { + auto ptr = reinterpret_cast(value); + return CreateUint16Vector(_fbb, ptr, _rehasher).Union(); + } + case SparseIndexVector_Uint8Vector: { + auto ptr = reinterpret_cast(value); + return CreateUint8Vector(_fbb, ptr, _rehasher).Union(); + } + default: return 0; + } +} + +inline SparseIndexVectorUnion::SparseIndexVectorUnion(const SparseIndexVectorUnion &u) : type(u.type), value(nullptr) { + switch (type) { + case SparseIndexVector_Int32Vector: { + value = new tflite::Int32VectorT(*reinterpret_cast(u.value)); + break; + } + case SparseIndexVector_Uint16Vector: { + value = new tflite::Uint16VectorT(*reinterpret_cast(u.value)); + break; + } + case SparseIndexVector_Uint8Vector: { + value = new tflite::Uint8VectorT(*reinterpret_cast(u.value)); + break; + } + default: + break; + } +} + +inline void SparseIndexVectorUnion::Reset() { + switch (type) { + case SparseIndexVector_Int32Vector: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case SparseIndexVector_Uint16Vector: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case SparseIndexVector_Uint8Vector: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + default: break; + } + value = nullptr; + type = SparseIndexVector_NONE; +} + +inline bool VerifyBuiltinOptions(flatbuffers::Verifier &verifier, const void *obj, BuiltinOptions type) { + switch (type) { + case BuiltinOptions_NONE: { + return true; + } + case BuiltinOptions_Conv2DOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_DepthwiseConv2DOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_ConcatEmbeddingsOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_LSHProjectionOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_Pool2DOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_SVDFOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_RNNOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_FullyConnectedOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_SoftmaxOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_ConcatenationOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_AddOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_L2NormOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_LocalResponseNormalizationOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_LSTMOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_ResizeBilinearOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_CallOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_ReshapeOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_SkipGramOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_SpaceToDepthOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_EmbeddingLookupSparseOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_MulOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_PadOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_GatherOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_BatchToSpaceNDOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_SpaceToBatchNDOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_TransposeOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_ReducerOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_SubOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_DivOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_SqueezeOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_SequenceRNNOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_StridedSliceOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_ExpOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_TopKV2Options: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_SplitOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_LogSoftmaxOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_CastOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_DequantizeOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_MaximumMinimumOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_ArgMaxOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_LessOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_NegOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_PadV2Options: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_GreaterOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_GreaterEqualOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_LessEqualOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_SelectOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_SliceOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_TransposeConvOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_SparseToDenseOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_TileOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_ExpandDimsOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_EqualOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_NotEqualOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_ShapeOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_PowOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_ArgMinOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_FakeQuantOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_PackOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_LogicalOrOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_OneHotOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_LogicalAndOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_LogicalNotOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_UnpackOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_FloorDivOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_SquareOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_ZerosLikeOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_FillOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_BidirectionalSequenceLSTMOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_BidirectionalSequenceRNNOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_UnidirectionalSequenceLSTMOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_FloorModOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_RangeOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_ResizeNearestNeighborOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_LeakyReluOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_SquaredDifferenceOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_MirrorPadOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_AbsOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_SplitVOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_UniqueOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_ReverseV2Options: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_AddNOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_GatherNdOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_CosOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_WhereOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_RankOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_ReverseSequenceOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_MatrixDiagOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_QuantizeOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_MatrixSetDiagOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_HardSwishOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_IfOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_WhileOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_DepthToSpaceOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_NonMaxSuppressionV4Options: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_NonMaxSuppressionV5Options: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_ScatterNdOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_SelectV2Options: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_DensifyOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_SegmentSumOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_BatchMatMulOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_CumsumOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_CallOnceOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_BroadcastToOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_Rfft2dOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_Conv3DOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_HashtableOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_HashtableFindOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_HashtableImportOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_HashtableSizeOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_VarHandleOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_ReadVariableOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_AssignVariableOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_RandomOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_BucketizeOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_GeluOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_DynamicUpdateSliceOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_UnsortedSegmentProdOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_UnsortedSegmentMaxOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_UnsortedSegmentMinOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_UnsortedSegmentSumOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_ATan2Options: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_SignOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + default: return true; + } +} + +inline bool VerifyBuiltinOptionsVector(flatbuffers::Verifier &verifier, const flatbuffers::Vector> *values, const flatbuffers::Vector *types) { + if (!values || !types) return !values && !types; + if (values->size() != types->size()) return false; + for (flatbuffers::uoffset_t i = 0; i < values->size(); ++i) { + if (!VerifyBuiltinOptions( + verifier, values->Get(i), types->GetEnum(i))) { + return false; + } + } + return true; +} + +inline void *BuiltinOptionsUnion::UnPack(const void *obj, BuiltinOptions type, const flatbuffers::resolver_function_t *resolver) { + (void)resolver; + switch (type) { + case BuiltinOptions_Conv2DOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_DepthwiseConv2DOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_ConcatEmbeddingsOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_LSHProjectionOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_Pool2DOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_SVDFOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_RNNOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_FullyConnectedOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_SoftmaxOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_ConcatenationOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_AddOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_L2NormOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_LocalResponseNormalizationOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_LSTMOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_ResizeBilinearOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_CallOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_ReshapeOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_SkipGramOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_SpaceToDepthOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_EmbeddingLookupSparseOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_MulOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_PadOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_GatherOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_BatchToSpaceNDOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_SpaceToBatchNDOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_TransposeOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_ReducerOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_SubOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_DivOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_SqueezeOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_SequenceRNNOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_StridedSliceOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_ExpOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_TopKV2Options: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_SplitOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_LogSoftmaxOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_CastOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_DequantizeOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_MaximumMinimumOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_ArgMaxOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_LessOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_NegOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_PadV2Options: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_GreaterOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_GreaterEqualOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_LessEqualOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_SelectOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_SliceOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_TransposeConvOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_SparseToDenseOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_TileOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_ExpandDimsOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_EqualOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_NotEqualOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_ShapeOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_PowOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_ArgMinOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_FakeQuantOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_PackOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_LogicalOrOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_OneHotOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_LogicalAndOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_LogicalNotOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_UnpackOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_FloorDivOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_SquareOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_ZerosLikeOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_FillOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_BidirectionalSequenceLSTMOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_BidirectionalSequenceRNNOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_UnidirectionalSequenceLSTMOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_FloorModOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_RangeOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_ResizeNearestNeighborOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_LeakyReluOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_SquaredDifferenceOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_MirrorPadOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_AbsOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_SplitVOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_UniqueOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_ReverseV2Options: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_AddNOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_GatherNdOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_CosOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_WhereOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_RankOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_ReverseSequenceOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_MatrixDiagOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_QuantizeOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_MatrixSetDiagOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_HardSwishOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_IfOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_WhileOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_DepthToSpaceOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_NonMaxSuppressionV4Options: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_NonMaxSuppressionV5Options: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_ScatterNdOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_SelectV2Options: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_DensifyOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_SegmentSumOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_BatchMatMulOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_CumsumOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_CallOnceOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_BroadcastToOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_Rfft2dOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_Conv3DOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_HashtableOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_HashtableFindOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_HashtableImportOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_HashtableSizeOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_VarHandleOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_ReadVariableOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_AssignVariableOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_RandomOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_BucketizeOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_GeluOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_DynamicUpdateSliceOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_UnsortedSegmentProdOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_UnsortedSegmentMaxOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_UnsortedSegmentMinOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_UnsortedSegmentSumOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_ATan2Options: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_SignOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + default: return nullptr; + } +} + +inline flatbuffers::Offset BuiltinOptionsUnion::Pack(flatbuffers::FlatBufferBuilder &_fbb, const flatbuffers::rehasher_function_t *_rehasher) const { + (void)_rehasher; + switch (type) { + case BuiltinOptions_Conv2DOptions: { + auto ptr = reinterpret_cast(value); + return CreateConv2DOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_DepthwiseConv2DOptions: { + auto ptr = reinterpret_cast(value); + return CreateDepthwiseConv2DOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_ConcatEmbeddingsOptions: { + auto ptr = reinterpret_cast(value); + return CreateConcatEmbeddingsOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_LSHProjectionOptions: { + auto ptr = reinterpret_cast(value); + return CreateLSHProjectionOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_Pool2DOptions: { + auto ptr = reinterpret_cast(value); + return CreatePool2DOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_SVDFOptions: { + auto ptr = reinterpret_cast(value); + return CreateSVDFOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_RNNOptions: { + auto ptr = reinterpret_cast(value); + return CreateRNNOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_FullyConnectedOptions: { + auto ptr = reinterpret_cast(value); + return CreateFullyConnectedOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_SoftmaxOptions: { + auto ptr = reinterpret_cast(value); + return CreateSoftmaxOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_ConcatenationOptions: { + auto ptr = reinterpret_cast(value); + return CreateConcatenationOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_AddOptions: { + auto ptr = reinterpret_cast(value); + return CreateAddOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_L2NormOptions: { + auto ptr = reinterpret_cast(value); + return CreateL2NormOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_LocalResponseNormalizationOptions: { + auto ptr = reinterpret_cast(value); + return CreateLocalResponseNormalizationOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_LSTMOptions: { + auto ptr = reinterpret_cast(value); + return CreateLSTMOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_ResizeBilinearOptions: { + auto ptr = reinterpret_cast(value); + return CreateResizeBilinearOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_CallOptions: { + auto ptr = reinterpret_cast(value); + return CreateCallOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_ReshapeOptions: { + auto ptr = reinterpret_cast(value); + return CreateReshapeOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_SkipGramOptions: { + auto ptr = reinterpret_cast(value); + return CreateSkipGramOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_SpaceToDepthOptions: { + auto ptr = reinterpret_cast(value); + return CreateSpaceToDepthOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_EmbeddingLookupSparseOptions: { + auto ptr = reinterpret_cast(value); + return CreateEmbeddingLookupSparseOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_MulOptions: { + auto ptr = reinterpret_cast(value); + return CreateMulOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_PadOptions: { + auto ptr = reinterpret_cast(value); + return CreatePadOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_GatherOptions: { + auto ptr = reinterpret_cast(value); + return CreateGatherOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_BatchToSpaceNDOptions: { + auto ptr = reinterpret_cast(value); + return CreateBatchToSpaceNDOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_SpaceToBatchNDOptions: { + auto ptr = reinterpret_cast(value); + return CreateSpaceToBatchNDOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_TransposeOptions: { + auto ptr = reinterpret_cast(value); + return CreateTransposeOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_ReducerOptions: { + auto ptr = reinterpret_cast(value); + return CreateReducerOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_SubOptions: { + auto ptr = reinterpret_cast(value); + return CreateSubOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_DivOptions: { + auto ptr = reinterpret_cast(value); + return CreateDivOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_SqueezeOptions: { + auto ptr = reinterpret_cast(value); + return CreateSqueezeOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_SequenceRNNOptions: { + auto ptr = reinterpret_cast(value); + return CreateSequenceRNNOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_StridedSliceOptions: { + auto ptr = reinterpret_cast(value); + return CreateStridedSliceOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_ExpOptions: { + auto ptr = reinterpret_cast(value); + return CreateExpOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_TopKV2Options: { + auto ptr = reinterpret_cast(value); + return CreateTopKV2Options(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_SplitOptions: { + auto ptr = reinterpret_cast(value); + return CreateSplitOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_LogSoftmaxOptions: { + auto ptr = reinterpret_cast(value); + return CreateLogSoftmaxOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_CastOptions: { + auto ptr = reinterpret_cast(value); + return CreateCastOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_DequantizeOptions: { + auto ptr = reinterpret_cast(value); + return CreateDequantizeOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_MaximumMinimumOptions: { + auto ptr = reinterpret_cast(value); + return CreateMaximumMinimumOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_ArgMaxOptions: { + auto ptr = reinterpret_cast(value); + return CreateArgMaxOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_LessOptions: { + auto ptr = reinterpret_cast(value); + return CreateLessOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_NegOptions: { + auto ptr = reinterpret_cast(value); + return CreateNegOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_PadV2Options: { + auto ptr = reinterpret_cast(value); + return CreatePadV2Options(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_GreaterOptions: { + auto ptr = reinterpret_cast(value); + return CreateGreaterOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_GreaterEqualOptions: { + auto ptr = reinterpret_cast(value); + return CreateGreaterEqualOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_LessEqualOptions: { + auto ptr = reinterpret_cast(value); + return CreateLessEqualOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_SelectOptions: { + auto ptr = reinterpret_cast(value); + return CreateSelectOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_SliceOptions: { + auto ptr = reinterpret_cast(value); + return CreateSliceOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_TransposeConvOptions: { + auto ptr = reinterpret_cast(value); + return CreateTransposeConvOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_SparseToDenseOptions: { + auto ptr = reinterpret_cast(value); + return CreateSparseToDenseOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_TileOptions: { + auto ptr = reinterpret_cast(value); + return CreateTileOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_ExpandDimsOptions: { + auto ptr = reinterpret_cast(value); + return CreateExpandDimsOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_EqualOptions: { + auto ptr = reinterpret_cast(value); + return CreateEqualOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_NotEqualOptions: { + auto ptr = reinterpret_cast(value); + return CreateNotEqualOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_ShapeOptions: { + auto ptr = reinterpret_cast(value); + return CreateShapeOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_PowOptions: { + auto ptr = reinterpret_cast(value); + return CreatePowOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_ArgMinOptions: { + auto ptr = reinterpret_cast(value); + return CreateArgMinOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_FakeQuantOptions: { + auto ptr = reinterpret_cast(value); + return CreateFakeQuantOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_PackOptions: { + auto ptr = reinterpret_cast(value); + return CreatePackOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_LogicalOrOptions: { + auto ptr = reinterpret_cast(value); + return CreateLogicalOrOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_OneHotOptions: { + auto ptr = reinterpret_cast(value); + return CreateOneHotOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_LogicalAndOptions: { + auto ptr = reinterpret_cast(value); + return CreateLogicalAndOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_LogicalNotOptions: { + auto ptr = reinterpret_cast(value); + return CreateLogicalNotOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_UnpackOptions: { + auto ptr = reinterpret_cast(value); + return CreateUnpackOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_FloorDivOptions: { + auto ptr = reinterpret_cast(value); + return CreateFloorDivOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_SquareOptions: { + auto ptr = reinterpret_cast(value); + return CreateSquareOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_ZerosLikeOptions: { + auto ptr = reinterpret_cast(value); + return CreateZerosLikeOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_FillOptions: { + auto ptr = reinterpret_cast(value); + return CreateFillOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_BidirectionalSequenceLSTMOptions: { + auto ptr = reinterpret_cast(value); + return CreateBidirectionalSequenceLSTMOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_BidirectionalSequenceRNNOptions: { + auto ptr = reinterpret_cast(value); + return CreateBidirectionalSequenceRNNOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_UnidirectionalSequenceLSTMOptions: { + auto ptr = reinterpret_cast(value); + return CreateUnidirectionalSequenceLSTMOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_FloorModOptions: { + auto ptr = reinterpret_cast(value); + return CreateFloorModOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_RangeOptions: { + auto ptr = reinterpret_cast(value); + return CreateRangeOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_ResizeNearestNeighborOptions: { + auto ptr = reinterpret_cast(value); + return CreateResizeNearestNeighborOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_LeakyReluOptions: { + auto ptr = reinterpret_cast(value); + return CreateLeakyReluOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_SquaredDifferenceOptions: { + auto ptr = reinterpret_cast(value); + return CreateSquaredDifferenceOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_MirrorPadOptions: { + auto ptr = reinterpret_cast(value); + return CreateMirrorPadOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_AbsOptions: { + auto ptr = reinterpret_cast(value); + return CreateAbsOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_SplitVOptions: { + auto ptr = reinterpret_cast(value); + return CreateSplitVOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_UniqueOptions: { + auto ptr = reinterpret_cast(value); + return CreateUniqueOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_ReverseV2Options: { + auto ptr = reinterpret_cast(value); + return CreateReverseV2Options(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_AddNOptions: { + auto ptr = reinterpret_cast(value); + return CreateAddNOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_GatherNdOptions: { + auto ptr = reinterpret_cast(value); + return CreateGatherNdOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_CosOptions: { + auto ptr = reinterpret_cast(value); + return CreateCosOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_WhereOptions: { + auto ptr = reinterpret_cast(value); + return CreateWhereOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_RankOptions: { + auto ptr = reinterpret_cast(value); + return CreateRankOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_ReverseSequenceOptions: { + auto ptr = reinterpret_cast(value); + return CreateReverseSequenceOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_MatrixDiagOptions: { + auto ptr = reinterpret_cast(value); + return CreateMatrixDiagOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_QuantizeOptions: { + auto ptr = reinterpret_cast(value); + return CreateQuantizeOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_MatrixSetDiagOptions: { + auto ptr = reinterpret_cast(value); + return CreateMatrixSetDiagOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_HardSwishOptions: { + auto ptr = reinterpret_cast(value); + return CreateHardSwishOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_IfOptions: { + auto ptr = reinterpret_cast(value); + return CreateIfOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_WhileOptions: { + auto ptr = reinterpret_cast(value); + return CreateWhileOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_DepthToSpaceOptions: { + auto ptr = reinterpret_cast(value); + return CreateDepthToSpaceOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_NonMaxSuppressionV4Options: { + auto ptr = reinterpret_cast(value); + return CreateNonMaxSuppressionV4Options(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_NonMaxSuppressionV5Options: { + auto ptr = reinterpret_cast(value); + return CreateNonMaxSuppressionV5Options(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_ScatterNdOptions: { + auto ptr = reinterpret_cast(value); + return CreateScatterNdOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_SelectV2Options: { + auto ptr = reinterpret_cast(value); + return CreateSelectV2Options(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_DensifyOptions: { + auto ptr = reinterpret_cast(value); + return CreateDensifyOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_SegmentSumOptions: { + auto ptr = reinterpret_cast(value); + return CreateSegmentSumOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_BatchMatMulOptions: { + auto ptr = reinterpret_cast(value); + return CreateBatchMatMulOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_CumsumOptions: { + auto ptr = reinterpret_cast(value); + return CreateCumsumOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_CallOnceOptions: { + auto ptr = reinterpret_cast(value); + return CreateCallOnceOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_BroadcastToOptions: { + auto ptr = reinterpret_cast(value); + return CreateBroadcastToOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_Rfft2dOptions: { + auto ptr = reinterpret_cast(value); + return CreateRfft2dOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_Conv3DOptions: { + auto ptr = reinterpret_cast(value); + return CreateConv3DOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_HashtableOptions: { + auto ptr = reinterpret_cast(value); + return CreateHashtableOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_HashtableFindOptions: { + auto ptr = reinterpret_cast(value); + return CreateHashtableFindOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_HashtableImportOptions: { + auto ptr = reinterpret_cast(value); + return CreateHashtableImportOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_HashtableSizeOptions: { + auto ptr = reinterpret_cast(value); + return CreateHashtableSizeOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_VarHandleOptions: { + auto ptr = reinterpret_cast(value); + return CreateVarHandleOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_ReadVariableOptions: { + auto ptr = reinterpret_cast(value); + return CreateReadVariableOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_AssignVariableOptions: { + auto ptr = reinterpret_cast(value); + return CreateAssignVariableOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_RandomOptions: { + auto ptr = reinterpret_cast(value); + return CreateRandomOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_BucketizeOptions: { + auto ptr = reinterpret_cast(value); + return CreateBucketizeOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_GeluOptions: { + auto ptr = reinterpret_cast(value); + return CreateGeluOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_DynamicUpdateSliceOptions: { + auto ptr = reinterpret_cast(value); + return CreateDynamicUpdateSliceOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_UnsortedSegmentProdOptions: { + auto ptr = reinterpret_cast(value); + return CreateUnsortedSegmentProdOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_UnsortedSegmentMaxOptions: { + auto ptr = reinterpret_cast(value); + return CreateUnsortedSegmentMaxOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_UnsortedSegmentMinOptions: { + auto ptr = reinterpret_cast(value); + return CreateUnsortedSegmentMinOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_UnsortedSegmentSumOptions: { + auto ptr = reinterpret_cast(value); + return CreateUnsortedSegmentSumOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_ATan2Options: { + auto ptr = reinterpret_cast(value); + return CreateATan2Options(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_SignOptions: { + auto ptr = reinterpret_cast(value); + return CreateSignOptions(_fbb, ptr, _rehasher).Union(); + } + default: return 0; + } +} + +inline BuiltinOptionsUnion::BuiltinOptionsUnion(const BuiltinOptionsUnion &u) : type(u.type), value(nullptr) { + switch (type) { + case BuiltinOptions_Conv2DOptions: { + value = new tflite::Conv2DOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_DepthwiseConv2DOptions: { + value = new tflite::DepthwiseConv2DOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_ConcatEmbeddingsOptions: { + value = new tflite::ConcatEmbeddingsOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_LSHProjectionOptions: { + value = new tflite::LSHProjectionOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_Pool2DOptions: { + value = new tflite::Pool2DOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_SVDFOptions: { + value = new tflite::SVDFOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_RNNOptions: { + value = new tflite::RNNOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_FullyConnectedOptions: { + value = new tflite::FullyConnectedOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_SoftmaxOptions: { + value = new tflite::SoftmaxOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_ConcatenationOptions: { + value = new tflite::ConcatenationOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_AddOptions: { + value = new tflite::AddOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_L2NormOptions: { + value = new tflite::L2NormOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_LocalResponseNormalizationOptions: { + value = new tflite::LocalResponseNormalizationOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_LSTMOptions: { + value = new tflite::LSTMOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_ResizeBilinearOptions: { + value = new tflite::ResizeBilinearOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_CallOptions: { + value = new tflite::CallOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_ReshapeOptions: { + value = new tflite::ReshapeOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_SkipGramOptions: { + value = new tflite::SkipGramOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_SpaceToDepthOptions: { + value = new tflite::SpaceToDepthOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_EmbeddingLookupSparseOptions: { + value = new tflite::EmbeddingLookupSparseOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_MulOptions: { + value = new tflite::MulOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_PadOptions: { + value = new tflite::PadOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_GatherOptions: { + value = new tflite::GatherOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_BatchToSpaceNDOptions: { + value = new tflite::BatchToSpaceNDOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_SpaceToBatchNDOptions: { + value = new tflite::SpaceToBatchNDOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_TransposeOptions: { + value = new tflite::TransposeOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_ReducerOptions: { + value = new tflite::ReducerOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_SubOptions: { + value = new tflite::SubOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_DivOptions: { + value = new tflite::DivOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_SqueezeOptions: { + value = new tflite::SqueezeOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_SequenceRNNOptions: { + value = new tflite::SequenceRNNOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_StridedSliceOptions: { + value = new tflite::StridedSliceOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_ExpOptions: { + value = new tflite::ExpOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_TopKV2Options: { + value = new tflite::TopKV2OptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_SplitOptions: { + value = new tflite::SplitOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_LogSoftmaxOptions: { + value = new tflite::LogSoftmaxOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_CastOptions: { + value = new tflite::CastOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_DequantizeOptions: { + value = new tflite::DequantizeOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_MaximumMinimumOptions: { + value = new tflite::MaximumMinimumOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_ArgMaxOptions: { + value = new tflite::ArgMaxOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_LessOptions: { + value = new tflite::LessOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_NegOptions: { + value = new tflite::NegOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_PadV2Options: { + value = new tflite::PadV2OptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_GreaterOptions: { + value = new tflite::GreaterOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_GreaterEqualOptions: { + value = new tflite::GreaterEqualOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_LessEqualOptions: { + value = new tflite::LessEqualOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_SelectOptions: { + value = new tflite::SelectOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_SliceOptions: { + value = new tflite::SliceOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_TransposeConvOptions: { + value = new tflite::TransposeConvOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_SparseToDenseOptions: { + value = new tflite::SparseToDenseOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_TileOptions: { + value = new tflite::TileOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_ExpandDimsOptions: { + value = new tflite::ExpandDimsOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_EqualOptions: { + value = new tflite::EqualOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_NotEqualOptions: { + value = new tflite::NotEqualOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_ShapeOptions: { + value = new tflite::ShapeOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_PowOptions: { + value = new tflite::PowOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_ArgMinOptions: { + value = new tflite::ArgMinOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_FakeQuantOptions: { + value = new tflite::FakeQuantOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_PackOptions: { + value = new tflite::PackOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_LogicalOrOptions: { + value = new tflite::LogicalOrOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_OneHotOptions: { + value = new tflite::OneHotOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_LogicalAndOptions: { + value = new tflite::LogicalAndOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_LogicalNotOptions: { + value = new tflite::LogicalNotOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_UnpackOptions: { + value = new tflite::UnpackOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_FloorDivOptions: { + value = new tflite::FloorDivOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_SquareOptions: { + value = new tflite::SquareOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_ZerosLikeOptions: { + value = new tflite::ZerosLikeOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_FillOptions: { + value = new tflite::FillOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_BidirectionalSequenceLSTMOptions: { + value = new tflite::BidirectionalSequenceLSTMOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_BidirectionalSequenceRNNOptions: { + value = new tflite::BidirectionalSequenceRNNOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_UnidirectionalSequenceLSTMOptions: { + value = new tflite::UnidirectionalSequenceLSTMOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_FloorModOptions: { + value = new tflite::FloorModOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_RangeOptions: { + value = new tflite::RangeOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_ResizeNearestNeighborOptions: { + value = new tflite::ResizeNearestNeighborOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_LeakyReluOptions: { + value = new tflite::LeakyReluOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_SquaredDifferenceOptions: { + value = new tflite::SquaredDifferenceOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_MirrorPadOptions: { + value = new tflite::MirrorPadOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_AbsOptions: { + value = new tflite::AbsOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_SplitVOptions: { + value = new tflite::SplitVOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_UniqueOptions: { + value = new tflite::UniqueOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_ReverseV2Options: { + value = new tflite::ReverseV2OptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_AddNOptions: { + value = new tflite::AddNOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_GatherNdOptions: { + value = new tflite::GatherNdOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_CosOptions: { + value = new tflite::CosOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_WhereOptions: { + value = new tflite::WhereOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_RankOptions: { + value = new tflite::RankOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_ReverseSequenceOptions: { + value = new tflite::ReverseSequenceOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_MatrixDiagOptions: { + value = new tflite::MatrixDiagOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_QuantizeOptions: { + value = new tflite::QuantizeOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_MatrixSetDiagOptions: { + value = new tflite::MatrixSetDiagOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_HardSwishOptions: { + value = new tflite::HardSwishOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_IfOptions: { + value = new tflite::IfOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_WhileOptions: { + value = new tflite::WhileOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_DepthToSpaceOptions: { + value = new tflite::DepthToSpaceOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_NonMaxSuppressionV4Options: { + value = new tflite::NonMaxSuppressionV4OptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_NonMaxSuppressionV5Options: { + value = new tflite::NonMaxSuppressionV5OptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_ScatterNdOptions: { + value = new tflite::ScatterNdOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_SelectV2Options: { + value = new tflite::SelectV2OptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_DensifyOptions: { + value = new tflite::DensifyOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_SegmentSumOptions: { + value = new tflite::SegmentSumOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_BatchMatMulOptions: { + value = new tflite::BatchMatMulOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_CumsumOptions: { + value = new tflite::CumsumOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_CallOnceOptions: { + value = new tflite::CallOnceOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_BroadcastToOptions: { + value = new tflite::BroadcastToOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_Rfft2dOptions: { + value = new tflite::Rfft2dOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_Conv3DOptions: { + value = new tflite::Conv3DOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_HashtableOptions: { + value = new tflite::HashtableOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_HashtableFindOptions: { + value = new tflite::HashtableFindOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_HashtableImportOptions: { + value = new tflite::HashtableImportOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_HashtableSizeOptions: { + value = new tflite::HashtableSizeOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_VarHandleOptions: { + value = new tflite::VarHandleOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_ReadVariableOptions: { + value = new tflite::ReadVariableOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_AssignVariableOptions: { + value = new tflite::AssignVariableOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_RandomOptions: { + value = new tflite::RandomOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_BucketizeOptions: { + value = new tflite::BucketizeOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_GeluOptions: { + value = new tflite::GeluOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_DynamicUpdateSliceOptions: { + value = new tflite::DynamicUpdateSliceOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_UnsortedSegmentProdOptions: { + value = new tflite::UnsortedSegmentProdOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_UnsortedSegmentMaxOptions: { + value = new tflite::UnsortedSegmentMaxOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_UnsortedSegmentMinOptions: { + value = new tflite::UnsortedSegmentMinOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_UnsortedSegmentSumOptions: { + value = new tflite::UnsortedSegmentSumOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_ATan2Options: { + value = new tflite::ATan2OptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_SignOptions: { + value = new tflite::SignOptionsT(*reinterpret_cast(u.value)); + break; + } + default: + break; + } +} + +inline void BuiltinOptionsUnion::Reset() { + switch (type) { + case BuiltinOptions_Conv2DOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_DepthwiseConv2DOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_ConcatEmbeddingsOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_LSHProjectionOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_Pool2DOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_SVDFOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_RNNOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_FullyConnectedOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_SoftmaxOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_ConcatenationOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_AddOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_L2NormOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_LocalResponseNormalizationOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_LSTMOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_ResizeBilinearOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_CallOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_ReshapeOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_SkipGramOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_SpaceToDepthOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_EmbeddingLookupSparseOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_MulOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_PadOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_GatherOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_BatchToSpaceNDOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_SpaceToBatchNDOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_TransposeOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_ReducerOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_SubOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_DivOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_SqueezeOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_SequenceRNNOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_StridedSliceOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_ExpOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_TopKV2Options: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_SplitOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_LogSoftmaxOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_CastOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_DequantizeOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_MaximumMinimumOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_ArgMaxOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_LessOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_NegOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_PadV2Options: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_GreaterOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_GreaterEqualOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_LessEqualOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_SelectOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_SliceOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_TransposeConvOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_SparseToDenseOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_TileOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_ExpandDimsOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_EqualOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_NotEqualOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_ShapeOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_PowOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_ArgMinOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_FakeQuantOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_PackOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_LogicalOrOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_OneHotOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_LogicalAndOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_LogicalNotOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_UnpackOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_FloorDivOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_SquareOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_ZerosLikeOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_FillOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_BidirectionalSequenceLSTMOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_BidirectionalSequenceRNNOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_UnidirectionalSequenceLSTMOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_FloorModOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_RangeOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_ResizeNearestNeighborOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_LeakyReluOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_SquaredDifferenceOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_MirrorPadOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_AbsOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_SplitVOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_UniqueOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_ReverseV2Options: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_AddNOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_GatherNdOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_CosOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_WhereOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_RankOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_ReverseSequenceOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_MatrixDiagOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_QuantizeOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_MatrixSetDiagOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_HardSwishOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_IfOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_WhileOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_DepthToSpaceOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_NonMaxSuppressionV4Options: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_NonMaxSuppressionV5Options: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_ScatterNdOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_SelectV2Options: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_DensifyOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_SegmentSumOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_BatchMatMulOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_CumsumOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_CallOnceOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_BroadcastToOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_Rfft2dOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_Conv3DOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_HashtableOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_HashtableFindOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_HashtableImportOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_HashtableSizeOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_VarHandleOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_ReadVariableOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_AssignVariableOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_RandomOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_BucketizeOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_GeluOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_DynamicUpdateSliceOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_UnsortedSegmentProdOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_UnsortedSegmentMaxOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_UnsortedSegmentMinOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_UnsortedSegmentSumOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_ATan2Options: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_SignOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + default: break; + } + value = nullptr; + type = BuiltinOptions_NONE; +} + +inline const tflite::Model *GetModel(const void *buf) { + return flatbuffers::GetRoot(buf); +} + +inline const tflite::Model *GetSizePrefixedModel(const void *buf) { + return flatbuffers::GetSizePrefixedRoot(buf); +} + +inline const char *ModelIdentifier() { + return "TFL3"; +} + +inline bool ModelBufferHasIdentifier(const void *buf) { + return flatbuffers::BufferHasIdentifier( + buf, ModelIdentifier()); +} + +inline bool SizePrefixedModelBufferHasIdentifier(const void *buf) { + return flatbuffers::BufferHasIdentifier( + buf, ModelIdentifier(), true); +} + +inline bool VerifyModelBuffer( + flatbuffers::Verifier &verifier) { + return verifier.VerifyBuffer(ModelIdentifier()); +} + +inline bool VerifySizePrefixedModelBuffer( + flatbuffers::Verifier &verifier) { + return verifier.VerifySizePrefixedBuffer(ModelIdentifier()); +} + +inline const char *ModelExtension() { + return "tflite"; +} + +inline void FinishModelBuffer( + flatbuffers::FlatBufferBuilder &fbb, + flatbuffers::Offset root) { + fbb.Finish(root, ModelIdentifier()); +} + +inline void FinishSizePrefixedModelBuffer( + flatbuffers::FlatBufferBuilder &fbb, + flatbuffers::Offset root) { + fbb.FinishSizePrefixed(root, ModelIdentifier()); +} + +inline std::unique_ptr UnPackModel( + const void *buf, + const flatbuffers::resolver_function_t *res = nullptr) { + return std::unique_ptr(GetModel(buf)->UnPack(res)); +} + +inline std::unique_ptr UnPackSizePrefixedModel( + const void *buf, + const flatbuffers::resolver_function_t *res = nullptr) { + return std::unique_ptr(GetSizePrefixedModel(buf)->UnPack(res)); +} + +} // namespace tflite + +#endif // FLATBUFFERS_GENERATED_SCHEMA_TFLITE_H_ diff --git a/edgeimpulse/edge-impulse-sdk/tensorflow/lite/schema/schema_utils.h b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/schema/schema_utils.h new file mode 100644 index 0000000..d810c4e --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/tensorflow/lite/schema/schema_utils.h @@ -0,0 +1,33 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_SCHEMA_SCHEMA_UTILS_H_ +#define TENSORFLOW_LITE_SCHEMA_SCHEMA_UTILS_H_ + +#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/flatbuffers.h" +#include "edge-impulse-sdk/tensorflow/lite/schema/schema_generated.h" + +namespace tflite { + +// The following methods are introduced to resolve op builtin code shortage +// problem. The new builtin operator will be assigned to the extended builtin +// code field in the flatbuffer schema. Those methods helps to hide builtin code +// details. +BuiltinOperator GetBuiltinCode(const OperatorCode *op_code); + +BuiltinOperator GetBuiltinCode(const OperatorCodeT *op_code); + +} // namespace tflite + +#endif // TENSORFLOW_LITE_SCHEMA_SCHEMA_UTILS_H_ diff --git a/edgeimpulse/edge-impulse-sdk/third_party/arc_mli_package/LICENSE b/edgeimpulse/edge-impulse-sdk/third_party/arc_mli_package/LICENSE new file mode 100644 index 0000000..5ad4eaf --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/third_party/arc_mli_package/LICENSE @@ -0,0 +1,13 @@ +embARC Machine Learning Inference (embARC MLI) library + +Copyright (c) 2019-2020 Synopsys, Inc. All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: + +1) Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. + +2) Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. + +3) Neither the name of the Synopsys, Inc., nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file diff --git a/edgeimpulse/edge-impulse-sdk/third_party/arc_mli_package/bin/emsdp_em11d_em9d_dfss/release/libmli.a b/edgeimpulse/edge-impulse-sdk/third_party/arc_mli_package/bin/emsdp_em11d_em9d_dfss/release/libmli.a new file mode 100644 index 0000000..2020ff2 Binary files /dev/null and b/edgeimpulse/edge-impulse-sdk/third_party/arc_mli_package/bin/emsdp_em11d_em9d_dfss/release/libmli.a differ diff --git a/edgeimpulse/edge-impulse-sdk/third_party/arc_mli_package/include/api/mli_helpers_api.h b/edgeimpulse/edge-impulse-sdk/third_party/arc_mli_package/include/api/mli_helpers_api.h new file mode 100644 index 0000000..2daa802 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/third_party/arc_mli_package/include/api/mli_helpers_api.h @@ -0,0 +1,145 @@ +/* +* Copyright 2019-2020, Synopsys, Inc. +* All rights reserved. +* +* This source code is licensed under the BSD-3-Clause license found in +* the LICENSE file in the root directory of this source tree. +* +*/ + +/** + * @file MLI Library Helpers API + * + * @brief This header includes declarations for helpers set of functions + */ + +#ifndef _MLI_HELPERS_API_H_ +#define _MLI_HELPERS_API_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include "mli_types.h" + +/** + * Set of helper defines to index the shape array. + */ +#define FMAP_H_DIM_CHW 1 // height +#define FMAP_W_DIM_CHW 2 // width +#define FMAP_C_DIM_CHW 0 // channels + +#define KRNL_H_DIM_CHW 2 // kernel height +#define KRNL_W_DIM_CHW 3 // kernel width +#define KRNL_D_DIM_CHW 1 // kernel depth aka input channels +#define KRNL_C_DIM_CHW 0 // output channels + +#define FMAP_H_DIM_HWC 0 // height +#define FMAP_W_DIM_HWC 1 // width +#define FMAP_C_DIM_HWC 2 // channels + +#define KRNL_H_DIM_HWC 1 // kernel height +#define KRNL_W_DIM_HWC 2 // kernel width +#define KRNL_D_DIM_HWC 3 // kernel depth aka input channels +#define KRNL_C_DIM_HWC 0 // output channels + +// for Depthwise convolution hwc kernel +#define KRNL_DW_D_DIM_HWC 0 // Depthwise convolution hwc kernel depth (must be == 1) +#define KRNL_DW_H_DIM_HWC 1 // Depthwise convolution hwc kernel height +#define KRNL_DW_W_DIM_HWC 2 // Depthwise convolution hwc kernel width +#define KRNL_DW_C_DIM_HWC 3 // Depthwise convolution hwc output channels + +/** + * @brief Count Number of Elements in Tensor + * + * @detail Function calculates the number of elements in a tensor starting from the provided + * dimension number (dimension numbering starts from 0). If start_dim=0 function calculates total number of elements + * Function returns 0 if input tensor rank is invalid or start_dim is bigger then input rank + * + * For more info on primitive see MLI Documentation + * + * @param in [I] Input tensor (of any shape) + * @param start_dim [I] Start dimension for counting + * + * @return Number of elements in tensor + */ +uint32_t mli_hlp_count_elem_num(const mli_tensor *in, uint32_t start_dim); + +/** + * @brief Get Tensor Basic Element Size + * + * @detail This function returns size of tensor basic element in bytes. It returns 0 if + * pointer to tensor is invalid, or tensor contains unsupported element type. + * + * For more info on primitive see MLI Documentation + * + * @param in [I] Input tensor (of any shape) + * + * @return Size of tensor basic element (bytes) + */ +uint32_t mli_hlp_tensor_element_size(const mli_tensor *in); + +/** + * @brief Convert Tensor + * + * @detail This function copies elements from input tensor to output with data conversion according to + * the output tensor type parameters. This operation does not change tensor shape. It copies it from input to output. + * + * For more info on primitive see MLI Documentation + * + * @param in [I] Input tensor (of any shape) + * @param out [O] Output tensor. Result will be stored here + * + * @return MLI status code + */ +mli_status mli_hlp_convert_tensor(mli_tensor *in, mli_tensor *out); + +/** + * @brief Point to Sub-Tensor + * + * @detail This function points to sub tensors in input tensor. This can be considered as indexing in + * a multidimensional array. This function performs operations on pointers and doesn’t copy data + * (only points to subsequence of data in input). For this reason, this function takes only parameters that + * can be translated to starting coordinates and size of required data. + * + * For more info on primitive see MLI Documentation + * + * @param in [I] Input tensor (of any shape) + * @param cfg [I] Configuration structure (for more info see @ref mli_point_to_subtsr_cfg) + * @param out [O] Output tensor. Result will be stored here + * + * @return MLI status code + */ +mli_status mli_hlp_point_to_subtensor(const mli_tensor *in, const mli_point_to_subtsr_cfg *cfg, mli_tensor *out); + +/** + * @brief Create a Sub-Tensor from a larger tensor + * + * @detail This function points to sub tensors in input tensor. This function performs operations + * on pointers and doesn’t copy data (only points to subsequence of data in input). + * For this reason, depending on the parameters, it can happen that the sub tensor contains + * data that is not adjacent in memory. + * + * For more info on primitive see MLI Documentation + * + * @param in [I] Input tensor (of any shape) + * @param cfg [I] Configuration structure (for more info see @ref mli_sub_tensor_cfg) + * @param out [O] Output tensor. Result will be stored here + * + * @return MLI status code + */ +mli_status mli_hlp_create_subtensor(const mli_tensor *in, const mli_sub_tensor_cfg *cfg, mli_tensor *out); + +uint32_t mli_hlp_tensor_scale_shift(const mli_tensor *in); + +int32_t mli_hlp_tensor_scale(const mli_tensor *in, const uint32_t scale_idx); + +int16_t mli_hlp_tensor_zero_offset(const mli_tensor *in, const uint32_t zero_idx); + + + +#ifdef __cplusplus +} +#endif + +#endif //_MLI_HELPERS_API_H_ diff --git a/edgeimpulse/edge-impulse-sdk/third_party/arc_mli_package/include/api/mli_kernels_api.h b/edgeimpulse/edge-impulse-sdk/third_party/arc_mli_package/include/api/mli_kernels_api.h new file mode 100644 index 0000000..927526d --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/third_party/arc_mli_package/include/api/mli_kernels_api.h @@ -0,0 +1,667 @@ +/* +* Copyright 2019-2020, Synopsys, Inc. +* All rights reserved. +* +* This source code is licensed under the BSD-3-Clause license found in +* the LICENSE file in the root directory of this source tree. +* +*/ + +/** + * @file MLI Library Kernels API + * + * @brief This header includes declarations for kernels set of functions + */ + +#ifndef _MLI_KERNELS_API_H_ +#define _MLI_KERNELS_API_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include "mli_krn_avepool_spec_api.h" +#include "mli_krn_conv2d_spec_api.h" +#include "mli_krn_depthwise_conv2d_spec_api.h" +#include "mli_krn_maxpool_spec_api.h" +#include "mli_types.h" + + + +//================================================ +// +// Convolution group of kernels +// +//================================================ +/** + * @brief 2D convolution + * + * @detail This kernel implements a general 2D convolution operation. It applies each filter of weights tensor + * to each framed area of the size of input tensor + * + * To implicitly insert additional points to sides of feature map (considering only width and height dimensions), + * ensure that you set the padding parameters. Padding influences how feature map is divided into patches + * for applying kernels because values of padded points are always zero. + * + * ReLU activation function may be applied to result of convolution. + * + * For full list of specialized and highly optimized versions of kernel see @ref mli_krn_conv2d_spec_api.h + * For more info on primitive see MLI Documentation + * + * @param in [I] Input feature map tensor (3-dimensional tensor) + * @param weights [I] Convolution filters weights tensor (4-dimensional tensor) + * @param bias [I] Convolution filters biases tensor (1-dimensional tensor) + * @param cfg [I] Convolution parameters structure (for more info see @ref mli_conv2d_cfg) + * @param out [O] Output feature map tensor. Result is stored here + * + * @return MLI status code + */ +mli_status mli_krn_conv2d_chw_fx8( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_conv2d_chw_fx16( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_conv2d_chw_fx8w16d( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_conv2d_chw_fx8_generic( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_conv2d_chw_fx16_generic( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_conv2d_hwc_fx8( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_conv2d_hwc_fx16( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_conv2d_hwc_fx8w16d( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_conv2d_nhwc_sa8_sa8_sa32( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_conv2d_nhwc_sa8_sa8_sa32_generic( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +char * mli_debug_krn_conv2d_nhwc_sa8_sa8_sa32( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +/** + * @brief 2D Depthwise convolution + * + * @detail This kernel implements a 2D depthwise convolution operation applying each filter channel + * to each input channel separatelly. As a result, output image depth is the same as input image depth. + * + * MLI implementation of depthwise convolution is compatible with caffe implementation of convolution layer + * with group parameter equal to number of input channels. In comparison with TensorFlow implementation + * (tf.nn.depthwise_conv2d in python API), this implementation does not support channel multiplier feature. + * Hence, the last dimension of weights tensor must be equal to 1. + * + * ReLU activation function may be applied to result of convolution. + * + * For full list of specialized and highly optimized versions of kernel see @ref mli_krn_depthwise_conv2d_spec_api.h + * For more info on primitive see MLI Documentation + * + * @param in [I] Input feature map tensor (3-dimensional tensor) + * @param weights [I] Convolution filters weights tensor (4-dimensional tensor) + * @param bias [I] Convolution filters biases tensor (1-dimensional tensor) + * @param cfg [I] Convolution parameters structure (for more info see @ref mli_conv2d_cfg) + * @param out [O] Output feature map tensor. Result will be stored here + * + * @return MLI status code + */ +mli_status mli_krn_depthwise_conv2d_chw_fx8( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_depthwise_conv2d_chw_fx16( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_depthwise_conv2d_chw_fx8w16d( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); +mli_status mli_krn_depthwise_conv2d_hwcn_sa8_sa8_sa32( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_depthwise_conv2d_chw_fx8_generic( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_depthwise_conv2d_chw_fx16_generic( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_depthwise_conv2d_hwcn_sa8_sa8_sa32_generic( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +char * mli_debug_krn_depthwise_conv2d_hwcn_sa8_sa8_sa32( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +//================================================ +// +// Pooling group of kernels +// +//================================================ +/** + * @brief Average pooling + * + * @detail This kernel implements an average pooling operation. Each channel of input is considered independently, + * which means that the analysis fragment includes only neighbor points of the channel. For each fragment + * of input tensor, average value over all considered ponts is defined as the output value. + * The fragment size is defined in configuration structure according to kernel_width and kernel_height values. + * + * Window positioning and moving is performed according to stride and padding parameters. + * This logic is similar to convolution 2D operation. Average Pooling primitive does not analyze an area smaller + * than kernel size (typically, this occurs on the right and bottom borders). In this case, ensure that you set + * padding parameters explicitly in order not to miss valid border values. Padded values do not participate + * in the calculations. So when a fragment includes padded values, only the existing values are analysed + * (this also implies reducing of divider for average calculation). + * + * For full list of specialized and highly optimized versions of kernel see @ref mli_krn_avepool_spec_api.h + * For more info on primitive see MLI Documentation + * + * @param in [I] Input feature map tensor (3-dimensional tensor) + * @param cfg [I] Pooling parameters structure (for more info see @ref mli_pool_cfg) + * @param out [O] Output feature map tensor. Result will be stored here + * + * @return MLI status code + */ +mli_status mli_krn_avepool_chw_fx8(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_avepool_chw_fx16(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_avepool_chw_fx8_generic(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_avepool_chw_fx16_generic(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_avepool_hwc_fx8(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_avepool_hwc_fx16(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_avepool_hwc_sa8(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); + +/** + * @brief MAX pooling + * + * @detail This kernel implements a max pooling operation. Each channel of input is considered independently, + * which means that the analysis fragment includes only neighbor points of the channel. For each fragment + * of input tensor, maximum value is being defined as the output value. The fragment size is defined in configuration + * structure according to kernel_width and kernel_height values. + * + * Splitting input on fragments is performed according to stride and padding parameters. This logic is similar to + * convolution 2D operation + * + * For full list of specialized and highly optimized versions of kernel see @ref mli_krn_maxpool_spec_api.h + * For more info on primitive see MLI Documentation + + * @param in [I] Input feature map tensor (3-dimensional tensor) + * @param cfg [I] Pooling parameters structure (for more info see @ref mli_pool_cfg) + * @param out [O] Output feature map tensor. Result will be stored here + * + * @return MLI status code + */ +mli_status mli_krn_maxpool_chw_fx8(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_maxpool_chw_fx16(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_maxpool_chw_fx8_generic(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_maxpool_chw_fx16_generic(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_maxpool_hwc_fx8(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_maxpool_hwc_fx16(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_maxpool_hwc_sa8(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_maxpool_hwc_fx8_generic(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_maxpool_hwc_fx16_generic(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_maxpool_hwc_sa8_generic(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); + +//================================================ +// +// Common group of kernels +// +//================================================ +/** + * @brief Fully connected + * + * @detail This kernel implements fully connected layer, also usually referred to as the inner product or dense layer. + * + * Ensure that the weight for this kernel is a 2-dimensional tensor (matrix of shape [M, N]), + * and Bias must be 1-dimensional tensor of shape [M]. Shape of input tensor is not considered and only total number + * of elements is considered (must be equal to N). Kernel outputs a 1-dimensional tensor of shape [M]. + * + * For more info on primitive see MLI Documentation + * + * @param in [I] Input feature tensor (of any shape) + * @param weights [I] Weights tensor (2-dimensional tensor) + * @param bias [I] Biases tensor (1-dimensional tensor) + * @param out [O] Output feature tensor. Result will be stored here + * + * @return MLI status code + */ +mli_status mli_krn_fully_connected_fx8( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + mli_tensor * out); + +mli_status mli_krn_fully_connected_fx16( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + mli_tensor * out); + +mli_status mli_krn_fully_connected_fx8w16d( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + mli_tensor * out); + +mli_status mli_krn_fully_connected_sa8_sa8_sa32( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + mli_tensor * out); +/** + * @brief Long Short Term Memory (LSTM) Cell + * + * @detail This kernel implements the default non-peephole implementation of long short term memory (LSTM) cell + * + * The Kernel supports three types of output activation: Sigmoid, Hyperbolic tangent and No activation (identity function) + * Kernel supports three modes of input processing: ONE_TO_ONE, BATCH_TO_BATCH, BATCH_TO_LAST + * Kernel REQUIRES extra intermediate tensor for calculations. It must be passed through configuration structure. + * + * For more info on primitive see MLI Documentation. + * + * @param in [I] Input feature tensor (of any shape or with the batchsize in the first dimensions for batched modes) + * @param prev_out [I] Previous output feature tensor (1-dimensional tensor) + * @param weights [I] Weights tensor (set of 4 matrixes in the [i,g,f,o] order: 3-dimensional tensor) + * @param bias [I] Biases tensor (set of 4 vectors in the [i,g,f,o] order: 2-dimensional tensor) + * @param cfg [I] LSTM Configuration structure (for more info see @ref mli_rnn_cell_cfg) + * @param cell [I/O] Cell memory state (1-dimensional tensor) + * @param out [O] Output feature tensor. Result will be stored here (single output or batch of outputs depending on mode) + * + * @return MLI status code + */ +mli_status mli_krn_lstm_cell_fx8( + const mli_tensor * in, + const mli_tensor * prev_out, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_rnn_cell_cfg * cfg, + mli_tensor * cell, + mli_tensor * out); + +mli_status mli_krn_lstm_cell_fx16( + const mli_tensor * in, + const mli_tensor * prev_out, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_rnn_cell_cfg * cfg, + mli_tensor * cell, + mli_tensor * out); + +mli_status mli_krn_lstm_cell_fx8w16d( + const mli_tensor * in, + const mli_tensor * prev_out, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_rnn_cell_cfg * cfg, + mli_tensor * cell, + mli_tensor * out); + +/** + * @brief Basic Recurrent Neural Network Cell + * + * @detail This kernel implements the basic recurrent cell without memory state. + * + * The Kernel supports three types of output activation: Sigmoid, Hyperbolic tangent and No activation (identity function) + * Kernel supports three modes of input processing: ONE_TO_ONE, BATCH_TO_BATCH, BATCH_TO_LAST + * To support user-specific complex recurrent cells beside LSTM, basic RNN cell kernel in One-to-One mode + * can work with matrices with stacked weights to produce stacked output tensor. For example, if weights tensor + * is 3-dimensionl tensor of shape [L, M, M+N], and Bias of shape [L, M], the output tensor is of shape [L, M]. + * Kernel REQUIRES extra intermediate tensor for calculations in BATCH-TO-LAST mode. It must be passed through + * configuration structure. + * + * For more info on primitive see MLI Documentation. + * + * @param in [I] Input feature tensor (of any shape or with the batchsize in the first dimensions for batched modes) + * @param prev_out [I] Previous output feature tensor (1-dimensional tensor) + * @param weights [I] Weights tensor (2-dimensional tensor. 3-dimensional tensor in case of stacked output for ONE-TO_ONE mode) + * @param bias [I] Biases tensor (1-dimensional tensor. 2-dimensional tensor in case of stacked output for ONE-TO_ONE mode) + * @param cfg [I] Configuration structure (for more info see @ref mli_rnn_cell_cfg) + * @param out [O] Output feature tensor. Result will be stored here (single output or batch of outputs depending on mode) + * + * @return MLI status code + */ +mli_status mli_krn_basic_rnn_cell_fx8( + const mli_tensor * in, + const mli_tensor * prev_out, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_rnn_cell_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_basic_rnn_cell_fx16( + const mli_tensor * in, + const mli_tensor * prev_out, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_rnn_cell_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_basic_rnn_cell_fx8w16d( + const mli_tensor * in, + const mli_tensor * prev_out, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_rnn_cell_cfg * cfg, + mli_tensor * out); + + + +//================================================ +// +// Activation group of kernels +// +//================================================ +/** + * @brief ReLU Activation function + * + * @detail This kernel represents Rectified Linear unit (ReLU). It performs various types of the rectifier activation on input. + * The following types of ReLU supported by this type of kernel: General ReLU, ReLU1, ReLU6 + * Kernel outputs a tensor of the same shape and type as input tensor. + * + * For more info on primitive see MLI Documentation + * + * @param in [I] Input feature tensor (of any shape) + * @param cfg [I] Configuration structure (for more info see @ref mli_relu_cfg) + * @param out [O] Output feature tensor. Result will be stored here + * + * @return MLI status code + */ +mli_status mli_krn_relu_fx8(const mli_tensor * in, const mli_relu_cfg * cfg, mli_tensor * out); +mli_status mli_krn_relu_fx16(const mli_tensor * in, const mli_relu_cfg * cfg, mli_tensor * out); + +/** + * @brief Leaky ReLU Activation function + * + * @detail This kernel represents Rectified Linear unit (ReLU) with a negative slope. + * The function accepts two tensors as input and one as output. The first input tensor is the feature map + * to be processed by the kernel, and the second input is a tensor-scalar that holds + * a negative slope coefficient(Note, special tensor-scalar form can be used. see mli_tensor description in MLI Documentation). + * Kernel outputs a tensor of the same shape and type as input tensor. + * + * For more info on primitive see MLI Documentation + * + * @param in [I] Input feature tensor (of any shape) + * @param slope_coeff [I] Slope coefficient scalar tensor + * @param out [O] Output feature tensor. Result will be stored here + * + * @return MLI status code + */ +mli_status mli_krn_leaky_relu_fx8(const mli_tensor * in, const mli_tensor * slope_coeff, mli_tensor * out); +mli_status mli_krn_leaky_relu_fx16(const mli_tensor * in, const mli_tensor * slope_coeff, mli_tensor * out); + +/** + * @brief Sigmoid Activation function + * + * @detail This kernel performs sigmoid (also mentioned as logistic) activation function on input tensor element-wise + * and stores the result to the output tensor. Kernel outputs a tensor of the same shape and type as input tensor. + * + * For more info on primitive see MLI Documentation + * + * @param in [I] Input feature tensor (of any shape) + * @param out [O] Output feature tensor. Result will be stored here + * + * @return MLI status code + */ +mli_status mli_krn_sigm_fx8(const mli_tensor * in, mli_tensor * out); +mli_status mli_krn_sigm_fx16(const mli_tensor * in, mli_tensor * out); + +/** + * @brief Hyperbolic Tangent Activation function + * + * @detail This kernel performs hyperbolic tangent activation function on input tensor element-wise + * and store result to the output tensor. Kernel outputs a tensor of the same shape and type as input tensor. + * + * For more info on primitive see MLI Documentation + * + * @param in [I] Input feature tensor (of any shape) + * @param out [O] Output feature tensor. Result will be stored here + * + * @return MLI status code + */ +mli_status mli_krn_tanh_fx8(const mli_tensor * in, mli_tensor * out); +mli_status mli_krn_tanh_fx16(const mli_tensor * in, mli_tensor * out); + +/** + * @brief Softmax + * + * @detail This kernel performs activation function which is a generalization of the logistic function. + * The SoftMax function is often used as the final layer of a neural network-based classifier and it's output can be considered + * as a probability distribution over N different possible outcomes. The sum of all the entries tends to 1 + * + * For more info on primitive see MLI Documentation + * + * @param in [I] Input feature tensor (of any shape) + * @param out [O] Output feature tensor. Result will be stored here + * + * @return MLI status code + */ +mli_status mli_krn_softmax_fx8(const mli_tensor * in, mli_tensor * out); +mli_status mli_krn_softmax_fx16(const mli_tensor * in, mli_tensor * out); + + + +//================================================ +// +// Elementwise group of kernels +// +//================================================ +/* + * @brief Elementwise Addition + * + * @detail This kernel adds two tensors of the same shape element-wise and stores results to the output tensor + * saving the shape of inputs. It supports simple broadcasting of single value (scalar tensor) on general tensor. + * One of the operands can be a scalar (Note, special tensor-scalar form can be used. see mli_tensor description in + * MLI Documentation) + * + * For more info on primitive see MLI Documentation + * + * @param in1 [I] First input feature tensor (or scalar tensor) + * @param in2 [I] Second input feature tensor (or scalar tensor) + * @param out [O] Output feature tensor. Result will be stored here + * + * @return MLI status code + */ +mli_status mli_krn_eltwise_add_fx8(const mli_tensor * in1, const mli_tensor * in2, mli_tensor * out); +mli_status mli_krn_eltwise_add_fx16(const mli_tensor * in1, const mli_tensor * in2, mli_tensor * out); + +/* + * @brief Elementwise Subtraction + * + * @detail This kernel subtracts element-wise, the second input tensor (subtrahend) from the first input tensor (minuend) + * and stores results to the output tensor It supports simple broadcasting of single value (scalar tensor) on general tensor. + * One of the operands can be a scalar (Note, special tensor-scalar form can be used. see mli_tensor description in MLI Documentation) + * + * For more info on primitive see MLI Documentation + * + * @param in1 [I] Minuend input feature tensor (or scalar tensor) + * @param in2 [I] Subtrahend input feature tensor (or scalar tensor) + * @param out [O] Output feature tensor. Result will be stored here + * + * @return MLI status code + */ +mli_status mli_krn_eltwise_sub_fx8(const mli_tensor * in1, const mli_tensor * in2, mli_tensor * out); +mli_status mli_krn_eltwise_sub_fx16(const mli_tensor * in1, const mli_tensor * in2, mli_tensor * out); + +/* @brief Elementwise Multiplication + * + * @detail This kernel multiplies two tensors of the same shape element-wise and store results to the output tensor + * saving the shape of inputs. It supports simple broadcasting of single value (scalar tensor) on general tensor. + * One of the operands can be a scalar (Note, special tensor-scalar form can be used. see mli_tensor description in MLI Documentation) + * + * For more info on primitive see MLI Documentation + * + * @param in1 [I] First input feature tensor (or scalar tensor) + * @param in2 [I] Second input feature tensor (or scalar tensor) + * @param out [O] Output feature tensor. Result will be stored here + * + * @return MLI status code + */ +mli_status mli_krn_eltwise_mul_fx8(const mli_tensor * in1, const mli_tensor * in2, mli_tensor * out); +mli_status mli_krn_eltwise_mul_fx16(const mli_tensor * in1, const mli_tensor * in2, mli_tensor * out); + +/* @brief Elementwise MAX/MIN + * + * @detail This kernel finds element-wise maximum / minimum of inputs operands and store results to the output tensor + * saving the shape of inputs. It supports simple broadcasting of single value (scalar tensor) on general tensor. + * One of the operands can be a scalar (Note, special tensor-scalar form can be used. see mli_tensor description in MLI Documentation) + * + * For more info on primitive see MLI Documentation + * + * @param in1 [I] First input feature tensor (or scalar tensor) + * @param in2 [I] Second input feature tensor (or scalar tensor) + * @param out [O] Output feature tensor. Result will be stored here + * + * @return MLI status code + */ +mli_status mli_krn_eltwise_min_fx8(const mli_tensor * in1, const mli_tensor * in2, mli_tensor * out); +mli_status mli_krn_eltwise_min_fx16(const mli_tensor * in1, const mli_tensor * in2, mli_tensor * out); +mli_status mli_krn_eltwise_max_fx8(const mli_tensor * in1, const mli_tensor * in2, mli_tensor * out); +mli_status mli_krn_eltwise_max_fx16(const mli_tensor * in1, const mli_tensor * in2, mli_tensor * out); + + + +//================================================ +// +// Data manipulation group of kernels +// +//================================================ +/** + * @brief Permute Tensor + * + * @detail The kernel permutes dimensions of input tensor according to provided order. In other words, it transposes input tensors. + * The new order of dimensions is given by perm_dim array of kernel configuration structure. Output dimension #idx + * corresponds to the dimension of input tensor with #perm_dim[idx]. Tensor's data is reordered according to new shape. + * + * For more info on primitive see MLI Documentation + * + * @param in [I] Input tensor (of any shape) + * @param cfg [I] Permute parameters structure (for more info see @ref mli_permute_cfg) + * @param out [O] Output tensor. Result will be stored here + * + * @return MLI status code + */ +mli_status mli_krn_permute_fx8(const mli_tensor * in, const mli_permute_cfg * cfg, mli_tensor * out); +mli_status mli_krn_permute_fx16(const mli_tensor * in, const mli_permute_cfg * cfg, mli_tensor * out); + +/** + * @brief Concatenation + * + * @detail This kernel concatenates multiple input tensors along one dimension to produce a single output tensor. + * The kernel takes array of pointers to input tensors. Kernel configuration structure keeps number of + * input tensors (number of pointer in the array) and axis along which concatenation should be performed. + * The shape of all input tensors must be the same except the target dimension for concatenation. + * + * For more info on primitive see MLI Documentation + * + * @param inputs [I] Tensors for concatenations + * @param cfg [I] Concatenation configuration structure (for more info see @ref mli_concat_cfg) + * @param out [O] Output tensor. Result will be stored here + * + * @return MLI status code + */ +mli_status mli_krn_concat_fx8(const mli_tensor ** inputs, const mli_concat_cfg * cfg, mli_tensor * out); +mli_status mli_krn_concat_fx16(const mli_tensor ** inputs, const mli_concat_cfg * cfg, mli_tensor * out); + +/** + * @brief 2D Padding + * + * @detail The kernel performs zero padding of borders across height and width dimensions of vision specific input + * feature maps. Padding for each side of image (top, bottom, left, right) is configured separately according to input + * configuration structure, but the same padding for each side is used across all channels. Padding for HWC and CHW layouts + * of input tensor is implemented as separate functions. + * + * For more info on primitive see MLI Documentation + * + * @param in [I] Input feature map tensor (3-dimensional tensor) + * @param cfg [I] 2D Padding configuration structure (for more info see @ref mli_padding2d_cfg) + * @param out [O] Output tensor. Result will be stored here + * + * @return MLI status code + */ +mli_status mli_krn_padding2d_chw_fx8(const mli_tensor * in, const mli_padding2d_cfg * cfg, mli_tensor * out); +mli_status mli_krn_padding2d_chw_fx16(const mli_tensor * in, const mli_padding2d_cfg * cfg, mli_tensor * out); +mli_status mli_krn_padding2d_hwc_fx8(const mli_tensor * in, const mli_padding2d_cfg * cfg, mli_tensor * out); +mli_status mli_krn_padding2d_hwc_fx16(const mli_tensor * in, const mli_padding2d_cfg * cfg, mli_tensor * out); + +#ifdef __cplusplus +} +#endif +#endif //_MLI_KERNELS_API_H_ diff --git a/edgeimpulse/edge-impulse-sdk/third_party/arc_mli_package/include/api/mli_krn_avepool_spec_api.h b/edgeimpulse/edge-impulse-sdk/third_party/arc_mli_package/include/api/mli_krn_avepool_spec_api.h new file mode 100644 index 0000000..10f1200 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/third_party/arc_mli_package/include/api/mli_krn_avepool_spec_api.h @@ -0,0 +1,117 @@ +/* This file is generated, do not edit! + * edit following template file instead: + * header_filetemplate.txt + */ +/* +* Copyright 2019-2020, Synopsys, Inc. +* All rights reserved. +* +* This source code is licensed under the BSD-3-Clause license found in +* the LICENSE file in the root directory of this source tree. +* +*/ + +#ifndef _MLI_KRN_AVEPOOL_SPEC_API_H_ +#define _MLI_KRN_AVEPOOL_SPEC_API_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include "mli_types.h" + +//=================================================================== +// AvePooling specialization kernels implementation +//=================================================================== +char * mli_debug_krn_avepool_chw_fx16(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); + +mli_status mli_krn_avepool_chw_fx16_k2x2_krnpad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_avepool_chw_fx16_k3x3_krnpad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_avepool_chw_fx16_k4x4_krnpad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_avepool_chw_fx16_k5x5_krnpad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_avepool_chw_fx16_k6x6_krnpad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_avepool_chw_fx16_k7x7_krnpad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_avepool_chw_fx16_k8x8_krnpad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_avepool_chw_fx16_k9x9_krnpad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_avepool_chw_fx16_k10x10_krnpad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_avepool_chw_fx16_k2x2_nopad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_avepool_chw_fx16_k3x3_nopad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_avepool_chw_fx16_k4x4_nopad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_avepool_chw_fx16_k5x5_nopad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_avepool_chw_fx16_k6x6_nopad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_avepool_chw_fx16_k7x7_nopad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_avepool_chw_fx16_k8x8_nopad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_avepool_chw_fx16_k9x9_nopad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_avepool_chw_fx16_k10x10_nopad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_avepool_chw_fx16_k1xn_krnpad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_avepool_chw_fx16_k1x2_krnpad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_avepool_chw_fx16_k1x3_krnpad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_avepool_chw_fx16_knx1_krnpad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_avepool_chw_fx16_k2x1_krnpad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_avepool_chw_fx16_k3x1_krnpad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_avepool_chw_fx16_k1xn_nopad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_avepool_chw_fx16_k1x2_nopad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_avepool_chw_fx16_k1x3_nopad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_avepool_chw_fx16_knx1_nopad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_avepool_chw_fx16_k2x1_nopad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_avepool_chw_fx16_k3x1_nopad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_avepool_chw_fx16_generic(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +char * mli_debug_krn_avepool_chw_fx8(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); + +mli_status mli_krn_avepool_chw_fx8_k2x2_krnpad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_avepool_chw_fx8_k3x3_krnpad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_avepool_chw_fx8_k4x4_krnpad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_avepool_chw_fx8_k5x5_krnpad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_avepool_chw_fx8_k6x6_krnpad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_avepool_chw_fx8_k7x7_krnpad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_avepool_chw_fx8_k8x8_krnpad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_avepool_chw_fx8_k9x9_krnpad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_avepool_chw_fx8_k10x10_krnpad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_avepool_chw_fx8_k2x2_nopad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_avepool_chw_fx8_k3x3_nopad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_avepool_chw_fx8_k4x4_nopad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_avepool_chw_fx8_k5x5_nopad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_avepool_chw_fx8_k6x6_nopad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_avepool_chw_fx8_k7x7_nopad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_avepool_chw_fx8_k8x8_nopad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_avepool_chw_fx8_k9x9_nopad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_avepool_chw_fx8_k10x10_nopad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_avepool_chw_fx8_k1xn_krnpad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_avepool_chw_fx8_k1x2_krnpad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_avepool_chw_fx8_k1x3_krnpad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_avepool_chw_fx8_knx1_krnpad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_avepool_chw_fx8_k2x1_krnpad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_avepool_chw_fx8_k3x1_krnpad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_avepool_chw_fx8_k1xn_nopad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_avepool_chw_fx8_k1x2_nopad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_avepool_chw_fx8_k1x3_nopad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_avepool_chw_fx8_knx1_nopad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_avepool_chw_fx8_k2x1_nopad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_avepool_chw_fx8_k3x1_nopad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_avepool_chw_fx8_generic(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +char * mli_debug_krn_avepool_hwc_fx16(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); + +mli_status mli_krn_avepool_hwc_fx16_k2x2_nopad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_avepool_hwc_fx16_k3x3_nopad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_avepool_hwc_fx16_k2x2_krnpad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_avepool_hwc_fx16_k3x3_krnpad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_avepool_hwc_fx16_generic(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +char * mli_debug_krn_avepool_hwc_fx8(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); + +mli_status mli_krn_avepool_hwc_fx8_k2x2_nopad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_avepool_hwc_fx8_k3x3_nopad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_avepool_hwc_fx8_k2x2_krnpad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_avepool_hwc_fx8_k3x3_krnpad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_avepool_hwc_fx8_generic(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +char * mli_debug_krn_avepool_hwc_sa8(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); + +mli_status mli_krn_avepool_hwc_sa8_k2x2_nopad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_avepool_hwc_sa8_k3x3_nopad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_avepool_hwc_sa8_k2x2_krnpad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_avepool_hwc_sa8_k3x3_krnpad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_avepool_hwc_sa8_generic(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); + +#ifdef __cplusplus +} +#endif +#endif //_MLI_KRN_AVEPOOL_SPEC_API_H_ diff --git a/edgeimpulse/edge-impulse-sdk/third_party/arc_mli_package/include/api/mli_krn_conv2d_spec_api.h b/edgeimpulse/edge-impulse-sdk/third_party/arc_mli_package/include/api/mli_krn_conv2d_spec_api.h new file mode 100644 index 0000000..bec0a54 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/third_party/arc_mli_package/include/api/mli_krn_conv2d_spec_api.h @@ -0,0 +1,828 @@ +/* This file is generated, do not edit! + * edit following template file instead: + * header_filetemplate.txt + */ +/* +* Copyright 2019-2020, Synopsys, Inc. +* All rights reserved. +* +* This source code is licensed under the BSD-3-Clause license found in +* the LICENSE file in the root directory of this source tree. +* +*/ + +#ifndef _MLI_KRN_CONV2D_SPEC_API_H_ +#define _MLI_KRN_CONV2D_SPEC_API_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include "mli_types.h" + +//=================================================================== +// Convolution 2d specialization kernels implementation +//=================================================================== +char * mli_debug_krn_conv2d_chw_fx16( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_conv2d_chw_fx16_k1x1_str1_nopad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_conv2d_chw_fx16_k1x1_ch1_str1_nopad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_conv2d_chw_fx16_k1x1_ch3_str1_nopad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_conv2d_chw_fx16_k1x1_ch4_str1_nopad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_conv2d_chw_fx16_k2x2_str1_krnpad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_conv2d_chw_fx16_k2x2_ch1_str1_krnpad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_conv2d_chw_fx16_k3x3_str1_krnpad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_conv2d_chw_fx16_k3x3_ch1_str1_krnpad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_conv2d_chw_fx16_k4x4_str1_krnpad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_conv2d_chw_fx16_k4x4_ch1_str1_krnpad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_conv2d_chw_fx16_k5x5_str1_krnpad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_conv2d_chw_fx16_k5x5_ch1_str1_krnpad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_conv2d_chw_fx16_k6x6_str1_krnpad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_conv2d_chw_fx16_k6x6_ch1_str1_krnpad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_conv2d_chw_fx16_k7x7_str1_krnpad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_conv2d_chw_fx16_k7x7_ch1_str1_krnpad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_conv2d_chw_fx16_k5x5_str1_nopad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_conv2d_chw_fx16_k5x5_ch1_str1_nopad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_conv2d_chw_fx16_k1x2_str1_krnpad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_conv2d_chw_fx16_k1x3_str1_krnpad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_conv2d_chw_fx16_k2x1_str1_krnpad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_conv2d_chw_fx16_k3x1_str1_krnpad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_conv2d_chw_fx16_k1xn_str1( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_conv2d_chw_fx16_knx1_str1( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_conv2d_chw_fx16_ch1_str1( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_conv2d_chw_fx16_str1( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_conv2d_chw_fx16_k1x1_nopad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_conv2d_chw_fx16_k1x1_ch1_nopad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_conv2d_chw_fx16_k1x1_ch3_nopad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_conv2d_chw_fx16_k1x1_ch4_nopad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_conv2d_chw_fx16_k1x1_ch8_nopad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_conv2d_chw_fx16_k2x2_krnpad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_conv2d_chw_fx16_k2x2_ch1_krnpad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_conv2d_chw_fx16_k3x3_krnpad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_conv2d_chw_fx16_k3x3_ch1_krnpad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_conv2d_chw_fx16_generic( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +char * mli_debug_krn_conv2d_chw_fx8( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_conv2d_chw_fx8_k1x1_str1_nopad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_conv2d_chw_fx8_k1x1_ch1_str1_nopad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_conv2d_chw_fx8_k1x1_ch3_str1_nopad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_conv2d_chw_fx8_k1x1_ch4_str1_nopad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_conv2d_chw_fx8_k2x2_str1_krnpad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_conv2d_chw_fx8_k2x2_ch1_str1_krnpad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_conv2d_chw_fx8_k3x3_str1_krnpad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_conv2d_chw_fx8_k3x3_ch1_str1_krnpad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_conv2d_chw_fx8_k4x4_str1_krnpad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_conv2d_chw_fx8_k4x4_ch1_str1_krnpad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_conv2d_chw_fx8_k5x5_str1_krnpad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_conv2d_chw_fx8_k5x5_ch1_str1_krnpad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_conv2d_chw_fx8_k6x6_str1_krnpad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_conv2d_chw_fx8_k6x6_ch1_str1_krnpad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_conv2d_chw_fx8_k7x7_str1_krnpad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_conv2d_chw_fx8_k7x7_ch1_str1_krnpad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_conv2d_chw_fx8_k1x2_str1_krnpad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_conv2d_chw_fx8_k1x3_str1_krnpad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_conv2d_chw_fx8_k2x1_str1_krnpad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_conv2d_chw_fx8_k3x1_str1_krnpad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_conv2d_chw_fx8_k1xn_str1( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_conv2d_chw_fx8_knx1_str1( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_conv2d_chw_fx8_ch1_str1( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_conv2d_chw_fx8_str1( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_conv2d_chw_fx8_k1x1_nopad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_conv2d_chw_fx8_k1x1_ch1_nopad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_conv2d_chw_fx8_k1x1_ch3_nopad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_conv2d_chw_fx8_k1x1_ch4_nopad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_conv2d_chw_fx8_k1x1_ch8_nopad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_conv2d_chw_fx8_k2x2_krnpad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_conv2d_chw_fx8_k2x2_ch1_krnpad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_conv2d_chw_fx8_k3x3_krnpad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_conv2d_chw_fx8_k3x3_ch1_krnpad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_conv2d_chw_fx8_generic( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +char * mli_debug_krn_conv2d_chw_fx8w16d( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_conv2d_chw_fx8w16d_k1x1_str1_nopad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_conv2d_chw_fx8w16d_k1x1_ch1_str1_nopad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_conv2d_chw_fx8w16d_k1x1_ch3_str1_nopad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_conv2d_chw_fx8w16d_k1x1_ch4_str1_nopad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_conv2d_chw_fx8w16d_k2x2_str1_krnpad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_conv2d_chw_fx8w16d_k2x2_ch1_str1_krnpad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_conv2d_chw_fx8w16d_k3x3_str1_krnpad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_conv2d_chw_fx8w16d_k3x3_ch1_str1_krnpad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_conv2d_chw_fx8w16d_k4x4_str1_krnpad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_conv2d_chw_fx8w16d_k4x4_ch1_str1_krnpad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_conv2d_chw_fx8w16d_k5x5_str1_krnpad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_conv2d_chw_fx8w16d_k5x5_ch1_str1_krnpad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_conv2d_chw_fx8w16d_k6x6_str1_krnpad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_conv2d_chw_fx8w16d_k6x6_ch1_str1_krnpad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_conv2d_chw_fx8w16d_k7x7_str1_krnpad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_conv2d_chw_fx8w16d_k7x7_ch1_str1_krnpad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_conv2d_chw_fx8w16d_k1x2_str1_krnpad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_conv2d_chw_fx8w16d_k1x3_str1_krnpad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_conv2d_chw_fx8w16d_k2x1_str1_krnpad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_conv2d_chw_fx8w16d_k3x1_str1_krnpad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_conv2d_chw_fx8w16d_k1xn_str1( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_conv2d_chw_fx8w16d_knx1_str1( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_conv2d_chw_fx8w16d_ch1_str1( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_conv2d_chw_fx8w16d_str1( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_conv2d_chw_fx8w16d_k1x1_nopad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_conv2d_chw_fx8w16d_k1x1_ch1_nopad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_conv2d_chw_fx8w16d_k1x1_ch3_nopad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_conv2d_chw_fx8w16d_k1x1_ch4_nopad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_conv2d_chw_fx8w16d_k1x1_ch8_nopad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_conv2d_chw_fx8w16d_k2x2_krnpad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_conv2d_chw_fx8w16d_k2x2_ch1_krnpad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_conv2d_chw_fx8w16d_k3x3_krnpad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_conv2d_chw_fx8w16d_k3x3_ch1_krnpad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_conv2d_chw_fx8w16d_generic( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +char * mli_debug_krn_conv2d_nhwc_sa8_sa8_sa32( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_conv2d_nhwc_sa8_sa8_sa32_k3x3_krnpad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_conv2d_nhwc_sa8_sa8_sa32_k5x5_krnpad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_conv2d_nhwc_sa8_sa8_sa32_k3x3_nopad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_conv2d_nhwc_sa8_sa8_sa32_k5x5_nopad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_conv2d_nhwc_sa8_sa8_sa32_k1x1_nopad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_conv2d_nhwc_sa8_sa8_sa32_generic( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + + +#ifdef __cplusplus +} +#endif +#endif //_MLI_KRN_CONV2D_SPEC_API_H_ diff --git a/edgeimpulse/edge-impulse-sdk/third_party/arc_mli_package/include/api/mli_krn_depthwise_conv2d_spec_api.h b/edgeimpulse/edge-impulse-sdk/third_party/arc_mli_package/include/api/mli_krn_depthwise_conv2d_spec_api.h new file mode 100644 index 0000000..6567c15 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/third_party/arc_mli_package/include/api/mli_krn_depthwise_conv2d_spec_api.h @@ -0,0 +1,786 @@ +/* This file is generated, do not edit! + * edit following template file instead: + * header_filetemplate.txt + */ +/* +* Copyright 2019-2020, Synopsys, Inc. +* All rights reserved. +* +* This source code is licensed under the BSD-3-Clause license found in +* the LICENSE file in the root directory of this source tree. +* +*/ + +#ifndef _MLI_KRN_DEPTHWISE_CONV2D_SPEC_API_H_ +#define _MLI_KRN_DEPTHWISE_CONV2D_SPEC_API_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include "mli_types.h" + +//=================================================================== +// Depthwise convolution 2d specialization kernels implementation +//=================================================================== +char * mli_debug_krn_depthwise_conv2d_chw_fx16( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_depthwise_conv2d_chw_fx16_k1x2_ch1_str1_krnpad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_depthwise_conv2d_chw_fx16_k2x1_ch1_str1_krnpad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_depthwise_conv2d_chw_fx16_k2x2_ch1_str1_krnpad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_depthwise_conv2d_chw_fx16_k3x3_ch1_str1_krnpad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_depthwise_conv2d_chw_fx16_k4x4_ch1_str1_krnpad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_depthwise_conv2d_chw_fx16_k5x5_ch1_str1_krnpad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_depthwise_conv2d_chw_fx16_k6x6_ch1_str1_krnpad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_depthwise_conv2d_chw_fx16_k7x7_ch1_str1_krnpad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_depthwise_conv2d_chw_fx16_k2x2_str1_krnpad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_depthwise_conv2d_chw_fx16_k3x3_str1_krnpad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_depthwise_conv2d_chw_fx16_k4x4_str1_krnpad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_depthwise_conv2d_chw_fx16_k5x5_str1_krnpad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_depthwise_conv2d_chw_fx16_k6x6_str1_krnpad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_depthwise_conv2d_chw_fx16_k7x7_str1_krnpad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_depthwise_conv2d_chw_fx16_str1_krnpad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_depthwise_conv2d_chw_fx16_k1x2_ch1_str1_nopad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_depthwise_conv2d_chw_fx16_k2x1_ch1_str1_nopad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_depthwise_conv2d_chw_fx16_k2x2_ch1_str1_nopad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_depthwise_conv2d_chw_fx16_k3x3_ch1_str1_nopad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_depthwise_conv2d_chw_fx16_k4x4_ch1_str1_nopad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_depthwise_conv2d_chw_fx16_k5x5_ch1_str1_nopad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_depthwise_conv2d_chw_fx16_k6x6_ch1_str1_nopad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_depthwise_conv2d_chw_fx16_k7x7_ch1_str1_nopad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_depthwise_conv2d_chw_fx16_k2x2_str1_nopad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_depthwise_conv2d_chw_fx16_k3x3_str1_nopad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_depthwise_conv2d_chw_fx16_k4x4_str1_nopad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_depthwise_conv2d_chw_fx16_k5x5_str1_nopad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_depthwise_conv2d_chw_fx16_k6x6_str1_nopad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_depthwise_conv2d_chw_fx16_k7x7_str1_nopad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_depthwise_conv2d_chw_fx16_k1xn_str1( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_depthwise_conv2d_chw_fx16_knx1_str1( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_depthwise_conv2d_chw_fx16_ch1_str1( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_depthwise_conv2d_chw_fx16_generic( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +char * mli_debug_krn_depthwise_conv2d_chw_fx8( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_depthwise_conv2d_chw_fx8_k1x2_ch1_str1_krnpad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_depthwise_conv2d_chw_fx8_k2x1_ch1_str1_krnpad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_depthwise_conv2d_chw_fx8_k2x2_ch1_str1_krnpad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_depthwise_conv2d_chw_fx8_k3x3_ch1_str1_krnpad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_depthwise_conv2d_chw_fx8_k4x4_ch1_str1_krnpad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_depthwise_conv2d_chw_fx8_k5x5_ch1_str1_krnpad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_depthwise_conv2d_chw_fx8_k6x6_ch1_str1_krnpad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_depthwise_conv2d_chw_fx8_k7x7_ch1_str1_krnpad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_depthwise_conv2d_chw_fx8_k2x2_str1_krnpad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_depthwise_conv2d_chw_fx8_k3x3_str1_krnpad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_depthwise_conv2d_chw_fx8_k4x4_str1_krnpad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_depthwise_conv2d_chw_fx8_k5x5_str1_krnpad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_depthwise_conv2d_chw_fx8_k6x6_str1_krnpad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_depthwise_conv2d_chw_fx8_k7x7_str1_krnpad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_depthwise_conv2d_chw_fx8_str1_krnpad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_depthwise_conv2d_chw_fx8_k1x2_ch1_str1_nopad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_depthwise_conv2d_chw_fx8_k2x1_ch1_str1_nopad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_depthwise_conv2d_chw_fx8_k2x2_ch1_str1_nopad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_depthwise_conv2d_chw_fx8_k3x3_ch1_str1_nopad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_depthwise_conv2d_chw_fx8_k4x4_ch1_str1_nopad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_depthwise_conv2d_chw_fx8_k5x5_ch1_str1_nopad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_depthwise_conv2d_chw_fx8_k6x6_ch1_str1_nopad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_depthwise_conv2d_chw_fx8_k7x7_ch1_str1_nopad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_depthwise_conv2d_chw_fx8_k2x2_str1_nopad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_depthwise_conv2d_chw_fx8_k3x3_str1_nopad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_depthwise_conv2d_chw_fx8_k4x4_str1_nopad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_depthwise_conv2d_chw_fx8_k5x5_str1_nopad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_depthwise_conv2d_chw_fx8_k6x6_str1_nopad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_depthwise_conv2d_chw_fx8_k7x7_str1_nopad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_depthwise_conv2d_chw_fx8_k1xn_str1( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_depthwise_conv2d_chw_fx8_knx1_str1( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_depthwise_conv2d_chw_fx8_ch1_str1( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_depthwise_conv2d_chw_fx8_generic( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +char * mli_debug_krn_depthwise_conv2d_chw_fx8w16d( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_depthwise_conv2d_chw_fx8w16d_k1x2_ch1_str1_krnpad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_depthwise_conv2d_chw_fx8w16d_k2x1_ch1_str1_krnpad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_depthwise_conv2d_chw_fx8w16d_k2x2_ch1_str1_krnpad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_depthwise_conv2d_chw_fx8w16d_k3x3_ch1_str1_krnpad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_depthwise_conv2d_chw_fx8w16d_k4x4_ch1_str1_krnpad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_depthwise_conv2d_chw_fx8w16d_k5x5_ch1_str1_krnpad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_depthwise_conv2d_chw_fx8w16d_k6x6_ch1_str1_krnpad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_depthwise_conv2d_chw_fx8w16d_k7x7_ch1_str1_krnpad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_depthwise_conv2d_chw_fx8w16d_k2x2_str1_krnpad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_depthwise_conv2d_chw_fx8w16d_k3x3_str1_krnpad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_depthwise_conv2d_chw_fx8w16d_k4x4_str1_krnpad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_depthwise_conv2d_chw_fx8w16d_k5x5_str1_krnpad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_depthwise_conv2d_chw_fx8w16d_k6x6_str1_krnpad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_depthwise_conv2d_chw_fx8w16d_k7x7_str1_krnpad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_depthwise_conv2d_chw_fx8w16d_str1_krnpad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_depthwise_conv2d_chw_fx8w16d_k1x2_ch1_str1_nopad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_depthwise_conv2d_chw_fx8w16d_k2x1_ch1_str1_nopad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_depthwise_conv2d_chw_fx8w16d_k2x2_ch1_str1_nopad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_depthwise_conv2d_chw_fx8w16d_k3x3_ch1_str1_nopad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_depthwise_conv2d_chw_fx8w16d_k4x4_ch1_str1_nopad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_depthwise_conv2d_chw_fx8w16d_k5x5_ch1_str1_nopad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_depthwise_conv2d_chw_fx8w16d_k6x6_ch1_str1_nopad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_depthwise_conv2d_chw_fx8w16d_k7x7_ch1_str1_nopad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_depthwise_conv2d_chw_fx8w16d_k2x2_str1_nopad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_depthwise_conv2d_chw_fx8w16d_k3x3_str1_nopad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_depthwise_conv2d_chw_fx8w16d_k4x4_str1_nopad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_depthwise_conv2d_chw_fx8w16d_k5x5_str1_nopad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_depthwise_conv2d_chw_fx8w16d_k6x6_str1_nopad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_depthwise_conv2d_chw_fx8w16d_k7x7_str1_nopad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_depthwise_conv2d_chw_fx8w16d_k1xn_str1( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_depthwise_conv2d_chw_fx8w16d_knx1_str1( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_depthwise_conv2d_chw_fx8w16d_ch1_str1( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_depthwise_conv2d_chw_fx8w16d_generic( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +char * mli_debug_krn_depthwise_conv2d_hwcn_sa8_sa8_sa32( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_depthwise_conv2d_hwcn_sa8_sa8_sa32_k3x3_krnpad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_depthwise_conv2d_hwcn_sa8_sa8_sa32_k5x5_krnpad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_depthwise_conv2d_hwcn_sa8_sa8_sa32_k3x3_nopad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_depthwise_conv2d_hwcn_sa8_sa8_sa32_k5x5_nopad( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + +mli_status mli_krn_depthwise_conv2d_hwcn_sa8_sa8_sa32_generic( + const mli_tensor * in, + const mli_tensor * weights, + const mli_tensor * bias, + const mli_conv2d_cfg * cfg, + mli_tensor * out); + + +#ifdef __cplusplus +} +#endif +#endif //_MLI_KRN_DEPTHWISE_CONV2D_SPEC_API_H_ diff --git a/edgeimpulse/edge-impulse-sdk/third_party/arc_mli_package/include/api/mli_krn_maxpool_spec_api.h b/edgeimpulse/edge-impulse-sdk/third_party/arc_mli_package/include/api/mli_krn_maxpool_spec_api.h new file mode 100644 index 0000000..edfa654 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/third_party/arc_mli_package/include/api/mli_krn_maxpool_spec_api.h @@ -0,0 +1,119 @@ +/* This file is generated, do not edit! + * edit following template file instead: + * header_filetemplate.txt + */ +/* +* Copyright 2019-2020, Synopsys, Inc. +* All rights reserved. +* +* This source code is licensed under the BSD-3-Clause license found in +* the LICENSE file in the root directory of this source tree. +* +*/ + +#ifndef _MLI_KRN_MAXPOOL_SPEC_API_H_ +#define _MLI_KRN_MAXPOOL_SPEC_API_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include "mli_types.h" + +//=================================================================== +// MaxPooling specialization kernels implementation +//=================================================================== +char * mli_debug_krn_maxpool_chw_fx16(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); + +mli_status mli_krn_maxpool_chw_fx16_k2x2_str1_nopad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_maxpool_chw_fx16_k2x2_nopad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_maxpool_chw_fx16_k3x3_nopad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_maxpool_chw_fx16_k4x4_nopad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_maxpool_chw_fx16_k5x5_nopad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_maxpool_chw_fx16_k6x6_nopad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_maxpool_chw_fx16_k7x7_nopad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_maxpool_chw_fx16_k8x8_nopad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_maxpool_chw_fx16_k9x9_nopad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_maxpool_chw_fx16_k10x10_nopad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_maxpool_chw_fx16_k1x2_nopad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_maxpool_chw_fx16_k1x3_nopad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_maxpool_chw_fx16_k2x1_nopad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_maxpool_chw_fx16_k3x1_nopad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_maxpool_chw_fx16_k2x2_krnpad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_maxpool_chw_fx16_k3x3_krnpad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_maxpool_chw_fx16_k4x4_krnpad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_maxpool_chw_fx16_k5x5_krnpad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_maxpool_chw_fx16_k6x6_krnpad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_maxpool_chw_fx16_k7x7_krnpad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_maxpool_chw_fx16_k8x8_krnpad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_maxpool_chw_fx16_k9x9_krnpad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_maxpool_chw_fx16_k10x10_krnpad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_maxpool_chw_fx16_k1x2_krnpad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_maxpool_chw_fx16_k1x3_krnpad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_maxpool_chw_fx16_k2x1_krnpad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_maxpool_chw_fx16_k3x1_krnpad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_maxpool_chw_fx16_k1xn(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_maxpool_chw_fx16_knx1(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_maxpool_chw_fx16_k2x2(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_maxpool_chw_fx16_k3x3(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_maxpool_chw_fx16_generic(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +char * mli_debug_krn_maxpool_chw_fx8(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); + +mli_status mli_krn_maxpool_chw_fx8_k2x2_str1_nopad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_maxpool_chw_fx8_k2x2_nopad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_maxpool_chw_fx8_k3x3_nopad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_maxpool_chw_fx8_k4x4_nopad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_maxpool_chw_fx8_k5x5_nopad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_maxpool_chw_fx8_k6x6_nopad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_maxpool_chw_fx8_k7x7_nopad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_maxpool_chw_fx8_k8x8_nopad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_maxpool_chw_fx8_k9x9_nopad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_maxpool_chw_fx8_k10x10_nopad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_maxpool_chw_fx8_k1x2_nopad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_maxpool_chw_fx8_k1x3_nopad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_maxpool_chw_fx8_k2x1_nopad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_maxpool_chw_fx8_k3x1_nopad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_maxpool_chw_fx8_k2x2_krnpad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_maxpool_chw_fx8_k3x3_krnpad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_maxpool_chw_fx8_k4x4_krnpad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_maxpool_chw_fx8_k5x5_krnpad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_maxpool_chw_fx8_k6x6_krnpad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_maxpool_chw_fx8_k7x7_krnpad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_maxpool_chw_fx8_k8x8_krnpad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_maxpool_chw_fx8_k9x9_krnpad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_maxpool_chw_fx8_k10x10_krnpad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_maxpool_chw_fx8_k1x2_krnpad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_maxpool_chw_fx8_k1x3_krnpad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_maxpool_chw_fx8_k2x1_krnpad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_maxpool_chw_fx8_k3x1_krnpad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_maxpool_chw_fx8_k1xn(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_maxpool_chw_fx8_knx1(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_maxpool_chw_fx8_k2x2(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_maxpool_chw_fx8_k3x3(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_maxpool_chw_fx8_generic(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +char * mli_debug_krn_maxpool_hwc_fx8(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); + +mli_status mli_krn_maxpool_hwc_fx8_k2x2_nopad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_maxpool_hwc_fx8_k3x3_nopad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_maxpool_hwc_fx8_k2x2_krnpad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_maxpool_hwc_fx8_k3x3_krnpad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_maxpool_hwc_fx8_generic(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +char * mli_debug_krn_maxpool_hwc_fx16(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); + +mli_status mli_krn_maxpool_hwc_fx16_k2x2_nopad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_maxpool_hwc_fx16_k3x3_nopad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_maxpool_hwc_fx16_k2x2_krnpad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_maxpool_hwc_fx16_k3x3_krnpad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_maxpool_hwc_fx16_generic(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +char * mli_debug_krn_maxpool_hwc_sa8(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); + +mli_status mli_krn_maxpool_hwc_sa8_k2x2_nopad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_maxpool_hwc_sa8_k3x3_nopad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_maxpool_hwc_sa8_k2x2_krnpad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_maxpool_hwc_sa8_k3x3_krnpad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); +mli_status mli_krn_maxpool_hwc_sa8_generic(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out); + +#ifdef __cplusplus +} +#endif +#endif //_MLI_KRN_MAXPOOL_SPEC_API_H_ diff --git a/edgeimpulse/edge-impulse-sdk/third_party/arc_mli_package/include/api/mli_mov_api.h b/edgeimpulse/edge-impulse-sdk/third_party/arc_mli_package/include/api/mli_mov_api.h new file mode 100644 index 0000000..4d7353d --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/third_party/arc_mli_package/include/api/mli_mov_api.h @@ -0,0 +1,369 @@ +/* +* Copyright 2019-2020, Synopsys, Inc. +* All rights reserved. +* +* This source code is licensed under the BSD-3-Clause license found in +* the LICENSE file in the root directory of this source tree. +* +*/ + +/** + * @file MLI Data Move API + * + * @brief This header includes declarations for data movement functions + */ + +#ifndef _MLI_MOV_API_H_ +#define _MLI_MOV_API_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +#include "mli_types.h" + +typedef struct _mli_mov_cfg { + uint32_t offset[MLI_MAX_RANK]; + uint32_t size[MLI_MAX_RANK]; // if zero, compute from input and other parameters + uint32_t sub_sample_step[MLI_MAX_RANK]; + uint32_t dst_offset[MLI_MAX_RANK]; + int32_t dst_mem_stride[MLI_MAX_RANK]; // if zero, compute from input and other parameters + uint8_t perm_dim[MLI_MAX_RANK]; + uint8_t padding_pre[MLI_MAX_RANK]; + uint8_t padding_post[MLI_MAX_RANK]; +} mli_mov_cfg_t; + +typedef enum _mli_mov_state { + MLI_MOV_STATE_INVALID = 0, + MLI_MOV_STATE_OPEN, + MLI_MOV_STATE_PREPARED, + MLI_MOV_STATE_DMA_CONFIGURED, + MLI_MOV_STATE_DMA_RUNNING, + MLI_MOV_STATE_DONE +} mli_mov_state; + +typedef struct _mli_mov_handle_t { + int dma_ch; + int num_ch; + mli_mov_state state; +} mli_mov_handle_t; + +//--------------------------------------------------------------------- +// Synchronous data movement functions +//--------------------------------------------------------------------- + +/** + * @brief Synchronous copy from src tensor to dst tensor + * + * @detail This function will perform a data copy from the src tensor to the dst tensor + * according to the settings in the cfg struct. + * The destination tensor needs to contain a valid pointer to a large enough buffer. + * the size of this buffer needs to be specified in the capacity field of the dst tensor. + * the other fields of the dst tensor will be filled by the copy function. + * + * The function will return once the complete data transfer is finished. + * + * @param src [I] pointer to source tensor. + * @param dst [I] pointer to destination tensor. + * @param cfg [I] pointer to config struct + * + * @return MLI status code + */ +mli_status +mli_mov_tensor_sync(const mli_tensor* src, const mli_mov_cfg_t* cfg, mli_tensor* dst); + + +//--------------------------------------------------------------------- +// Asynchronous data movement functions +//--------------------------------------------------------------------- + +/** + * @brief Prepare asynchronous copy from src to dst + * + * @detail This function will prepare a data copy from the src tensor to the dst tensor + * according to the settings in the cfg struct. + * The destination tensor needs to contain a valid pointer to a large enough buffer. + * the size of this buffer needs to be specified in the capacity field of the dst tensor. + * the other fields of the dst tensor will be filled by the copy function. + * The handle needs to be obtained using the mli_mov_acquire_handle() function. + * + * The function returns after the transfer has been prepared. It still needs to be started + * by the mli_mov_start() function. + * + * @param h [I] pointer to a handle for an available dma channel. + * @param src [I] pointer to source tensor. + * @param dst [I] pointer to destination tensor. + * @param cfg [I] pointer to config struct + * + * @return MLI status code + */ +mli_status +mli_mov_prepare(mli_mov_handle_t* h, const mli_tensor* src, const mli_mov_cfg_t* cfg, mli_tensor* dst); + +/** + * @brief Register a callback for a datatransfer + * + * @detail This function will register a callback function that will be called after + * the data transfer has been completed. Note that the callback needs to be registered before the + * transfer has been started. otherwise in case of a fast transfer it could happen that the transfer + * finished before the callback got registered. + * the callback function takes one parameter, and the value of cookie is passed to the callback function. + * The handle needs to be obtained using the mli_mov_acquire_handle() function. + * + * Registration of a callback function is optional. + * + * @param h [I] pointer to a handle for an available dma channel. + * @param cb [I] function pointer to a callback. + * @param cookie [I] this parameter will be passed to the callback function. + * + * @return MLI status code + */ +mli_status +mli_mov_registercallback(mli_mov_handle_t* h, void (*cb)(int32_t), int32_t cookie); + +/** + * @brief Start asynchronous copy from src to dst + * + * @detail This function will start the data copy from the src tensor to the dst tensor + * as prepared by the prepare function. + * Before this function is called the mli_mov_prepare() has to be called. + * + * The function returns after the transfer has been started. A callback or wait for done + * can be used to synchronize on the transfer complete + * + * @param h [I] pointer to a handle for an available dma channel. + * @param src [I] pointer to source tensor. + * @param dst [I] pointer to destination tensor. + * @param cfg [I] pointer to config struct + * + * @return MLI status code + */ +mli_status +mli_mov_start(mli_mov_handle_t* h, const mli_tensor* src, const mli_mov_cfg_t* cfg, mli_tensor* dst); + +/** + * @brief Polling function to detect if transfer has completed + * + * @detail This function will return true when the transfer is completed, and false in all + * other cases + * + * @param h [I] pointer to a handle for an available dma channel. + * + * @return bool transfer is done. + */ +bool +mli_mov_isdone(mli_mov_handle_t* h); + +/** + * @brief Synchronize to transfer complete + * + * @detail This function will do active polling and return after the transfer has completed. + * + * @param h [I] pointer to a handle for an available dma channel. + * + * @return MLI status code + */ +mli_status +mli_mov_wait(mli_mov_handle_t* h); + + +//--------------------------------------------------------------------- +// functions to set available resources (e.g. dma channels) +//--------------------------------------------------------------------- +/** + * @brief set dma channels that can be used by mli_mov functions + * + * @detail This function is used to set a pool of the dma channels + * that can be used by the mli_mov functions. + * These channels should not be used by other functions. + * the acquire and release functions can be used to obtain channels from this pool + * + * @param ch_offset [I] first dma channel that can by used + * @param num_ch [I] number of dma channels that can be used + * + * @return MLI status code + */ +mli_status +mli_mov_set_num_dma_ch(int ch_offset, int num_ch); + +/** + * @brief Acquire dma channel(s) + * + * @detail This function is used to obtain one or more dma channels from the pool + * + * @param num_ch [I] number of requested dma channels + * @param h [O] pointer a handle that will be filled with the dma channel offset that can be used. + * + * @return MLI status code + */ +mli_status +mli_mov_acquire_handle(int num_ch, mli_mov_handle_t* h); + +/** + * @brief Release dma channle(s) + * + * @detail This function will release the dma channels from the handle h back to the pool. + * + * @param h [I] pointer to a handle for an available dma channel. + * + * @return MLI status code + */ +mli_status +mli_mov_release_handle(mli_mov_handle_t* h); + + +//--------------------------------------------------------------------- +// Helper functions to fill mli_mov_cfg_t +//--------------------------------------------------------------------- + +/** + * @brief Construction of cfg struct for full tensor copy + * + * @detail This function will fill the cfg struct with the values needed for a full tensor copy + * it will put all the other fields to a neutral value. + * + * @param cfg [O] pointer to config struct + * + * @return MLI status code + */ +mli_status +mli_mov_cfg_for_copy(mli_mov_cfg_t* cfg); + +/** + * @brief Construction of cfg struct for slicing + * + * @detail This function will fill the cfg struct with the values needed for copying a slice + * from the source to the destination tensor. + * + * @param cfg [O] pointer to config struct + * @param offsets [I] array of size MLI_MAX_RANK that contains the top left coordinate of the slice + * @param sizes [I] array of size MLI_MAX_RANK that contains the size of the slice + * @param dst_mem_stride [I] array of size MLI_MAX_RANK that contains the number of elements to the next dimension in the destination tensor + + * + * @return MLI status code + */ +mli_status +mli_mov_cfg_for_slice(mli_mov_cfg_t* cfg, int* offsets, int* sizes, int* dst_mem_stride); + +/** + * @brief Construction of cfg struct for concatenation + * + * @detail This function will fill the cfg struct with the values needed for copying a complete tensor + * into a larger tensor at a specified position. + * + * @param cfg [O] pointer to config struct + * @param dst_offsets [I] array of size MLI_MAX_RANK that contains the top left coordinate in the dst tensor where the src needs to be copied + * @param dst_mem_stride [I] array of size MLI_MAX_RANK that contains the number of elements to the next dimension in the destination tensor + + * + * @return MLI status code + */ +mli_status +mli_mov_cfg_for_concat(mli_mov_cfg_t* cfg, int* dst_offsets, int* dst_mem_stride); + +/** + * @brief Construction of cfg struct for subsampling + * + * @detail This function will fill the cfg struct with the values needed for subsampling a tensor + * a subsample step of 3 means that every third sample is copied to the output. + * + * @param cfg [O] pointer to config struct + * @param subsample_step [I] array of size MLI_MAX_RANK that contains the subsample step for each dimension. + * @param dst_mem_stride [I] array of size MLI_MAX_RANK that contains the number of elements to the next dimension in the destination tensor + + * + * @return MLI status code + */ +mli_status +mli_mov_cfg_for_subsample(mli_mov_cfg_t* cfg, int* sub_sample_step, int* dst_mem_stride); + +/** + * @brief Construction of cfg struct for permutaion or transposing a tensor + * + * @detail This function will fill the cfg struct with the values needed for reordering the order of the dimensions in a tensor. + * + * @param cfg [O] pointer to config struct + * @param perm_dim [I] array of size MLI_MAX_RANK that contains the index of the source dimension for each output dimension + * + * @return MLI status code + */ +mli_status +mli_mov_cfg_for_permute(mli_mov_cfg_t* cfg, uint8_t* perm_dim); + +/** + * @brief Construction of cfg struct for padding + * + * @detail This function will fill the cfg struct with the values needed adding zero padding to a tensor in CHW layout. + * + * @param cfg [O] pointer to config struct + * @param padleft [I] amount of pixels to padd to the left + * @param padright [I] amount of pixels to padd to the right + * @param padtop [I] amount of pixels to padd to the top + * @param padbot [I] amount of pixels to padd to the bottom + * @param dst_mem_stride [I] array of size MLI_MAX_RANK that contains the number of elements to the next dimension in the destination tensor + * + * @return MLI status code + */ +mli_status +mli_mov_cfg_for_padding2d_chw(mli_mov_cfg_t* cfg, uint8_t padleft, uint8_t padright, uint8_t padtop, uint8_t padbot, int* dst_mem_stride); + +/** + * @brief Construction of cfg struct for padding + * + * @detail This function will fill the cfg struct with the values needed adding zero padding to a tensor in HWC layout. + * + * @param cfg [O] pointer to config struct + * @param padleft [I] amount of pixels to padd to the left + * @param padright [I] amount of pixels to padd to the right + * @param padtop [I] amount of pixels to padd to the top + * @param padbot [I] amount of pixels to padd to the bottom + * @param dst_mem_stride [I] array of size MLI_MAX_RANK that contains the number of elements to the next dimension in the destination tensor + * + * @return MLI status code + */ +mli_status +mli_mov_cfg_for_padding2d_hwc(mli_mov_cfg_t* cfg, uint8_t padleft, uint8_t padright, uint8_t padtop, uint8_t padbot, int* dst_mem_stride); + +/** + * @brief Construction of cfg struct + * + * @detail This function will fill the cfg struct + * + * @param cfg [O] pointer to config struct + * @param offsets [I] array of size MLI_MAX_RANK that contains the top left coordinate of the slice + * @param sizes [I] array of size MLI_MAX_RANK that contains the size of the slice + * @param subsample_step [I] array of size MLI_MAX_RANK that contains the subsample step for each dimension. + * @param dst_offsets [I] array of size MLI_MAX_RANK that contains the top left coordinate in the dst tensor where the src needs to be copied + * @param dst_mem_stride [I] array of size MLI_MAX_RANK that contains the number of elements to the next dimension in the destination tensor + * @param perm_dim [I] array of size MLI_MAX_RANK that contains the index of the source dimension for each output dimension + * @param padleft [I] amount of pixels to padd to the left + * @param padright [I] amount of pixels to padd to the right + * @param padtop [I] amount of pixels to padd to the top + * @param padbot [I] amount of pixels to padd to the bottom + * + * @return MLI status code + */ + +mli_status +mli_mov_cfg_all( + mli_mov_cfg_t* cfg, + int* offsets, + int* sizes, + int* subsample_step, + int* dst_offsets, + int* dst_mem_stride, + uint8_t* perm_dim, + uint8_t padleft, + uint8_t padright, + uint8_t padtop, + uint8_t padbot); + + +#ifdef __cplusplus +} +#endif + +#endif //_MLI_MOV_API_H_ diff --git a/edgeimpulse/edge-impulse-sdk/third_party/arc_mli_package/include/mli_api.h b/edgeimpulse/edge-impulse-sdk/third_party/arc_mli_package/include/mli_api.h new file mode 100644 index 0000000..6e7dad3 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/third_party/arc_mli_package/include/mli_api.h @@ -0,0 +1,25 @@ +/* +* Copyright 2019-2020, Synopsys, Inc. +* All rights reserved. +* +* This source code is licensed under the BSD-3-Clause license found in +* the LICENSE file in the root directory of this source tree. +* +*/ + +/** + * @file MLI Library API + * + * @brief This header includes all necessary files for using MLI Library + */ + +#ifndef _MLI_API_H_ +#define _MLI_API_H_ + +#include "mli_types.h" + +#include "api/mli_helpers_api.h" +#include "api/mli_kernels_api.h" +#include "api/mli_mov_api.h" + +#endif //#ifndef _MLI_API_H_ diff --git a/edgeimpulse/edge-impulse-sdk/third_party/arc_mli_package/include/mli_config.h b/edgeimpulse/edge-impulse-sdk/third_party/arc_mli_package/include/mli_config.h new file mode 100644 index 0000000..960c232 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/third_party/arc_mli_package/include/mli_config.h @@ -0,0 +1,115 @@ +/* +* Copyright 2019-2020, Synopsys, Inc. +* All rights reserved. +* +* This source code is licensed under the BSD-3-Clause license found in +* the LICENSE file in the root directory of this source tree. +* +*/ + +/** + * @file MLI Library Configuration header + * + * @brief This header defines MLI Library configuration + */ + +#ifndef _MLI_CONFIG_H_ +#define _MLI_CONFIG_H_ +/** +* Define Library build configuration options +*/ + +/** +* Concatenate primitive: Maximum number of tensors that might be concatenated. +*/ +#define MLI_CONCAT_MAX_TENSORS (8) + +/** +* Library Debug mode +*/ +#define DBG_MODE_RELEASE (0) /*< No debug. Messages:OFF; Assertions:OFF; ReturnCodes: Always OK */ +#define DBG_MODE_RET_CODES (1) /*< Return codes mode. Messages:OFF; Assertions:OFF; ReturnCodes: Valid Return*/ +#define DBG_MODE_ASSERT (2) /*< Assert. Messages:OFF; Assertions:ON; Extra Assertions:OFF; ReturnCodes: Valid Return */ +#define DBG_MODE_DEBUG (3) /*< Debug. Messages:ON; Assertions:ON; Extra Assertions:OFF; ReturnCodes: Valid Return */ +#define DBG_MODE_FULL (4) /*< Full Debug. Messages:ON; Assertions:ON; Extra Assertions:ON; ReturnCodes: Valid Return */ + +#ifndef MLI_DEBUG_MODE +#define MLI_DEBUG_MODE (DBG_MODE_RELEASE) +#endif + +/** +* Define platform specific data +*/ +#include + +#if defined (__CCAC__) + +#include + +#ifdef __FXAPI__ +#include +#else +#error "ARC FX Library (FXAPI) is a required dependency" +#endif + +#endif // if defined (__CCAC__) + + +/* +* Define the platform (according to pre-defined macro or according to HW config) +* 1 - ARCV2DSP ISA +* 2 - ARCV2DSP ISA with XY Memory +* 3 - ARCV2DSP ISA with 64bit operands (HS Only) +*/ + +#if defined(V2DSP_XY) || ((defined __Xxy) && !(defined(V2DSP) || defined(V2DSP_WIDE))) +/* Platform with XY memory (EM9D or EM11D) */ +#undef V2DSP_XY +#define ARC_PLATFORM (2) +#define ARC_PLATFORM_STR "ARCv2DSP XY" + +#elif defined(V2DSP_WIDE) || ((defined __Xdsp_wide) && !(defined(V2DSP) || defined(V2DSP_XY))) +/* Platform with wide DSP ISA (HS45D or HS47D) */ +#undef V2DSP_WIDE +#define ARC_PLATFORM (3) +#define ARC_PLATFORM_STR "ARCv2DSP Wide" + +#elif defined(V2DSP) || ((defined(__Xdsp2) || defined(__Xdsp_complex)) && !(defined(V2DSP_XY) || defined(V2DSP_WIDE))) +/* Platform with DSP ISA (EM5D or EM7D) */ +#undef V2DSP +#define ARC_PLATFORM (1) +#define ARC_PLATFORM_STR "ARCv2DSP" + +#else +#error "Target platform is undefined or defined incorrectly" +#endif + +#define V2DSP (1) +#define V2DSP_XY (2) +#define V2DSP_WIDE (3) + +/* +* Re-define ML pointers for XY specific platform +* +* MLI_PTR is used for all the read pointers +* MLI_CONV_OUT_PTR is used for the output buffers of all weigths based kernels. +* this means all the kernels that perform a convolution like operation between inputs and weights. +* MLI_OUT_PTR is used for the output of all other kernels. +*/ +#if (ARC_PLATFORM == V2DSP_XY) +#define MLI_PTR(p) __xy p * +#define MLI_PTR_IS_XY true +#define MLI_OUT_PTR(p) __xy p * +#define MLI_OUT_PTR_IS_XY true +#define MLI_CONV_OUT_PTR(p) p * +#define MLI_CONV_OUT_PTR_IS_XY false +#else +#define MLI_PTR(p) p * +#define MLI_PTR_IS_XY false +#define MLI_OUT_PTR(p) p * +#define MLI_OUT_PTR_IS_XY false +#define MLI_CONV_OUT_PTR(p) p * +#define MLI_CONV_OUT_PTR_IS_XY false +#endif + +#endif // _MLI_CONFIG_H_ diff --git a/edgeimpulse/edge-impulse-sdk/third_party/arc_mli_package/include/mli_types.h b/edgeimpulse/edge-impulse-sdk/third_party/arc_mli_package/include/mli_types.h new file mode 100644 index 0000000..03df3db --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/third_party/arc_mli_package/include/mli_types.h @@ -0,0 +1,339 @@ +/* +* Copyright 2019-2020, Synopsys, Inc. +* All rights reserved. +* +* This source code is licensed under the BSD-3-Clause license found in +* the LICENSE file in the root directory of this source tree. +* +*/ + +/** + * @file MLI Library Public Types + * + * @brief This header defines MLI Library data types + */ + +#ifndef _MLI_TYPES_H_ +#define _MLI_TYPES_H_ + +#include + + +//================================================================ +// +// Library return codes +// +//================================================================= + +/** + * @brief return codes + * + * All functions return value of mli_status enumeration type to indicate if there was an error. + * + */ +typedef enum _mli_status{ + MLI_STATUS_OK = 0, /**< No error occurred */ + MLI_STATUS_BAD_TENSOR, /**< Invalid tensor is passed to the function */ + MLI_STATUS_SHAPE_MISMATCH, /**< Shape of tensors are not compatible for the function */ + MLI_STATUS_INCOMPATEBLE_TENSORS, + MLI_STATUS_BAD_FUNC_CFG, /**< Not valid configuration structure is passed */ + MLI_STATUS_NOT_ENGH_MEM, /**< Capacity of output tensor is not enough for function result */ + MLI_STATUS_NOT_SUPPORTED, /**< Function is not yet implemented, or inputs combinations is not supported */ + MLI_STATUS_SPEC_PARAM_MISMATCH, + + MLI_STATUS_ARGUMENT_ERROR, + MLI_STATUS_LENGTH_ERROR, + MLI_STATUS_SIZE_MISMATCH, + + MLI_STATUS_RANK_MISMATCH, + MLI_STATUS_TYPE_MISMATCH, + /* other return codes*/ + + MLI_STATUS_LARGE_ENUM = 0x02000000 /**< Utility field. Prevent size optimization of public enums */ +} mli_status; + +//================================================================ +// +// Basic Data Types Definition +// +//================================================================= + +#define MLI_MAX_RANK (4) /**< Maximum tensor rank (number of dimensions) supported by the library */ +/** + * @brief Tensor's basic element type and it's parameters + * + * Defines basic element type stored in tensor structure. + * Based on this information library functions may define sizes, + * algorithms for processing, and other implementation specific things. + */ +typedef enum { + MLI_EL_FX_8 = 0, /**< 8 bit depth fixed point data with configurable number + of fractional bits Data container is int8_t*/ + MLI_EL_FX_16, /**< 16 bit depth fixed point data with configurable number + of fractional bits Data container is int16_t*/ + MLI_EL_ASYM_I8, /**< 8 bit asymetrical signed data with configurable zero offset vector + and multiplier vector. Data container is int8_t */ + MLI_EL_ASYM_I32, /**< 32 bit asymetrical signed data with configurable zero offset vector + and multiplier vector. Data container is int32_t */ + MLI_EL_LARGE_ENUM = 0x02000000 /**< Utility field. Prevent size optimization of public enums */ +} mli_element_type; + +/** + * @brief Container union to represent polymorphic data. + * + * Stores pointer to data or a single value that intend to be directly used in arithmetical operations. + * + * NOTE: As compiler pointer to XY memory (or another fast memory) is a separate type, it should be somehow reflected if we are going to + * use it in tensor type. + */ +typedef union _mli_data_container { + int32_t* pi32; + int16_t* pi16; + int8_t* pi8; + int32_t i32; + int16_t i16; + int8_t i8; +} mli_data_container; + +/** + * @brief type parameters for arithmetical operations with tensor elements. + * + * Stores data type parameters required for arithmetical operations with tensor elements. + * These parameters wrapped into union for future library extensibility but current version + * supports only fixed point data with configurable number of fractional bits. + * The union can be interpreted only as this structure. + */ +typedef union _mli_element_params { + struct { + uint8_t frac_bits; /**< Number of fractional bits */ + } fx; + + struct { + mli_data_container zero_point; /**< 16bit signed zero point offset. Single value for all data in tensor if dim < 0 + or pointer to an array of zero points regarding configured dimension (dim) otherwise. + In case of array it's size can be looked up in the shape using the dimension to which the scales apply*/ + mli_data_container scale; /** 16bit signed scale factors. Single value for all data in tensor if dim < 0 + or pointer to an array of scale factors regarding configured dimension (dim) otherwise. + In case of array it's size can be looked up in the shape using the dimension to which the scales apply*/ + int32_t dim; /**< dimension of the tensor to which the array's of quantization parameters apply */ + int8_t scale_frac_bits; /**< number of fractional bits in the elements of the scales array */ + } asym; +} mli_element_params; + + +/** + * @brief Tensor type - main data container for all ML API algorithms + * + * Tensor is the main container type for all input and output data which must be processed by ML algo-rithm. + * In general data for neural networks and other machine learning tasks is a multi-dimensional arrays of some + * particular shape. So tensor structure includes not only data, but it’s shape, it’s type, and other data specific + * parameters. To be more precise, saying “data†we mean input features, out-put features, layer weights and biases + * but not layer parameters like padding or stride for convolution-al layers. + */ +typedef struct _mli_tensor { + + void *data; /**< main data. Layer cast this pointer to actual type (XY ptr for L1) */ + uint32_t capacity; /**< data buffer size in bytes. Necessary for auxiliary tensors where dimensions are variable. */ + + int32_t mem_stride[MLI_MAX_RANK]; /**< Array with the distance (in elements) to the next element in the same dimension. + To compute the size in bytes, the number of elements needs to be multiplied by the bytes per element. + For example, for a matrix A[rows][columns], mem_stride[0] contains the distance + to the next element (=1 in this example), and mem_stride[1] contains the distance from + one row to the next (=columns in this example). The size of the array is defined by MLI_MAX_RANK. */ + + uint32_t shape[MLI_MAX_RANK]; /**< Array with tensors dimensions. Dimensions must be stored in direct order + starting from least changing one. For example: + for matrix of shape [rows][columns], shape[0] = rows and shape[1] = columns */ + uint32_t rank; /**< Tensors rank with amount of dimensions. Must be smaller or equal MLI_MAX_RANK value */ + + mli_element_type el_type; /**< Type of elements stored in tensor */ + mli_element_params el_params; /**< Parameters of elements stored in tensor. */ +} mli_tensor; + + +//================================================================ +// +// Layers Configurations Definition +// +//================================================================= + + +/** + * @brief RELU layer config definition + * + * enum used for selection of the type of ReLu activation function + */ +typedef enum { + MLI_RELU_NONE = 0, /**< no ReLu activation */ + MLI_RELU_GEN, /**< General ReLU with output range [0, MAX_VAL]*/ + MLI_RELU_1, /**< ReLU with output range [-1, 1] */ + MLI_RELU_6, /**< ReLU with output range [0, 6] */ + MLI_RELU_LARGE_ENUM = 0x02000000 /**< Utility field. Prevent size optimization of public enums */ +} mli_relu_type; + + + +/** + * @brief RELU layer config + * + * configuration struct to store the ReLu type + */ +typedef struct { + mli_relu_type type; +} mli_relu_cfg; + + + +/** + * @brief Convolutional layer config definition + * + * Data structure to provide the configuration for a 2D convolution function. + * The stride parameters can be used to get a subsampled output. (by stepping through the input) + */ +typedef struct { + mli_relu_cfg relu;/**< Type of ReLU activation applied to output values.*/ + uint8_t stride_width; /**< Stride (step) of filter across width dimension of input.*/ + uint8_t stride_height;/**< Stride (step) of filter across height dimension of input.*/ + uint8_t padding_left; /**< Number of zero points implicitly added to the left side of input (width dimension).*/ + uint8_t padding_right;/**< Number of zero points implicitly added to the right side of input (width dimension).*/ + uint8_t padding_top; /**< Number of zero points implicitly added to the upper side of input (height dimension).*/ + uint8_t padding_bottom;/**< Number of zero points implicitly added to the bottom side of input (height dimension).*/ +} mli_conv2d_cfg; + + + +/** + * @brief Pooling layer config definition + * + * Data structure to provide the configuration for pooling primitives. + */ +typedef struct { + uint8_t kernel_width; /**< Width for pooling function applying */ + uint8_t kernel_height; /**< Height for pooling function applying */ + uint8_t stride_width; /**< Step to next pooling in width dimension of input*/ + uint8_t stride_height; /**< Step to next pooling in height dimension of input*/ + uint8_t padding_left; /**< Number of points implicitly added to the left side of input (width dimension).*/ + uint8_t padding_right;/**< Number of points implicitly added to the right side of input (width dimension).*/ + uint8_t padding_top; /**< Number of points implicitly added to the upper side of input (height dimension).*/ + uint8_t padding_bottom;/**< Number of points implicitly added to the bottom side of input (height dimension).*/ +} mli_pool_cfg; + +/** + * @brief Recurent layers processing mode definition + * + * enum used for selection of the type of processing mode for LSTM and Basic RNN primitives + */ +typedef enum { + RNN_ONE_TO_ONE = 0, /**< One-to-one mode. Process input tensor as single input frame */ + RNN_BATCH_TO_BATCH, /**< Batch-to-batch mode. Process input tensor as sequence of frames to produce a sequence of outputs of the same size */ + RNN_BATCH_TO_LAST, /**< Batch-to-last mode. Process input tensor as sequence of frames to produce a single (last in the sequence) output */ + RNN_MODE_LARGE_ENUM = 0x02000000 /**< Utility field. Prevent size optimization of public enums */ +} mli_rnn_mode; + + + +/** + * @brief Recurent layers output activation definition + * + * enum used for selection of the type of output activation for LSTM and Basic RNN primitives + */ +typedef enum { + RNN_ACT_NONE = 0, /**< No activation.*/ + RNN_ACT_TANH, /**< Hyperbolic tangent activation.*/ + RNN_ACT_SIGM, /**< sigmoid (logistic) activation function.*/ + RNN_ACT_LARGE_ENUM = 0x02000000 /**< Utility field. Prevent size optimization of public enums */ +} mli_rnn_out_activation; + + + +/** + * @brief Recurrent layers config definition + * + * Data structure to provide the configuration for LSTM and Basic RNN primitives. + */ +typedef struct { + mli_rnn_mode mode; /**< Recurrent layer processing mode.*/ + mli_rnn_out_activation act; /**< Output activation type.*/ + mli_tensor *ir_tsr; /**< Pointer to tensor for holding intermediate results. */ +} mli_rnn_cell_cfg; + + + +/** + * @brief Permute layer config definition + * + * Data structure to provide the permutation order to functions. + */ +typedef struct { + uint8_t perm_dim[MLI_MAX_RANK]; /**< A permutation array. Dimensions order for output tensor. */ +} mli_permute_cfg; + + + +/** + * @brief Padding2D layer config definition + * + * Data structure to provide the configuration for padding2D primitives. + */ +typedef struct { + uint8_t padding_left; /**< Number of zero points to be added to the left side of input (width dimension).*/ + uint8_t padding_right; /**< Number of zero points to be added to the right side of input (width dimension).*/ + uint8_t padding_top; /**< Number of zero points to be added to the upper side of input (height dimension).*/ + uint8_t padding_bottom;/**< Number of zero points to be added to the bottom side of input (height dimension).*/ +} mli_padding2d_cfg; + + + +/** + * @brief Concatenation layer config definition + * + * Data structure to provide the configuration for Concatenation primitives. + */ +typedef struct { + uint8_t tensors_num; /**< Number of tensors to concatenate (number of pointers in “inputs†array) */ + uint8_t axis; /**< Axis for concatenation (dimension number starting from 0)*/ +} mli_concat_cfg; + + +/** + * @brief Point-to-subtensor helper config + * + * Data structure to provide coordinates and size of required subtensor in the input tensor + */ +typedef struct { + uint32_t start_coord[MLI_MAX_RANK]; /**< subtensor start coodinates in the input tensor */ + uint8_t coord_num; /**< number of coodrdinates in the array */ + uint8_t first_out_dim_size; /**< First output dimension size */ +} mli_point_to_subtsr_cfg; + +/** + * @brief Create Subtensor helper config + * + * Data structure to provide coordinates and sizes of required subtensor in the input tensor + * The size can be reduced in any dimension. + */ +typedef struct { + uint32_t offset[MLI_MAX_RANK]; /**< subtensor start coordinates in the input tensor + The size of this array is determined by the rank of the input tensor*/ + uint32_t size[MLI_MAX_RANK]; /**< Size of the sub tensor in elements per dimension + the number of entries in this array is determind by the input tensor */ + uint32_t sub_tensor_rank; /**< Rank of the sub tensor that will be produced */ +} mli_sub_tensor_cfg; + +/** + * @brief Data layout type for vision kernels (convolutions/pooloing mostly). + * + * Provide information on how to interprete dimensions in input and params tensors: + * which dimension are height/ width/ channels + * + * LAYOUT_HWC - Data is stored in next order: [Height; Width; Channels] + * weights in [Filters(out channel); Height; Width; In Channels] + * LAYOUT_HWCN - Data is stored as for HWC + * weights are [Height; Width; In Channels; Filters(out channel)] + */ + typedef enum { + LAYOUT_HWC, + LAYOUT_HWCN + } mli_layout_type; + +#endif // _MLI_TYPES_H_ diff --git a/edgeimpulse/edge-impulse-sdk/third_party/flatbuffers/LICENSE.txt b/edgeimpulse/edge-impulse-sdk/third_party/flatbuffers/LICENSE.txt new file mode 100644 index 0000000..d645695 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/third_party/flatbuffers/LICENSE.txt @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/edgeimpulse/edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_allocator.h b/edgeimpulse/edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_allocator.h new file mode 100644 index 0000000..9274c98 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_allocator.h @@ -0,0 +1,68 @@ +/* + * Copyright 2021 Google Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef FLATBUFFERS_ALLOCATOR_H_ +#define FLATBUFFERS_ALLOCATOR_H_ + +#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_base.h" + +namespace flatbuffers { + +// Allocator interface. This is flatbuffers-specific and meant only for +// `vector_downward` usage. +class Allocator { + public: + virtual ~Allocator() {} + + // Allocate `size` bytes of memory. + virtual uint8_t *allocate(size_t size) = 0; + + // Deallocate `size` bytes of memory at `p` allocated by this allocator. + virtual void deallocate(uint8_t *p, size_t size) = 0; + + // Reallocate `new_size` bytes of memory, replacing the old region of size + // `old_size` at `p`. In contrast to a normal realloc, this grows downwards, + // and is intended specifcally for `vector_downward` use. + // `in_use_back` and `in_use_front` indicate how much of `old_size` is + // actually in use at each end, and needs to be copied. + virtual uint8_t *reallocate_downward(uint8_t *old_p, size_t old_size, + size_t new_size, size_t in_use_back, + size_t in_use_front) { + FLATBUFFERS_ASSERT(new_size > old_size); // vector_downward only grows + uint8_t *new_p = allocate(new_size); + memcpy_downward(old_p, old_size, new_p, new_size, in_use_back, + in_use_front); + deallocate(old_p, old_size); + return new_p; + } + + protected: + // Called by `reallocate_downward` to copy memory from `old_p` of `old_size` + // to `new_p` of `new_size`. Only memory of size `in_use_front` and + // `in_use_back` will be copied from the front and back of the old memory + // allocation. + void memcpy_downward(uint8_t *old_p, size_t old_size, uint8_t *new_p, + size_t new_size, size_t in_use_back, + size_t in_use_front) { + memcpy(new_p + new_size - in_use_back, old_p + old_size - in_use_back, + in_use_back); + memcpy(new_p, old_p, in_use_front); + } +}; + +} // namespace flatbuffers + +#endif // FLATBUFFERS_ALLOCATOR_H_ \ No newline at end of file diff --git a/edgeimpulse/edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_array.h b/edgeimpulse/edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_array.h new file mode 100644 index 0000000..286db9d --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_array.h @@ -0,0 +1,243 @@ +/* + * Copyright 2021 Google Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef FLATBUFFERS_ARRAY_H_ +#define FLATBUFFERS_ARRAY_H_ + +#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_base.h" +#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_stl_emulation.h" +#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_vector.h" + +namespace flatbuffers { + +// This is used as a helper type for accessing arrays. +template class Array { + // Array can carry only POD data types (scalars or structs). + typedef typename flatbuffers::bool_constant::value> + scalar_tag; + typedef + typename flatbuffers::conditional::type + IndirectHelperType; + + public: + typedef uint16_t size_type; + typedef typename IndirectHelper::return_type return_type; + typedef VectorIterator const_iterator; + typedef VectorReverseIterator const_reverse_iterator; + + // If T is a LE-scalar or a struct (!scalar_tag::value). + static FLATBUFFERS_CONSTEXPR bool is_span_observable = + (scalar_tag::value && (FLATBUFFERS_LITTLEENDIAN || sizeof(T) == 1)) || + !scalar_tag::value; + + FLATBUFFERS_CONSTEXPR uint16_t size() const { return length; } + + return_type Get(uoffset_t i) const { + FLATBUFFERS_ASSERT(i < size()); + return IndirectHelper::Read(Data(), i); + } + + return_type operator[](uoffset_t i) const { return Get(i); } + + // If this is a Vector of enums, T will be its storage type, not the enum + // type. This function makes it convenient to retrieve value with enum + // type E. + template E GetEnum(uoffset_t i) const { + return static_cast(Get(i)); + } + + const_iterator begin() const { return const_iterator(Data(), 0); } + const_iterator end() const { return const_iterator(Data(), size()); } + + const_reverse_iterator rbegin() const { + return const_reverse_iterator(end()); + } + const_reverse_iterator rend() const { + return const_reverse_iterator(begin()); + } + + const_iterator cbegin() const { return begin(); } + const_iterator cend() const { return end(); } + + const_reverse_iterator crbegin() const { return rbegin(); } + const_reverse_iterator crend() const { return rend(); } + + // Get a mutable pointer to elements inside this array. + // This method used to mutate arrays of structs followed by a @p Mutate + // operation. For primitive types use @p Mutate directly. + // @warning Assignments and reads to/from the dereferenced pointer are not + // automatically converted to the correct endianness. + typename flatbuffers::conditional::type + GetMutablePointer(uoffset_t i) const { + FLATBUFFERS_ASSERT(i < size()); + return const_cast(&data()[i]); + } + + // Change elements if you have a non-const pointer to this object. + void Mutate(uoffset_t i, const T &val) { MutateImpl(scalar_tag(), i, val); } + + // The raw data in little endian format. Use with care. + const uint8_t *Data() const { return data_; } + + uint8_t *Data() { return data_; } + + // Similarly, but typed, much like std::vector::data + const T *data() const { return reinterpret_cast(Data()); } + T *data() { return reinterpret_cast(Data()); } + + // Copy data from a span with endian conversion. + // If this Array and the span overlap, the behavior is undefined. + void CopyFromSpan(flatbuffers::span src) { + const auto p1 = reinterpret_cast(src.data()); + const auto p2 = Data(); + FLATBUFFERS_ASSERT(!(p1 >= p2 && p1 < (p2 + length)) && + !(p2 >= p1 && p2 < (p1 + length))); + (void)p1; + (void)p2; + CopyFromSpanImpl(flatbuffers::bool_constant(), src); + } + + protected: + void MutateImpl(flatbuffers::true_type, uoffset_t i, const T &val) { + FLATBUFFERS_ASSERT(i < size()); + WriteScalar(data() + i, val); + } + + void MutateImpl(flatbuffers::false_type, uoffset_t i, const T &val) { + *(GetMutablePointer(i)) = val; + } + + void CopyFromSpanImpl(flatbuffers::true_type, + flatbuffers::span src) { + // Use std::memcpy() instead of std::copy() to avoid performance degradation + // due to aliasing if T is char or unsigned char. + // The size is known at compile time, so memcpy would be inlined. + std::memcpy(data(), src.data(), length * sizeof(T)); + } + + // Copy data from flatbuffers::span with endian conversion. + void CopyFromSpanImpl(flatbuffers::false_type, + flatbuffers::span src) { + for (size_type k = 0; k < length; k++) { Mutate(k, src[k]); } + } + + // This class is only used to access pre-existing data. Don't ever + // try to construct these manually. + // 'constexpr' allows us to use 'size()' at compile time. + // @note Must not use 'FLATBUFFERS_CONSTEXPR' here, as const is not allowed on + // a constructor. +#if defined(__cpp_constexpr) + constexpr Array(); +#else + Array(); +#endif + + uint8_t data_[length * sizeof(T)]; + + private: + // This class is a pointer. Copying will therefore create an invalid object. + // Private and unimplemented copy constructor. + Array(const Array &); + Array &operator=(const Array &); +}; + +// Specialization for Array[struct] with access using Offset pointer. +// This specialization used by idl_gen_text.cpp. +template class Array, length> { + static_assert(flatbuffers::is_same::value, "unexpected type T"); + + public: + typedef const void *return_type; + + const uint8_t *Data() const { return data_; } + + // Make idl_gen_text.cpp::PrintContainer happy. + return_type operator[](uoffset_t) const { + FLATBUFFERS_ASSERT(false); + return nullptr; + } + + private: + // This class is only used to access pre-existing data. + Array(); + Array(const Array &); + Array &operator=(const Array &); + + uint8_t data_[1]; +}; + +template +FLATBUFFERS_CONSTEXPR_CPP11 flatbuffers::span make_span(Array &arr) + FLATBUFFERS_NOEXCEPT { + static_assert( + Array::is_span_observable, + "wrong type U, only plain struct, LE-scalar, or byte types are allowed"); + return span(arr.data(), N); +} + +template +FLATBUFFERS_CONSTEXPR_CPP11 flatbuffers::span make_span( + const Array &arr) FLATBUFFERS_NOEXCEPT { + static_assert( + Array::is_span_observable, + "wrong type U, only plain struct, LE-scalar, or byte types are allowed"); + return span(arr.data(), N); +} + +template +FLATBUFFERS_CONSTEXPR_CPP11 flatbuffers::span +make_bytes_span(Array &arr) FLATBUFFERS_NOEXCEPT { + static_assert(Array::is_span_observable, + "internal error, Array might hold only scalars or structs"); + return span(arr.Data(), sizeof(U) * N); +} + +template +FLATBUFFERS_CONSTEXPR_CPP11 flatbuffers::span +make_bytes_span(const Array &arr) FLATBUFFERS_NOEXCEPT { + static_assert(Array::is_span_observable, + "internal error, Array might hold only scalars or structs"); + return span(arr.Data(), sizeof(U) * N); +} + +// Cast a raw T[length] to a raw flatbuffers::Array +// without endian conversion. Use with care. +// TODO: move these Cast-methods to `internal` namespace. +template +Array &CastToArray(T (&arr)[length]) { + return *reinterpret_cast *>(arr); +} + +template +const Array &CastToArray(const T (&arr)[length]) { + return *reinterpret_cast *>(arr); +} + +template +Array &CastToArrayOfEnum(T (&arr)[length]) { + static_assert(sizeof(E) == sizeof(T), "invalid enum type E"); + return *reinterpret_cast *>(arr); +} + +template +const Array &CastToArrayOfEnum(const T (&arr)[length]) { + static_assert(sizeof(E) == sizeof(T), "invalid enum type E"); + return *reinterpret_cast *>(arr); +} + +} // namespace flatbuffers + +#endif // FLATBUFFERS_ARRAY_H_ diff --git a/edgeimpulse/edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_base.h b/edgeimpulse/edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_base.h new file mode 100644 index 0000000..525a8e5 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_base.h @@ -0,0 +1,496 @@ +#ifndef FLATBUFFERS_BASE_H_ +#define FLATBUFFERS_BASE_H_ + +// For TFLM, we always want FLATBUFFERS_LOCALE_INDEPENDENT to be defined as 0. +// We could achieve this by adding -DFLATBUFFERS_LOCALE_INDEPENDENT=0 to the +// TFLM Makefile. However, for (at least) the Arduino, adding additional build +// flags during the compilation can be a bit awkward. As such, we have instead +// made a decision to change the default to be FLATBUFFERS_LOCALE_INDEPENDENT=0 +// for TFLM to make it easier for external IDE integration. +#ifndef FLATBUFFERS_LOCALE_INDEPENDENT +#define FLATBUFFERS_LOCALE_INDEPENDENT 0 +#endif + +// clang-format off + +// If activate should be declared and included first. +#if defined(FLATBUFFERS_MEMORY_LEAK_TRACKING) && \ + defined(_MSC_VER) && defined(_DEBUG) + // The _CRTDBG_MAP_ALLOC inside will replace + // calloc/free (etc) to its debug version using #define directives. + #define _CRTDBG_MAP_ALLOC + #include + #include + // Replace operator new by trace-enabled version. + #define DEBUG_NEW new(_NORMAL_BLOCK, __FILE__, __LINE__) + #define new DEBUG_NEW +#endif + +#if !defined(FLATBUFFERS_ASSERT) +#include +#define FLATBUFFERS_ASSERT assert +#elif defined(FLATBUFFERS_ASSERT_INCLUDE) +// Include file with forward declaration +#include FLATBUFFERS_ASSERT_INCLUDE +#endif + +#ifndef ARDUINO +#include +#endif + +#include +#include +#include + +#if defined(ARDUINO) && !defined(ARDUINOSTL_M_H) + #include +#else + #include +#endif + +#include +#include +#include +#include +#include +#include +#include + +#if defined(__unix__) && !defined(FLATBUFFERS_LOCALE_INDEPENDENT) + #include +#endif + +#ifdef __ANDROID__ + #include +#endif + +#if defined(__ICCARM__) +#include +#endif + +// Note the __clang__ check is needed, because clang presents itself +// as an older GNUC compiler (4.2). +// Clang 3.3 and later implement all of the ISO C++ 2011 standard. +// Clang 3.4 and later implement all of the ISO C++ 2014 standard. +// http://clang.llvm.org/cxx_status.html + +// Note the MSVC value '__cplusplus' may be incorrect: +// The '__cplusplus' predefined macro in the MSVC stuck at the value 199711L, +// indicating (erroneously!) that the compiler conformed to the C++98 Standard. +// This value should be correct starting from MSVC2017-15.7-Preview-3. +// The '__cplusplus' will be valid only if MSVC2017-15.7-P3 and the `/Zc:__cplusplus` switch is set. +// Workaround (for details see MSDN): +// Use the _MSC_VER and _MSVC_LANG definition instead of the __cplusplus for compatibility. +// The _MSVC_LANG macro reports the Standard version regardless of the '/Zc:__cplusplus' switch. + +#if defined(__GNUC__) && !defined(__clang__) + #define FLATBUFFERS_GCC (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) +#else + #define FLATBUFFERS_GCC 0 +#endif + +#if defined(__clang__) + #define FLATBUFFERS_CLANG (__clang_major__ * 10000 + __clang_minor__ * 100 + __clang_patchlevel__) +#else + #define FLATBUFFERS_CLANG 0 +#endif + +/// @cond FLATBUFFERS_INTERNAL +#if __cplusplus <= 199711L && \ + (!defined(_MSC_VER) || _MSC_VER < 1600) && \ + (!defined(__GNUC__) || \ + (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__ < 40400)) + #error A C++11 compatible compiler with support for the auto typing is \ + required for FlatBuffers. + #error __cplusplus _MSC_VER __GNUC__ __GNUC_MINOR__ __GNUC_PATCHLEVEL__ +#endif + +#if !defined(__clang__) && \ + defined(__GNUC__) && \ + (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__ < 40600) + // Backwards compatibility for g++ 4.4, and 4.5 which don't have the nullptr + // and constexpr keywords. Note the __clang__ check is needed, because clang + // presents itself as an older GNUC compiler. + #ifndef nullptr_t + const class nullptr_t { + public: + template inline operator T*() const { return 0; } + private: + void operator&() const; + } nullptr = {}; + #endif + #ifndef constexpr + #define constexpr const + #endif +#endif + +// The wire format uses a little endian encoding (since that's efficient for +// the common platforms). +#if defined(__s390x__) + #define FLATBUFFERS_LITTLEENDIAN 0 +#endif // __s390x__ +#if !defined(FLATBUFFERS_LITTLEENDIAN) + #if defined(__GNUC__) || defined(__clang__) || defined(__ICCARM__) + #if (defined(__BIG_ENDIAN__) || \ + (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)) + #define FLATBUFFERS_LITTLEENDIAN 0 + #else + #define FLATBUFFERS_LITTLEENDIAN 1 + #endif // __BIG_ENDIAN__ + #elif defined(_MSC_VER) + #if defined(_M_PPC) + #define FLATBUFFERS_LITTLEENDIAN 0 + #else + #define FLATBUFFERS_LITTLEENDIAN 1 + #endif + #else + #error Unable to determine endianness, define FLATBUFFERS_LITTLEENDIAN. + #endif +#endif // !defined(FLATBUFFERS_LITTLEENDIAN) + +#define FLATBUFFERS_VERSION_MAJOR 2 +#define FLATBUFFERS_VERSION_MINOR 0 +#define FLATBUFFERS_VERSION_REVISION 6 +#define FLATBUFFERS_STRING_EXPAND(X) #X +#define FLATBUFFERS_STRING(X) FLATBUFFERS_STRING_EXPAND(X) +namespace flatbuffers { + // Returns version as string "MAJOR.MINOR.REVISION". + const char* FLATBUFFERS_VERSION(); +} + +#if (!defined(_MSC_VER) || _MSC_VER > 1600) && \ + (!defined(__GNUC__) || (__GNUC__ * 100 + __GNUC_MINOR__ >= 407)) || \ + defined(__clang__) + #define FLATBUFFERS_FINAL_CLASS final + #define FLATBUFFERS_OVERRIDE override + #define FLATBUFFERS_EXPLICIT_CPP11 explicit + #define FLATBUFFERS_VTABLE_UNDERLYING_TYPE : flatbuffers::voffset_t +#else + #define FLATBUFFERS_FINAL_CLASS + #define FLATBUFFERS_OVERRIDE + #define FLATBUFFERS_EXPLICIT_CPP11 + #define FLATBUFFERS_VTABLE_UNDERLYING_TYPE +#endif + +#if (!defined(_MSC_VER) || _MSC_VER >= 1900) && \ + (!defined(__GNUC__) || (__GNUC__ * 100 + __GNUC_MINOR__ >= 406)) || \ + (defined(__cpp_constexpr) && __cpp_constexpr >= 200704) + #define FLATBUFFERS_CONSTEXPR constexpr + #define FLATBUFFERS_CONSTEXPR_CPP11 constexpr + #define FLATBUFFERS_CONSTEXPR_DEFINED +#else + #define FLATBUFFERS_CONSTEXPR const + #define FLATBUFFERS_CONSTEXPR_CPP11 +#endif + +#if (defined(__cplusplus) && __cplusplus >= 201402L) || \ + (defined(__cpp_constexpr) && __cpp_constexpr >= 201304) + #define FLATBUFFERS_CONSTEXPR_CPP14 FLATBUFFERS_CONSTEXPR_CPP11 +#else + #define FLATBUFFERS_CONSTEXPR_CPP14 +#endif + +#if (defined(__GXX_EXPERIMENTAL_CXX0X__) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 406)) || \ + (defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 190023026)) || \ + defined(__clang__) + #define FLATBUFFERS_NOEXCEPT noexcept +#else + #define FLATBUFFERS_NOEXCEPT +#endif + +// NOTE: the FLATBUFFERS_DELETE_FUNC macro may change the access mode to +// private, so be sure to put it at the end or reset access mode explicitly. +#if (!defined(_MSC_VER) || _MSC_FULL_VER >= 180020827) && \ + (!defined(__GNUC__) || (__GNUC__ * 100 + __GNUC_MINOR__ >= 404)) || \ + defined(__clang__) + #define FLATBUFFERS_DELETE_FUNC(func) func = delete +#else + #define FLATBUFFERS_DELETE_FUNC(func) private: func +#endif + +#if (!defined(_MSC_VER) || _MSC_VER >= 1900) && \ + (!defined(__GNUC__) || (__GNUC__ * 100 + __GNUC_MINOR__ >= 409)) || \ + defined(__clang__) + #define FLATBUFFERS_DEFAULT_DECLARATION +#endif + +// Check if we can use template aliases +// Not possible if Microsoft Compiler before 2012 +// Possible is the language feature __cpp_alias_templates is defined well +// Or possible if the C++ std is C+11 or newer +#if (defined(_MSC_VER) && _MSC_VER > 1700 /* MSVC2012 */) \ + || (defined(__cpp_alias_templates) && __cpp_alias_templates >= 200704) \ + || (defined(__cplusplus) && __cplusplus >= 201103L) + #define FLATBUFFERS_TEMPLATES_ALIASES +#endif + +#ifndef FLATBUFFERS_HAS_STRING_VIEW + // Only provide flatbuffers::string_view if __has_include can be used + // to detect a header that provides an implementation + #if defined(__has_include) + // Check for std::string_view (in c++17) + #if __has_include() && (__cplusplus >= 201606 || (defined(_HAS_CXX17) && _HAS_CXX17)) + #include + namespace flatbuffers { + typedef std::string_view string_view; + } + #define FLATBUFFERS_HAS_STRING_VIEW 1 + // Check for std::experimental::string_view (in c++14, compiler-dependent) + #elif __has_include() && (__cplusplus >= 201411) + #include + namespace flatbuffers { + typedef std::experimental::string_view string_view; + } + #define FLATBUFFERS_HAS_STRING_VIEW 1 + // Check for absl::string_view + #elif __has_include("absl/strings/string_view.h") + #include "absl/strings/string_view.h" + namespace flatbuffers { + typedef absl::string_view string_view; + } + #define FLATBUFFERS_HAS_STRING_VIEW 1 + #endif + #endif // __has_include +#endif // !FLATBUFFERS_HAS_STRING_VIEW + +#ifndef FLATBUFFERS_GENERAL_HEAP_ALLOC_OK + // Allow heap allocations to be used + #define FLATBUFFERS_GENERAL_HEAP_ALLOC_OK 1 +#endif // !FLATBUFFERS_GENERAL_HEAP_ALLOC_OK + +#ifndef FLATBUFFERS_HAS_NEW_STRTOD + // Modern (C++11) strtod and strtof functions are available for use. + // 1) nan/inf strings as argument of strtod; + // 2) hex-float as argument of strtod/strtof. + #if (defined(_MSC_VER) && _MSC_VER >= 1900) || \ + (defined(__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 409)) || \ + (defined(__clang__)) + #define FLATBUFFERS_HAS_NEW_STRTOD 1 + #endif +#endif // !FLATBUFFERS_HAS_NEW_STRTOD + +#ifndef FLATBUFFERS_LOCALE_INDEPENDENT + // Enable locale independent functions {strtof_l, strtod_l,strtoll_l, + // strtoull_l}. + #if (defined(_MSC_VER) && _MSC_VER >= 1800) || \ + (defined(__ANDROID_API__) && __ANDROID_API__>= 21) || \ + (defined(_XOPEN_VERSION) && (_XOPEN_VERSION >= 700)) && \ + (!defined(__Fuchsia__) && !defined(__ANDROID_API__)) + #define FLATBUFFERS_LOCALE_INDEPENDENT 1 + #else + #define FLATBUFFERS_LOCALE_INDEPENDENT 0 + #endif +#endif // !FLATBUFFERS_LOCALE_INDEPENDENT + +// Suppress Undefined Behavior Sanitizer (recoverable only). Usage: +// - __supress_ubsan__("undefined") +// - __supress_ubsan__("signed-integer-overflow") +#if defined(__clang__) && (__clang_major__ > 3 || (__clang_major__ == 3 && __clang_minor__ >=7)) + #define __supress_ubsan__(type) __attribute__((no_sanitize(type))) +#elif defined(__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 409) + #define __supress_ubsan__(type) __attribute__((no_sanitize_undefined)) +#else + #define __supress_ubsan__(type) +#endif + +// This is constexpr function used for checking compile-time constants. +// Avoid `#pragma warning(disable: 4127) // C4127: expression is constant`. +template FLATBUFFERS_CONSTEXPR inline bool IsConstTrue(T t) { + return !!t; +} + +// Enable C++ attribute [[]] if std:c++17 or higher. +#if ((__cplusplus >= 201703L) \ + || (defined(_MSVC_LANG) && (_MSVC_LANG >= 201703L))) + // All attributes unknown to an implementation are ignored without causing an error. + #define FLATBUFFERS_ATTRIBUTE(attr) attr + + #define FLATBUFFERS_FALLTHROUGH() [[fallthrough]] +#else + #define FLATBUFFERS_ATTRIBUTE(attr) + + #if FLATBUFFERS_CLANG >= 30800 + #define FLATBUFFERS_FALLTHROUGH() [[clang::fallthrough]] + #elif FLATBUFFERS_GCC >= 70300 + #define FLATBUFFERS_FALLTHROUGH() [[gnu::fallthrough]] + #else + #define FLATBUFFERS_FALLTHROUGH() + #endif +#endif + +/// @endcond + +/// @file +namespace flatbuffers { + +/// @cond FLATBUFFERS_INTERNAL +// Our default offset / size type, 32bit on purpose on 64bit systems. +// Also, using a consistent offset type maintains compatibility of serialized +// offset values between 32bit and 64bit systems. +typedef uint32_t uoffset_t; + +// Signed offsets for references that can go in both directions. +typedef int32_t soffset_t; + +// Offset/index used in v-tables, can be changed to uint8_t in +// format forks to save a bit of space if desired. +typedef uint16_t voffset_t; + +typedef uintmax_t largest_scalar_t; + +// In 32bits, this evaluates to 2GB - 1 +#define FLATBUFFERS_MAX_BUFFER_SIZE ((1ULL << (sizeof(::flatbuffers::soffset_t) * 8 - 1)) - 1) + +// The minimum size buffer that can be a valid flatbuffer. +// Includes the offset to the root table (uoffset_t), the offset to the vtable +// of the root table (soffset_t), the size of the vtable (uint16_t), and the +// size of the referring table (uint16_t). +#define FLATBUFFERS_MIN_BUFFER_SIZE sizeof(uoffset_t) + sizeof(soffset_t) + \ + sizeof(uint16_t) + sizeof(uint16_t) + +// We support aligning the contents of buffers up to this size. +#ifndef FLATBUFFERS_MAX_ALIGNMENT + #define FLATBUFFERS_MAX_ALIGNMENT 32 +#endif + +/// @brief The length of a FlatBuffer file header. +static const size_t kFileIdentifierLength = 4; + +inline bool VerifyAlignmentRequirements(size_t align, size_t min_align = 1) { + return (min_align <= align) && (align <= (FLATBUFFERS_MAX_ALIGNMENT)) && + (align & (align - 1)) == 0; // must be power of 2 +} + +#if defined(_MSC_VER) + #pragma warning(disable: 4351) // C4351: new behavior: elements of array ... will be default initialized + #pragma warning(push) + #pragma warning(disable: 4127) // C4127: conditional expression is constant +#endif + +template T EndianSwap(T t) { + #if defined(_MSC_VER) + #define FLATBUFFERS_BYTESWAP16 _byteswap_ushort + #define FLATBUFFERS_BYTESWAP32 _byteswap_ulong + #define FLATBUFFERS_BYTESWAP64 _byteswap_uint64 + #elif defined(__ICCARM__) + #define FLATBUFFERS_BYTESWAP16 __REV16 + #define FLATBUFFERS_BYTESWAP32 __REV + #define FLATBUFFERS_BYTESWAP64(x) \ + ((__REV(static_cast(x >> 32U))) | (static_cast(__REV(static_cast(x)))) << 32U) + #else + #if defined(__GNUC__) && __GNUC__ * 100 + __GNUC_MINOR__ < 408 && !defined(__clang__) + // __builtin_bswap16 was missing prior to GCC 4.8. + #define FLATBUFFERS_BYTESWAP16(x) \ + static_cast(__builtin_bswap32(static_cast(x) << 16)) + #else + #define FLATBUFFERS_BYTESWAP16 __builtin_bswap16 + #endif + #define FLATBUFFERS_BYTESWAP32 __builtin_bswap32 + #define FLATBUFFERS_BYTESWAP64 __builtin_bswap64 + #endif + if (sizeof(T) == 1) { // Compile-time if-then's. + return t; + } else if (sizeof(T) == 2) { + union { T t; uint16_t i; } u = { t }; + u.i = FLATBUFFERS_BYTESWAP16(u.i); + return u.t; + } else if (sizeof(T) == 4) { + union { T t; uint32_t i; } u = { t }; + u.i = FLATBUFFERS_BYTESWAP32(u.i); + return u.t; + } else if (sizeof(T) == 8) { + union { T t; uint64_t i; } u = { t }; + u.i = FLATBUFFERS_BYTESWAP64(u.i); + return u.t; + } else { + FLATBUFFERS_ASSERT(0); + return t; + } +} + +#if defined(_MSC_VER) + #pragma warning(pop) +#endif + + +template T EndianScalar(T t) { + #if FLATBUFFERS_LITTLEENDIAN + return t; + #else + return EndianSwap(t); + #endif +} + +template +// UBSAN: C++ aliasing type rules, see std::bit_cast<> for details. +__supress_ubsan__("alignment") +T ReadScalar(const void *p) { + return EndianScalar(*reinterpret_cast(p)); +} + +// See https://github.com/google/flatbuffers/issues/5950 + +#if (FLATBUFFERS_GCC >= 100000) && (FLATBUFFERS_GCC < 110000) + #pragma GCC diagnostic push + #pragma GCC diagnostic ignored "-Wstringop-overflow" +#endif + +template +// UBSAN: C++ aliasing type rules, see std::bit_cast<> for details. +__supress_ubsan__("alignment") +void WriteScalar(void *p, T t) { + *reinterpret_cast(p) = EndianScalar(t); +} + +template struct Offset; +template __supress_ubsan__("alignment") void WriteScalar(void *p, Offset t) { + *reinterpret_cast(p) = EndianScalar(t.o); +} + +#if (FLATBUFFERS_GCC >= 100000) && (FLATBUFFERS_GCC < 110000) + #pragma GCC diagnostic pop +#endif + +// Computes how many bytes you'd have to pad to be able to write an +// "scalar_size" scalar if the buffer had grown to "buf_size" (downwards in +// memory). +__supress_ubsan__("unsigned-integer-overflow") +inline size_t PaddingBytes(size_t buf_size, size_t scalar_size) { + return ((~buf_size) + 1) & (scalar_size - 1); +} + +// Generic 'operator==' with conditional specialisations. +// T e - new value of a scalar field. +// T def - default of scalar (is known at compile-time). +template inline bool IsTheSameAs(T e, T def) { return e == def; } + +#if defined(FLATBUFFERS_NAN_DEFAULTS) && \ + defined(FLATBUFFERS_HAS_NEW_STRTOD) && (FLATBUFFERS_HAS_NEW_STRTOD > 0) +// Like `operator==(e, def)` with weak NaN if T=(float|double). +template inline bool IsFloatTheSameAs(T e, T def) { + return (e == def) || ((def != def) && (e != e)); +} +template<> inline bool IsTheSameAs(float e, float def) { + return IsFloatTheSameAs(e, def); +} +template<> inline bool IsTheSameAs(double e, double def) { + return IsFloatTheSameAs(e, def); +} +#endif + +// Check 'v' is out of closed range [low; high]. +// Workaround for GCC warning [-Werror=type-limits]: +// comparison is always true due to limited range of data type. +template +inline bool IsOutRange(const T &v, const T &low, const T &high) { + return (v < low) || (high < v); +} + +// Check 'v' is in closed range [low; high]. +template +inline bool IsInRange(const T &v, const T &low, const T &high) { + return !IsOutRange(v, low, high); +} + +} // namespace flatbuffers +#endif // FLATBUFFERS_BASE_H_ diff --git a/edgeimpulse/edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_buffer.h b/edgeimpulse/edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_buffer.h new file mode 100644 index 0000000..fba7de2 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_buffer.h @@ -0,0 +1,142 @@ +/* + * Copyright 2021 Google Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef FLATBUFFERS_BUFFER_H_ +#define FLATBUFFERS_BUFFER_H_ + +#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_base.h" + +namespace flatbuffers { + +// Wrapper for uoffset_t to allow safe template specialization. +// Value is allowed to be 0 to indicate a null object (see e.g. AddOffset). +template struct Offset { + uoffset_t o; + Offset() : o(0) {} + Offset(uoffset_t _o) : o(_o) {} + Offset Union() const { return Offset(o); } + bool IsNull() const { return !o; } +}; + +inline void EndianCheck() { + int endiantest = 1; + // If this fails, see FLATBUFFERS_LITTLEENDIAN above. + FLATBUFFERS_ASSERT(*reinterpret_cast(&endiantest) == + FLATBUFFERS_LITTLEENDIAN); + (void)endiantest; +} + +template FLATBUFFERS_CONSTEXPR size_t AlignOf() { + // clang-format off + #ifdef _MSC_VER + return __alignof(T); + #else + #ifndef alignof + return __alignof__(T); + #else + return alignof(T); + #endif + #endif + // clang-format on +} + +// Lexicographically compare two strings (possibly containing nulls), and +// return true if the first is less than the second. +static inline bool StringLessThan(const char *a_data, uoffset_t a_size, + const char *b_data, uoffset_t b_size) { + const auto cmp = memcmp(a_data, b_data, (std::min)(a_size, b_size)); + return cmp == 0 ? a_size < b_size : cmp < 0; +} + +// When we read serialized data from memory, in the case of most scalars, +// we want to just read T, but in the case of Offset, we want to actually +// perform the indirection and return a pointer. +// The template specialization below does just that. +// It is wrapped in a struct since function templates can't overload on the +// return type like this. +// The typedef is for the convenience of callers of this function +// (avoiding the need for a trailing return decltype) +template struct IndirectHelper { + typedef T return_type; + typedef T mutable_return_type; + static const size_t element_stride = sizeof(T); + static return_type Read(const uint8_t *p, uoffset_t i) { + return EndianScalar((reinterpret_cast(p))[i]); + } +}; +template struct IndirectHelper> { + typedef const T *return_type; + typedef T *mutable_return_type; + static const size_t element_stride = sizeof(uoffset_t); + static return_type Read(const uint8_t *p, uoffset_t i) { + p += i * sizeof(uoffset_t); + return reinterpret_cast(p + ReadScalar(p)); + } +}; +template struct IndirectHelper { + typedef const T *return_type; + typedef T *mutable_return_type; + static const size_t element_stride = sizeof(T); + static return_type Read(const uint8_t *p, uoffset_t i) { + return reinterpret_cast(p + i * sizeof(T)); + } +}; + +/// @brief Get a pointer to the the file_identifier section of the buffer. +/// @return Returns a const char pointer to the start of the file_identifier +/// characters in the buffer. The returned char * has length +/// 'flatbuffers::FlatBufferBuilder::kFileIdentifierLength'. +/// This function is UNDEFINED for FlatBuffers whose schema does not include +/// a file_identifier (likely points at padding or the start of a the root +/// vtable). +inline const char *GetBufferIdentifier(const void *buf, + bool size_prefixed = false) { + return reinterpret_cast(buf) + + ((size_prefixed) ? 2 * sizeof(uoffset_t) : sizeof(uoffset_t)); +} + +// Helper to see if the identifier in a buffer has the expected value. +inline bool BufferHasIdentifier(const void *buf, const char *identifier, + bool size_prefixed = false) { + return strncmp(GetBufferIdentifier(buf, size_prefixed), identifier, + flatbuffers::kFileIdentifierLength) == 0; +} + +/// @cond FLATBUFFERS_INTERNAL +// Helpers to get a typed pointer to the root object contained in the buffer. +template T *GetMutableRoot(void *buf) { + EndianCheck(); + return reinterpret_cast( + reinterpret_cast(buf) + + EndianScalar(*reinterpret_cast(buf))); +} + +template T *GetMutableSizePrefixedRoot(void *buf) { + return GetMutableRoot(reinterpret_cast(buf) + + sizeof(uoffset_t)); +} + +template const T *GetRoot(const void *buf) { + return GetMutableRoot(const_cast(buf)); +} + +template const T *GetSizePrefixedRoot(const void *buf) { + return GetRoot(reinterpret_cast(buf) + sizeof(uoffset_t)); +} + +} // namespace flatbuffers + +#endif // FLATBUFFERS_BUFFER_H_ \ No newline at end of file diff --git a/edgeimpulse/edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_buffer_ref.h b/edgeimpulse/edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_buffer_ref.h new file mode 100644 index 0000000..56eb281 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_buffer_ref.h @@ -0,0 +1,53 @@ +/* + * Copyright 2021 Google Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef FLATBUFFERS_BUFFER_REF_H_ +#define FLATBUFFERS_BUFFER_REF_H_ + +#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_base.h" +#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_verifier.h" + +namespace flatbuffers { + +// Convenient way to bundle a buffer and its length, to pass it around +// typed by its root. +// A BufferRef does not own its buffer. +struct BufferRefBase {}; // for std::is_base_of + +template struct BufferRef : BufferRefBase { + BufferRef() : buf(nullptr), len(0), must_free(false) {} + BufferRef(uint8_t *_buf, uoffset_t _len) + : buf(_buf), len(_len), must_free(false) {} + + ~BufferRef() { + if (must_free) free(buf); + } + + const T *GetRoot() const { return flatbuffers::GetRoot(buf); } + + bool Verify() { + Verifier verifier(buf, len); + return verifier.VerifyBuffer(nullptr); + } + + uint8_t *buf; + uoffset_t len; + bool must_free; +}; + +} // namespace flatbuffers + +#endif // FLATBUFFERS_BUFFER_REF_H_ \ No newline at end of file diff --git a/edgeimpulse/edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_default_allocator.h b/edgeimpulse/edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_default_allocator.h new file mode 100644 index 0000000..fff5db7 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_default_allocator.h @@ -0,0 +1,58 @@ +/* + * Copyright 2021 Google Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef FLATBUFFERS_DEFAULT_ALLOCATOR_H_ +#define FLATBUFFERS_DEFAULT_ALLOCATOR_H_ + +#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_allocator.h" +#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_base.h" + +namespace flatbuffers { + +// DefaultAllocator uses new/delete to allocate memory regions +class DefaultAllocator : public Allocator { + public: + uint8_t *allocate(size_t size) FLATBUFFERS_OVERRIDE { + return new uint8_t[size]; + } + + void deallocate(uint8_t *p, size_t) FLATBUFFERS_OVERRIDE { delete[] p; } + + static void dealloc(void *p, size_t) { delete[] static_cast(p); } +}; + +// These functions allow for a null allocator to mean use the default allocator, +// as used by DetachedBuffer and vector_downward below. +// This is to avoid having a statically or dynamically allocated default +// allocator, or having to move it between the classes that may own it. +inline uint8_t *Allocate(Allocator *allocator, size_t size) { + return allocator->allocate(size); +} + +inline void Deallocate(Allocator *allocator, uint8_t *p, size_t size) { + allocator->deallocate(p, size); +} + +inline uint8_t *ReallocateDownward(Allocator *allocator, uint8_t *old_p, + size_t old_size, size_t new_size, + size_t in_use_back, size_t in_use_front) { + return allocator->reallocate_downward(old_p, old_size, new_size, in_use_back, + in_use_front); +} + +} // namespace flatbuffers + +#endif // FLATBUFFERS_DEFAULT_ALLOCATOR_H_ diff --git a/edgeimpulse/edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_detached_buffer.h b/edgeimpulse/edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_detached_buffer.h new file mode 100644 index 0000000..2d8ebac --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_detached_buffer.h @@ -0,0 +1,114 @@ +/* + * Copyright 2021 Google Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef FLATBUFFERS_DETACHED_BUFFER_H_ +#define FLATBUFFERS_DETACHED_BUFFER_H_ + +#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_allocator.h" +#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_base.h" +#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_default_allocator.h" + +namespace flatbuffers { + +// DetachedBuffer is a finished flatbuffer memory region, detached from its +// builder. The original memory region and allocator are also stored so that +// the DetachedBuffer can manage the memory lifetime. +class DetachedBuffer { + public: + DetachedBuffer() + : allocator_(nullptr), + own_allocator_(false), + buf_(nullptr), + reserved_(0), + cur_(nullptr), + size_(0) {} + + DetachedBuffer(Allocator *allocator, bool own_allocator, uint8_t *buf, + size_t reserved, uint8_t *cur, size_t sz) + : allocator_(allocator), + own_allocator_(own_allocator), + buf_(buf), + reserved_(reserved), + cur_(cur), + size_(sz) {} + + DetachedBuffer(DetachedBuffer &&other) + : allocator_(other.allocator_), + own_allocator_(other.own_allocator_), + buf_(other.buf_), + reserved_(other.reserved_), + cur_(other.cur_), + size_(other.size_) { + other.reset(); + } + + DetachedBuffer &operator=(DetachedBuffer &&other) { + if (this == &other) return *this; + + destroy(); + + allocator_ = other.allocator_; + own_allocator_ = other.own_allocator_; + buf_ = other.buf_; + reserved_ = other.reserved_; + cur_ = other.cur_; + size_ = other.size_; + + other.reset(); + + return *this; + } + + ~DetachedBuffer() { destroy(); } + + const uint8_t *data() const { return cur_; } + + uint8_t *data() { return cur_; } + + size_t size() const { return size_; } + + // These may change access mode, leave these at end of public section + FLATBUFFERS_DELETE_FUNC(DetachedBuffer(const DetachedBuffer &other)); + FLATBUFFERS_DELETE_FUNC( + DetachedBuffer &operator=(const DetachedBuffer &other)); + + protected: + Allocator *allocator_; + bool own_allocator_; + uint8_t *buf_; + size_t reserved_; + uint8_t *cur_; + size_t size_; + + inline void destroy() { + if (buf_) Deallocate(allocator_, buf_, reserved_); + if (own_allocator_ && allocator_) { delete allocator_; } + reset(); + } + + inline void reset() { + allocator_ = nullptr; + own_allocator_ = false; + buf_ = nullptr; + reserved_ = 0; + cur_ = nullptr; + size_ = 0; + } +}; + +} // namespace flatbuffers + +#endif // FLATBUFFERS_DETACHED_BUFFER_H_ diff --git a/edgeimpulse/edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_flatbuffer_builder.h b/edgeimpulse/edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_flatbuffer_builder.h new file mode 100644 index 0000000..9aedf7b --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_flatbuffer_builder.h @@ -0,0 +1,1214 @@ +/* + * Copyright 2021 Google Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef FLATBUFFERS_FLATBUFFER_BUILDER_H_ +#define FLATBUFFERS_FLATBUFFER_BUILDER_H_ + +#include +#include + +#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_allocator.h" +#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_array.h" +#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_base.h" +#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_buffer_ref.h" +#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_default_allocator.h" +#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_detached_buffer.h" +#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_stl_emulation.h" +#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_string.h" +#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_struct.h" +#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_table.h" +#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_vector.h" +#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_vector_downward.h" +#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_verifier.h" + +namespace flatbuffers { + +// Converts a Field ID to a virtual table offset. +inline voffset_t FieldIndexToOffset(voffset_t field_id) { + // Should correspond to what EndTable() below builds up. + const int fixed_fields = 2; // Vtable size and Object Size. + return static_cast((field_id + fixed_fields) * sizeof(voffset_t)); +} + +template> +const T *data(const std::vector &v) { + // Eventually the returned pointer gets passed down to memcpy, so + // we need it to be non-null to avoid undefined behavior. + static uint8_t t; + return v.empty() ? reinterpret_cast(&t) : &v.front(); +} +template> +T *data(std::vector &v) { + // Eventually the returned pointer gets passed down to memcpy, so + // we need it to be non-null to avoid undefined behavior. + static uint8_t t; + return v.empty() ? reinterpret_cast(&t) : &v.front(); +} + +/// @addtogroup flatbuffers_cpp_api +/// @{ +/// @class FlatBufferBuilder +/// @brief Helper class to hold data needed in creation of a FlatBuffer. +/// To serialize data, you typically call one of the `Create*()` functions in +/// the generated code, which in turn call a sequence of `StartTable`/ +/// `PushElement`/`AddElement`/`EndTable`, or the builtin `CreateString`/ +/// `CreateVector` functions. Do this is depth-first order to build up a tree to +/// the root. `Finish()` wraps up the buffer ready for transport. +class FlatBufferBuilder { + public: + /// @brief Default constructor for FlatBufferBuilder. + /// @param[in] initial_size The initial size of the buffer, in bytes. Defaults + /// to `1024`. + /// @param[in] allocator An `Allocator` to use. If null will use + /// `DefaultAllocator`. + /// @param[in] own_allocator Whether the builder/vector should own the + /// allocator. Defaults to / `false`. + /// @param[in] buffer_minalign Force the buffer to be aligned to the given + /// minimum alignment upon reallocation. Only needed if you intend to store + /// types with custom alignment AND you wish to read the buffer in-place + /// directly after creation. + explicit FlatBufferBuilder( + size_t initial_size = 1024, Allocator *allocator = nullptr, + bool own_allocator = false, + size_t buffer_minalign = AlignOf()) + : buf_(initial_size, allocator, own_allocator, buffer_minalign), + num_field_loc(0), + max_voffset_(0), + nested(false), + finished(false), + minalign_(1), + force_defaults_(false), + dedup_vtables_(true), + string_pool(nullptr) { + EndianCheck(); + } + + /// @brief Move constructor for FlatBufferBuilder. + FlatBufferBuilder(FlatBufferBuilder &&other) + : buf_(1024, nullptr, false, AlignOf()), + num_field_loc(0), + max_voffset_(0), + nested(false), + finished(false), + minalign_(1), + force_defaults_(false), + dedup_vtables_(true), + string_pool(nullptr) { + EndianCheck(); + // Default construct and swap idiom. + // Lack of delegating constructors in vs2010 makes it more verbose than + // needed. + Swap(other); + } + + /// @brief Move assignment operator for FlatBufferBuilder. + FlatBufferBuilder &operator=(FlatBufferBuilder &&other) { + // Move construct a temporary and swap idiom + FlatBufferBuilder temp(std::move(other)); + Swap(temp); + return *this; + } + + void Swap(FlatBufferBuilder &other) { + using std::swap; + buf_.swap(other.buf_); + swap(num_field_loc, other.num_field_loc); + swap(max_voffset_, other.max_voffset_); + swap(nested, other.nested); + swap(finished, other.finished); + swap(minalign_, other.minalign_); + swap(force_defaults_, other.force_defaults_); + swap(dedup_vtables_, other.dedup_vtables_); + swap(string_pool, other.string_pool); + } + + ~FlatBufferBuilder() { + if (string_pool) delete string_pool; + } + + void Reset() { + Clear(); // clear builder state + buf_.reset(); // deallocate buffer + } + + /// @brief Reset all the state in this FlatBufferBuilder so it can be reused + /// to construct another buffer. + void Clear() { + ClearOffsets(); + buf_.clear(); + nested = false; + finished = false; + minalign_ = 1; + if (string_pool) string_pool->clear(); + } + + /// @brief The current size of the serialized buffer, counting from the end. + /// @return Returns an `uoffset_t` with the current size of the buffer. + uoffset_t GetSize() const { return buf_.size(); } + + /// @brief Get the serialized buffer (after you call `Finish()`). + /// @return Returns an `uint8_t` pointer to the FlatBuffer data inside the + /// buffer. + uint8_t *GetBufferPointer() const { + Finished(); + return buf_.data(); + } + + /// @brief Get the serialized buffer (after you call `Finish()`) as a span. + /// @return Returns a constructed flatbuffers::span that is a view over the + /// FlatBuffer data inside the buffer. + flatbuffers::span GetBufferSpan() const { + Finished(); + return flatbuffers::span(buf_.data(), buf_.size()); + } + + /// @brief Get a pointer to an unfinished buffer. + /// @return Returns a `uint8_t` pointer to the unfinished buffer. + uint8_t *GetCurrentBufferPointer() const { return buf_.data(); } + + /// @brief Get the released pointer to the serialized buffer. + /// @warning Do NOT attempt to use this FlatBufferBuilder afterwards! + /// @return A `FlatBuffer` that owns the buffer and its allocator and + /// behaves similar to a `unique_ptr` with a deleter. + FLATBUFFERS_ATTRIBUTE([[deprecated("use Release() instead")]]) + DetachedBuffer ReleaseBufferPointer() { + Finished(); + return buf_.release(); + } + + /// @brief Get the released DetachedBuffer. + /// @return A `DetachedBuffer` that owns the buffer and its allocator. + DetachedBuffer Release() { + Finished(); + return buf_.release(); + } + + /// @brief Get the released pointer to the serialized buffer. + /// @param size The size of the memory block containing + /// the serialized `FlatBuffer`. + /// @param offset The offset from the released pointer where the finished + /// `FlatBuffer` starts. + /// @return A raw pointer to the start of the memory block containing + /// the serialized `FlatBuffer`. + /// @remark If the allocator is owned, it gets deleted when the destructor is + /// called.. + uint8_t *ReleaseRaw(size_t &size, size_t &offset) { + Finished(); + return buf_.release_raw(size, offset); + } + + /// @brief get the minimum alignment this buffer needs to be accessed + /// properly. This is only known once all elements have been written (after + /// you call Finish()). You can use this information if you need to embed + /// a FlatBuffer in some other buffer, such that you can later read it + /// without first having to copy it into its own buffer. + size_t GetBufferMinAlignment() const { + Finished(); + return minalign_; + } + + /// @cond FLATBUFFERS_INTERNAL + void Finished() const { + // If you get this assert, you're attempting to get access a buffer + // which hasn't been finished yet. Be sure to call + // FlatBufferBuilder::Finish with your root table. + // If you really need to access an unfinished buffer, call + // GetCurrentBufferPointer instead. + FLATBUFFERS_ASSERT(finished); + } + /// @endcond + + /// @brief In order to save space, fields that are set to their default value + /// don't get serialized into the buffer. + /// @param[in] fd When set to `true`, always serializes default values that + /// are set. Optional fields which are not set explicitly, will still not be + /// serialized. + void ForceDefaults(bool fd) { force_defaults_ = fd; } + + /// @brief By default vtables are deduped in order to save space. + /// @param[in] dedup When set to `true`, dedup vtables. + void DedupVtables(bool dedup) { dedup_vtables_ = dedup; } + + /// @cond FLATBUFFERS_INTERNAL + void Pad(size_t num_bytes) { buf_.fill(num_bytes); } + + void TrackMinAlign(size_t elem_size) { + if (elem_size > minalign_) minalign_ = elem_size; + } + + void Align(size_t elem_size) { + TrackMinAlign(elem_size); + buf_.fill(PaddingBytes(buf_.size(), elem_size)); + } + + void PushFlatBuffer(const uint8_t *bytes, size_t size) { + PushBytes(bytes, size); + finished = true; + } + + void PushBytes(const uint8_t *bytes, size_t size) { buf_.push(bytes, size); } + + void PopBytes(size_t amount) { buf_.pop(amount); } + + template void AssertScalarT() { + // The code assumes power of 2 sizes and endian-swap-ability. + static_assert(flatbuffers::is_scalar::value, "T must be a scalar type"); + } + + // Write a single aligned scalar to the buffer + template uoffset_t PushElement(T element) { + AssertScalarT(); + Align(sizeof(T)); + buf_.push_small(EndianScalar(element)); + return GetSize(); + } + + template uoffset_t PushElement(Offset off) { + // Special case for offsets: see ReferTo below. + return PushElement(ReferTo(off.o)); + } + + // When writing fields, we track where they are, so we can create correct + // vtables later. + void TrackField(voffset_t field, uoffset_t off) { + FieldLoc fl = { off, field }; + buf_.scratch_push_small(fl); + num_field_loc++; + if (field > max_voffset_) { max_voffset_ = field; } + } + + // Like PushElement, but additionally tracks the field this represents. + template void AddElement(voffset_t field, T e, T def) { + // We don't serialize values equal to the default. + if (IsTheSameAs(e, def) && !force_defaults_) return; + TrackField(field, PushElement(e)); + } + + template void AddElement(voffset_t field, T e) { + TrackField(field, PushElement(e)); + } + + template void AddOffset(voffset_t field, Offset off) { + if (off.IsNull()) return; // Don't store. + AddElement(field, ReferTo(off.o), static_cast(0)); + } + + template void AddStruct(voffset_t field, const T *structptr) { + if (!structptr) return; // Default, don't store. + Align(AlignOf()); + buf_.push_small(*structptr); + TrackField(field, GetSize()); + } + + void AddStructOffset(voffset_t field, uoffset_t off) { + TrackField(field, off); + } + + // Offsets initially are relative to the end of the buffer (downwards). + // This function converts them to be relative to the current location + // in the buffer (when stored here), pointing upwards. + uoffset_t ReferTo(uoffset_t off) { + // Align to ensure GetSize() below is correct. + Align(sizeof(uoffset_t)); + // Offset must refer to something already in buffer. + const uoffset_t size = GetSize(); + FLATBUFFERS_ASSERT(off && off <= size); + return size - off + static_cast(sizeof(uoffset_t)); + } + + void NotNested() { + // If you hit this, you're trying to construct a Table/Vector/String + // during the construction of its parent table (between the MyTableBuilder + // and table.Finish(). + // Move the creation of these sub-objects to above the MyTableBuilder to + // not get this assert. + // Ignoring this assert may appear to work in simple cases, but the reason + // it is here is that storing objects in-line may cause vtable offsets + // to not fit anymore. It also leads to vtable duplication. + FLATBUFFERS_ASSERT(!nested); + // If you hit this, fields were added outside the scope of a table. + FLATBUFFERS_ASSERT(!num_field_loc); + } + + // From generated code (or from the parser), we call StartTable/EndTable + // with a sequence of AddElement calls in between. + uoffset_t StartTable() { + NotNested(); + nested = true; + return GetSize(); + } + + // This finishes one serialized object by generating the vtable if it's a + // table, comparing it against existing vtables, and writing the + // resulting vtable offset. + uoffset_t EndTable(uoffset_t start) { + // If you get this assert, a corresponding StartTable wasn't called. + FLATBUFFERS_ASSERT(nested); + // Write the vtable offset, which is the start of any Table. + // We fill it's value later. + auto vtableoffsetloc = PushElement(0); + // Write a vtable, which consists entirely of voffset_t elements. + // It starts with the number of offsets, followed by a type id, followed + // by the offsets themselves. In reverse: + // Include space for the last offset and ensure empty tables have a + // minimum size. + max_voffset_ = + (std::max)(static_cast(max_voffset_ + sizeof(voffset_t)), + FieldIndexToOffset(0)); + buf_.fill_big(max_voffset_); + auto table_object_size = vtableoffsetloc - start; + // Vtable use 16bit offsets. + FLATBUFFERS_ASSERT(table_object_size < 0x10000); + WriteScalar(buf_.data() + sizeof(voffset_t), + static_cast(table_object_size)); + WriteScalar(buf_.data(), max_voffset_); + // Write the offsets into the table + for (auto it = buf_.scratch_end() - num_field_loc * sizeof(FieldLoc); + it < buf_.scratch_end(); it += sizeof(FieldLoc)) { + auto field_location = reinterpret_cast(it); + auto pos = static_cast(vtableoffsetloc - field_location->off); + // If this asserts, it means you've set a field twice. + FLATBUFFERS_ASSERT( + !ReadScalar(buf_.data() + field_location->id)); + WriteScalar(buf_.data() + field_location->id, pos); + } + ClearOffsets(); + auto vt1 = reinterpret_cast(buf_.data()); + auto vt1_size = ReadScalar(vt1); + auto vt_use = GetSize(); + // See if we already have generated a vtable with this exact same + // layout before. If so, make it point to the old one, remove this one. + if (dedup_vtables_) { + for (auto it = buf_.scratch_data(); it < buf_.scratch_end(); + it += sizeof(uoffset_t)) { + auto vt_offset_ptr = reinterpret_cast(it); + auto vt2 = reinterpret_cast(buf_.data_at(*vt_offset_ptr)); + auto vt2_size = ReadScalar(vt2); + if (vt1_size != vt2_size || 0 != memcmp(vt2, vt1, vt1_size)) continue; + vt_use = *vt_offset_ptr; + buf_.pop(GetSize() - vtableoffsetloc); + break; + } + } + // If this is a new vtable, remember it. + if (vt_use == GetSize()) { buf_.scratch_push_small(vt_use); } + // Fill the vtable offset we created above. + // The offset points from the beginning of the object to where the + // vtable is stored. + // Offsets default direction is downward in memory for future format + // flexibility (storing all vtables at the start of the file). + WriteScalar(buf_.data_at(vtableoffsetloc), + static_cast(vt_use) - + static_cast(vtableoffsetloc)); + + nested = false; + return vtableoffsetloc; + } + + FLATBUFFERS_ATTRIBUTE([[deprecated("call the version above instead")]]) + uoffset_t EndTable(uoffset_t start, voffset_t /*numfields*/) { + return EndTable(start); + } + + // This checks a required field has been set in a given table that has + // just been constructed. + template void Required(Offset table, voffset_t field); + + uoffset_t StartStruct(size_t alignment) { + Align(alignment); + return GetSize(); + } + + uoffset_t EndStruct() { return GetSize(); } + + void ClearOffsets() { + buf_.scratch_pop(num_field_loc * sizeof(FieldLoc)); + num_field_loc = 0; + max_voffset_ = 0; + } + + // Aligns such that when "len" bytes are written, an object can be written + // after it with "alignment" without padding. + void PreAlign(size_t len, size_t alignment) { + if (len == 0) return; + TrackMinAlign(alignment); + buf_.fill(PaddingBytes(GetSize() + len, alignment)); + } + template void PreAlign(size_t len) { + AssertScalarT(); + PreAlign(len, sizeof(T)); + } + /// @endcond + + /// @brief Store a string in the buffer, which can contain any binary data. + /// @param[in] str A const char pointer to the data to be stored as a string. + /// @param[in] len The number of bytes that should be stored from `str`. + /// @return Returns the offset in the buffer where the string starts. + Offset CreateString(const char *str, size_t len) { + NotNested(); + PreAlign(len + 1); // Always 0-terminated. + buf_.fill(1); + PushBytes(reinterpret_cast(str), len); + PushElement(static_cast(len)); + return Offset(GetSize()); + } + + /// @brief Store a string in the buffer, which is null-terminated. + /// @param[in] str A const char pointer to a C-string to add to the buffer. + /// @return Returns the offset in the buffer where the string starts. + Offset CreateString(const char *str) { + return CreateString(str, strlen(str)); + } + + /// @brief Store a string in the buffer, which is null-terminated. + /// @param[in] str A char pointer to a C-string to add to the buffer. + /// @return Returns the offset in the buffer where the string starts. + Offset CreateString(char *str) { + return CreateString(str, strlen(str)); + } + + /// @brief Store a string in the buffer, which can contain any binary data. + /// @param[in] str A const reference to a std::string to store in the buffer. + /// @return Returns the offset in the buffer where the string starts. + Offset CreateString(const std::string &str) { + return CreateString(str.c_str(), str.length()); + } + + // clang-format off + #ifdef FLATBUFFERS_HAS_STRING_VIEW + /// @brief Store a string in the buffer, which can contain any binary data. + /// @param[in] str A const string_view to copy in to the buffer. + /// @return Returns the offset in the buffer where the string starts. + Offset CreateString(flatbuffers::string_view str) { + return CreateString(str.data(), str.size()); + } + #endif // FLATBUFFERS_HAS_STRING_VIEW + // clang-format on + + /// @brief Store a string in the buffer, which can contain any binary data. + /// @param[in] str A const pointer to a `String` struct to add to the buffer. + /// @return Returns the offset in the buffer where the string starts + Offset CreateString(const String *str) { + return str ? CreateString(str->c_str(), str->size()) : 0; + } + + /// @brief Store a string in the buffer, which can contain any binary data. + /// @param[in] str A const reference to a std::string like type with support + /// of T::c_str() and T::length() to store in the buffer. + /// @return Returns the offset in the buffer where the string starts. + template Offset CreateString(const T &str) { + return CreateString(str.c_str(), str.length()); + } + + /// @brief Store a string in the buffer, which can contain any binary data. + /// If a string with this exact contents has already been serialized before, + /// instead simply returns the offset of the existing string. This uses a map + /// stored on the heap, but only stores the numerical offsets. + /// @param[in] str A const char pointer to the data to be stored as a string. + /// @param[in] len The number of bytes that should be stored from `str`. + /// @return Returns the offset in the buffer where the string starts. + Offset CreateSharedString(const char *str, size_t len) { + FLATBUFFERS_ASSERT(FLATBUFFERS_GENERAL_HEAP_ALLOC_OK); + if (!string_pool) + string_pool = new StringOffsetMap(StringOffsetCompare(buf_)); + auto size_before_string = buf_.size(); + // Must first serialize the string, since the set is all offsets into + // buffer. + auto off = CreateString(str, len); + auto it = string_pool->find(off); + // If it exists we reuse existing serialized data! + if (it != string_pool->end()) { + // We can remove the string we serialized. + buf_.pop(buf_.size() - size_before_string); + return *it; + } + // Record this string for future use. + string_pool->insert(off); + return off; + } + +#ifdef FLATBUFFERS_HAS_STRING_VIEW + /// @brief Store a string in the buffer, which can contain any binary data. + /// If a string with this exact contents has already been serialized before, + /// instead simply returns the offset of the existing string. This uses a map + /// stored on the heap, but only stores the numerical offsets. + /// @param[in] str A const std::string_view to store in the buffer. + /// @return Returns the offset in the buffer where the string starts + Offset CreateSharedString(const flatbuffers::string_view str) { + return CreateSharedString(str.data(), str.size()); + } +#else + /// @brief Store a string in the buffer, which null-terminated. + /// If a string with this exact contents has already been serialized before, + /// instead simply returns the offset of the existing string. This uses a map + /// stored on the heap, but only stores the numerical offsets. + /// @param[in] str A const char pointer to a C-string to add to the buffer. + /// @return Returns the offset in the buffer where the string starts. + Offset CreateSharedString(const char *str) { + return CreateSharedString(str, strlen(str)); + } + + /// @brief Store a string in the buffer, which can contain any binary data. + /// If a string with this exact contents has already been serialized before, + /// instead simply returns the offset of the existing string. This uses a map + /// stored on the heap, but only stores the numerical offsets. + /// @param[in] str A const reference to a std::string to store in the buffer. + /// @return Returns the offset in the buffer where the string starts. + Offset CreateSharedString(const std::string &str) { + return CreateSharedString(str.c_str(), str.length()); + } +#endif + + /// @brief Store a string in the buffer, which can contain any binary data. + /// If a string with this exact contents has already been serialized before, + /// instead simply returns the offset of the existing string. This uses a map + /// stored on the heap, but only stores the numerical offsets. + /// @param[in] str A const pointer to a `String` struct to add to the buffer. + /// @return Returns the offset in the buffer where the string starts + Offset CreateSharedString(const String *str) { + return CreateSharedString(str->c_str(), str->size()); + } + + /// @cond FLATBUFFERS_INTERNAL + uoffset_t EndVector(size_t len) { + FLATBUFFERS_ASSERT(nested); // Hit if no corresponding StartVector. + nested = false; + return PushElement(static_cast(len)); + } + + void StartVector(size_t len, size_t elemsize) { + NotNested(); + nested = true; + PreAlign(len * elemsize); + PreAlign(len * elemsize, elemsize); // Just in case elemsize > uoffset_t. + } + + // Call this right before StartVector/CreateVector if you want to force the + // alignment to be something different than what the element size would + // normally dictate. + // This is useful when storing a nested_flatbuffer in a vector of bytes, + // or when storing SIMD floats, etc. + void ForceVectorAlignment(size_t len, size_t elemsize, size_t alignment) { + if (len == 0) return; + FLATBUFFERS_ASSERT(VerifyAlignmentRequirements(alignment)); + PreAlign(len * elemsize, alignment); + } + + // Similar to ForceVectorAlignment but for String fields. + void ForceStringAlignment(size_t len, size_t alignment) { + if (len == 0) return; + FLATBUFFERS_ASSERT(VerifyAlignmentRequirements(alignment)); + PreAlign((len + 1) * sizeof(char), alignment); + } + + /// @endcond + + /// @brief Serialize an array into a FlatBuffer `vector`. + /// @tparam T The data type of the array elements. + /// @param[in] v A pointer to the array of type `T` to serialize into the + /// buffer as a `vector`. + /// @param[in] len The number of elements to serialize. + /// @return Returns a typed `Offset` into the serialized data indicating + /// where the vector is stored. + template Offset> CreateVector(const T *v, size_t len) { + // If this assert hits, you're specifying a template argument that is + // causing the wrong overload to be selected, remove it. + AssertScalarT(); + StartVector(len, sizeof(T)); + if (len == 0) { return Offset>(EndVector(len)); } + // clang-format off + #if FLATBUFFERS_LITTLEENDIAN + PushBytes(reinterpret_cast(v), len * sizeof(T)); + #else + if (sizeof(T) == 1) { + PushBytes(reinterpret_cast(v), len); + } else { + for (auto i = len; i > 0; ) { + PushElement(v[--i]); + } + } + #endif + // clang-format on + return Offset>(EndVector(len)); + } + + /// @brief Serialize an array like object into a FlatBuffer `vector`. + /// @tparam T The data type of the array elements. + /// @tparam C The type of the array. + /// @param[in] array A reference to an array like object of type `T` to + /// serialize into the buffer as a `vector`. + /// @return Returns a typed `Offset` into the serialized data indicating + /// where the vector is stored. + template Offset> CreateVector(const C &array) { + return CreateVector(array.data(), array.size()); + } + + /// @brief Serialize an initializer list into a FlatBuffer `vector`. + /// @tparam T The data type of the initializer list elements. + /// @param[in] v The value of the initializer list. + /// @return Returns a typed `Offset` into the serialized data indicating + /// where the vector is stored. + template + Offset> CreateVector(std::initializer_list v) { + return CreateVector(v.begin(), v.size()); + } + + template + Offset>> CreateVector(const Offset *v, size_t len) { + StartVector(len, sizeof(Offset)); + for (auto i = len; i > 0;) { PushElement(v[--i]); } + return Offset>>(EndVector(len)); + } + + /// @brief Serialize a `std::vector` into a FlatBuffer `vector`. + /// @tparam T The data type of the `std::vector` elements. + /// @param v A const reference to the `std::vector` to serialize into the + /// buffer as a `vector`. + /// @return Returns a typed `Offset` into the serialized data indicating + /// where the vector is stored. + template> + Offset> CreateVector(const std::vector &v) { + return CreateVector(data(v), v.size()); + } + + // vector may be implemented using a bit-set, so we can't access it as + // an array. Instead, read elements manually. + // Background: https://isocpp.org/blog/2012/11/on-vectorbool + Offset> CreateVector(const std::vector &v) { + StartVector(v.size(), sizeof(uint8_t)); + for (auto i = v.size(); i > 0;) { + PushElement(static_cast(v[--i])); + } + return Offset>(EndVector(v.size())); + } + + /// @brief Serialize values returned by a function into a FlatBuffer `vector`. + /// This is a convenience function that takes care of iteration for you. + /// @tparam T The data type of the `std::vector` elements. + /// @param f A function that takes the current iteration 0..vector_size-1 and + /// returns any type that you can construct a FlatBuffers vector out of. + /// @return Returns a typed `Offset` into the serialized data indicating + /// where the vector is stored. + template + Offset> CreateVector(size_t vector_size, + const std::function &f) { + FLATBUFFERS_ASSERT(FLATBUFFERS_GENERAL_HEAP_ALLOC_OK); + std::vector elems(vector_size); + for (size_t i = 0; i < vector_size; i++) elems[i] = f(i); + return CreateVector(elems); + } + + /// @brief Serialize values returned by a function into a FlatBuffer `vector`. + /// This is a convenience function that takes care of iteration for you. This + /// uses a vector stored on the heap to store the intermediate results of the + /// iteration. + /// @tparam T The data type of the `std::vector` elements. + /// @param f A function that takes the current iteration 0..vector_size-1, + /// and the state parameter returning any type that you can construct a + /// FlatBuffers vector out of. + /// @param state State passed to f. + /// @return Returns a typed `Offset` into the serialized data indicating + /// where the vector is stored. + template + Offset> CreateVector(size_t vector_size, F f, S *state) { + FLATBUFFERS_ASSERT(FLATBUFFERS_GENERAL_HEAP_ALLOC_OK); + std::vector elems(vector_size); + for (size_t i = 0; i < vector_size; i++) elems[i] = f(i, state); + return CreateVector(elems); + } + + /// @brief Serialize a `std::vector` into a FlatBuffer `vector`. + /// whereas StringType is any type that is accepted by the CreateString() + /// overloads. + /// This is a convenience function for a common case. + /// @param v A const reference to the `std::vector` to serialize into the + /// buffer as a `vector`. + /// @return Returns a typed `Offset` into the serialized data indicating + /// where the vector is stored. + template> + Offset>> CreateVectorOfStrings( + const std::vector &v) { + return CreateVectorOfStrings(v.cbegin(), v.cend()); + } + + /// @brief Serialize a collection of Strings into a FlatBuffer `vector`. + /// This is a convenience function for a common case. + /// @param begin The begining iterator of the collection + /// @param end The ending iterator of the collection + /// @return Returns a typed `Offset` into the serialized data indicating + /// where the vector is stored. + template + Offset>> CreateVectorOfStrings(It begin, It end) { + auto size = std::distance(begin, end); + auto scratch_buffer_usage = size * sizeof(Offset); + // If there is not enough space to store the offsets, there definitely won't + // be enough space to store all the strings. So ensuring space for the + // scratch region is OK, for it it fails, it would have failed later. + buf_.ensure_space(scratch_buffer_usage); + for (auto it = begin; it != end; ++it) { + buf_.scratch_push_small(CreateString(*it)); + } + StartVector(size, sizeof(Offset)); + for (auto i = 1; i <= size; i++) { + // Note we re-evaluate the buf location each iteration to account for any + // underlying buffer resizing that may occur. + PushElement(*reinterpret_cast *>( + buf_.scratch_end() - i * sizeof(Offset))); + } + buf_.scratch_pop(scratch_buffer_usage); + return Offset>>(EndVector(size)); + } + + /// @brief Serialize an array of structs into a FlatBuffer `vector`. + /// @tparam T The data type of the struct array elements. + /// @param[in] v A pointer to the array of type `T` to serialize into the + /// buffer as a `vector`. + /// @param[in] len The number of elements to serialize. + /// @return Returns a typed `Offset` into the serialized data indicating + /// where the vector is stored. + template + Offset> CreateVectorOfStructs(const T *v, size_t len) { + StartVector(len * sizeof(T) / AlignOf(), AlignOf()); + if (len > 0) { + PushBytes(reinterpret_cast(v), sizeof(T) * len); + } + return Offset>(EndVector(len)); + } + + /// @brief Serialize an array of native structs into a FlatBuffer `vector`. + /// @tparam T The data type of the struct array elements. + /// @tparam S The data type of the native struct array elements. + /// @param[in] v A pointer to the array of type `S` to serialize into the + /// buffer as a `vector`. + /// @param[in] len The number of elements to serialize. + /// @param[in] pack_func Pointer to a function to convert the native struct + /// to the FlatBuffer struct. + /// @return Returns a typed `Offset` into the serialized data indicating + /// where the vector is stored. + template + Offset> CreateVectorOfNativeStructs( + const S *v, size_t len, T (*const pack_func)(const S &)) { + FLATBUFFERS_ASSERT(pack_func); + auto structs = StartVectorOfStructs(len); + for (size_t i = 0; i < len; i++) { structs[i] = pack_func(v[i]); } + return EndVectorOfStructs(len); + } + + /// @brief Serialize an array of native structs into a FlatBuffer `vector`. + /// @tparam T The data type of the struct array elements. + /// @tparam S The data type of the native struct array elements. + /// @param[in] v A pointer to the array of type `S` to serialize into the + /// buffer as a `vector`. + /// @param[in] len The number of elements to serialize. + /// @return Returns a typed `Offset` into the serialized data indicating + /// where the vector is stored. + template + Offset> CreateVectorOfNativeStructs(const S *v, + size_t len) { + extern T Pack(const S &); + return CreateVectorOfNativeStructs(v, len, Pack); + } + + /// @brief Serialize an array of structs into a FlatBuffer `vector`. + /// @tparam T The data type of the struct array elements. + /// @param[in] filler A function that takes the current iteration + /// 0..vector_size-1 and a pointer to the struct that must be filled. + /// @return Returns a typed `Offset` into the serialized data indicating + /// where the vector is stored. + /// This is mostly useful when flatbuffers are generated with mutation + /// accessors. + template + Offset> CreateVectorOfStructs( + size_t vector_size, const std::function &filler) { + T *structs = StartVectorOfStructs(vector_size); + for (size_t i = 0; i < vector_size; i++) { + filler(i, structs); + structs++; + } + return EndVectorOfStructs(vector_size); + } + + /// @brief Serialize an array of structs into a FlatBuffer `vector`. + /// @tparam T The data type of the struct array elements. + /// @param[in] f A function that takes the current iteration 0..vector_size-1, + /// a pointer to the struct that must be filled and the state argument. + /// @param[in] state Arbitrary state to pass to f. + /// @return Returns a typed `Offset` into the serialized data indicating + /// where the vector is stored. + /// This is mostly useful when flatbuffers are generated with mutation + /// accessors. + template + Offset> CreateVectorOfStructs(size_t vector_size, F f, + S *state) { + T *structs = StartVectorOfStructs(vector_size); + for (size_t i = 0; i < vector_size; i++) { + f(i, structs, state); + structs++; + } + return EndVectorOfStructs(vector_size); + } + + /// @brief Serialize a `std::vector` of structs into a FlatBuffer `vector`. + /// @tparam T The data type of the `std::vector` struct elements. + /// @param[in] v A const reference to the `std::vector` of structs to + /// serialize into the buffer as a `vector`. + /// @return Returns a typed `Offset` into the serialized data indicating + /// where the vector is stored. + template> + Offset> CreateVectorOfStructs( + const std::vector &v) { + return CreateVectorOfStructs(data(v), v.size()); + } + + /// @brief Serialize a `std::vector` of native structs into a FlatBuffer + /// `vector`. + /// @tparam T The data type of the `std::vector` struct elements. + /// @tparam S The data type of the `std::vector` native struct elements. + /// @param[in] v A const reference to the `std::vector` of structs to + /// serialize into the buffer as a `vector`. + /// @param[in] pack_func Pointer to a function to convert the native struct + /// to the FlatBuffer struct. + /// @return Returns a typed `Offset` into the serialized data indicating + /// where the vector is stored. + template> + Offset> CreateVectorOfNativeStructs( + const std::vector &v, T (*const pack_func)(const S &)) { + return CreateVectorOfNativeStructs(data(v), v.size(), pack_func); + } + + /// @brief Serialize a `std::vector` of native structs into a FlatBuffer + /// `vector`. + /// @tparam T The data type of the `std::vector` struct elements. + /// @tparam S The data type of the `std::vector` native struct elements. + /// @param[in] v A const reference to the `std::vector` of structs to + /// serialize into the buffer as a `vector`. + /// @return Returns a typed `Offset` into the serialized data indicating + /// where the vector is stored. + template> + Offset> CreateVectorOfNativeStructs( + const std::vector &v) { + return CreateVectorOfNativeStructs(data(v), v.size()); + } + + /// @cond FLATBUFFERS_INTERNAL + template struct StructKeyComparator { + bool operator()(const T &a, const T &b) const { + return a.KeyCompareLessThan(&b); + } + }; + /// @endcond + + /// @brief Serialize a `std::vector` of structs into a FlatBuffer `vector` + /// in sorted order. + /// @tparam T The data type of the `std::vector` struct elements. + /// @param[in] v A const reference to the `std::vector` of structs to + /// serialize into the buffer as a `vector`. + /// @return Returns a typed `Offset` into the serialized data indicating + /// where the vector is stored. + template> + Offset> CreateVectorOfSortedStructs( + std::vector *v) { + return CreateVectorOfSortedStructs(data(*v), v->size()); + } + + /// @brief Serialize a `std::vector` of native structs into a FlatBuffer + /// `vector` in sorted order. + /// @tparam T The data type of the `std::vector` struct elements. + /// @tparam S The data type of the `std::vector` native struct elements. + /// @param[in] v A const reference to the `std::vector` of structs to + /// serialize into the buffer as a `vector`. + /// @return Returns a typed `Offset` into the serialized data indicating + /// where the vector is stored. + template> + Offset> CreateVectorOfSortedNativeStructs( + std::vector *v) { + return CreateVectorOfSortedNativeStructs(data(*v), v->size()); + } + + /// @brief Serialize an array of structs into a FlatBuffer `vector` in sorted + /// order. + /// @tparam T The data type of the struct array elements. + /// @param[in] v A pointer to the array of type `T` to serialize into the + /// buffer as a `vector`. + /// @param[in] len The number of elements to serialize. + /// @return Returns a typed `Offset` into the serialized data indicating + /// where the vector is stored. + template + Offset> CreateVectorOfSortedStructs(T *v, size_t len) { + std::stable_sort(v, v + len, StructKeyComparator()); + return CreateVectorOfStructs(v, len); + } + + /// @brief Serialize an array of native structs into a FlatBuffer `vector` in + /// sorted order. + /// @tparam T The data type of the struct array elements. + /// @tparam S The data type of the native struct array elements. + /// @param[in] v A pointer to the array of type `S` to serialize into the + /// buffer as a `vector`. + /// @param[in] len The number of elements to serialize. + /// @return Returns a typed `Offset` into the serialized data indicating + /// where the vector is stored. + template + Offset> CreateVectorOfSortedNativeStructs(S *v, + size_t len) { + extern T Pack(const S &); + auto structs = StartVectorOfStructs(len); + for (size_t i = 0; i < len; i++) { structs[i] = Pack(v[i]); } + std::stable_sort(structs, structs + len, StructKeyComparator()); + return EndVectorOfStructs(len); + } + + /// @cond FLATBUFFERS_INTERNAL + template struct TableKeyComparator { + TableKeyComparator(vector_downward &buf) : buf_(buf) {} + TableKeyComparator(const TableKeyComparator &other) : buf_(other.buf_) {} + bool operator()(const Offset &a, const Offset &b) const { + auto table_a = reinterpret_cast(buf_.data_at(a.o)); + auto table_b = reinterpret_cast(buf_.data_at(b.o)); + return table_a->KeyCompareLessThan(table_b); + } + vector_downward &buf_; + + private: + FLATBUFFERS_DELETE_FUNC( + TableKeyComparator &operator=(const TableKeyComparator &other)); + }; + /// @endcond + + /// @brief Serialize an array of `table` offsets as a `vector` in the buffer + /// in sorted order. + /// @tparam T The data type that the offset refers to. + /// @param[in] v An array of type `Offset` that contains the `table` + /// offsets to store in the buffer in sorted order. + /// @param[in] len The number of elements to store in the `vector`. + /// @return Returns a typed `Offset` into the serialized data indicating + /// where the vector is stored. + template + Offset>> CreateVectorOfSortedTables(Offset *v, + size_t len) { + std::stable_sort(v, v + len, TableKeyComparator(buf_)); + return CreateVector(v, len); + } + + /// @brief Serialize an array of `table` offsets as a `vector` in the buffer + /// in sorted order. + /// @tparam T The data type that the offset refers to. + /// @param[in] v An array of type `Offset` that contains the `table` + /// offsets to store in the buffer in sorted order. + /// @return Returns a typed `Offset` into the serialized data indicating + /// where the vector is stored. + template> + Offset>> CreateVectorOfSortedTables( + std::vector, Alloc> *v) { + return CreateVectorOfSortedTables(data(*v), v->size()); + } + + /// @brief Specialized version of `CreateVector` for non-copying use cases. + /// Write the data any time later to the returned buffer pointer `buf`. + /// @param[in] len The number of elements to store in the `vector`. + /// @param[in] elemsize The size of each element in the `vector`. + /// @param[out] buf A pointer to a `uint8_t` pointer that can be + /// written to at a later time to serialize the data into a `vector` + /// in the buffer. + uoffset_t CreateUninitializedVector(size_t len, size_t elemsize, + uint8_t **buf) { + NotNested(); + StartVector(len, elemsize); + buf_.make_space(len * elemsize); + auto vec_start = GetSize(); + auto vec_end = EndVector(len); + *buf = buf_.data_at(vec_start); + return vec_end; + } + + /// @brief Specialized version of `CreateVector` for non-copying use cases. + /// Write the data any time later to the returned buffer pointer `buf`. + /// @tparam T The data type of the data that will be stored in the buffer + /// as a `vector`. + /// @param[in] len The number of elements to store in the `vector`. + /// @param[out] buf A pointer to a pointer of type `T` that can be + /// written to at a later time to serialize the data into a `vector` + /// in the buffer. + template + Offset> CreateUninitializedVector(size_t len, T **buf) { + AssertScalarT(); + return CreateUninitializedVector(len, sizeof(T), + reinterpret_cast(buf)); + } + + template + Offset> CreateUninitializedVectorOfStructs(size_t len, + T **buf) { + return CreateUninitializedVector(len, sizeof(T), + reinterpret_cast(buf)); + } + + // @brief Create a vector of scalar type T given as input a vector of scalar + // type U, useful with e.g. pre "enum class" enums, or any existing scalar + // data of the wrong type. + template + Offset> CreateVectorScalarCast(const U *v, size_t len) { + AssertScalarT(); + AssertScalarT(); + StartVector(len, sizeof(T)); + for (auto i = len; i > 0;) { PushElement(static_cast(v[--i])); } + return Offset>(EndVector(len)); + } + + /// @brief Write a struct by itself, typically to be part of a union. + template Offset CreateStruct(const T &structobj) { + NotNested(); + Align(AlignOf()); + buf_.push_small(structobj); + return Offset(GetSize()); + } + + /// @brief Finish serializing a buffer by writing the root offset. + /// @param[in] file_identifier If a `file_identifier` is given, the buffer + /// will be prefixed with a standard FlatBuffers file header. + template + void Finish(Offset root, const char *file_identifier = nullptr) { + Finish(root.o, file_identifier, false); + } + + /// @brief Finish a buffer with a 32 bit size field pre-fixed (size of the + /// buffer following the size field). These buffers are NOT compatible + /// with standard buffers created by Finish, i.e. you can't call GetRoot + /// on them, you have to use GetSizePrefixedRoot instead. + /// All >32 bit quantities in this buffer will be aligned when the whole + /// size pre-fixed buffer is aligned. + /// These kinds of buffers are useful for creating a stream of FlatBuffers. + template + void FinishSizePrefixed(Offset root, + const char *file_identifier = nullptr) { + Finish(root.o, file_identifier, true); + } + + void SwapBufAllocator(FlatBufferBuilder &other) { + buf_.swap_allocator(other.buf_); + } + + /// @brief The length of a FlatBuffer file header. + static const size_t kFileIdentifierLength = + ::flatbuffers::kFileIdentifierLength; + + protected: + // You shouldn't really be copying instances of this class. + FlatBufferBuilder(const FlatBufferBuilder &); + FlatBufferBuilder &operator=(const FlatBufferBuilder &); + + void Finish(uoffset_t root, const char *file_identifier, bool size_prefix) { + NotNested(); + buf_.clear_scratch(); + // This will cause the whole buffer to be aligned. + PreAlign((size_prefix ? sizeof(uoffset_t) : 0) + sizeof(uoffset_t) + + (file_identifier ? kFileIdentifierLength : 0), + minalign_); + if (file_identifier) { + FLATBUFFERS_ASSERT(strlen(file_identifier) == kFileIdentifierLength); + PushBytes(reinterpret_cast(file_identifier), + kFileIdentifierLength); + } + PushElement(ReferTo(root)); // Location of root. + if (size_prefix) { PushElement(GetSize()); } + finished = true; + } + + struct FieldLoc { + uoffset_t off; + voffset_t id; + }; + + vector_downward buf_; + + // Accumulating offsets of table members while it is being built. + // We store these in the scratch pad of buf_, after the vtable offsets. + uoffset_t num_field_loc; + // Track how much of the vtable is in use, so we can output the most compact + // possible vtable. + voffset_t max_voffset_; + + // Ensure objects are not nested. + bool nested; + + // Ensure the buffer is finished before it is being accessed. + bool finished; + + size_t minalign_; + + bool force_defaults_; // Serialize values equal to their defaults anyway. + + bool dedup_vtables_; + + struct StringOffsetCompare { + StringOffsetCompare(const vector_downward &buf) : buf_(&buf) {} + bool operator()(const Offset &a, const Offset &b) const { + auto stra = reinterpret_cast(buf_->data_at(a.o)); + auto strb = reinterpret_cast(buf_->data_at(b.o)); + return StringLessThan(stra->data(), stra->size(), strb->data(), + strb->size()); + } + const vector_downward *buf_; + }; + + // For use with CreateSharedString. Instantiated on first use only. + typedef std::set, StringOffsetCompare> StringOffsetMap; + StringOffsetMap *string_pool; + + private: + // Allocates space for a vector of structures. + // Must be completed with EndVectorOfStructs(). + template T *StartVectorOfStructs(size_t vector_size) { + StartVector(vector_size * sizeof(T) / AlignOf(), AlignOf()); + return reinterpret_cast(buf_.make_space(vector_size * sizeof(T))); + } + + // End the vector of structures in the flatbuffers. + // Vector should have previously be started with StartVectorOfStructs(). + template + Offset> EndVectorOfStructs(size_t vector_size) { + return Offset>(EndVector(vector_size)); + } +}; +/// @} + +/// Helpers to get a typed pointer to objects that are currently being built. +/// @warning Creating new objects will lead to reallocations and invalidates +/// the pointer! +template +T *GetMutableTemporaryPointer(FlatBufferBuilder &fbb, Offset offset) { + return reinterpret_cast(fbb.GetCurrentBufferPointer() + fbb.GetSize() - + offset.o); +} + +template +const T *GetTemporaryPointer(FlatBufferBuilder &fbb, Offset offset) { + return GetMutableTemporaryPointer(fbb, offset); +} + +template +void FlatBufferBuilder::Required(Offset table, voffset_t field) { + auto table_ptr = reinterpret_cast(buf_.data_at(table.o)); + bool ok = table_ptr->GetOptionalFieldOffset(field) != 0; + // If this fails, the caller will show what field needs to be set. + FLATBUFFERS_ASSERT(ok); + (void)ok; +} + +} // namespace flatbuffers + +#endif // FLATBUFFERS_VECTOR_DOWNWARD_H_ diff --git a/edgeimpulse/edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_stl_emulation.h b/edgeimpulse/edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_stl_emulation.h new file mode 100644 index 0000000..7166d4f --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_stl_emulation.h @@ -0,0 +1,509 @@ +/* + * Copyright 2017 Google Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef FLATBUFFERS_STL_EMULATION_H_ +#define FLATBUFFERS_STL_EMULATION_H_ + +// clang-format off +#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_base.h" + +#include +#include +#include +#include +#include + +#ifndef FLATBUFFERS_USE_STD_OPTIONAL + // Detect C++17 compatible compiler. + // __cplusplus >= 201703L - a compiler has support of 'static inline' variables. + #if (defined(__cplusplus) && __cplusplus >= 201703L) \ + || (defined(_MSVC_LANG) && _MSVC_LANG >= 201703L) + #define FLATBUFFERS_USE_STD_OPTIONAL 1 + #else + #define FLATBUFFERS_USE_STD_OPTIONAL 0 + #endif // (defined(__cplusplus) && __cplusplus >= 201703L) ... +#endif // FLATBUFFERS_USE_STD_OPTIONAL + +#if FLATBUFFERS_USE_STD_OPTIONAL + #include +#endif + +// The __cpp_lib_span is the predefined feature macro. +#if defined(FLATBUFFERS_USE_STD_SPAN) + #include +#elif defined(__cpp_lib_span) && defined(__has_include) + #if __has_include() + #include + #define FLATBUFFERS_USE_STD_SPAN + #endif +#else + // Disable non-trivial ctors if FLATBUFFERS_SPAN_MINIMAL defined. + #if !defined(FLATBUFFERS_TEMPLATES_ALIASES) + #define FLATBUFFERS_SPAN_MINIMAL + #else + // Enable implicit construction of a span from a std::array. + #include + #endif +#endif // defined(FLATBUFFERS_USE_STD_SPAN) + +// This header provides backwards compatibility for older versions of the STL. +namespace flatbuffers { + +#if defined(FLATBUFFERS_TEMPLATES_ALIASES) + template + using numeric_limits = std::numeric_limits; +#else + template class numeric_limits : + public std::numeric_limits {}; +#endif // defined(FLATBUFFERS_TEMPLATES_ALIASES) + +#if defined(FLATBUFFERS_TEMPLATES_ALIASES) + template using is_scalar = std::is_scalar; + template using is_same = std::is_same; + template using is_floating_point = std::is_floating_point; + template using is_unsigned = std::is_unsigned; + template using is_enum = std::is_enum; + template using make_unsigned = std::make_unsigned; + template + using conditional = std::conditional; + template + using integral_constant = std::integral_constant; + template + using bool_constant = integral_constant; + using true_type = std::true_type; + using false_type = std::false_type; +#else + // MSVC 2010 doesn't support C++11 aliases. + template struct is_scalar : public std::is_scalar {}; + template struct is_same : public std::is_same {}; + template struct is_floating_point : + public std::is_floating_point {}; + template struct is_unsigned : public std::is_unsigned {}; + template struct is_enum : public std::is_enum {}; + template struct make_unsigned : public std::make_unsigned {}; + template + struct conditional : public std::conditional {}; + template + struct integral_constant : public std::integral_constant {}; + template + struct bool_constant : public integral_constant {}; + typedef bool_constant true_type; + typedef bool_constant false_type; +#endif // defined(FLATBUFFERS_TEMPLATES_ALIASES) + +#if defined(FLATBUFFERS_TEMPLATES_ALIASES) + template using unique_ptr = std::unique_ptr; +#else + // MSVC 2010 doesn't support C++11 aliases. + // We're manually "aliasing" the class here as we want to bring unique_ptr + // into the flatbuffers namespace. We have unique_ptr in the flatbuffers + // namespace we have a completely independent implementation (see below) + // for C++98 STL implementations. + template class unique_ptr : public std::unique_ptr { + public: + unique_ptr() {} + explicit unique_ptr(T* p) : std::unique_ptr(p) {} + unique_ptr(std::unique_ptr&& u) { *this = std::move(u); } + unique_ptr(unique_ptr&& u) { *this = std::move(u); } + unique_ptr& operator=(std::unique_ptr&& u) { + std::unique_ptr::reset(u.release()); + return *this; + } + unique_ptr& operator=(unique_ptr&& u) { + std::unique_ptr::reset(u.release()); + return *this; + } + unique_ptr& operator=(T* p) { + return std::unique_ptr::operator=(p); + } + }; +#endif // defined(FLATBUFFERS_TEMPLATES_ALIASES) + +#if FLATBUFFERS_USE_STD_OPTIONAL +template +using Optional = std::optional; +using nullopt_t = std::nullopt_t; +inline constexpr nullopt_t nullopt = std::nullopt; + +#else +// Limited implementation of Optional type for a scalar T. +// This implementation limited by trivial types compatible with +// std::is_arithmetic or std::is_enum type traits. + +// A tag to indicate an empty flatbuffers::optional. +struct nullopt_t { + explicit FLATBUFFERS_CONSTEXPR_CPP11 nullopt_t(int) {} +}; + +#if defined(FLATBUFFERS_CONSTEXPR_DEFINED) + namespace internal { + template struct nullopt_holder { + static constexpr nullopt_t instance_ = nullopt_t(0); + }; + template + constexpr nullopt_t nullopt_holder::instance_; + } + static constexpr const nullopt_t &nullopt = internal::nullopt_holder::instance_; + +#else + namespace internal { + template struct nullopt_holder { + static const nullopt_t instance_; + }; + template + const nullopt_t nullopt_holder::instance_ = nullopt_t(0); + } + static const nullopt_t &nullopt = internal::nullopt_holder::instance_; + +#endif + +template +class Optional FLATBUFFERS_FINAL_CLASS { + // Non-scalar 'T' would extremely complicated Optional. + // Use is_scalar checking because flatbuffers flatbuffers::is_arithmetic + // isn't implemented. + static_assert(flatbuffers::is_scalar::value, "unexpected type T"); + + public: + ~Optional() {} + + FLATBUFFERS_CONSTEXPR_CPP11 Optional() FLATBUFFERS_NOEXCEPT + : value_(), has_value_(false) {} + + FLATBUFFERS_CONSTEXPR_CPP11 Optional(nullopt_t) FLATBUFFERS_NOEXCEPT + : value_(), has_value_(false) {} + + FLATBUFFERS_CONSTEXPR_CPP11 Optional(T val) FLATBUFFERS_NOEXCEPT + : value_(val), has_value_(true) {} + + FLATBUFFERS_CONSTEXPR_CPP11 Optional(const Optional &other) FLATBUFFERS_NOEXCEPT + : value_(other.value_), has_value_(other.has_value_) {} + + FLATBUFFERS_CONSTEXPR_CPP14 Optional &operator=(const Optional &other) FLATBUFFERS_NOEXCEPT { + value_ = other.value_; + has_value_ = other.has_value_; + return *this; + } + + FLATBUFFERS_CONSTEXPR_CPP14 Optional &operator=(nullopt_t) FLATBUFFERS_NOEXCEPT { + value_ = T(); + has_value_ = false; + return *this; + } + + FLATBUFFERS_CONSTEXPR_CPP14 Optional &operator=(T val) FLATBUFFERS_NOEXCEPT { + value_ = val; + has_value_ = true; + return *this; + } + + void reset() FLATBUFFERS_NOEXCEPT { + *this = nullopt; + } + + void swap(Optional &other) FLATBUFFERS_NOEXCEPT { + std::swap(value_, other.value_); + std::swap(has_value_, other.has_value_); + } + + FLATBUFFERS_CONSTEXPR_CPP11 FLATBUFFERS_EXPLICIT_CPP11 operator bool() const FLATBUFFERS_NOEXCEPT { + return has_value_; + } + + FLATBUFFERS_CONSTEXPR_CPP11 bool has_value() const FLATBUFFERS_NOEXCEPT { + return has_value_; + } + + FLATBUFFERS_CONSTEXPR_CPP11 const T& operator*() const FLATBUFFERS_NOEXCEPT { + return value_; + } + + const T& value() const { + FLATBUFFERS_ASSERT(has_value()); + return value_; + } + + T value_or(T default_value) const FLATBUFFERS_NOEXCEPT { + return has_value() ? value_ : default_value; + } + + private: + T value_; + bool has_value_; +}; + +template +FLATBUFFERS_CONSTEXPR_CPP11 bool operator==(const Optional& opt, nullopt_t) FLATBUFFERS_NOEXCEPT { + return !opt; +} +template +FLATBUFFERS_CONSTEXPR_CPP11 bool operator==(nullopt_t, const Optional& opt) FLATBUFFERS_NOEXCEPT { + return !opt; +} + +template +FLATBUFFERS_CONSTEXPR_CPP11 bool operator==(const Optional& lhs, const U& rhs) FLATBUFFERS_NOEXCEPT { + return static_cast(lhs) && (*lhs == rhs); +} + +template +FLATBUFFERS_CONSTEXPR_CPP11 bool operator==(const T& lhs, const Optional& rhs) FLATBUFFERS_NOEXCEPT { + return static_cast(rhs) && (lhs == *rhs); +} + +template +FLATBUFFERS_CONSTEXPR_CPP11 bool operator==(const Optional& lhs, const Optional& rhs) FLATBUFFERS_NOEXCEPT { + return static_cast(lhs) != static_cast(rhs) + ? false + : !static_cast(lhs) ? false : (*lhs == *rhs); +} +#endif // FLATBUFFERS_USE_STD_OPTIONAL + + +// Very limited and naive partial implementation of C++20 std::span. +#if defined(FLATBUFFERS_USE_STD_SPAN) + inline constexpr std::size_t dynamic_extent = std::dynamic_extent; + template + using span = std::span; + +#else // !defined(FLATBUFFERS_USE_STD_SPAN) +FLATBUFFERS_CONSTEXPR std::size_t dynamic_extent = static_cast(-1); + +// Exclude this code if MSVC2010 or non-STL Android is active. +// The non-STL Android doesn't have `std::is_convertible` required for SFINAE. +#if !defined(FLATBUFFERS_SPAN_MINIMAL) +namespace internal { + // This is SFINAE helper class for checking of a common condition: + // > This overload only participates in overload resolution + // > Check whether a pointer to an array of From can be converted + // > to a pointer to an array of To. + // This helper is used for checking of 'From -> const From'. + template + struct is_span_convertable { + using type = + typename std::conditional::value + && (Extent == dynamic_extent || N == Extent), + int, void>::type; + }; + + template + struct SpanIterator { + // TODO: upgrade to std::random_access_iterator_tag. + using iterator_category = std::forward_iterator_tag; + using difference_type = std::ptrdiff_t; + using value_type = typename std::remove_cv::type; + using reference = T&; + using pointer = T*; + + // Convince MSVC compiler that this iterator is trusted (it is verified). + #ifdef _MSC_VER + using _Unchecked_type = pointer; + #endif // _MSC_VER + + SpanIterator(pointer ptr) : ptr_(ptr) {} + reference operator*() const { return *ptr_; } + pointer operator->() { return ptr_; } + SpanIterator& operator++() { ptr_++; return *this; } + SpanIterator operator++(int) { auto tmp = *this; ++(*this); return tmp; } + + friend bool operator== (const SpanIterator& lhs, const SpanIterator& rhs) { return lhs.ptr_ == rhs.ptr_; } + friend bool operator!= (const SpanIterator& lhs, const SpanIterator& rhs) { return lhs.ptr_ != rhs.ptr_; } + + private: + pointer ptr_; + }; +} // namespace internal +#endif // !defined(FLATBUFFERS_SPAN_MINIMAL) + +// T - element type; must be a complete type that is not an abstract +// class type. +// Extent - the number of elements in the sequence, or dynamic. +template +class span FLATBUFFERS_FINAL_CLASS { + public: + typedef T element_type; + typedef T& reference; + typedef const T& const_reference; + typedef T* pointer; + typedef const T* const_pointer; + typedef std::size_t size_type; + + static FLATBUFFERS_CONSTEXPR size_type extent = Extent; + + // Returns the number of elements in the span. + FLATBUFFERS_CONSTEXPR_CPP11 size_type size() const FLATBUFFERS_NOEXCEPT { + return count_; + } + + // Returns the size of the sequence in bytes. + FLATBUFFERS_CONSTEXPR_CPP11 + size_type size_bytes() const FLATBUFFERS_NOEXCEPT { + return size() * sizeof(element_type); + } + + // Checks if the span is empty. + FLATBUFFERS_CONSTEXPR_CPP11 bool empty() const FLATBUFFERS_NOEXCEPT { + return size() == 0; + } + + // Returns a pointer to the beginning of the sequence. + FLATBUFFERS_CONSTEXPR_CPP11 pointer data() const FLATBUFFERS_NOEXCEPT { + return data_; + } + + #if !defined(FLATBUFFERS_SPAN_MINIMAL) + using Iterator = internal::SpanIterator; + + Iterator begin() const { return Iterator(data()); } + Iterator end() const { return Iterator(data() + size()); } + #endif + + // Returns a reference to the idx-th element of the sequence. + // The behavior is undefined if the idx is greater than or equal to size(). + FLATBUFFERS_CONSTEXPR_CPP11 reference operator[](size_type idx) const { + return data()[idx]; + } + + FLATBUFFERS_CONSTEXPR_CPP11 span(const span &other) FLATBUFFERS_NOEXCEPT + : data_(other.data_), count_(other.count_) {} + + FLATBUFFERS_CONSTEXPR_CPP14 span &operator=(const span &other) + FLATBUFFERS_NOEXCEPT { + data_ = other.data_; + count_ = other.count_; + } + + // Limited implementation of + // `template constexpr std::span(It first, size_type count);`. + // + // Constructs a span that is a view over the range [first, first + count); + // the resulting span has: data() == first and size() == count. + // The behavior is undefined if [first, first + count) is not a valid range, + // or if (extent != flatbuffers::dynamic_extent && count != extent). + FLATBUFFERS_CONSTEXPR_CPP11 + explicit span(pointer first, size_type count) FLATBUFFERS_NOEXCEPT + : data_ (Extent == dynamic_extent ? first : (Extent == count ? first : nullptr)), + count_(Extent == dynamic_extent ? count : (Extent == count ? Extent : 0)) { + // Make span empty if the count argument is incompatible with span. + } + + // Exclude this code if MSVC2010 is active. The MSVC2010 isn't C++11 + // compliant, it doesn't support default template arguments for functions. + #if defined(FLATBUFFERS_SPAN_MINIMAL) + FLATBUFFERS_CONSTEXPR_CPP11 span() FLATBUFFERS_NOEXCEPT : data_(nullptr), + count_(0) { + static_assert(extent == 0 || extent == dynamic_extent, "invalid span"); + } + + #else + // Constructs an empty span whose data() == nullptr and size() == 0. + // This overload only participates in overload resolution if + // extent == 0 || extent == flatbuffers::dynamic_extent. + // A dummy template argument N is need dependency for SFINAE. + template::type = 0> + FLATBUFFERS_CONSTEXPR_CPP11 span() FLATBUFFERS_NOEXCEPT : data_(nullptr), + count_(0) { + static_assert(extent == 0 || extent == dynamic_extent, "invalid span"); + } + + // Constructs a span that is a view over the array arr; the resulting span + // has size() == N and data() == std::data(arr). These overloads only + // participate in overload resolution if + // extent == std::dynamic_extent || N == extent is true and + // std::remove_pointer_t(*)[] + // is convertible to element_type (*)[]. + template::type = 0> + FLATBUFFERS_CONSTEXPR_CPP11 span(element_type (&arr)[N]) FLATBUFFERS_NOEXCEPT + : data_(arr), count_(N) {} + + template::type = 0> + FLATBUFFERS_CONSTEXPR_CPP11 span(std::array &arr) FLATBUFFERS_NOEXCEPT + : data_(arr.data()), count_(N) {} + + //template + //FLATBUFFERS_CONSTEXPR_CPP11 span(std::array &arr) FLATBUFFERS_NOEXCEPT + // : data_(arr.data()), count_(N) {} + + template::type = 0> + FLATBUFFERS_CONSTEXPR_CPP11 span(const std::array &arr) FLATBUFFERS_NOEXCEPT + : data_(arr.data()), count_(N) {} + + // Converting constructor from another span s; + // the resulting span has size() == s.size() and data() == s.data(). + // This overload only participates in overload resolution + // if extent == std::dynamic_extent || N == extent is true and U (*)[] + // is convertible to element_type (*)[]. + template::type = 0> + FLATBUFFERS_CONSTEXPR_CPP11 span(const flatbuffers::span &s) FLATBUFFERS_NOEXCEPT + : span(s.data(), s.size()) { + } + + #endif // !defined(FLATBUFFERS_SPAN_MINIMAL) + + private: + // This is a naive implementation with 'count_' member even if (Extent != dynamic_extent). + pointer const data_; + size_type count_; +}; +#endif // defined(FLATBUFFERS_USE_STD_SPAN) + +#if !defined(FLATBUFFERS_SPAN_MINIMAL) +template +FLATBUFFERS_CONSTEXPR_CPP11 +flatbuffers::span make_span(ElementType(&arr)[Extent]) FLATBUFFERS_NOEXCEPT { + return span(arr); +} + +template +FLATBUFFERS_CONSTEXPR_CPP11 +flatbuffers::span make_span(const ElementType(&arr)[Extent]) FLATBUFFERS_NOEXCEPT { + return span(arr); +} + +template +FLATBUFFERS_CONSTEXPR_CPP11 +flatbuffers::span make_span(std::array &arr) FLATBUFFERS_NOEXCEPT { + return span(arr); +} + +template +FLATBUFFERS_CONSTEXPR_CPP11 +flatbuffers::span make_span(const std::array &arr) FLATBUFFERS_NOEXCEPT { + return span(arr); +} + +template +FLATBUFFERS_CONSTEXPR_CPP11 +flatbuffers::span make_span(ElementType *first, std::size_t count) FLATBUFFERS_NOEXCEPT { + return span(first, count); +} + +template +FLATBUFFERS_CONSTEXPR_CPP11 +flatbuffers::span make_span(const ElementType *first, std::size_t count) FLATBUFFERS_NOEXCEPT { + return span(first, count); +} +#endif // !defined(FLATBUFFERS_SPAN_MINIMAL) + +} // namespace flatbuffers + +#endif // FLATBUFFERS_STL_EMULATION_H_ diff --git a/edgeimpulse/edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_string.h b/edgeimpulse/edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_string.h new file mode 100644 index 0000000..45cecf2 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_string.h @@ -0,0 +1,64 @@ +/* + * Copyright 2021 Google Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef FLATBUFFERS_STRING_H_ +#define FLATBUFFERS_STRING_H_ + +#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_base.h" +#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_vector.h" + +namespace flatbuffers { + +struct String : public Vector { + const char *c_str() const { return reinterpret_cast(Data()); } + std::string str() const { return std::string(c_str(), size()); } + + // clang-format off + #ifdef FLATBUFFERS_HAS_STRING_VIEW + flatbuffers::string_view string_view() const { + return flatbuffers::string_view(c_str(), size()); + } + #endif // FLATBUFFERS_HAS_STRING_VIEW + // clang-format on + + bool operator<(const String &o) const { + return StringLessThan(this->data(), this->size(), o.data(), o.size()); + } +}; + +// Convenience function to get std::string from a String returning an empty +// string on null pointer. +static inline std::string GetString(const String *str) { + return str ? str->str() : ""; +} + +// Convenience function to get char* from a String returning an empty string on +// null pointer. +static inline const char *GetCstring(const String *str) { + return str ? str->c_str() : ""; +} + +#ifdef FLATBUFFERS_HAS_STRING_VIEW +// Convenience function to get string_view from a String returning an empty +// string_view on null pointer. +static inline flatbuffers::string_view GetStringView(const String *str) { + return str ? str->string_view() : flatbuffers::string_view(); +} +#endif // FLATBUFFERS_HAS_STRING_VIEW + +} // namespace flatbuffers + +#endif // FLATBUFFERS_STRING_H_ \ No newline at end of file diff --git a/edgeimpulse/edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_struct.h b/edgeimpulse/edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_struct.h new file mode 100644 index 0000000..385d648 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_struct.h @@ -0,0 +1,53 @@ +/* + * Copyright 2021 Google Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef FLATBUFFERS_STRUCT_H_ +#define FLATBUFFERS_STRUCT_H_ + +#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_base.h" + +namespace flatbuffers { + +// "structs" are flat structures that do not have an offset table, thus +// always have all members present and do not support forwards/backwards +// compatible extensions. + +class Struct FLATBUFFERS_FINAL_CLASS { + public: + template T GetField(uoffset_t o) const { + return ReadScalar(&data_[o]); + } + + template T GetStruct(uoffset_t o) const { + return reinterpret_cast(&data_[o]); + } + + const uint8_t *GetAddressOf(uoffset_t o) const { return &data_[o]; } + uint8_t *GetAddressOf(uoffset_t o) { return &data_[o]; } + + private: + // private constructor & copy constructor: you obtain instances of this + // class by pointing to existing data only + Struct(); + Struct(const Struct &); + Struct &operator=(const Struct &); + + uint8_t data_[1]; +}; + +} // namespace flatbuffers + +#endif // FLATBUFFERS_STRUCT_H_ \ No newline at end of file diff --git a/edgeimpulse/edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_table.h b/edgeimpulse/edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_table.h new file mode 100644 index 0000000..3aca63f --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_table.h @@ -0,0 +1,168 @@ +/* + * Copyright 2021 Google Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef FLATBUFFERS_TABLE_H_ +#define FLATBUFFERS_TABLE_H_ + +#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_base.h" +#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_verifier.h" + +namespace flatbuffers { + +// "tables" use an offset table (possibly shared) that allows fields to be +// omitted and added at will, but uses an extra indirection to read. +class Table { + public: + const uint8_t *GetVTable() const { + return data_ - ReadScalar(data_); + } + + // This gets the field offset for any of the functions below it, or 0 + // if the field was not present. + voffset_t GetOptionalFieldOffset(voffset_t field) const { + // The vtable offset is always at the start. + auto vtable = GetVTable(); + // The first element is the size of the vtable (fields + type id + itself). + auto vtsize = ReadScalar(vtable); + // If the field we're accessing is outside the vtable, we're reading older + // data, so it's the same as if the offset was 0 (not present). + return field < vtsize ? ReadScalar(vtable + field) : 0; + } + + template T GetField(voffset_t field, T defaultval) const { + auto field_offset = GetOptionalFieldOffset(field); + return field_offset ? ReadScalar(data_ + field_offset) : defaultval; + } + + template P GetPointer(voffset_t field) { + auto field_offset = GetOptionalFieldOffset(field); + auto p = data_ + field_offset; + return field_offset ? reinterpret_cast

(p + ReadScalar(p)) + : nullptr; + } + template P GetPointer(voffset_t field) const { + return const_cast(this)->GetPointer

(field); + } + + template P GetStruct(voffset_t field) const { + auto field_offset = GetOptionalFieldOffset(field); + auto p = const_cast(data_ + field_offset); + return field_offset ? reinterpret_cast

(p) : nullptr; + } + + template + flatbuffers::Optional GetOptional(voffset_t field) const { + auto field_offset = GetOptionalFieldOffset(field); + auto p = data_ + field_offset; + return field_offset ? Optional(static_cast(ReadScalar(p))) + : Optional(); + } + + template bool SetField(voffset_t field, T val, T def) { + auto field_offset = GetOptionalFieldOffset(field); + if (!field_offset) return IsTheSameAs(val, def); + WriteScalar(data_ + field_offset, val); + return true; + } + template bool SetField(voffset_t field, T val) { + auto field_offset = GetOptionalFieldOffset(field); + if (!field_offset) return false; + WriteScalar(data_ + field_offset, val); + return true; + } + + bool SetPointer(voffset_t field, const uint8_t *val) { + auto field_offset = GetOptionalFieldOffset(field); + if (!field_offset) return false; + WriteScalar(data_ + field_offset, + static_cast(val - (data_ + field_offset))); + return true; + } + + uint8_t *GetAddressOf(voffset_t field) { + auto field_offset = GetOptionalFieldOffset(field); + return field_offset ? data_ + field_offset : nullptr; + } + const uint8_t *GetAddressOf(voffset_t field) const { + return const_cast

(this)->GetAddressOf(field); + } + + bool CheckField(voffset_t field) const { + return GetOptionalFieldOffset(field) != 0; + } + + // Verify the vtable of this table. + // Call this once per table, followed by VerifyField once per field. + bool VerifyTableStart(Verifier &verifier) const { + return verifier.VerifyTableStart(data_); + } + + // Verify a particular field. + template + bool VerifyField(const Verifier &verifier, voffset_t field, + size_t align) const { + // Calling GetOptionalFieldOffset should be safe now thanks to + // VerifyTable(). + auto field_offset = GetOptionalFieldOffset(field); + // Check the actual field. + return !field_offset || verifier.VerifyField(data_, field_offset, align); + } + + // VerifyField for required fields. + template + bool VerifyFieldRequired(const Verifier &verifier, voffset_t field, + size_t align) const { + auto field_offset = GetOptionalFieldOffset(field); + return verifier.Check(field_offset != 0) && + verifier.VerifyField(data_, field_offset, align); + } + + // Versions for offsets. + bool VerifyOffset(const Verifier &verifier, voffset_t field) const { + auto field_offset = GetOptionalFieldOffset(field); + return !field_offset || verifier.VerifyOffset(data_, field_offset); + } + + bool VerifyOffsetRequired(const Verifier &verifier, voffset_t field) const { + auto field_offset = GetOptionalFieldOffset(field); + return verifier.Check(field_offset != 0) && + verifier.VerifyOffset(data_, field_offset); + } + + private: + // private constructor & copy constructor: you obtain instances of this + // class by pointing to existing data only + Table(); + Table(const Table &other); + Table &operator=(const Table &); + + uint8_t data_[1]; +}; + +// This specialization allows avoiding warnings like: +// MSVC C4800: type: forcing value to bool 'true' or 'false'. +template<> +inline flatbuffers::Optional Table::GetOptional( + voffset_t field) const { + auto field_offset = GetOptionalFieldOffset(field); + auto p = data_ + field_offset; + return field_offset ? Optional(ReadScalar(p) != 0) + : Optional(); +} + +} // namespace flatbuffers + +#endif // FLATBUFFERS_TABLE_H_ diff --git a/edgeimpulse/edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_util.h b/edgeimpulse/edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_util.h new file mode 100644 index 0000000..10138be --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_util.h @@ -0,0 +1,725 @@ +/* + * Copyright 2014 Google Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef FLATBUFFERS_UTIL_H_ +#define FLATBUFFERS_UTIL_H_ + +#include +#include + +#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_base.h" +#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_stl_emulation.h" + +// For TFLM we always want to use FLATBUFFERS_PREFER_PRINTF=1. See +// http://b/211811553 for more context. +#ifndef FLATBUFFERS_PREFER_PRINTF +#define FLATBUFFERS_PREFER_PRINTF 1 +#endif + +#ifndef FLATBUFFERS_PREFER_PRINTF +# include +# include +#else // FLATBUFFERS_PREFER_PRINTF +# include +# include +#endif // FLATBUFFERS_PREFER_PRINTF + +#include + +namespace flatbuffers { + +// @locale-independent functions for ASCII characters set. + +// Fast checking that character lies in closed range: [a <= x <= b] +// using one compare (conditional branch) operator. +inline bool check_ascii_range(char x, char a, char b) { + FLATBUFFERS_ASSERT(a <= b); + // (Hacker's Delight): `a <= x <= b` <=> `(x-a) <={u} (b-a)`. + // The x, a, b will be promoted to int and subtracted without overflow. + return static_cast(x - a) <= static_cast(b - a); +} + +// Case-insensitive isalpha +inline bool is_alpha(char c) { + // ASCII only: alpha to upper case => reset bit 0x20 (~0x20 = 0xDF). + return check_ascii_range(c & 0xDF, 'a' & 0xDF, 'z' & 0xDF); +} + +// Check for uppercase alpha +inline bool is_alpha_upper(char c) { return check_ascii_range(c, 'A', 'Z'); } + +// Check (case-insensitive) that `c` is equal to alpha. +inline bool is_alpha_char(char c, char alpha) { + FLATBUFFERS_ASSERT(is_alpha(alpha)); + // ASCII only: alpha to upper case => reset bit 0x20 (~0x20 = 0xDF). + return ((c & 0xDF) == (alpha & 0xDF)); +} + +// https://en.cppreference.com/w/cpp/string/byte/isxdigit +// isdigit and isxdigit are the only standard narrow character classification +// functions that are not affected by the currently installed C locale. although +// some implementations (e.g. Microsoft in 1252 codepage) may classify +// additional single-byte characters as digits. +inline bool is_digit(char c) { return check_ascii_range(c, '0', '9'); } + +inline bool is_xdigit(char c) { + // Replace by look-up table. + return is_digit(c) || check_ascii_range(c & 0xDF, 'a' & 0xDF, 'f' & 0xDF); +} + +// Case-insensitive isalnum +inline bool is_alnum(char c) { return is_alpha(c) || is_digit(c); } + +inline char CharToUpper(char c) { + return static_cast(::toupper(static_cast(c))); +} + +inline char CharToLower(char c) { + return static_cast(::tolower(static_cast(c))); +} + +// @end-locale-independent functions for ASCII character set + +#ifdef FLATBUFFERS_PREFER_PRINTF +template size_t IntToDigitCount(T t) { + size_t digit_count = 0; + // Count the sign for negative numbers + if (t < 0) digit_count++; + // Count a single 0 left of the dot for fractional numbers + if (-1 < t && t < 1) digit_count++; + // Count digits until fractional part + T eps = std::numeric_limits::epsilon(); + while (t <= (-1 + eps) || (1 - eps) <= t) { + t /= 10; + digit_count++; + } + return digit_count; +} + +template size_t NumToStringWidth(T t, int precision = 0) { + size_t string_width = IntToDigitCount(t); + // Count the dot for floating point numbers + if (precision) string_width += (precision + 1); + return string_width; +} + +template +std::string NumToStringImplWrapper(T t, const char *fmt, int precision = 0) { + size_t string_width = NumToStringWidth(t, precision); + std::string s(string_width, 0x00); + // Allow snprintf to use std::string trailing null to detect buffer overflow + snprintf(const_cast(s.data()), (s.size() + 1), fmt, string_width, t); + return s; +} +#endif // FLATBUFFERS_PREFER_PRINTF + +// Convert an integer or floating point value to a string. +// In contrast to std::stringstream, "char" values are +// converted to a string of digits, and we don't use scientific notation. +template std::string NumToString(T t) { + // clang-format off + + #ifndef FLATBUFFERS_PREFER_PRINTF + std::stringstream ss; + ss << t; + return ss.str(); + #else // FLATBUFFERS_PREFER_PRINTF + auto v = static_cast(t); + return NumToStringImplWrapper(v, "%.*lld"); + #endif // FLATBUFFERS_PREFER_PRINTF + // clang-format on +} +// Avoid char types used as character data. +template<> inline std::string NumToString(signed char t) { + return NumToString(static_cast(t)); +} +template<> inline std::string NumToString(unsigned char t) { + return NumToString(static_cast(t)); +} +template<> inline std::string NumToString(char t) { + return NumToString(static_cast(t)); +} + +// Special versions for floats/doubles. +template std::string FloatToString(T t, int precision) { + // clang-format off + + #ifndef FLATBUFFERS_PREFER_PRINTF + // to_string() prints different numbers of digits for floats depending on + // platform and isn't available on Android, so we use stringstream + std::stringstream ss; + // Use std::fixed to suppress scientific notation. + ss << std::fixed; + // Default precision is 6, we want that to be higher for doubles. + ss << std::setprecision(precision); + ss << t; + auto s = ss.str(); + #else // FLATBUFFERS_PREFER_PRINTF + auto v = static_cast(t); + auto s = NumToStringImplWrapper(v, "%0.*f", precision); + #endif // FLATBUFFERS_PREFER_PRINTF + // clang-format on + // Sadly, std::fixed turns "1" into "1.00000", so here we undo that. + auto p = s.find_last_not_of('0'); + if (p != std::string::npos) { + // Strip trailing zeroes. If it is a whole number, keep one zero. + s.resize(p + (s[p] == '.' ? 2 : 1)); + } + return s; +} + +template<> inline std::string NumToString(double t) { + return FloatToString(t, 12); +} +template<> inline std::string NumToString(float t) { + return FloatToString(t, 6); +} + +// Convert an integer value to a hexadecimal string. +// The returned string length is always xdigits long, prefixed by 0 digits. +// For example, IntToStringHex(0x23, 8) returns the string "00000023". +inline std::string IntToStringHex(int i, int xdigits) { + FLATBUFFERS_ASSERT(i >= 0); + // clang-format off + + #ifndef FLATBUFFERS_PREFER_PRINTF + std::stringstream ss; + ss << std::setw(xdigits) << std::setfill('0') << std::hex << std::uppercase + << i; + return ss.str(); + #else // FLATBUFFERS_PREFER_PRINTF + return NumToStringImplWrapper(i, "%.*X", xdigits); + #endif // FLATBUFFERS_PREFER_PRINTF + // clang-format on +} + +// clang-format off +// Use locale independent functions {strtod_l, strtof_l, strtoll_l, strtoull_l}. +#if defined(FLATBUFFERS_LOCALE_INDEPENDENT) && (FLATBUFFERS_LOCALE_INDEPENDENT > 0) + class ClassicLocale { + #ifdef _MSC_VER + typedef _locale_t locale_type; + #else + typedef locale_t locale_type; // POSIX.1-2008 locale_t type + #endif + ClassicLocale(); + ~ClassicLocale(); + locale_type locale_; + static ClassicLocale instance_; + public: + static locale_type Get() { return instance_.locale_; } + }; + + #ifdef _MSC_VER + #define __strtoull_impl(s, pe, b) _strtoui64_l(s, pe, b, ClassicLocale::Get()) + #define __strtoll_impl(s, pe, b) _strtoi64_l(s, pe, b, ClassicLocale::Get()) + #define __strtod_impl(s, pe) _strtod_l(s, pe, ClassicLocale::Get()) + #define __strtof_impl(s, pe) _strtof_l(s, pe, ClassicLocale::Get()) + #else + #define __strtoull_impl(s, pe, b) strtoull_l(s, pe, b, ClassicLocale::Get()) + #define __strtoll_impl(s, pe, b) strtoll_l(s, pe, b, ClassicLocale::Get()) + #define __strtod_impl(s, pe) strtod_l(s, pe, ClassicLocale::Get()) + #define __strtof_impl(s, pe) strtof_l(s, pe, ClassicLocale::Get()) + #endif +#else + #define __strtod_impl(s, pe) strtod(s, pe) + #define __strtof_impl(s, pe) static_cast(strtod(s, pe)) + #ifdef _MSC_VER + #define __strtoull_impl(s, pe, b) _strtoui64(s, pe, b) + #define __strtoll_impl(s, pe, b) _strtoi64(s, pe, b) + #else + #define __strtoull_impl(s, pe, b) strtoull(s, pe, b) + #define __strtoll_impl(s, pe, b) strtoll(s, pe, b) + #endif +#endif + +inline void strtoval_impl(int64_t *val, const char *str, char **endptr, + int base) { + *val = __strtoll_impl(str, endptr, base); +} + +inline void strtoval_impl(uint64_t *val, const char *str, char **endptr, + int base) { + *val = __strtoull_impl(str, endptr, base); +} + +inline void strtoval_impl(double *val, const char *str, char **endptr) { + *val = __strtod_impl(str, endptr); +} + +// UBSAN: double to float is safe if numeric_limits::is_iec559 is true. +__supress_ubsan__("float-cast-overflow") +inline void strtoval_impl(float *val, const char *str, char **endptr) { + *val = __strtof_impl(str, endptr); +} +#undef __strtoull_impl +#undef __strtoll_impl +#undef __strtod_impl +#undef __strtof_impl +// clang-format on + +// Adaptor for strtoull()/strtoll(). +// Flatbuffers accepts numbers with any count of leading zeros (-009 is -9), +// while strtoll with base=0 interprets first leading zero as octal prefix. +// In future, it is possible to add prefixed 0b0101. +// 1) Checks errno code for overflow condition (out of range). +// 2) If base <= 0, function try to detect base of number by prefix. +// +// Return value (like strtoull and strtoll, but reject partial result): +// - If successful, an integer value corresponding to the str is returned. +// - If full string conversion can't be performed, 0 is returned. +// - If the converted value falls out of range of corresponding return type, a +// range error occurs. In this case value MAX(T)/MIN(T) is returned. +template +inline bool StringToIntegerImpl(T *val, const char *const str, + const int base = 0, + const bool check_errno = true) { + // T is int64_t or uint64_T + FLATBUFFERS_ASSERT(str); + if (base <= 0) { + auto s = str; + while (*s && !is_digit(*s)) s++; + if (s[0] == '0' && is_alpha_char(s[1], 'X')) + return StringToIntegerImpl(val, str, 16, check_errno); + // if a prefix not match, try base=10 + return StringToIntegerImpl(val, str, 10, check_errno); + } else { + if (check_errno) errno = 0; // clear thread-local errno + auto endptr = str; + strtoval_impl(val, str, const_cast(&endptr), base); + if ((*endptr != '\0') || (endptr == str)) { + *val = 0; // erase partial result + return false; // invalid string + } + // errno is out-of-range, return MAX/MIN + if (check_errno && errno) return false; + return true; + } +} + +template +inline bool StringToFloatImpl(T *val, const char *const str) { + // Type T must be either float or double. + FLATBUFFERS_ASSERT(str && val); + auto end = str; + strtoval_impl(val, str, const_cast(&end)); + auto done = (end != str) && (*end == '\0'); + if (!done) *val = 0; // erase partial result + return done; +} + +// Convert a string to an instance of T. +// Return value (matched with StringToInteger64Impl and strtod): +// - If successful, a numeric value corresponding to the str is returned. +// - If full string conversion can't be performed, 0 is returned. +// - If the converted value falls out of range of corresponding return type, a +// range error occurs. In this case value MAX(T)/MIN(T) is returned. +template inline bool StringToNumber(const char *s, T *val) { + // Assert on `unsigned long` and `signed long` on LP64. + // If it is necessary, it could be solved with flatbuffers::enable_if. + static_assert(sizeof(T) < sizeof(int64_t), "unexpected type T"); + FLATBUFFERS_ASSERT(s && val); + int64_t i64; + // The errno check isn't needed, will return MAX/MIN on overflow. + if (StringToIntegerImpl(&i64, s, 0, false)) { + const int64_t max = (flatbuffers::numeric_limits::max)(); + const int64_t min = flatbuffers::numeric_limits::lowest(); + if (i64 > max) { + *val = static_cast(max); + return false; + } + if (i64 < min) { + // For unsigned types return max to distinguish from + // "no conversion can be performed" when 0 is returned. + *val = static_cast(flatbuffers::is_unsigned::value ? max : min); + return false; + } + *val = static_cast(i64); + return true; + } + *val = 0; + return false; +} + +template<> inline bool StringToNumber(const char *str, int64_t *val) { + return StringToIntegerImpl(val, str); +} + +template<> +inline bool StringToNumber(const char *str, uint64_t *val) { + if (!StringToIntegerImpl(val, str)) return false; + // The strtoull accepts negative numbers: + // If the minus sign was part of the input sequence, the numeric value + // calculated from the sequence of digits is negated as if by unary minus + // in the result type, which applies unsigned integer wraparound rules. + // Fix this behaviour (except -0). + if (*val) { + auto s = str; + while (*s && !is_digit(*s)) s++; + s = (s > str) ? (s - 1) : s; // step back to one symbol + if (*s == '-') { + // For unsigned types return the max to distinguish from + // "no conversion can be performed". + *val = (flatbuffers::numeric_limits::max)(); + return false; + } + } + return true; +} + +template<> inline bool StringToNumber(const char *s, float *val) { + return StringToFloatImpl(val, s); +} + +template<> inline bool StringToNumber(const char *s, double *val) { + return StringToFloatImpl(val, s); +} + +inline int64_t StringToInt(const char *s, int base = 10) { + int64_t val; + return StringToIntegerImpl(&val, s, base) ? val : 0; +} + +inline uint64_t StringToUInt(const char *s, int base = 10) { + uint64_t val; + return StringToIntegerImpl(&val, s, base) ? val : 0; +} + +typedef bool (*LoadFileFunction)(const char *filename, bool binary, + std::string *dest); +typedef bool (*FileExistsFunction)(const char *filename); + +LoadFileFunction SetLoadFileFunction(LoadFileFunction load_file_function); + +FileExistsFunction SetFileExistsFunction( + FileExistsFunction file_exists_function); + +// Check if file "name" exists. +bool FileExists(const char *name); + +// Check if "name" exists and it is also a directory. +bool DirExists(const char *name); + +// Load file "name" into "buf" returning true if successful +// false otherwise. If "binary" is false data is read +// using ifstream's text mode, otherwise data is read with +// no transcoding. +bool LoadFile(const char *name, bool binary, std::string *buf); + +// Save data "buf" of length "len" bytes into a file +// "name" returning true if successful, false otherwise. +// If "binary" is false data is written using ifstream's +// text mode, otherwise data is written with no +// transcoding. +bool SaveFile(const char *name, const char *buf, size_t len, bool binary); + +// Save data "buf" into file "name" returning true if +// successful, false otherwise. If "binary" is false +// data is written using ifstream's text mode, otherwise +// data is written with no transcoding. +inline bool SaveFile(const char *name, const std::string &buf, bool binary) { + return SaveFile(name, buf.c_str(), buf.size(), binary); +} + +// Functionality for minimalistic portable path handling. + +// The functions below behave correctly regardless of whether posix ('/') or +// Windows ('/' or '\\') separators are used. + +// Any new separators inserted are always posix. +FLATBUFFERS_CONSTEXPR char kPathSeparator = '/'; + +// Returns the path with the extension, if any, removed. +std::string StripExtension(const std::string &filepath); + +// Returns the extension, if any. +std::string GetExtension(const std::string &filepath); + +// Return the last component of the path, after the last separator. +std::string StripPath(const std::string &filepath); + +// Strip the last component of the path + separator. +std::string StripFileName(const std::string &filepath); + +std::string StripPrefix(const std::string &filepath, + const std::string &prefix_to_remove); + +// Concatenates a path with a filename, regardless of whether the path +// ends in a separator or not. +std::string ConCatPathFileName(const std::string &path, + const std::string &filename); + +// Replaces any '\\' separators with '/' +std::string PosixPath(const char *path); +std::string PosixPath(const std::string &path); + +// This function ensure a directory exists, by recursively +// creating dirs for any parts of the path that don't exist yet. +void EnsureDirExists(const std::string &filepath); + +// Obtains the absolute path from any other path. +// Returns the input path if the absolute path couldn't be resolved. +std::string AbsolutePath(const std::string &filepath); + +// Returns files relative to the --project_root path, prefixed with `//`. +std::string RelativeToRootPath(const std::string &project, + const std::string &filepath); + +// To and from UTF-8 unicode conversion functions + +// Convert a unicode code point into a UTF-8 representation by appending it +// to a string. Returns the number of bytes generated. +inline int ToUTF8(uint32_t ucc, std::string *out) { + FLATBUFFERS_ASSERT(!(ucc & 0x80000000)); // Top bit can't be set. + // 6 possible encodings: http://en.wikipedia.org/wiki/UTF-8 + for (int i = 0; i < 6; i++) { + // Max bits this encoding can represent. + uint32_t max_bits = 6 + i * 5 + static_cast(!i); + if (ucc < (1u << max_bits)) { // does it fit? + // Remaining bits not encoded in the first byte, store 6 bits each + uint32_t remain_bits = i * 6; + // Store first byte: + (*out) += static_cast((0xFE << (max_bits - remain_bits)) | + (ucc >> remain_bits)); + // Store remaining bytes: + for (int j = i - 1; j >= 0; j--) { + (*out) += static_cast(((ucc >> (j * 6)) & 0x3F) | 0x80); + } + return i + 1; // Return the number of bytes added. + } + } + FLATBUFFERS_ASSERT(0); // Impossible to arrive here. + return -1; +} + +// Converts whatever prefix of the incoming string corresponds to a valid +// UTF-8 sequence into a unicode code. The incoming pointer will have been +// advanced past all bytes parsed. +// returns -1 upon corrupt UTF-8 encoding (ignore the incoming pointer in +// this case). +inline int FromUTF8(const char **in) { + int len = 0; + // Count leading 1 bits. + for (int mask = 0x80; mask >= 0x04; mask >>= 1) { + if (**in & mask) { + len++; + } else { + break; + } + } + if ((static_cast(**in) << len) & 0x80) + return -1; // Bit after leading 1's must be 0. + if (!len) return *(*in)++; + // UTF-8 encoded values with a length are between 2 and 4 bytes. + if (len < 2 || len > 4) { return -1; } + // Grab initial bits of the code. + int ucc = *(*in)++ & ((1 << (7 - len)) - 1); + for (int i = 0; i < len - 1; i++) { + if ((**in & 0xC0) != 0x80) return -1; // Upper bits must 1 0. + ucc <<= 6; + ucc |= *(*in)++ & 0x3F; // Grab 6 more bits of the code. + } + // UTF-8 cannot encode values between 0xD800 and 0xDFFF (reserved for + // UTF-16 surrogate pairs). + if (ucc >= 0xD800 && ucc <= 0xDFFF) { return -1; } + // UTF-8 must represent code points in their shortest possible encoding. + switch (len) { + case 2: + // Two bytes of UTF-8 can represent code points from U+0080 to U+07FF. + if (ucc < 0x0080 || ucc > 0x07FF) { return -1; } + break; + case 3: + // Three bytes of UTF-8 can represent code points from U+0800 to U+FFFF. + if (ucc < 0x0800 || ucc > 0xFFFF) { return -1; } + break; + case 4: + // Four bytes of UTF-8 can represent code points from U+10000 to U+10FFFF. + if (ucc < 0x10000 || ucc > 0x10FFFF) { return -1; } + break; + } + return ucc; +} + +#ifndef FLATBUFFERS_PREFER_PRINTF +// Wraps a string to a maximum length, inserting new lines where necessary. Any +// existing whitespace will be collapsed down to a single space. A prefix or +// suffix can be provided, which will be inserted before or after a wrapped +// line, respectively. +inline std::string WordWrap(const std::string in, size_t max_length, + const std::string wrapped_line_prefix, + const std::string wrapped_line_suffix) { + std::istringstream in_stream(in); + std::string wrapped, line, word; + + in_stream >> word; + line = word; + + while (in_stream >> word) { + if ((line.length() + 1 + word.length() + wrapped_line_suffix.length()) < + max_length) { + line += " " + word; + } else { + wrapped += line + wrapped_line_suffix + "\n"; + line = wrapped_line_prefix + word; + } + } + wrapped += line; + + return wrapped; +} +#endif // !FLATBUFFERS_PREFER_PRINTF + +inline bool EscapeString(const char *s, size_t length, std::string *_text, + bool allow_non_utf8, bool natural_utf8) { + std::string &text = *_text; + text += "\""; + for (uoffset_t i = 0; i < length; i++) { + char c = s[i]; + switch (c) { + case '\n': text += "\\n"; break; + case '\t': text += "\\t"; break; + case '\r': text += "\\r"; break; + case '\b': text += "\\b"; break; + case '\f': text += "\\f"; break; + case '\"': text += "\\\""; break; + case '\\': text += "\\\\"; break; + default: + if (c >= ' ' && c <= '~') { + text += c; + } else { + // Not printable ASCII data. Let's see if it's valid UTF-8 first: + const char *utf8 = s + i; + int ucc = FromUTF8(&utf8); + if (ucc < 0) { + if (allow_non_utf8) { + text += "\\x"; + text += IntToStringHex(static_cast(c), 2); + } else { + // There are two cases here: + // + // 1) We reached here by parsing an IDL file. In that case, + // we previously checked for non-UTF-8, so we shouldn't reach + // here. + // + // 2) We reached here by someone calling GenerateText() + // on a previously-serialized flatbuffer. The data might have + // non-UTF-8 Strings, or might be corrupt. + // + // In both cases, we have to give up and inform the caller + // they have no JSON. + return false; + } + } else { + if (natural_utf8) { + // utf8 points to past all utf-8 bytes parsed + text.append(s + i, static_cast(utf8 - s - i)); + } else if (ucc <= 0xFFFF) { + // Parses as Unicode within JSON's \uXXXX range, so use that. + text += "\\u"; + text += IntToStringHex(ucc, 4); + } else if (ucc <= 0x10FFFF) { + // Encode Unicode SMP values to a surrogate pair using two \u + // escapes. + uint32_t base = ucc - 0x10000; + auto high_surrogate = (base >> 10) + 0xD800; + auto low_surrogate = (base & 0x03FF) + 0xDC00; + text += "\\u"; + text += IntToStringHex(high_surrogate, 4); + text += "\\u"; + text += IntToStringHex(low_surrogate, 4); + } + // Skip past characters recognized. + i = static_cast(utf8 - s - 1); + } + } + break; + } + } + text += "\""; + return true; +} + +inline std::string BufferToHexText(const void *buffer, size_t buffer_size, + size_t max_length, + const std::string &wrapped_line_prefix, + const std::string &wrapped_line_suffix) { + std::string text = wrapped_line_prefix; + size_t start_offset = 0; + const char *s = reinterpret_cast(buffer); + for (size_t i = 0; s && i < buffer_size; i++) { + // Last iteration or do we have more? + bool have_more = i + 1 < buffer_size; + text += "0x"; + text += IntToStringHex(static_cast(s[i]), 2); + if (have_more) { text += ','; } + // If we have more to process and we reached max_length + if (have_more && + text.size() + wrapped_line_suffix.size() >= start_offset + max_length) { + text += wrapped_line_suffix; + text += '\n'; + start_offset = text.size(); + text += wrapped_line_prefix; + } + } + text += wrapped_line_suffix; + return text; +} + +// Remove paired quotes in a string: "text"|'text' -> text. +std::string RemoveStringQuotes(const std::string &s); + +// Change th global C-locale to locale with name . +// Returns an actual locale name in <_value>, useful if locale_name is "" or +// null. +bool SetGlobalTestLocale(const char *locale_name, + std::string *_value = nullptr); + +// Read (or test) a value of environment variable. +bool ReadEnvironmentVariable(const char *var_name, + std::string *_value = nullptr); + +// MSVC specific: Send all assert reports to STDOUT to prevent CI hangs. +void SetupDefaultCRTReportMode(); + +enum class Case { + kUnknown = 0, + // TheQuickBrownFox + kUpperCamel = 1, + // theQuickBrownFox + kLowerCamel = 2, + // the_quick_brown_fox + kSnake = 3, + // THE_QUICK_BROWN_FOX + kScreamingSnake = 4, + // THEQUICKBROWNFOX + kAllUpper = 5, + // thequickbrownfox + kAllLower = 6, + // the-quick-brown-fox + kDasher = 7, + // THEQuiCKBr_ownFox (or whatever you want, we won't change it) + kKeep = 8, + // the_quick_brown_fox123 (as opposed to the_quick_brown_fox_123) + kSnake2 = 9, +}; + +// Convert the `input` string of case `input_case` to the specified `output_case`. +std::string ConvertCase(const std::string &input, Case output_case, + Case input_case = Case::kSnake); + +} // namespace flatbuffers + +#endif // FLATBUFFERS_UTIL_H_ diff --git a/edgeimpulse/edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_vector.h b/edgeimpulse/edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_vector.h new file mode 100644 index 0000000..81f583b --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_vector.h @@ -0,0 +1,389 @@ +/* + * Copyright 2021 Google Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef FLATBUFFERS_VECTOR_H_ +#define FLATBUFFERS_VECTOR_H_ + +#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_base.h" +#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_buffer.h" +#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_stl_emulation.h" + +namespace flatbuffers { + +struct String; + +// An STL compatible iterator implementation for Vector below, effectively +// calling Get() for every element. +template struct VectorIterator { + typedef std::random_access_iterator_tag iterator_category; + typedef IT value_type; + typedef ptrdiff_t difference_type; + typedef IT *pointer; + typedef IT &reference; + + VectorIterator(const uint8_t *data, uoffset_t i) + : data_(data + IndirectHelper::element_stride * i) {} + VectorIterator(const VectorIterator &other) : data_(other.data_) {} + VectorIterator() : data_(nullptr) {} + + VectorIterator &operator=(const VectorIterator &other) { + data_ = other.data_; + return *this; + } + + VectorIterator &operator=(VectorIterator &&other) { + data_ = other.data_; + return *this; + } + + bool operator==(const VectorIterator &other) const { + return data_ == other.data_; + } + + bool operator<(const VectorIterator &other) const { + return data_ < other.data_; + } + + bool operator!=(const VectorIterator &other) const { + return data_ != other.data_; + } + + difference_type operator-(const VectorIterator &other) const { + return (data_ - other.data_) / IndirectHelper::element_stride; + } + + // Note: return type is incompatible with the standard + // `reference operator*()`. + IT operator*() const { return IndirectHelper::Read(data_, 0); } + + // Note: return type is incompatible with the standard + // `pointer operator->()`. + IT operator->() const { return IndirectHelper::Read(data_, 0); } + + VectorIterator &operator++() { + data_ += IndirectHelper::element_stride; + return *this; + } + + VectorIterator operator++(int) { + VectorIterator temp(data_, 0); + data_ += IndirectHelper::element_stride; + return temp; + } + + VectorIterator operator+(const uoffset_t &offset) const { + return VectorIterator(data_ + offset * IndirectHelper::element_stride, + 0); + } + + VectorIterator &operator+=(const uoffset_t &offset) { + data_ += offset * IndirectHelper::element_stride; + return *this; + } + + VectorIterator &operator--() { + data_ -= IndirectHelper::element_stride; + return *this; + } + + VectorIterator operator--(int) { + VectorIterator temp(data_, 0); + data_ -= IndirectHelper::element_stride; + return temp; + } + + VectorIterator operator-(const uoffset_t &offset) const { + return VectorIterator(data_ - offset * IndirectHelper::element_stride, + 0); + } + + VectorIterator &operator-=(const uoffset_t &offset) { + data_ -= offset * IndirectHelper::element_stride; + return *this; + } + + private: + const uint8_t *data_; +}; + +template +struct VectorReverseIterator : public std::reverse_iterator { + explicit VectorReverseIterator(Iterator iter) + : std::reverse_iterator(iter) {} + + // Note: return type is incompatible with the standard + // `reference operator*()`. + typename Iterator::value_type operator*() const { + auto tmp = std::reverse_iterator::current; + return *--tmp; + } + + // Note: return type is incompatible with the standard + // `pointer operator->()`. + typename Iterator::value_type operator->() const { + auto tmp = std::reverse_iterator::current; + return *--tmp; + } +}; + +// This is used as a helper type for accessing vectors. +// Vector::data() assumes the vector elements start after the length field. +template class Vector { + public: + typedef VectorIterator::mutable_return_type> + iterator; + typedef VectorIterator::return_type> + const_iterator; + typedef VectorReverseIterator reverse_iterator; + typedef VectorReverseIterator const_reverse_iterator; + + typedef typename flatbuffers::bool_constant::value> + scalar_tag; + + static FLATBUFFERS_CONSTEXPR bool is_span_observable = + scalar_tag::value && (FLATBUFFERS_LITTLEENDIAN || sizeof(T) == 1); + + uoffset_t size() const { return EndianScalar(length_); } + + // Deprecated: use size(). Here for backwards compatibility. + FLATBUFFERS_ATTRIBUTE([[deprecated("use size() instead")]]) + uoffset_t Length() const { return size(); } + + typedef typename IndirectHelper::return_type return_type; + typedef typename IndirectHelper::mutable_return_type mutable_return_type; + typedef return_type value_type; + + return_type Get(uoffset_t i) const { + FLATBUFFERS_ASSERT(i < size()); + return IndirectHelper::Read(Data(), i); + } + + return_type operator[](uoffset_t i) const { return Get(i); } + + // If this is a Vector of enums, T will be its storage type, not the enum + // type. This function makes it convenient to retrieve value with enum + // type E. + template E GetEnum(uoffset_t i) const { + return static_cast(Get(i)); + } + + // If this a vector of unions, this does the cast for you. There's no check + // to make sure this is the right type! + template const U *GetAs(uoffset_t i) const { + return reinterpret_cast(Get(i)); + } + + // If this a vector of unions, this does the cast for you. There's no check + // to make sure this is actually a string! + const String *GetAsString(uoffset_t i) const { + return reinterpret_cast(Get(i)); + } + + const void *GetStructFromOffset(size_t o) const { + return reinterpret_cast(Data() + o); + } + + iterator begin() { return iterator(Data(), 0); } + const_iterator begin() const { return const_iterator(Data(), 0); } + + iterator end() { return iterator(Data(), size()); } + const_iterator end() const { return const_iterator(Data(), size()); } + + reverse_iterator rbegin() { return reverse_iterator(end()); } + const_reverse_iterator rbegin() const { + return const_reverse_iterator(end()); + } + + reverse_iterator rend() { return reverse_iterator(begin()); } + const_reverse_iterator rend() const { + return const_reverse_iterator(begin()); + } + + const_iterator cbegin() const { return begin(); } + + const_iterator cend() const { return end(); } + + const_reverse_iterator crbegin() const { return rbegin(); } + + const_reverse_iterator crend() const { return rend(); } + + // Change elements if you have a non-const pointer to this object. + // Scalars only. See reflection.h, and the documentation. + void Mutate(uoffset_t i, const T &val) { + FLATBUFFERS_ASSERT(i < size()); + WriteScalar(data() + i, val); + } + + // Change an element of a vector of tables (or strings). + // "val" points to the new table/string, as you can obtain from + // e.g. reflection::AddFlatBuffer(). + void MutateOffset(uoffset_t i, const uint8_t *val) { + FLATBUFFERS_ASSERT(i < size()); + static_assert(sizeof(T) == sizeof(uoffset_t), "Unrelated types"); + WriteScalar(data() + i, + static_cast(val - (Data() + i * sizeof(uoffset_t)))); + } + + // Get a mutable pointer to tables/strings inside this vector. + mutable_return_type GetMutableObject(uoffset_t i) const { + FLATBUFFERS_ASSERT(i < size()); + return const_cast(IndirectHelper::Read(Data(), i)); + } + + // The raw data in little endian format. Use with care. + const uint8_t *Data() const { + return reinterpret_cast(&length_ + 1); + } + + uint8_t *Data() { return reinterpret_cast(&length_ + 1); } + + // Similarly, but typed, much like std::vector::data + const T *data() const { return reinterpret_cast(Data()); } + T *data() { return reinterpret_cast(Data()); } + + template return_type LookupByKey(K key) const { + void *search_result = std::bsearch( + &key, Data(), size(), IndirectHelper::element_stride, KeyCompare); + + if (!search_result) { + return nullptr; // Key not found. + } + + const uint8_t *element = reinterpret_cast(search_result); + + return IndirectHelper::Read(element, 0); + } + + template mutable_return_type MutableLookupByKey(K key) { + return const_cast(LookupByKey(key)); + } + + protected: + // This class is only used to access pre-existing data. Don't ever + // try to construct these manually. + Vector(); + + uoffset_t length_; + + private: + // This class is a pointer. Copying will therefore create an invalid object. + // Private and unimplemented copy constructor. + Vector(const Vector &); + Vector &operator=(const Vector &); + + template static int KeyCompare(const void *ap, const void *bp) { + const K *key = reinterpret_cast(ap); + const uint8_t *data = reinterpret_cast(bp); + auto table = IndirectHelper::Read(data, 0); + + // std::bsearch compares with the operands transposed, so we negate the + // result here. + return -table->KeyCompareWithValue(*key); + } +}; + +template +FLATBUFFERS_CONSTEXPR_CPP11 flatbuffers::span make_span(Vector &vec) + FLATBUFFERS_NOEXCEPT { + static_assert(Vector::is_span_observable, + "wrong type U, only LE-scalar, or byte types are allowed"); + return span(vec.data(), vec.size()); +} + +template +FLATBUFFERS_CONSTEXPR_CPP11 flatbuffers::span make_span( + const Vector &vec) FLATBUFFERS_NOEXCEPT { + static_assert(Vector::is_span_observable, + "wrong type U, only LE-scalar, or byte types are allowed"); + return span(vec.data(), vec.size()); +} + +template +FLATBUFFERS_CONSTEXPR_CPP11 flatbuffers::span make_bytes_span( + Vector &vec) FLATBUFFERS_NOEXCEPT { + static_assert(Vector::scalar_tag::value, + "wrong type U, only LE-scalar, or byte types are allowed"); + return span(vec.Data(), vec.size() * sizeof(U)); +} + +template +FLATBUFFERS_CONSTEXPR_CPP11 flatbuffers::span make_bytes_span( + const Vector &vec) FLATBUFFERS_NOEXCEPT { + static_assert(Vector::scalar_tag::value, + "wrong type U, only LE-scalar, or byte types are allowed"); + return span(vec.Data(), vec.size() * sizeof(U)); +} + +// Convenient helper functions to get a span of any vector, regardless +// of whether it is null or not (the field is not set). +template +FLATBUFFERS_CONSTEXPR_CPP11 flatbuffers::span make_span(Vector *ptr) + FLATBUFFERS_NOEXCEPT { + static_assert(Vector::is_span_observable, + "wrong type U, only LE-scalar, or byte types are allowed"); + return ptr ? make_span(*ptr) : span(); +} + +template +FLATBUFFERS_CONSTEXPR_CPP11 flatbuffers::span make_span( + const Vector *ptr) FLATBUFFERS_NOEXCEPT { + static_assert(Vector::is_span_observable, + "wrong type U, only LE-scalar, or byte types are allowed"); + return ptr ? make_span(*ptr) : span(); +} + +// Represent a vector much like the template above, but in this case we +// don't know what the element types are (used with reflection.h). +class VectorOfAny { + public: + uoffset_t size() const { return EndianScalar(length_); } + + const uint8_t *Data() const { + return reinterpret_cast(&length_ + 1); + } + uint8_t *Data() { return reinterpret_cast(&length_ + 1); } + + protected: + VectorOfAny(); + + uoffset_t length_; + + private: + VectorOfAny(const VectorOfAny &); + VectorOfAny &operator=(const VectorOfAny &); +}; + +template +Vector> *VectorCast(Vector> *ptr) { + static_assert(std::is_base_of::value, "Unrelated types"); + return reinterpret_cast> *>(ptr); +} + +template +const Vector> *VectorCast(const Vector> *ptr) { + static_assert(std::is_base_of::value, "Unrelated types"); + return reinterpret_cast> *>(ptr); +} + +// Convenient helper function to get the length of any vector, regardless +// of whether it is null or not (the field is not set). +template static inline size_t VectorLength(const Vector *v) { + return v ? v->size() : 0; +} + +} // namespace flatbuffers + +#endif // FLATBUFFERS_VERIFIER_H_ diff --git a/edgeimpulse/edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_vector_downward.h b/edgeimpulse/edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_vector_downward.h new file mode 100644 index 0000000..6ff86a9 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_vector_downward.h @@ -0,0 +1,271 @@ +/* + * Copyright 2021 Google Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef FLATBUFFERS_VECTOR_DOWNWARD_H_ +#define FLATBUFFERS_VECTOR_DOWNWARD_H_ + +#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_base.h" +#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_default_allocator.h" +#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_detached_buffer.h" + +namespace flatbuffers { + +// This is a minimal replication of std::vector functionality, +// except growing from higher to lower addresses. i.e push_back() inserts data +// in the lowest address in the vector. +// Since this vector leaves the lower part unused, we support a "scratch-pad" +// that can be stored there for temporary data, to share the allocated space. +// Essentially, this supports 2 std::vectors in a single buffer. +class vector_downward { + public: + explicit vector_downward(size_t initial_size, Allocator *allocator, + bool own_allocator, size_t buffer_minalign) + : allocator_(allocator), + own_allocator_(own_allocator), + initial_size_(initial_size), + buffer_minalign_(buffer_minalign), + reserved_(0), + size_(0), + buf_(nullptr), + cur_(nullptr), + scratch_(nullptr) {} + + vector_downward(vector_downward &&other) + // clang-format on + : allocator_(other.allocator_), + own_allocator_(other.own_allocator_), + initial_size_(other.initial_size_), + buffer_minalign_(other.buffer_minalign_), + reserved_(other.reserved_), + size_(other.size_), + buf_(other.buf_), + cur_(other.cur_), + scratch_(other.scratch_) { + // No change in other.allocator_ + // No change in other.initial_size_ + // No change in other.buffer_minalign_ + other.own_allocator_ = false; + other.reserved_ = 0; + other.buf_ = nullptr; + other.cur_ = nullptr; + other.scratch_ = nullptr; + } + + vector_downward &operator=(vector_downward &&other) { + // Move construct a temporary and swap idiom + vector_downward temp(std::move(other)); + swap(temp); + return *this; + } + + ~vector_downward() { + clear_buffer(); + clear_allocator(); + } + + void reset() { + clear_buffer(); + clear(); + } + + void clear() { + if (buf_) { + cur_ = buf_ + reserved_; + } else { + reserved_ = 0; + cur_ = nullptr; + } + size_ = 0; + clear_scratch(); + } + + void clear_scratch() { scratch_ = buf_; } + + void clear_allocator() { + if (own_allocator_ && allocator_) { delete allocator_; } + allocator_ = nullptr; + own_allocator_ = false; + } + + void clear_buffer() { + if (buf_) Deallocate(allocator_, buf_, reserved_); + buf_ = nullptr; + } + + // Relinquish the pointer to the caller. + uint8_t *release_raw(size_t &allocated_bytes, size_t &offset) { + auto *buf = buf_; + allocated_bytes = reserved_; + offset = static_cast(cur_ - buf_); + + // release_raw only relinquishes the buffer ownership. + // Does not deallocate or reset the allocator. Destructor will do that. + buf_ = nullptr; + clear(); + return buf; + } + + // Relinquish the pointer to the caller. + DetachedBuffer release() { + // allocator ownership (if any) is transferred to DetachedBuffer. + DetachedBuffer fb(allocator_, own_allocator_, buf_, reserved_, cur_, + size()); + if (own_allocator_) { + allocator_ = nullptr; + own_allocator_ = false; + } + buf_ = nullptr; + clear(); + return fb; + } + + size_t ensure_space(size_t len) { + FLATBUFFERS_ASSERT(cur_ >= scratch_ && scratch_ >= buf_); + if (len > static_cast(cur_ - scratch_)) { reallocate(len); } + // Beyond this, signed offsets may not have enough range: + // (FlatBuffers > 2GB not supported). + FLATBUFFERS_ASSERT(size() < FLATBUFFERS_MAX_BUFFER_SIZE); + return len; + } + + inline uint8_t *make_space(size_t len) { + if (len) { + ensure_space(len); + cur_ -= len; + size_ += static_cast(len); + } + return cur_; + } + + // Returns nullptr if using the DefaultAllocator. + Allocator *get_custom_allocator() { return allocator_; } + + inline uoffset_t size() const { return size_; } + + uoffset_t scratch_size() const { + return static_cast(scratch_ - buf_); + } + + size_t capacity() const { return reserved_; } + + uint8_t *data() const { + FLATBUFFERS_ASSERT(cur_); + return cur_; + } + + uint8_t *scratch_data() const { + FLATBUFFERS_ASSERT(buf_); + return buf_; + } + + uint8_t *scratch_end() const { + FLATBUFFERS_ASSERT(scratch_); + return scratch_; + } + + uint8_t *data_at(size_t offset) const { return buf_ + reserved_ - offset; } + + void push(const uint8_t *bytes, size_t num) { + if (num > 0) { memcpy(make_space(num), bytes, num); } + } + + // Specialized version of push() that avoids memcpy call for small data. + template void push_small(const T &little_endian_t) { + make_space(sizeof(T)); + *reinterpret_cast(cur_) = little_endian_t; + } + + template void scratch_push_small(const T &t) { + ensure_space(sizeof(T)); + *reinterpret_cast(scratch_) = t; + scratch_ += sizeof(T); + } + + // fill() is most frequently called with small byte counts (<= 4), + // which is why we're using loops rather than calling memset. + void fill(size_t zero_pad_bytes) { + make_space(zero_pad_bytes); + for (size_t i = 0; i < zero_pad_bytes; i++) cur_[i] = 0; + } + + // Version for when we know the size is larger. + // Precondition: zero_pad_bytes > 0 + void fill_big(size_t zero_pad_bytes) { + memset(make_space(zero_pad_bytes), 0, zero_pad_bytes); + } + + void pop(size_t bytes_to_remove) { + cur_ += bytes_to_remove; + size_ -= static_cast(bytes_to_remove); + } + + void scratch_pop(size_t bytes_to_remove) { scratch_ -= bytes_to_remove; } + + void swap(vector_downward &other) { + using std::swap; + swap(allocator_, other.allocator_); + swap(own_allocator_, other.own_allocator_); + swap(initial_size_, other.initial_size_); + swap(buffer_minalign_, other.buffer_minalign_); + swap(reserved_, other.reserved_); + swap(size_, other.size_); + swap(buf_, other.buf_); + swap(cur_, other.cur_); + swap(scratch_, other.scratch_); + } + + void swap_allocator(vector_downward &other) { + using std::swap; + swap(allocator_, other.allocator_); + swap(own_allocator_, other.own_allocator_); + } + + private: + // You shouldn't really be copying instances of this class. + FLATBUFFERS_DELETE_FUNC(vector_downward(const vector_downward &)); + FLATBUFFERS_DELETE_FUNC(vector_downward &operator=(const vector_downward &)); + + Allocator *allocator_; + bool own_allocator_; + size_t initial_size_; + size_t buffer_minalign_; + size_t reserved_; + uoffset_t size_; + uint8_t *buf_; + uint8_t *cur_; // Points at location between empty (below) and used (above). + uint8_t *scratch_; // Points to the end of the scratchpad in use. + + void reallocate(size_t len) { + auto old_reserved = reserved_; + auto old_size = size(); + auto old_scratch_size = scratch_size(); + reserved_ += + (std::max)(len, old_reserved ? old_reserved / 2 : initial_size_); + reserved_ = (reserved_ + buffer_minalign_ - 1) & ~(buffer_minalign_ - 1); + if (buf_) { + buf_ = ReallocateDownward(allocator_, buf_, old_reserved, reserved_, + old_size, old_scratch_size); + } else { + buf_ = Allocate(allocator_, reserved_); + } + cur_ = buf_ + reserved_ - old_size; + scratch_ = buf_ + old_scratch_size; + } +}; + +} // namespace flatbuffers + +#endif // FLATBUFFERS_VECTOR_DOWNWARD_H_ diff --git a/edgeimpulse/edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_verifier.h b/edgeimpulse/edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_verifier.h new file mode 100644 index 0000000..5f13e27 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_verifier.h @@ -0,0 +1,304 @@ +/* + * Copyright 2021 Google Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef FLATBUFFERS_VERIFIER_H_ +#define FLATBUFFERS_VERIFIER_H_ + +#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_base.h" +#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_vector.h" + +namespace flatbuffers { + +// Helper class to verify the integrity of a FlatBuffer +class Verifier FLATBUFFERS_FINAL_CLASS { + public: + Verifier(const uint8_t *const buf, const size_t buf_len, + const uoffset_t _max_depth = 64, + const uoffset_t _max_tables = 1000000, + const bool _check_alignment = true) + : buf_(buf), + size_(buf_len), + max_depth_(_max_depth), + max_tables_(_max_tables), + check_alignment_(_check_alignment), + upper_bound_(0), + depth_(0), + num_tables_(0), + flex_reuse_tracker_(nullptr) { + FLATBUFFERS_ASSERT(size_ < FLATBUFFERS_MAX_BUFFER_SIZE); + } + + // Central location where any verification failures register. + bool Check(const bool ok) const { + // clang-format off + #ifdef FLATBUFFERS_DEBUG_VERIFICATION_FAILURE + FLATBUFFERS_ASSERT(ok); + #endif + #ifdef FLATBUFFERS_TRACK_VERIFIER_BUFFER_SIZE + if (!ok) + upper_bound_ = 0; + #endif + // clang-format on + return ok; + } + + // Verify any range within the buffer. + bool Verify(const size_t elem, const size_t elem_len) const { + // clang-format off + #ifdef FLATBUFFERS_TRACK_VERIFIER_BUFFER_SIZE + auto upper_bound = elem + elem_len; + if (upper_bound_ < upper_bound) + upper_bound_ = upper_bound; + #endif + // clang-format on + return Check(elem_len < size_ && elem <= size_ - elem_len); + } + + bool VerifyAlignment(const size_t elem, const size_t align) const { + return Check((elem & (align - 1)) == 0 || !check_alignment_); + } + + // Verify a range indicated by sizeof(T). + template bool Verify(const size_t elem) const { + return VerifyAlignment(elem, sizeof(T)) && Verify(elem, sizeof(T)); + } + + bool VerifyFromPointer(const uint8_t *const p, const size_t len) { + return Verify(static_cast(p - buf_), len); + } + + // Verify relative to a known-good base pointer. + bool VerifyFieldStruct(const uint8_t *const base, const voffset_t elem_off, + const size_t elem_len, const size_t align) const { + const auto f = static_cast(base - buf_) + elem_off; + return VerifyAlignment(f, align) && Verify(f, elem_len); + } + + template + bool VerifyField(const uint8_t *const base, const voffset_t elem_off, + const size_t align) const { + const auto f = static_cast(base - buf_) + elem_off; + return VerifyAlignment(f, align) && Verify(f, sizeof(T)); + } + + // Verify a pointer (may be NULL) of a table type. + template bool VerifyTable(const T *const table) { + return !table || table->Verify(*this); + } + + // Verify a pointer (may be NULL) of any vector type. + template bool VerifyVector(const Vector *const vec) const { + return !vec || VerifyVectorOrString(reinterpret_cast(vec), + sizeof(T)); + } + + // Verify a pointer (may be NULL) of a vector to struct. + template + bool VerifyVector(const Vector *const vec) const { + return VerifyVector(reinterpret_cast *>(vec)); + } + + // Verify a pointer (may be NULL) to string. + bool VerifyString(const String *const str) const { + size_t end; + return !str || (VerifyVectorOrString(reinterpret_cast(str), + 1, &end) && + Verify(end, 1) && // Must have terminator + Check(buf_[end] == '\0')); // Terminating byte must be 0. + } + + // Common code between vectors and strings. + bool VerifyVectorOrString(const uint8_t *const vec, const size_t elem_size, + size_t *const end = nullptr) const { + const auto veco = static_cast(vec - buf_); + // Check we can read the size field. + if (!Verify(veco)) return false; + // Check the whole array. If this is a string, the byte past the array + // must be 0. + const auto size = ReadScalar(vec); + const auto max_elems = FLATBUFFERS_MAX_BUFFER_SIZE / elem_size; + if (!Check(size < max_elems)) + return false; // Protect against byte_size overflowing. + const auto byte_size = sizeof(size) + elem_size * size; + if (end) *end = veco + byte_size; + return Verify(veco, byte_size); + } + + // Special case for string contents, after the above has been called. + bool VerifyVectorOfStrings(const Vector> *const vec) const { + if (vec) { + for (uoffset_t i = 0; i < vec->size(); i++) { + if (!VerifyString(vec->Get(i))) return false; + } + } + return true; + } + + // Special case for table contents, after the above has been called. + template + bool VerifyVectorOfTables(const Vector> *const vec) { + if (vec) { + for (uoffset_t i = 0; i < vec->size(); i++) { + if (!vec->Get(i)->Verify(*this)) return false; + } + } + return true; + } + + __supress_ubsan__("unsigned-integer-overflow") bool VerifyTableStart( + const uint8_t *const table) { + // Check the vtable offset. + const auto tableo = static_cast(table - buf_); + if (!Verify(tableo)) return false; + // This offset may be signed, but doing the subtraction unsigned always + // gives the result we want. + const auto vtableo = + tableo - static_cast(ReadScalar(table)); + // Check the vtable size field, then check vtable fits in its entirety. + if (!(VerifyComplexity() && Verify(vtableo) && + VerifyAlignment(ReadScalar(buf_ + vtableo), + sizeof(voffset_t)))) + return false; + const auto vsize = ReadScalar(buf_ + vtableo); + return Check((vsize & 1) == 0) && Verify(vtableo, vsize); + } + + template + bool VerifyBufferFromStart(const char *const identifier, const size_t start) { + // Buffers have to be of some size to be valid. The reason it is a runtime + // check instead of static_assert, is that nested flatbuffers go through + // this call and their size is determined at runtime. + if (!Check(size_ >= FLATBUFFERS_MIN_BUFFER_SIZE)) return false; + + // If an identifier is provided, check that we have a buffer + if (identifier && !Check((size_ >= 2 * sizeof(flatbuffers::uoffset_t) && + BufferHasIdentifier(buf_ + start, identifier)))) { + return false; + } + + // Call T::Verify, which must be in the generated code for this type. + const auto o = VerifyOffset(start); + return Check(o != 0) && + reinterpret_cast(buf_ + start + o)->Verify(*this) + // clang-format off + #ifdef FLATBUFFERS_TRACK_VERIFIER_BUFFER_SIZE + && GetComputedSize() + #endif + ; + // clang-format on + } + + template + bool VerifyNestedFlatBuffer(const Vector *const buf, + const char *const identifier) { + // An empty buffer is OK as it indicates not present. + if (!buf) return true; + + // If there is a nested buffer, it must be greater than the min size. + if(!Check(buf->size() >= FLATBUFFERS_MIN_BUFFER_SIZE)) return false; + + Verifier nested_verifier(buf->data(), buf->size()); + return nested_verifier.VerifyBuffer(identifier); + } + + // Verify this whole buffer, starting with root type T. + template bool VerifyBuffer() { return VerifyBuffer(nullptr); } + + template bool VerifyBuffer(const char *const identifier) { + return VerifyBufferFromStart(identifier, 0); + } + + template + bool VerifySizePrefixedBuffer(const char *const identifier) { + return Verify(0U) && + Check(ReadScalar(buf_) == size_ - sizeof(uoffset_t)) && + VerifyBufferFromStart(identifier, sizeof(uoffset_t)); + } + + uoffset_t VerifyOffset(const size_t start) const { + if (!Verify(start)) return 0; + const auto o = ReadScalar(buf_ + start); + // May not point to itself. + if (!Check(o != 0)) return 0; + // Can't wrap around / buffers are max 2GB. + if (!Check(static_cast(o) >= 0)) return 0; + // Must be inside the buffer to create a pointer from it (pointer outside + // buffer is UB). + if (!Verify(start + o, 1)) return 0; + return o; + } + + uoffset_t VerifyOffset(const uint8_t *const base, + const voffset_t start) const { + return VerifyOffset(static_cast(base - buf_) + start); + } + + // Called at the start of a table to increase counters measuring data + // structure depth and amount, and possibly bails out with false if + // limits set by the constructor have been hit. Needs to be balanced + // with EndTable(). + bool VerifyComplexity() { + depth_++; + num_tables_++; + return Check(depth_ <= max_depth_ && num_tables_ <= max_tables_); + } + + // Called at the end of a table to pop the depth count. + bool EndTable() { + depth_--; + return true; + } + + // Returns the message size in bytes + size_t GetComputedSize() const { + // clang-format off + #ifdef FLATBUFFERS_TRACK_VERIFIER_BUFFER_SIZE + uintptr_t size = upper_bound_; + // Align the size to uoffset_t + size = (size - 1 + sizeof(uoffset_t)) & ~(sizeof(uoffset_t) - 1); + return (size > size_) ? 0 : size; + #else + // Must turn on FLATBUFFERS_TRACK_VERIFIER_BUFFER_SIZE for this to work. + (void)upper_bound_; + FLATBUFFERS_ASSERT(false); + return 0; + #endif + // clang-format on + } + + std::vector *GetFlexReuseTracker() { return flex_reuse_tracker_; } + + void SetFlexReuseTracker(std::vector *const rt) { + flex_reuse_tracker_ = rt; + } + + private: + const uint8_t *buf_; + const size_t size_; + const uoffset_t max_depth_; + const uoffset_t max_tables_; + const bool check_alignment_; + + mutable size_t upper_bound_; + + uoffset_t depth_; + uoffset_t num_tables_; + std::vector *flex_reuse_tracker_; +}; + +} // namespace flatbuffers + +#endif // FLATBUFFERS_VERIFIER_H_ diff --git a/edgeimpulse/edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/flatbuffers.h b/edgeimpulse/edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/flatbuffers.h new file mode 100644 index 0000000..051a0ed --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/flatbuffers.h @@ -0,0 +1,270 @@ +/* + * Copyright 2014 Google Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef FLATBUFFERS_H_ +#define FLATBUFFERS_H_ + +// TODO: These includes are for mitigating the pains of users editing their +// source because they relied on flatbuffers.h to include everything for them. +#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_array.h" +#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_base.h" +#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_buffer.h" +#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_buffer_ref.h" +#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_detached_buffer.h" +#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_flatbuffer_builder.h" +#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_stl_emulation.h" +#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_string.h" +#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_struct.h" +#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_table.h" +#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_vector.h" +#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_vector_downward.h" +#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_verifier.h" + +namespace flatbuffers { + +/// @brief This can compute the start of a FlatBuffer from a root pointer, i.e. +/// it is the opposite transformation of GetRoot(). +/// This may be useful if you want to pass on a root and have the recipient +/// delete the buffer afterwards. +inline const uint8_t *GetBufferStartFromRootPointer(const void *root) { + auto table = reinterpret_cast(root); + auto vtable = table->GetVTable(); + // Either the vtable is before the root or after the root. + auto start = (std::min)(vtable, reinterpret_cast(root)); + // Align to at least sizeof(uoffset_t). + start = reinterpret_cast(reinterpret_cast(start) & + ~(sizeof(uoffset_t) - 1)); + // Additionally, there may be a file_identifier in the buffer, and the root + // offset. The buffer may have been aligned to any size between + // sizeof(uoffset_t) and FLATBUFFERS_MAX_ALIGNMENT (see "force_align"). + // Sadly, the exact alignment is only known when constructing the buffer, + // since it depends on the presence of values with said alignment properties. + // So instead, we simply look at the next uoffset_t values (root, + // file_identifier, and alignment padding) to see which points to the root. + // None of the other values can "impersonate" the root since they will either + // be 0 or four ASCII characters. + static_assert(flatbuffers::kFileIdentifierLength == sizeof(uoffset_t), + "file_identifier is assumed to be the same size as uoffset_t"); + for (auto possible_roots = FLATBUFFERS_MAX_ALIGNMENT / sizeof(uoffset_t) + 1; + possible_roots; possible_roots--) { + start -= sizeof(uoffset_t); + if (ReadScalar(start) + start == + reinterpret_cast(root)) + return start; + } + // We didn't find the root, either the "root" passed isn't really a root, + // or the buffer is corrupt. + // Assert, because calling this function with bad data may cause reads + // outside of buffer boundaries. + FLATBUFFERS_ASSERT(false); + return nullptr; +} + +/// @brief This return the prefixed size of a FlatBuffer. +inline uoffset_t GetPrefixedSize(const uint8_t *buf) { + return ReadScalar(buf); +} + +// Base class for native objects (FlatBuffer data de-serialized into native +// C++ data structures). +// Contains no functionality, purely documentative. +struct NativeTable {}; + +/// @brief Function types to be used with resolving hashes into objects and +/// back again. The resolver gets a pointer to a field inside an object API +/// object that is of the type specified in the schema using the attribute +/// `cpp_type` (it is thus important whatever you write to this address +/// matches that type). The value of this field is initially null, so you +/// may choose to implement a delayed binding lookup using this function +/// if you wish. The resolver does the opposite lookup, for when the object +/// is being serialized again. +typedef uint64_t hash_value_t; +typedef std::function + resolver_function_t; +typedef std::function rehasher_function_t; + +// Helper function to test if a field is present, using any of the field +// enums in the generated code. +// `table` must be a generated table type. Since this is a template parameter, +// this is not typechecked to be a subclass of Table, so beware! +// Note: this function will return false for fields equal to the default +// value, since they're not stored in the buffer (unless force_defaults was +// used). +template +bool IsFieldPresent(const T *table, typename T::FlatBuffersVTableOffset field) { + // Cast, since Table is a private baseclass of any table types. + return reinterpret_cast(table)->CheckField( + static_cast(field)); +} + +// Utility function for reverse lookups on the EnumNames*() functions +// (in the generated C++ code) +// names must be NULL terminated. +inline int LookupEnum(const char **names, const char *name) { + for (const char **p = names; *p; p++) + if (!strcmp(*p, name)) return static_cast(p - names); + return -1; +} + +// These macros allow us to layout a struct with a guarantee that they'll end +// up looking the same on different compilers and platforms. +// It does this by disallowing the compiler to do any padding, and then +// does padding itself by inserting extra padding fields that make every +// element aligned to its own size. +// Additionally, it manually sets the alignment of the struct as a whole, +// which is typically its largest element, or a custom size set in the schema +// by the force_align attribute. +// These are used in the generated code only. + +// clang-format off +#if defined(_MSC_VER) + #define FLATBUFFERS_MANUALLY_ALIGNED_STRUCT(alignment) \ + __pragma(pack(1)) \ + struct __declspec(align(alignment)) + #define FLATBUFFERS_STRUCT_END(name, size) \ + __pragma(pack()) \ + static_assert(sizeof(name) == size, "compiler breaks packing rules") +#elif defined(__GNUC__) || defined(__clang__) || defined(__ICCARM__) + #define FLATBUFFERS_MANUALLY_ALIGNED_STRUCT(alignment) \ + _Pragma("pack(1)") \ + struct __attribute__((aligned(alignment))) + #define FLATBUFFERS_STRUCT_END(name, size) \ + _Pragma("pack()") \ + static_assert(sizeof(name) == size, "compiler breaks packing rules") +#else + #error Unknown compiler, please define structure alignment macros +#endif +// clang-format on + +// Minimal reflection via code generation. +// Besides full-fat reflection (see reflection.h) and parsing/printing by +// loading schemas (see idl.h), we can also have code generation for minimal +// reflection data which allows pretty-printing and other uses without needing +// a schema or a parser. +// Generate code with --reflect-types (types only) or --reflect-names (names +// also) to enable. +// See minireflect.h for utilities using this functionality. + +// These types are organized slightly differently as the ones in idl.h. +enum SequenceType { ST_TABLE, ST_STRUCT, ST_UNION, ST_ENUM }; + +// Scalars have the same order as in idl.h +// clang-format off +#define FLATBUFFERS_GEN_ELEMENTARY_TYPES(ET) \ + ET(ET_UTYPE) \ + ET(ET_BOOL) \ + ET(ET_CHAR) \ + ET(ET_UCHAR) \ + ET(ET_SHORT) \ + ET(ET_USHORT) \ + ET(ET_INT) \ + ET(ET_UINT) \ + ET(ET_LONG) \ + ET(ET_ULONG) \ + ET(ET_FLOAT) \ + ET(ET_DOUBLE) \ + ET(ET_STRING) \ + ET(ET_SEQUENCE) // See SequenceType. + +enum ElementaryType { + #define FLATBUFFERS_ET(E) E, + FLATBUFFERS_GEN_ELEMENTARY_TYPES(FLATBUFFERS_ET) + #undef FLATBUFFERS_ET +}; + +inline const char * const *ElementaryTypeNames() { + static const char * const names[] = { + #define FLATBUFFERS_ET(E) #E, + FLATBUFFERS_GEN_ELEMENTARY_TYPES(FLATBUFFERS_ET) + #undef FLATBUFFERS_ET + }; + return names; +} +// clang-format on + +// Basic type info cost just 16bits per field! +// We're explicitly defining the signedness since the signedness of integer +// bitfields is otherwise implementation-defined and causes warnings on older +// GCC compilers. +struct TypeCode { + // ElementaryType + unsigned short base_type : 4; + // Either vector (in table) or array (in struct) + unsigned short is_repeating : 1; + // Index into type_refs below, or -1 for none. + signed short sequence_ref : 11; +}; + +static_assert(sizeof(TypeCode) == 2, "TypeCode"); + +struct TypeTable; + +// Signature of the static method present in each type. +typedef const TypeTable *(*TypeFunction)(); + +struct TypeTable { + SequenceType st; + size_t num_elems; // of type_codes, values, names (but not type_refs). + const TypeCode *type_codes; // num_elems count + const TypeFunction *type_refs; // less than num_elems entries (see TypeCode). + const int16_t *array_sizes; // less than num_elems entries (see TypeCode). + const int64_t *values; // Only set for non-consecutive enum/union or structs. + const char *const *names; // Only set if compiled with --reflect-names. +}; + +// String which identifies the current version of FlatBuffers. +inline const char *flatbuffers_version_string() { + return "FlatBuffers " FLATBUFFERS_STRING(FLATBUFFERS_VERSION_MAJOR) "." + FLATBUFFERS_STRING(FLATBUFFERS_VERSION_MINOR) "." + FLATBUFFERS_STRING(FLATBUFFERS_VERSION_REVISION); +} + +// clang-format off +#define FLATBUFFERS_DEFINE_BITMASK_OPERATORS(E, T)\ + inline E operator | (E lhs, E rhs){\ + return E(T(lhs) | T(rhs));\ + }\ + inline E operator & (E lhs, E rhs){\ + return E(T(lhs) & T(rhs));\ + }\ + inline E operator ^ (E lhs, E rhs){\ + return E(T(lhs) ^ T(rhs));\ + }\ + inline E operator ~ (E lhs){\ + return E(~T(lhs));\ + }\ + inline E operator |= (E &lhs, E rhs){\ + lhs = lhs | rhs;\ + return lhs;\ + }\ + inline E operator &= (E &lhs, E rhs){\ + lhs = lhs & rhs;\ + return lhs;\ + }\ + inline E operator ^= (E &lhs, E rhs){\ + lhs = lhs ^ rhs;\ + return lhs;\ + }\ + inline bool operator !(E rhs) \ + {\ + return !bool(T(rhs)); \ + } +/// @endcond +} // namespace flatbuffers + +// clang-format on + +#endif // FLATBUFFERS_H_ diff --git a/edgeimpulse/edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/flexbuffers.h b/edgeimpulse/edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/flexbuffers.h new file mode 100644 index 0000000..7930949 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/flexbuffers.h @@ -0,0 +1,1903 @@ +/* + * Copyright 2017 Google Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef FLATBUFFERS_FLEXBUFFERS_H_ +#define FLATBUFFERS_FLEXBUFFERS_H_ + +#include +// Used to select STL variant. +#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_base.h" +// We use the basic binary writing functions from the regular FlatBuffers. +#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_util.h" + +#ifdef _MSC_VER +# include +#endif + +#if defined(_MSC_VER) +# pragma warning(push) +# pragma warning(disable : 4127) // C4127: conditional expression is constant +#endif + +namespace flexbuffers { + +class Reference; +class Map; + +// These are used in the lower 2 bits of a type field to determine the size of +// the elements (and or size field) of the item pointed to (e.g. vector). +enum BitWidth { + BIT_WIDTH_8 = 0, + BIT_WIDTH_16 = 1, + BIT_WIDTH_32 = 2, + BIT_WIDTH_64 = 3, +}; + +// These are used as the upper 6 bits of a type field to indicate the actual +// type. +enum Type { + FBT_NULL = 0, + FBT_INT = 1, + FBT_UINT = 2, + FBT_FLOAT = 3, + // Types above stored inline, types below (except FBT_BOOL) store an offset. + FBT_KEY = 4, + FBT_STRING = 5, + FBT_INDIRECT_INT = 6, + FBT_INDIRECT_UINT = 7, + FBT_INDIRECT_FLOAT = 8, + FBT_MAP = 9, + FBT_VECTOR = 10, // Untyped. + FBT_VECTOR_INT = 11, // Typed any size (stores no type table). + FBT_VECTOR_UINT = 12, + FBT_VECTOR_FLOAT = 13, + FBT_VECTOR_KEY = 14, + // DEPRECATED, use FBT_VECTOR or FBT_VECTOR_KEY instead. + // Read test.cpp/FlexBuffersDeprecatedTest() for details on why. + FBT_VECTOR_STRING_DEPRECATED = 15, + FBT_VECTOR_INT2 = 16, // Typed tuple (no type table, no size field). + FBT_VECTOR_UINT2 = 17, + FBT_VECTOR_FLOAT2 = 18, + FBT_VECTOR_INT3 = 19, // Typed triple (no type table, no size field). + FBT_VECTOR_UINT3 = 20, + FBT_VECTOR_FLOAT3 = 21, + FBT_VECTOR_INT4 = 22, // Typed quad (no type table, no size field). + FBT_VECTOR_UINT4 = 23, + FBT_VECTOR_FLOAT4 = 24, + FBT_BLOB = 25, + FBT_BOOL = 26, + FBT_VECTOR_BOOL = + 36, // To Allow the same type of conversion of type to vector type + + FBT_MAX_TYPE = 37 +}; + +inline bool IsInline(Type t) { return t <= FBT_FLOAT || t == FBT_BOOL; } + +inline bool IsTypedVectorElementType(Type t) { + return (t >= FBT_INT && t <= FBT_STRING) || t == FBT_BOOL; +} + +inline bool IsTypedVector(Type t) { + return (t >= FBT_VECTOR_INT && t <= FBT_VECTOR_STRING_DEPRECATED) || + t == FBT_VECTOR_BOOL; +} + +inline bool IsFixedTypedVector(Type t) { + return t >= FBT_VECTOR_INT2 && t <= FBT_VECTOR_FLOAT4; +} + +inline Type ToTypedVector(Type t, size_t fixed_len = 0) { + FLATBUFFERS_ASSERT(IsTypedVectorElementType(t)); + switch (fixed_len) { + case 0: return static_cast(t - FBT_INT + FBT_VECTOR_INT); + case 2: return static_cast(t - FBT_INT + FBT_VECTOR_INT2); + case 3: return static_cast(t - FBT_INT + FBT_VECTOR_INT3); + case 4: return static_cast(t - FBT_INT + FBT_VECTOR_INT4); + default: FLATBUFFERS_ASSERT(0); return FBT_NULL; + } +} + +inline Type ToTypedVectorElementType(Type t) { + FLATBUFFERS_ASSERT(IsTypedVector(t)); + return static_cast(t - FBT_VECTOR_INT + FBT_INT); +} + +inline Type ToFixedTypedVectorElementType(Type t, uint8_t *len) { + FLATBUFFERS_ASSERT(IsFixedTypedVector(t)); + auto fixed_type = t - FBT_VECTOR_INT2; + *len = static_cast(fixed_type / 3 + + 2); // 3 types each, starting from length 2. + return static_cast(fixed_type % 3 + FBT_INT); +} + +// TODO: implement proper support for 8/16bit floats, or decide not to +// support them. +typedef int16_t half; +typedef int8_t quarter; + +// TODO: can we do this without conditionals using intrinsics or inline asm +// on some platforms? Given branch prediction the method below should be +// decently quick, but it is the most frequently executed function. +// We could do an (unaligned) 64-bit read if we ifdef out the platforms for +// which that doesn't work (or where we'd read into un-owned memory). +template +R ReadSizedScalar(const uint8_t *data, uint8_t byte_width) { + return byte_width < 4 + ? (byte_width < 2 + ? static_cast(flatbuffers::ReadScalar(data)) + : static_cast(flatbuffers::ReadScalar(data))) + : (byte_width < 8 + ? static_cast(flatbuffers::ReadScalar(data)) + : static_cast(flatbuffers::ReadScalar(data))); +} + +inline int64_t ReadInt64(const uint8_t *data, uint8_t byte_width) { + return ReadSizedScalar( + data, byte_width); +} + +inline uint64_t ReadUInt64(const uint8_t *data, uint8_t byte_width) { + // This is the "hottest" function (all offset lookups use this), so worth + // optimizing if possible. + // TODO: GCC apparently replaces memcpy by a rep movsb, but only if count is a + // constant, which here it isn't. Test if memcpy is still faster than + // the conditionals in ReadSizedScalar. Can also use inline asm. + + // clang-format off + #if defined(_MSC_VER) && defined(_M_X64) && !defined(_M_ARM64EC) + // This is 64-bit Windows only, __movsb does not work on 32-bit Windows. + uint64_t u = 0; + __movsb(reinterpret_cast(&u), + reinterpret_cast(data), byte_width); + return flatbuffers::EndianScalar(u); + #else + return ReadSizedScalar( + data, byte_width); + #endif + // clang-format on +} + +inline double ReadDouble(const uint8_t *data, uint8_t byte_width) { + return ReadSizedScalar(data, + byte_width); +} + +inline const uint8_t *Indirect(const uint8_t *offset, uint8_t byte_width) { + return offset - ReadUInt64(offset, byte_width); +} + +template const uint8_t *Indirect(const uint8_t *offset) { + return offset - flatbuffers::ReadScalar(offset); +} + +inline BitWidth WidthU(uint64_t u) { +#define FLATBUFFERS_GET_FIELD_BIT_WIDTH(value, width) \ + { \ + if (!((u) & ~((1ULL << (width)) - 1ULL))) return BIT_WIDTH_##width; \ + } + FLATBUFFERS_GET_FIELD_BIT_WIDTH(u, 8); + FLATBUFFERS_GET_FIELD_BIT_WIDTH(u, 16); + FLATBUFFERS_GET_FIELD_BIT_WIDTH(u, 32); +#undef FLATBUFFERS_GET_FIELD_BIT_WIDTH + return BIT_WIDTH_64; +} + +inline BitWidth WidthI(int64_t i) { + auto u = static_cast(i) << 1; + return WidthU(i >= 0 ? u : ~u); +} + +inline BitWidth WidthF(double f) { + return static_cast(static_cast(f)) == f ? BIT_WIDTH_32 + : BIT_WIDTH_64; +} + +// Base class of all types below. +// Points into the data buffer and allows access to one type. +class Object { + public: + Object(const uint8_t *data, uint8_t byte_width) + : data_(data), byte_width_(byte_width) {} + + protected: + const uint8_t *data_; + uint8_t byte_width_; +}; + +// Object that has a size, obtained either from size prefix, or elsewhere. +class Sized : public Object { + public: + // Size prefix. + Sized(const uint8_t *data, uint8_t byte_width) + : Object(data, byte_width), size_(read_size()) {} + // Manual size. + Sized(const uint8_t *data, uint8_t byte_width, size_t sz) + : Object(data, byte_width), size_(sz) {} + size_t size() const { return size_; } + // Access size stored in `byte_width_` bytes before data_ pointer. + size_t read_size() const { + return static_cast(ReadUInt64(data_ - byte_width_, byte_width_)); + } + + protected: + size_t size_; +}; + +class String : public Sized { + public: + // Size prefix. + String(const uint8_t *data, uint8_t byte_width) : Sized(data, byte_width) {} + // Manual size. + String(const uint8_t *data, uint8_t byte_width, size_t sz) + : Sized(data, byte_width, sz) {} + + size_t length() const { return size(); } + const char *c_str() const { return reinterpret_cast(data_); } + std::string str() const { return std::string(c_str(), size()); } + + static String EmptyString() { + static const char *empty_string = ""; + return String(reinterpret_cast(empty_string), 1, 0); + } + bool IsTheEmptyString() const { return data_ == EmptyString().data_; } +}; + +class Blob : public Sized { + public: + Blob(const uint8_t *data_buf, uint8_t byte_width) + : Sized(data_buf, byte_width) {} + + static Blob EmptyBlob() { + static const uint8_t empty_blob[] = { 0 /*len*/ }; + return Blob(empty_blob + 1, 1); + } + bool IsTheEmptyBlob() const { return data_ == EmptyBlob().data_; } + const uint8_t *data() const { return data_; } +}; + +class Vector : public Sized { + public: + Vector(const uint8_t *data, uint8_t byte_width) : Sized(data, byte_width) {} + + Reference operator[](size_t i) const; + + static Vector EmptyVector() { + static const uint8_t empty_vector[] = { 0 /*len*/ }; + return Vector(empty_vector + 1, 1); + } + bool IsTheEmptyVector() const { return data_ == EmptyVector().data_; } +}; + +class TypedVector : public Sized { + public: + TypedVector(const uint8_t *data, uint8_t byte_width, Type element_type) + : Sized(data, byte_width), type_(element_type) {} + + Reference operator[](size_t i) const; + + static TypedVector EmptyTypedVector() { + static const uint8_t empty_typed_vector[] = { 0 /*len*/ }; + return TypedVector(empty_typed_vector + 1, 1, FBT_INT); + } + bool IsTheEmptyVector() const { + return data_ == TypedVector::EmptyTypedVector().data_; + } + + Type ElementType() { return type_; } + + friend Reference; + + private: + Type type_; + + friend Map; +}; + +class FixedTypedVector : public Object { + public: + FixedTypedVector(const uint8_t *data, uint8_t byte_width, Type element_type, + uint8_t len) + : Object(data, byte_width), type_(element_type), len_(len) {} + + Reference operator[](size_t i) const; + + static FixedTypedVector EmptyFixedTypedVector() { + static const uint8_t fixed_empty_vector[] = { 0 /* unused */ }; + return FixedTypedVector(fixed_empty_vector, 1, FBT_INT, 0); + } + bool IsTheEmptyFixedTypedVector() const { + return data_ == FixedTypedVector::EmptyFixedTypedVector().data_; + } + + Type ElementType() const { return type_; } + uint8_t size() const { return len_; } + + private: + Type type_; + uint8_t len_; +}; + +class Map : public Vector { + public: + Map(const uint8_t *data, uint8_t byte_width) : Vector(data, byte_width) {} + + Reference operator[](const char *key) const; + Reference operator[](const std::string &key) const; + + Vector Values() const { return Vector(data_, byte_width_); } + + TypedVector Keys() const { + const size_t num_prefixed_fields = 3; + auto keys_offset = data_ - byte_width_ * num_prefixed_fields; + return TypedVector(Indirect(keys_offset, byte_width_), + static_cast( + ReadUInt64(keys_offset + byte_width_, byte_width_)), + FBT_KEY); + } + + static Map EmptyMap() { + static const uint8_t empty_map[] = { + 0 /*keys_len*/, 0 /*keys_offset*/, 1 /*keys_width*/, 0 /*len*/ + }; + return Map(empty_map + 4, 1); + } + + bool IsTheEmptyMap() const { return data_ == EmptyMap().data_; } +}; + +template +void AppendToString(std::string &s, T &&v, bool keys_quoted) { + s += "[ "; + for (size_t i = 0; i < v.size(); i++) { + if (i) s += ", "; + v[i].ToString(true, keys_quoted, s); + } + s += " ]"; +} + +class Reference { + public: + Reference() + : data_(nullptr), parent_width_(0), byte_width_(0), type_(FBT_NULL) {} + + Reference(const uint8_t *data, uint8_t parent_width, uint8_t byte_width, + Type type) + : data_(data), + parent_width_(parent_width), + byte_width_(byte_width), + type_(type) {} + + Reference(const uint8_t *data, uint8_t parent_width, uint8_t packed_type) + : data_(data), parent_width_(parent_width) { + byte_width_ = 1U << static_cast(packed_type & 3); + type_ = static_cast(packed_type >> 2); + } + + Type GetType() const { return type_; } + + bool IsNull() const { return type_ == FBT_NULL; } + bool IsBool() const { return type_ == FBT_BOOL; } + bool IsInt() const { return type_ == FBT_INT || type_ == FBT_INDIRECT_INT; } + bool IsUInt() const { + return type_ == FBT_UINT || type_ == FBT_INDIRECT_UINT; + } + bool IsIntOrUint() const { return IsInt() || IsUInt(); } + bool IsFloat() const { + return type_ == FBT_FLOAT || type_ == FBT_INDIRECT_FLOAT; + } + bool IsNumeric() const { return IsIntOrUint() || IsFloat(); } + bool IsString() const { return type_ == FBT_STRING; } + bool IsKey() const { return type_ == FBT_KEY; } + bool IsVector() const { return type_ == FBT_VECTOR || type_ == FBT_MAP; } + bool IsUntypedVector() const { return type_ == FBT_VECTOR; } + bool IsTypedVector() const { return flexbuffers::IsTypedVector(type_); } + bool IsFixedTypedVector() const { + return flexbuffers::IsFixedTypedVector(type_); + } + bool IsAnyVector() const { + return (IsTypedVector() || IsFixedTypedVector() || IsVector()); + } + bool IsMap() const { return type_ == FBT_MAP; } + bool IsBlob() const { return type_ == FBT_BLOB; } + bool AsBool() const { + return (type_ == FBT_BOOL ? ReadUInt64(data_, parent_width_) + : AsUInt64()) != 0; + } + + // Reads any type as a int64_t. Never fails, does most sensible conversion. + // Truncates floats, strings are attempted to be parsed for a number, + // vectors/maps return their size. Returns 0 if all else fails. + int64_t AsInt64() const { + if (type_ == FBT_INT) { + // A fast path for the common case. + return ReadInt64(data_, parent_width_); + } else + switch (type_) { + case FBT_INDIRECT_INT: return ReadInt64(Indirect(), byte_width_); + case FBT_UINT: return ReadUInt64(data_, parent_width_); + case FBT_INDIRECT_UINT: return ReadUInt64(Indirect(), byte_width_); + case FBT_FLOAT: + return static_cast(ReadDouble(data_, parent_width_)); + case FBT_INDIRECT_FLOAT: + return static_cast(ReadDouble(Indirect(), byte_width_)); + case FBT_NULL: return 0; + case FBT_STRING: return flatbuffers::StringToInt(AsString().c_str()); + case FBT_VECTOR: return static_cast(AsVector().size()); + case FBT_BOOL: return ReadInt64(data_, parent_width_); + default: + // Convert other things to int. + return 0; + } + } + + // TODO: could specialize these to not use AsInt64() if that saves + // extension ops in generated code, and use a faster op than ReadInt64. + int32_t AsInt32() const { return static_cast(AsInt64()); } + int16_t AsInt16() const { return static_cast(AsInt64()); } + int8_t AsInt8() const { return static_cast(AsInt64()); } + + uint64_t AsUInt64() const { + if (type_ == FBT_UINT) { + // A fast path for the common case. + return ReadUInt64(data_, parent_width_); + } else + switch (type_) { + case FBT_INDIRECT_UINT: return ReadUInt64(Indirect(), byte_width_); + case FBT_INT: return ReadInt64(data_, parent_width_); + case FBT_INDIRECT_INT: return ReadInt64(Indirect(), byte_width_); + case FBT_FLOAT: + return static_cast(ReadDouble(data_, parent_width_)); + case FBT_INDIRECT_FLOAT: + return static_cast(ReadDouble(Indirect(), byte_width_)); + case FBT_NULL: return 0; + case FBT_STRING: return flatbuffers::StringToUInt(AsString().c_str()); + case FBT_VECTOR: return static_cast(AsVector().size()); + case FBT_BOOL: return ReadUInt64(data_, parent_width_); + default: + // Convert other things to uint. + return 0; + } + } + + uint32_t AsUInt32() const { return static_cast(AsUInt64()); } + uint16_t AsUInt16() const { return static_cast(AsUInt64()); } + uint8_t AsUInt8() const { return static_cast(AsUInt64()); } + + double AsDouble() const { + if (type_ == FBT_FLOAT) { + // A fast path for the common case. + return ReadDouble(data_, parent_width_); + } else + switch (type_) { + case FBT_INDIRECT_FLOAT: return ReadDouble(Indirect(), byte_width_); + case FBT_INT: + return static_cast(ReadInt64(data_, parent_width_)); + case FBT_UINT: + return static_cast(ReadUInt64(data_, parent_width_)); + case FBT_INDIRECT_INT: + return static_cast(ReadInt64(Indirect(), byte_width_)); + case FBT_INDIRECT_UINT: + return static_cast(ReadUInt64(Indirect(), byte_width_)); + case FBT_NULL: return 0.0; + case FBT_STRING: { +#if 1 +#if !defined( _MSC_VER) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wnull-dereference" +#endif + // See b/173239141 for additional context. Patched via + // micro/tools/make/flexbuffers_download.sh + // Introduce a segfault for an unsupported code path for TFLM. + return *(static_cast(nullptr)); +#if !defined( _MSC_VER) +#pragma GCC diagnostic pop +#endif +#else + // This is the original code + double d; + flatbuffers::StringToNumber(AsString().c_str(), &d); + return d; +#endif + } + case FBT_VECTOR: return static_cast(AsVector().size()); + case FBT_BOOL: + return static_cast(ReadUInt64(data_, parent_width_)); + default: + // Convert strings and other things to float. + return 0; + } + } + + float AsFloat() const { return static_cast(AsDouble()); } + + const char *AsKey() const { + if (type_ == FBT_KEY || type_ == FBT_STRING) { + return reinterpret_cast(Indirect()); + } else { + return ""; + } + } + + // This function returns the empty string if you try to read something that + // is not a string or key. + String AsString() const { + if (type_ == FBT_STRING) { + return String(Indirect(), byte_width_); + } else if (type_ == FBT_KEY) { + auto key = Indirect(); + return String(key, byte_width_, + strlen(reinterpret_cast(key))); + } else { + return String::EmptyString(); + } + } + + // Unlike AsString(), this will convert any type to a std::string. + std::string ToString() const { + std::string s; + ToString(false, false, s); + return s; + } + + // Convert any type to a JSON-like string. strings_quoted determines if + // string values at the top level receive "" quotes (inside other values + // they always do). keys_quoted determines if keys are quoted, at any level. + // TODO(wvo): add further options to have indentation/newlines. + void ToString(bool strings_quoted, bool keys_quoted, std::string &s) const { + if (type_ == FBT_STRING) { + String str(Indirect(), byte_width_); + if (strings_quoted) { + flatbuffers::EscapeString(str.c_str(), str.length(), &s, true, false); + } else { + s.append(str.c_str(), str.length()); + } + } else if (IsKey()) { + auto str = AsKey(); + if (keys_quoted) { + flatbuffers::EscapeString(str, strlen(str), &s, true, false); + } else { + s += str; + } + } else if (IsInt()) { + s += flatbuffers::NumToString(AsInt64()); + } else if (IsUInt()) { + s += flatbuffers::NumToString(AsUInt64()); + } else if (IsFloat()) { + s += flatbuffers::NumToString(AsDouble()); + } else if (IsNull()) { + s += "null"; + } else if (IsBool()) { + s += AsBool() ? "true" : "false"; + } else if (IsMap()) { + s += "{ "; + auto m = AsMap(); + auto keys = m.Keys(); + auto vals = m.Values(); + for (size_t i = 0; i < keys.size(); i++) { + bool kq = keys_quoted; + if (!kq) { + // FlexBuffers keys may contain arbitrary characters, only allow + // unquoted if it looks like an "identifier": + const char *p = keys[i].AsKey(); + if (!flatbuffers::is_alpha(*p) && *p != '_') { + kq = true; + } else { + while (*++p) { + if (!flatbuffers::is_alnum(*p) && *p != '_') { + kq = true; + break; + } + } + } + } + keys[i].ToString(true, kq, s); + s += ": "; + vals[i].ToString(true, keys_quoted, s); + if (i < keys.size() - 1) s += ", "; + } + s += " }"; + } else if (IsVector()) { + AppendToString(s, AsVector(), keys_quoted); + } else if (IsTypedVector()) { + AppendToString(s, AsTypedVector(), keys_quoted); + } else if (IsFixedTypedVector()) { + AppendToString(s, AsFixedTypedVector(), keys_quoted); + } else if (IsBlob()) { + auto blob = AsBlob(); + flatbuffers::EscapeString(reinterpret_cast(blob.data()), + blob.size(), &s, true, false); + } else { + s += "(?)"; + } + } + + // This function returns the empty blob if you try to read a not-blob. + // Strings can be viewed as blobs too. + Blob AsBlob() const { + if (type_ == FBT_BLOB || type_ == FBT_STRING) { + return Blob(Indirect(), byte_width_); + } else { + return Blob::EmptyBlob(); + } + } + + // This function returns the empty vector if you try to read a not-vector. + // Maps can be viewed as vectors too. + Vector AsVector() const { + if (type_ == FBT_VECTOR || type_ == FBT_MAP) { + return Vector(Indirect(), byte_width_); + } else { + return Vector::EmptyVector(); + } + } + + TypedVector AsTypedVector() const { + if (IsTypedVector()) { + auto tv = + TypedVector(Indirect(), byte_width_, ToTypedVectorElementType(type_)); + if (tv.type_ == FBT_STRING) { + // These can't be accessed as strings, since we don't know the bit-width + // of the size field, see the declaration of + // FBT_VECTOR_STRING_DEPRECATED above for details. + // We change the type here to be keys, which are a subtype of strings, + // and will ignore the size field. This will truncate strings with + // embedded nulls. + tv.type_ = FBT_KEY; + } + return tv; + } else { + return TypedVector::EmptyTypedVector(); + } + } + + FixedTypedVector AsFixedTypedVector() const { + if (IsFixedTypedVector()) { + uint8_t len = 0; + auto vtype = ToFixedTypedVectorElementType(type_, &len); + return FixedTypedVector(Indirect(), byte_width_, vtype, len); + } else { + return FixedTypedVector::EmptyFixedTypedVector(); + } + } + + Map AsMap() const { + if (type_ == FBT_MAP) { + return Map(Indirect(), byte_width_); + } else { + return Map::EmptyMap(); + } + } + + template T As() const; + + // Experimental: Mutation functions. + // These allow scalars in an already created buffer to be updated in-place. + // Since by default scalars are stored in the smallest possible space, + // the new value may not fit, in which case these functions return false. + // To avoid this, you can construct the values you intend to mutate using + // Builder::ForceMinimumBitWidth. + bool MutateInt(int64_t i) { + if (type_ == FBT_INT) { + return Mutate(data_, i, parent_width_, WidthI(i)); + } else if (type_ == FBT_INDIRECT_INT) { + return Mutate(Indirect(), i, byte_width_, WidthI(i)); + } else if (type_ == FBT_UINT) { + auto u = static_cast(i); + return Mutate(data_, u, parent_width_, WidthU(u)); + } else if (type_ == FBT_INDIRECT_UINT) { + auto u = static_cast(i); + return Mutate(Indirect(), u, byte_width_, WidthU(u)); + } else { + return false; + } + } + + bool MutateBool(bool b) { + return type_ == FBT_BOOL && Mutate(data_, b, parent_width_, BIT_WIDTH_8); + } + + bool MutateUInt(uint64_t u) { + if (type_ == FBT_UINT) { + return Mutate(data_, u, parent_width_, WidthU(u)); + } else if (type_ == FBT_INDIRECT_UINT) { + return Mutate(Indirect(), u, byte_width_, WidthU(u)); + } else if (type_ == FBT_INT) { + auto i = static_cast(u); + return Mutate(data_, i, parent_width_, WidthI(i)); + } else if (type_ == FBT_INDIRECT_INT) { + auto i = static_cast(u); + return Mutate(Indirect(), i, byte_width_, WidthI(i)); + } else { + return false; + } + } + + bool MutateFloat(float f) { + if (type_ == FBT_FLOAT) { + return MutateF(data_, f, parent_width_, BIT_WIDTH_32); + } else if (type_ == FBT_INDIRECT_FLOAT) { + return MutateF(Indirect(), f, byte_width_, BIT_WIDTH_32); + } else { + return false; + } + } + + bool MutateFloat(double d) { + if (type_ == FBT_FLOAT) { + return MutateF(data_, d, parent_width_, WidthF(d)); + } else if (type_ == FBT_INDIRECT_FLOAT) { + return MutateF(Indirect(), d, byte_width_, WidthF(d)); + } else { + return false; + } + } + + bool MutateString(const char *str, size_t len) { + auto s = AsString(); + if (s.IsTheEmptyString()) return false; + // This is very strict, could allow shorter strings, but that creates + // garbage. + if (s.length() != len) return false; + memcpy(const_cast(s.c_str()), str, len); + return true; + } + bool MutateString(const char *str) { return MutateString(str, strlen(str)); } + bool MutateString(const std::string &str) { + return MutateString(str.data(), str.length()); + } + + private: + const uint8_t *Indirect() const { + return flexbuffers::Indirect(data_, parent_width_); + } + + template + bool Mutate(const uint8_t *dest, T t, size_t byte_width, + BitWidth value_width) { + auto fits = static_cast(static_cast(1U) << value_width) <= + byte_width; + if (fits) { + t = flatbuffers::EndianScalar(t); + memcpy(const_cast(dest), &t, byte_width); + } + return fits; + } + + template + bool MutateF(const uint8_t *dest, T t, size_t byte_width, + BitWidth value_width) { + if (byte_width == sizeof(double)) + return Mutate(dest, static_cast(t), byte_width, value_width); + if (byte_width == sizeof(float)) + return Mutate(dest, static_cast(t), byte_width, value_width); + FLATBUFFERS_ASSERT(false); + return false; + } + + friend class Verifier; + + const uint8_t *data_; + uint8_t parent_width_; + uint8_t byte_width_; + Type type_; +}; + +// Template specialization for As(). +template<> inline bool Reference::As() const { return AsBool(); } + +template<> inline int8_t Reference::As() const { return AsInt8(); } +template<> inline int16_t Reference::As() const { return AsInt16(); } +template<> inline int32_t Reference::As() const { return AsInt32(); } +template<> inline int64_t Reference::As() const { return AsInt64(); } + +template<> inline uint8_t Reference::As() const { return AsUInt8(); } +template<> inline uint16_t Reference::As() const { + return AsUInt16(); +} +template<> inline uint32_t Reference::As() const { + return AsUInt32(); +} +template<> inline uint64_t Reference::As() const { + return AsUInt64(); +} + +template<> inline double Reference::As() const { return AsDouble(); } +template<> inline float Reference::As() const { return AsFloat(); } + +template<> inline String Reference::As() const { return AsString(); } +template<> inline std::string Reference::As() const { + return AsString().str(); +} + +template<> inline Blob Reference::As() const { return AsBlob(); } +template<> inline Vector Reference::As() const { return AsVector(); } +template<> inline TypedVector Reference::As() const { + return AsTypedVector(); +} +template<> inline FixedTypedVector Reference::As() const { + return AsFixedTypedVector(); +} +template<> inline Map Reference::As() const { return AsMap(); } + +inline uint8_t PackedType(BitWidth bit_width, Type type) { + return static_cast(bit_width | (type << 2)); +} + +inline uint8_t NullPackedType() { return PackedType(BIT_WIDTH_8, FBT_NULL); } + +// Vector accessors. +// Note: if you try to access outside of bounds, you get a Null value back +// instead. Normally this would be an assert, but since this is "dynamically +// typed" data, you may not want that (someone sends you a 2d vector and you +// wanted 3d). +// The Null converts seamlessly into a default value for any other type. +// TODO(wvo): Could introduce an #ifdef that makes this into an assert? +inline Reference Vector::operator[](size_t i) const { + auto len = size(); + if (i >= len) return Reference(nullptr, 1, NullPackedType()); + auto packed_type = (data_ + len * byte_width_)[i]; + auto elem = data_ + i * byte_width_; + return Reference(elem, byte_width_, packed_type); +} + +inline Reference TypedVector::operator[](size_t i) const { + auto len = size(); + if (i >= len) return Reference(nullptr, 1, NullPackedType()); + auto elem = data_ + i * byte_width_; + return Reference(elem, byte_width_, 1, type_); +} + +inline Reference FixedTypedVector::operator[](size_t i) const { + if (i >= len_) return Reference(nullptr, 1, NullPackedType()); + auto elem = data_ + i * byte_width_; + return Reference(elem, byte_width_, 1, type_); +} + +template int KeyCompare(const void *key, const void *elem) { + auto str_elem = reinterpret_cast( + Indirect(reinterpret_cast(elem))); + auto skey = reinterpret_cast(key); + return strcmp(skey, str_elem); +} + +inline Reference Map::operator[](const char *key) const { + auto keys = Keys(); + // We can't pass keys.byte_width_ to the comparison function, so we have + // to pick the right one ahead of time. + int (*comp)(const void *, const void *) = nullptr; + switch (keys.byte_width_) { + case 1: comp = KeyCompare; break; + case 2: comp = KeyCompare; break; + case 4: comp = KeyCompare; break; + case 8: comp = KeyCompare; break; + default: FLATBUFFERS_ASSERT(false); return Reference(); + } + auto res = std::bsearch(key, keys.data_, keys.size(), keys.byte_width_, comp); + if (!res) return Reference(nullptr, 1, NullPackedType()); + auto i = (reinterpret_cast(res) - keys.data_) / keys.byte_width_; + return (*static_cast(this))[i]; +} + +inline Reference Map::operator[](const std::string &key) const { + return (*this)[key.c_str()]; +} + +inline Reference GetRoot(const uint8_t *buffer, size_t size) { + // See Finish() below for the serialization counterpart of this. + // The root starts at the end of the buffer, so we parse backwards from there. + auto end = buffer + size; + auto byte_width = *--end; + auto packed_type = *--end; + end -= byte_width; // The root data item. + return Reference(end, byte_width, packed_type); +} + +inline Reference GetRoot(const std::vector &buffer) { + return GetRoot(buffer.data(), buffer.size()); +} + +// Flags that configure how the Builder behaves. +// The "Share" flags determine if the Builder automatically tries to pool +// this type. Pooling can reduce the size of serialized data if there are +// multiple maps of the same kind, at the expense of slightly slower +// serialization (the cost of lookups) and more memory use (std::set). +// By default this is on for keys, but off for strings. +// Turn keys off if you have e.g. only one map. +// Turn strings on if you expect many non-unique string values. +// Additionally, sharing key vectors can save space if you have maps with +// identical field populations. +enum BuilderFlag { + BUILDER_FLAG_NONE = 0, + BUILDER_FLAG_SHARE_KEYS = 1, + BUILDER_FLAG_SHARE_STRINGS = 2, + BUILDER_FLAG_SHARE_KEYS_AND_STRINGS = 3, + BUILDER_FLAG_SHARE_KEY_VECTORS = 4, + BUILDER_FLAG_SHARE_ALL = 7, +}; + +class Builder FLATBUFFERS_FINAL_CLASS { + public: + Builder(size_t initial_size = 256, + BuilderFlag flags = BUILDER_FLAG_SHARE_KEYS) + : buf_(initial_size), + finished_(false), + has_duplicate_keys_(false), + flags_(flags), + force_min_bit_width_(BIT_WIDTH_8), + key_pool(KeyOffsetCompare(buf_)), + string_pool(StringOffsetCompare(buf_)) { + buf_.clear(); + } + +#ifdef FLATBUFFERS_DEFAULT_DECLARATION + Builder(Builder &&) = default; + Builder &operator=(Builder &&) = default; +#endif + + /// @brief Get the serialized buffer (after you call `Finish()`). + /// @return Returns a vector owned by this class. + const std::vector &GetBuffer() const { + Finished(); + return buf_; + } + + // Size of the buffer. Does not include unfinished values. + size_t GetSize() const { return buf_.size(); } + + // Reset all state so we can re-use the buffer. + void Clear() { + buf_.clear(); + stack_.clear(); + finished_ = false; + // flags_ remains as-is; + force_min_bit_width_ = BIT_WIDTH_8; + key_pool.clear(); + string_pool.clear(); + } + + // All value constructing functions below have two versions: one that + // takes a key (for placement inside a map) and one that doesn't (for inside + // vectors and elsewhere). + + void Null() { stack_.push_back(Value()); } + void Null(const char *key) { + Key(key); + Null(); + } + + void Int(int64_t i) { stack_.push_back(Value(i, FBT_INT, WidthI(i))); } + void Int(const char *key, int64_t i) { + Key(key); + Int(i); + } + + void UInt(uint64_t u) { stack_.push_back(Value(u, FBT_UINT, WidthU(u))); } + void UInt(const char *key, uint64_t u) { + Key(key); + UInt(u); + } + + void Float(float f) { stack_.push_back(Value(f)); } + void Float(const char *key, float f) { + Key(key); + Float(f); + } + + void Double(double f) { stack_.push_back(Value(f)); } + void Double(const char *key, double d) { + Key(key); + Double(d); + } + + void Bool(bool b) { stack_.push_back(Value(b)); } + void Bool(const char *key, bool b) { + Key(key); + Bool(b); + } + + void IndirectInt(int64_t i) { PushIndirect(i, FBT_INDIRECT_INT, WidthI(i)); } + void IndirectInt(const char *key, int64_t i) { + Key(key); + IndirectInt(i); + } + + void IndirectUInt(uint64_t u) { + PushIndirect(u, FBT_INDIRECT_UINT, WidthU(u)); + } + void IndirectUInt(const char *key, uint64_t u) { + Key(key); + IndirectUInt(u); + } + + void IndirectFloat(float f) { + PushIndirect(f, FBT_INDIRECT_FLOAT, BIT_WIDTH_32); + } + void IndirectFloat(const char *key, float f) { + Key(key); + IndirectFloat(f); + } + + void IndirectDouble(double f) { + PushIndirect(f, FBT_INDIRECT_FLOAT, WidthF(f)); + } + void IndirectDouble(const char *key, double d) { + Key(key); + IndirectDouble(d); + } + + size_t Key(const char *str, size_t len) { + auto sloc = buf_.size(); + WriteBytes(str, len + 1); + if (flags_ & BUILDER_FLAG_SHARE_KEYS) { + auto it = key_pool.find(sloc); + if (it != key_pool.end()) { + // Already in the buffer. Remove key we just serialized, and use + // existing offset instead. + buf_.resize(sloc); + sloc = *it; + } else { + key_pool.insert(sloc); + } + } + stack_.push_back(Value(static_cast(sloc), FBT_KEY, BIT_WIDTH_8)); + return sloc; + } + + size_t Key(const char *str) { return Key(str, strlen(str)); } + size_t Key(const std::string &str) { return Key(str.c_str(), str.size()); } + + size_t String(const char *str, size_t len) { + auto reset_to = buf_.size(); + auto sloc = CreateBlob(str, len, 1, FBT_STRING); + if (flags_ & BUILDER_FLAG_SHARE_STRINGS) { + StringOffset so(sloc, len); + auto it = string_pool.find(so); + if (it != string_pool.end()) { + // Already in the buffer. Remove string we just serialized, and use + // existing offset instead. + buf_.resize(reset_to); + sloc = it->first; + stack_.back().u_ = sloc; + } else { + string_pool.insert(so); + } + } + return sloc; + } + size_t String(const char *str) { return String(str, strlen(str)); } + size_t String(const std::string &str) { + return String(str.c_str(), str.size()); + } + void String(const flexbuffers::String &str) { + String(str.c_str(), str.length()); + } + + void String(const char *key, const char *str) { + Key(key); + String(str); + } + void String(const char *key, const std::string &str) { + Key(key); + String(str); + } + void String(const char *key, const flexbuffers::String &str) { + Key(key); + String(str); + } + + size_t Blob(const void *data, size_t len) { + return CreateBlob(data, len, 0, FBT_BLOB); + } + size_t Blob(const std::vector &v) { + return CreateBlob(v.data(), v.size(), 0, FBT_BLOB); + } + + void Blob(const char *key, const void *data, size_t len) { + Key(key); + Blob(data, len); + } + void Blob(const char *key, const std::vector &v) { + Key(key); + Blob(v); + } + + // TODO(wvo): support all the FlexBuffer types (like flexbuffers::String), + // e.g. Vector etc. Also in overloaded versions. + // Also some FlatBuffers types? + + size_t StartVector() { return stack_.size(); } + size_t StartVector(const char *key) { + Key(key); + return stack_.size(); + } + size_t StartMap() { return stack_.size(); } + size_t StartMap(const char *key) { + Key(key); + return stack_.size(); + } + + // TODO(wvo): allow this to specify an alignment greater than the natural + // alignment. + size_t EndVector(size_t start, bool typed, bool fixed) { + auto vec = CreateVector(start, stack_.size() - start, 1, typed, fixed); + // Remove temp elements and return vector. + stack_.resize(start); + stack_.push_back(vec); + return static_cast(vec.u_); + } + + size_t EndMap(size_t start) { + // We should have interleaved keys and values on the stack. + // Make sure it is an even number: + auto len = stack_.size() - start; + FLATBUFFERS_ASSERT(!(len & 1)); + len /= 2; + // Make sure keys are all strings: + for (auto key = start; key < stack_.size(); key += 2) { + FLATBUFFERS_ASSERT(stack_[key].type_ == FBT_KEY); + } + // Now sort values, so later we can do a binary search lookup. + // We want to sort 2 array elements at a time. + struct TwoValue { + Value key; + Value val; + }; + // TODO(wvo): strict aliasing? + // TODO(wvo): allow the caller to indicate the data is already sorted + // for maximum efficiency? With an assert to check sortedness to make sure + // we're not breaking binary search. + // Or, we can track if the map is sorted as keys are added which would be + // be quite cheap (cheaper than checking it here), so we can skip this + // step automatically when appliccable, and encourage people to write in + // sorted fashion. + // std::sort is typically already a lot faster on sorted data though. + auto dict = reinterpret_cast(stack_.data() + start); + std::sort( + dict, dict + len, [&](const TwoValue &a, const TwoValue &b) -> bool { + auto as = reinterpret_cast(buf_.data() + a.key.u_); + auto bs = reinterpret_cast(buf_.data() + b.key.u_); + auto comp = strcmp(as, bs); + // We want to disallow duplicate keys, since this results in a + // map where values cannot be found. + // But we can't assert here (since we don't want to fail on + // random JSON input) or have an error mechanism. + // Instead, we set has_duplicate_keys_ in the builder to + // signal this. + // TODO: Have to check for pointer equality, as some sort + // implementation apparently call this function with the same + // element?? Why? + if (!comp && &a != &b) has_duplicate_keys_ = true; + return comp < 0; + }); + // First create a vector out of all keys. + // TODO(wvo): if kBuilderFlagShareKeyVectors is true, see if we can share + // the first vector. + auto keys = CreateVector(start, len, 2, true, false); + auto vec = CreateVector(start + 1, len, 2, false, false, &keys); + // Remove temp elements and return map. + stack_.resize(start); + stack_.push_back(vec); + return static_cast(vec.u_); + } + + // Call this after EndMap to see if the map had any duplicate keys. + // Any map with such keys won't be able to retrieve all values. + bool HasDuplicateKeys() const { return has_duplicate_keys_; } + + template size_t Vector(F f) { + auto start = StartVector(); + f(); + return EndVector(start, false, false); + } + template size_t Vector(F f, T &state) { + auto start = StartVector(); + f(state); + return EndVector(start, false, false); + } + template size_t Vector(const char *key, F f) { + auto start = StartVector(key); + f(); + return EndVector(start, false, false); + } + template + size_t Vector(const char *key, F f, T &state) { + auto start = StartVector(key); + f(state); + return EndVector(start, false, false); + } + + template void Vector(const T *elems, size_t len) { + if (flatbuffers::is_scalar::value) { + // This path should be a lot quicker and use less space. + ScalarVector(elems, len, false); + } else { + auto start = StartVector(); + for (size_t i = 0; i < len; i++) Add(elems[i]); + EndVector(start, false, false); + } + } + template + void Vector(const char *key, const T *elems, size_t len) { + Key(key); + Vector(elems, len); + } + template void Vector(const std::vector &vec) { + Vector(vec.data(), vec.size()); + } + + template size_t TypedVector(F f) { + auto start = StartVector(); + f(); + return EndVector(start, true, false); + } + template size_t TypedVector(F f, T &state) { + auto start = StartVector(); + f(state); + return EndVector(start, true, false); + } + template size_t TypedVector(const char *key, F f) { + auto start = StartVector(key); + f(); + return EndVector(start, true, false); + } + template + size_t TypedVector(const char *key, F f, T &state) { + auto start = StartVector(key); + f(state); + return EndVector(start, true, false); + } + + template size_t FixedTypedVector(const T *elems, size_t len) { + // We only support a few fixed vector lengths. Anything bigger use a + // regular typed vector. + FLATBUFFERS_ASSERT(len >= 2 && len <= 4); + // And only scalar values. + static_assert(flatbuffers::is_scalar::value, "Unrelated types"); + return ScalarVector(elems, len, true); + } + + template + size_t FixedTypedVector(const char *key, const T *elems, size_t len) { + Key(key); + return FixedTypedVector(elems, len); + } + + template size_t Map(F f) { + auto start = StartMap(); + f(); + return EndMap(start); + } + template size_t Map(F f, T &state) { + auto start = StartMap(); + f(state); + return EndMap(start); + } + template size_t Map(const char *key, F f) { + auto start = StartMap(key); + f(); + return EndMap(start); + } + template size_t Map(const char *key, F f, T &state) { + auto start = StartMap(key); + f(state); + return EndMap(start); + } + template void Map(const std::map &map) { + auto start = StartMap(); + for (auto it = map.begin(); it != map.end(); ++it) + Add(it->first.c_str(), it->second); + EndMap(start); + } + + // If you wish to share a value explicitly (a value not shared automatically + // through one of the BUILDER_FLAG_SHARE_* flags) you can do so with these + // functions. Or if you wish to turn those flags off for performance reasons + // and still do some explicit sharing. For example: + // builder.IndirectDouble(M_PI); + // auto id = builder.LastValue(); // Remember where we stored it. + // .. more code goes here .. + // builder.ReuseValue(id); // Refers to same double by offset. + // LastValue works regardless of whether the value has a key or not. + // Works on any data type. + struct Value; + Value LastValue() { return stack_.back(); } + void ReuseValue(Value v) { stack_.push_back(v); } + void ReuseValue(const char *key, Value v) { + Key(key); + ReuseValue(v); + } + + // Overloaded Add that tries to call the correct function above. + void Add(int8_t i) { Int(i); } + void Add(int16_t i) { Int(i); } + void Add(int32_t i) { Int(i); } + void Add(int64_t i) { Int(i); } + void Add(uint8_t u) { UInt(u); } + void Add(uint16_t u) { UInt(u); } + void Add(uint32_t u) { UInt(u); } + void Add(uint64_t u) { UInt(u); } + void Add(float f) { Float(f); } + void Add(double d) { Double(d); } + void Add(bool b) { Bool(b); } + void Add(const char *str) { String(str); } + void Add(const std::string &str) { String(str); } + void Add(const flexbuffers::String &str) { String(str); } + + template void Add(const std::vector &vec) { Vector(vec); } + + template void Add(const char *key, const T &t) { + Key(key); + Add(t); + } + + template void Add(const std::map &map) { + Map(map); + } + + template void operator+=(const T &t) { Add(t); } + + // This function is useful in combination with the Mutate* functions above. + // It forces elements of vectors and maps to have a minimum size, such that + // they can later be updated without failing. + // Call with no arguments to reset. + void ForceMinimumBitWidth(BitWidth bw = BIT_WIDTH_8) { + force_min_bit_width_ = bw; + } + + void Finish() { + // If you hit this assert, you likely have objects that were never included + // in a parent. You need to have exactly one root to finish a buffer. + // Check your Start/End calls are matched, and all objects are inside + // some other object. + FLATBUFFERS_ASSERT(stack_.size() == 1); + + // Write root value. + auto byte_width = Align(stack_[0].ElemWidth(buf_.size(), 0)); + WriteAny(stack_[0], byte_width); + // Write root type. + Write(stack_[0].StoredPackedType(), 1); + // Write root size. Normally determined by parent, but root has no parent :) + Write(byte_width, 1); + + finished_ = true; + } + + private: + void Finished() const { + // If you get this assert, you're attempting to get access a buffer + // which hasn't been finished yet. Be sure to call + // Builder::Finish with your root object. + FLATBUFFERS_ASSERT(finished_); + } + + // Align to prepare for writing a scalar with a certain size. + uint8_t Align(BitWidth alignment) { + auto byte_width = 1U << alignment; + buf_.insert(buf_.end(), flatbuffers::PaddingBytes(buf_.size(), byte_width), + 0); + return static_cast(byte_width); + } + + void WriteBytes(const void *val, size_t size) { + buf_.insert(buf_.end(), reinterpret_cast(val), + reinterpret_cast(val) + size); + } + + template void Write(T val, size_t byte_width) { + FLATBUFFERS_ASSERT(sizeof(T) >= byte_width); + val = flatbuffers::EndianScalar(val); + WriteBytes(&val, byte_width); + } + + void WriteDouble(double f, uint8_t byte_width) { + switch (byte_width) { + case 8: Write(f, byte_width); break; + case 4: Write(static_cast(f), byte_width); break; + // case 2: Write(static_cast(f), byte_width); break; + // case 1: Write(static_cast(f), byte_width); break; + default: FLATBUFFERS_ASSERT(0); + } + } + + void WriteOffset(uint64_t o, uint8_t byte_width) { + auto reloff = buf_.size() - o; + FLATBUFFERS_ASSERT(byte_width == 8 || reloff < 1ULL << (byte_width * 8)); + Write(reloff, byte_width); + } + + template void PushIndirect(T val, Type type, BitWidth bit_width) { + auto byte_width = Align(bit_width); + auto iloc = buf_.size(); + Write(val, byte_width); + stack_.push_back(Value(static_cast(iloc), type, bit_width)); + } + + static BitWidth WidthB(size_t byte_width) { + switch (byte_width) { + case 1: return BIT_WIDTH_8; + case 2: return BIT_WIDTH_16; + case 4: return BIT_WIDTH_32; + case 8: return BIT_WIDTH_64; + default: FLATBUFFERS_ASSERT(false); return BIT_WIDTH_64; + } + } + + template static Type GetScalarType() { + static_assert(flatbuffers::is_scalar::value, "Unrelated types"); + return flatbuffers::is_floating_point::value + ? FBT_FLOAT + : flatbuffers::is_same::value + ? FBT_BOOL + : (flatbuffers::is_unsigned::value ? FBT_UINT + : FBT_INT); + } + + public: + // This was really intended to be private, except for LastValue/ReuseValue. + struct Value { + union { + int64_t i_; + uint64_t u_; + double f_; + }; + + Type type_; + + // For scalars: of itself, for vector: of its elements, for string: length. + BitWidth min_bit_width_; + + Value() : i_(0), type_(FBT_NULL), min_bit_width_(BIT_WIDTH_8) {} + + Value(bool b) + : u_(static_cast(b)), + type_(FBT_BOOL), + min_bit_width_(BIT_WIDTH_8) {} + + Value(int64_t i, Type t, BitWidth bw) + : i_(i), type_(t), min_bit_width_(bw) {} + Value(uint64_t u, Type t, BitWidth bw) + : u_(u), type_(t), min_bit_width_(bw) {} + + Value(float f) + : f_(static_cast(f)), + type_(FBT_FLOAT), + min_bit_width_(BIT_WIDTH_32) {} + Value(double f) : f_(f), type_(FBT_FLOAT), min_bit_width_(WidthF(f)) {} + + uint8_t StoredPackedType(BitWidth parent_bit_width_ = BIT_WIDTH_8) const { + return PackedType(StoredWidth(parent_bit_width_), type_); + } + + BitWidth ElemWidth(size_t buf_size, size_t elem_index) const { + if (IsInline(type_)) { + return min_bit_width_; + } else { + // We have an absolute offset, but want to store a relative offset + // elem_index elements beyond the current buffer end. Since whether + // the relative offset fits in a certain byte_width depends on + // the size of the elements before it (and their alignment), we have + // to test for each size in turn. + for (size_t byte_width = 1; + byte_width <= sizeof(flatbuffers::largest_scalar_t); + byte_width *= 2) { + // Where are we going to write this offset? + auto offset_loc = buf_size + + flatbuffers::PaddingBytes(buf_size, byte_width) + + elem_index * byte_width; + // Compute relative offset. + auto offset = offset_loc - u_; + // Does it fit? + auto bit_width = WidthU(offset); + if (static_cast(static_cast(1U) << bit_width) == + byte_width) + return bit_width; + } + FLATBUFFERS_ASSERT(false); // Must match one of the sizes above. + return BIT_WIDTH_64; + } + } + + BitWidth StoredWidth(BitWidth parent_bit_width_ = BIT_WIDTH_8) const { + if (IsInline(type_)) { + return (std::max)(min_bit_width_, parent_bit_width_); + } else { + return min_bit_width_; + } + } + }; + + private: + void WriteAny(const Value &val, uint8_t byte_width) { + switch (val.type_) { + case FBT_NULL: + case FBT_INT: Write(val.i_, byte_width); break; + case FBT_BOOL: + case FBT_UINT: Write(val.u_, byte_width); break; + case FBT_FLOAT: WriteDouble(val.f_, byte_width); break; + default: WriteOffset(val.u_, byte_width); break; + } + } + + size_t CreateBlob(const void *data, size_t len, size_t trailing, Type type) { + auto bit_width = WidthU(len); + auto byte_width = Align(bit_width); + Write(len, byte_width); + auto sloc = buf_.size(); + WriteBytes(data, len + trailing); + stack_.push_back(Value(static_cast(sloc), type, bit_width)); + return sloc; + } + + template + size_t ScalarVector(const T *elems, size_t len, bool fixed) { + auto vector_type = GetScalarType(); + auto byte_width = sizeof(T); + auto bit_width = WidthB(byte_width); + // If you get this assert, you're trying to write a vector with a size + // field that is bigger than the scalars you're trying to write (e.g. a + // byte vector > 255 elements). For such types, write a "blob" instead. + // TODO: instead of asserting, could write vector with larger elements + // instead, though that would be wasteful. + FLATBUFFERS_ASSERT(WidthU(len) <= bit_width); + Align(bit_width); + if (!fixed) Write(len, byte_width); + auto vloc = buf_.size(); + for (size_t i = 0; i < len; i++) Write(elems[i], byte_width); + stack_.push_back(Value(static_cast(vloc), + ToTypedVector(vector_type, fixed ? len : 0), + bit_width)); + return vloc; + } + + Value CreateVector(size_t start, size_t vec_len, size_t step, bool typed, + bool fixed, const Value *keys = nullptr) { + FLATBUFFERS_ASSERT( + !fixed || + typed); // typed=false, fixed=true combination is not supported. + // Figure out smallest bit width we can store this vector with. + auto bit_width = (std::max)(force_min_bit_width_, WidthU(vec_len)); + auto prefix_elems = 1; + if (keys) { + // If this vector is part of a map, we will pre-fix an offset to the keys + // to this vector. + bit_width = (std::max)(bit_width, keys->ElemWidth(buf_.size(), 0)); + prefix_elems += 2; + } + Type vector_type = FBT_KEY; + // Check bit widths and types for all elements. + for (size_t i = start; i < stack_.size(); i += step) { + auto elem_width = + stack_[i].ElemWidth(buf_.size(), i - start + prefix_elems); + bit_width = (std::max)(bit_width, elem_width); + if (typed) { + if (i == start) { + vector_type = stack_[i].type_; + } else { + // If you get this assert, you are writing a typed vector with + // elements that are not all the same type. + FLATBUFFERS_ASSERT(vector_type == stack_[i].type_); + } + } + } + // If you get this assert, your typed types are not one of: + // Int / UInt / Float / Key. + FLATBUFFERS_ASSERT(!typed || IsTypedVectorElementType(vector_type)); + auto byte_width = Align(bit_width); + // Write vector. First the keys width/offset if available, and size. + if (keys) { + WriteOffset(keys->u_, byte_width); + Write(1ULL << keys->min_bit_width_, byte_width); + } + if (!fixed) Write(vec_len, byte_width); + // Then the actual data. + auto vloc = buf_.size(); + for (size_t i = start; i < stack_.size(); i += step) { + WriteAny(stack_[i], byte_width); + } + // Then the types. + if (!typed) { + for (size_t i = start; i < stack_.size(); i += step) { + buf_.push_back(stack_[i].StoredPackedType(bit_width)); + } + } + return Value(static_cast(vloc), + keys ? FBT_MAP + : (typed ? ToTypedVector(vector_type, fixed ? vec_len : 0) + : FBT_VECTOR), + bit_width); + } + + // You shouldn't really be copying instances of this class. + Builder(const Builder &); + Builder &operator=(const Builder &); + + std::vector buf_; + std::vector stack_; + + bool finished_; + bool has_duplicate_keys_; + + BuilderFlag flags_; + + BitWidth force_min_bit_width_; + + struct KeyOffsetCompare { + explicit KeyOffsetCompare(const std::vector &buf) : buf_(&buf) {} + bool operator()(size_t a, size_t b) const { + auto stra = reinterpret_cast(buf_->data() + a); + auto strb = reinterpret_cast(buf_->data() + b); + return strcmp(stra, strb) < 0; + } + const std::vector *buf_; + }; + + typedef std::pair StringOffset; + struct StringOffsetCompare { + explicit StringOffsetCompare(const std::vector &buf) + : buf_(&buf) {} + bool operator()(const StringOffset &a, const StringOffset &b) const { + auto stra = buf_->data() + a.first; + auto strb = buf_->data() + b.first; + auto cr = memcmp(stra, strb, (std::min)(a.second, b.second) + 1); + return cr < 0 || (cr == 0 && a.second < b.second); + } + const std::vector *buf_; + }; + + typedef std::set KeyOffsetMap; + typedef std::set StringOffsetMap; + + KeyOffsetMap key_pool; + StringOffsetMap string_pool; + + friend class Verifier; +}; + +// Helper class to verify the integrity of a FlexBuffer +class Verifier FLATBUFFERS_FINAL_CLASS { + public: + Verifier(const uint8_t *buf, size_t buf_len, + // Supplying this vector likely results in faster verification + // of larger buffers with many shared keys/strings, but + // comes at the cost of using additional memory the same size of + // the buffer being verified, so it is by default off. + std::vector *reuse_tracker = nullptr, + bool _check_alignment = true, size_t max_depth = 64) + : buf_(buf), + size_(buf_len), + depth_(0), + max_depth_(max_depth), + num_vectors_(0), + max_vectors_(buf_len), + check_alignment_(_check_alignment), + reuse_tracker_(reuse_tracker) { + FLATBUFFERS_ASSERT(size_ < FLATBUFFERS_MAX_BUFFER_SIZE); + if (reuse_tracker_) { + reuse_tracker_->clear(); + reuse_tracker_->resize(size_, PackedType(BIT_WIDTH_8, FBT_NULL)); + } + } + + private: + // Central location where any verification failures register. + bool Check(bool ok) const { + // clang-format off + #ifdef FLATBUFFERS_DEBUG_VERIFICATION_FAILURE + FLATBUFFERS_ASSERT(ok); + #endif + // clang-format on + return ok; + } + + // Verify any range within the buffer. + bool VerifyFrom(size_t elem, size_t elem_len) const { + return Check(elem_len < size_ && elem <= size_ - elem_len); + } + bool VerifyBefore(size_t elem, size_t elem_len) const { + return Check(elem_len <= elem); + } + + bool VerifyFromPointer(const uint8_t *p, size_t len) { + auto o = static_cast(p - buf_); + return VerifyFrom(o, len); + } + bool VerifyBeforePointer(const uint8_t *p, size_t len) { + auto o = static_cast(p - buf_); + return VerifyBefore(o, len); + } + + bool VerifyByteWidth(size_t width) { + return Check(width == 1 || width == 2 || width == 4 || width == 8); + } + + bool VerifyType(int type) { return Check(type >= 0 && type < FBT_MAX_TYPE); } + + bool VerifyOffset(uint64_t off, const uint8_t *p) { + return Check(off <= static_cast(size_)) && + off <= static_cast(p - buf_); + } + + bool VerifyAlignment(const uint8_t *p, size_t size) const { + auto o = static_cast(p - buf_); + return Check((o & (size - 1)) == 0 || !check_alignment_); + } + +// Macro, since we want to escape from parent function & use lazy args. +#define FLEX_CHECK_VERIFIED(P, PACKED_TYPE) \ + if (reuse_tracker_) { \ + auto packed_type = PACKED_TYPE; \ + auto existing = (*reuse_tracker_)[P - buf_]; \ + if (existing == packed_type) return true; \ + /* Fail verification if already set with different type! */ \ + if (!Check(existing == 0)) return false; \ + (*reuse_tracker_)[P - buf_] = packed_type; \ + } + + bool VerifyVector(Reference r, const uint8_t *p, Type elem_type) { + // Any kind of nesting goes thru this function, so guard against that + // here, both with simple nesting checks, and the reuse tracker if on. + depth_++; + num_vectors_++; + if (!Check(depth_ <= max_depth_ && num_vectors_ <= max_vectors_)) + return false; + auto size_byte_width = r.byte_width_; + if (!VerifyBeforePointer(p, size_byte_width)) return false; + FLEX_CHECK_VERIFIED(p - size_byte_width, + PackedType(Builder::WidthB(size_byte_width), r.type_)); + auto sized = Sized(p, size_byte_width); + auto num_elems = sized.size(); + auto elem_byte_width = r.type_ == FBT_STRING || r.type_ == FBT_BLOB + ? uint8_t(1) + : r.byte_width_; + auto max_elems = SIZE_MAX / elem_byte_width; + if (!Check(num_elems < max_elems)) + return false; // Protect against byte_size overflowing. + auto byte_size = num_elems * elem_byte_width; + if (!VerifyFromPointer(p, byte_size)) return false; + if (elem_type == FBT_NULL) { + // Verify type bytes after the vector. + if (!VerifyFromPointer(p + byte_size, num_elems)) return false; + auto v = Vector(p, size_byte_width); + for (size_t i = 0; i < num_elems; i++) + if (!VerifyRef(v[i])) return false; + } else if (elem_type == FBT_KEY) { + auto v = TypedVector(p, elem_byte_width, FBT_KEY); + for (size_t i = 0; i < num_elems; i++) + if (!VerifyRef(v[i])) return false; + } else { + FLATBUFFERS_ASSERT(IsInline(elem_type)); + } + depth_--; + return true; + } + + bool VerifyKeys(const uint8_t *p, uint8_t byte_width) { + // The vector part of the map has already been verified. + const size_t num_prefixed_fields = 3; + if (!VerifyBeforePointer(p, byte_width * num_prefixed_fields)) return false; + p -= byte_width * num_prefixed_fields; + auto off = ReadUInt64(p, byte_width); + if (!VerifyOffset(off, p)) return false; + auto key_byte_with = + static_cast(ReadUInt64(p + byte_width, byte_width)); + if (!VerifyByteWidth(key_byte_with)) return false; + return VerifyVector(Reference(p, byte_width, key_byte_with, FBT_VECTOR_KEY), + p - off, FBT_KEY); + } + + bool VerifyKey(const uint8_t *p) { + FLEX_CHECK_VERIFIED(p, PackedType(BIT_WIDTH_8, FBT_KEY)); + while (p < buf_ + size_) + if (*p++) return true; + return false; + } + +#undef FLEX_CHECK_VERIFIED + + bool VerifyTerminator(const String &s) { + return VerifyFromPointer(reinterpret_cast(s.c_str()), + s.size() + 1); + } + + bool VerifyRef(Reference r) { + // r.parent_width_ and r.data_ already verified. + if (!VerifyByteWidth(r.byte_width_) || !VerifyType(r.type_)) { + return false; + } + if (IsInline(r.type_)) { + // Inline scalars, don't require further verification. + return true; + } + // All remaining types are an offset. + auto off = ReadUInt64(r.data_, r.parent_width_); + if (!VerifyOffset(off, r.data_)) return false; + auto p = r.Indirect(); + if (!VerifyAlignment(p, r.byte_width_)) return false; + switch (r.type_) { + case FBT_INDIRECT_INT: + case FBT_INDIRECT_UINT: + case FBT_INDIRECT_FLOAT: return VerifyFromPointer(p, r.byte_width_); + case FBT_KEY: return VerifyKey(p); + case FBT_MAP: + return VerifyVector(r, p, FBT_NULL) && VerifyKeys(p, r.byte_width_); + case FBT_VECTOR: return VerifyVector(r, p, FBT_NULL); + case FBT_VECTOR_INT: return VerifyVector(r, p, FBT_INT); + case FBT_VECTOR_BOOL: + case FBT_VECTOR_UINT: return VerifyVector(r, p, FBT_UINT); + case FBT_VECTOR_FLOAT: return VerifyVector(r, p, FBT_FLOAT); + case FBT_VECTOR_KEY: return VerifyVector(r, p, FBT_KEY); + case FBT_VECTOR_STRING_DEPRECATED: + // Use of FBT_KEY here intentional, see elsewhere. + return VerifyVector(r, p, FBT_KEY); + case FBT_BLOB: return VerifyVector(r, p, FBT_UINT); + case FBT_STRING: + return VerifyVector(r, p, FBT_UINT) && + VerifyTerminator(String(p, r.byte_width_)); + case FBT_VECTOR_INT2: + case FBT_VECTOR_UINT2: + case FBT_VECTOR_FLOAT2: + case FBT_VECTOR_INT3: + case FBT_VECTOR_UINT3: + case FBT_VECTOR_FLOAT3: + case FBT_VECTOR_INT4: + case FBT_VECTOR_UINT4: + case FBT_VECTOR_FLOAT4: { + uint8_t len = 0; + auto vtype = ToFixedTypedVectorElementType(r.type_, &len); + if (!VerifyType(vtype)) return false; + return VerifyFromPointer(p, r.byte_width_ * len); + } + default: return false; + } + } + + public: + bool VerifyBuffer() { + if (!Check(size_ >= 3)) return false; + auto end = buf_ + size_; + auto byte_width = *--end; + auto packed_type = *--end; + return VerifyByteWidth(byte_width) && Check(end - buf_ >= byte_width) && + VerifyRef(Reference(end - byte_width, byte_width, packed_type)); + } + + private: + const uint8_t *buf_; + size_t size_; + size_t depth_; + const size_t max_depth_; + size_t num_vectors_; + const size_t max_vectors_; + bool check_alignment_; + std::vector *reuse_tracker_; +}; + +// Utility function that contructs the Verifier for you, see above for +// parameters. +inline bool VerifyBuffer(const uint8_t *buf, size_t buf_len, + std::vector *reuse_tracker = nullptr) { + Verifier verifier(buf, buf_len, reuse_tracker); + return verifier.VerifyBuffer(); +} + +} // namespace flexbuffers + +#if defined(_MSC_VER) +# pragma warning(pop) +#endif + +#endif // FLATBUFFERS_FLEXBUFFERS_H_ diff --git a/edgeimpulse/edge-impulse-sdk/third_party/gemmlowp/LICENSE b/edgeimpulse/edge-impulse-sdk/third_party/gemmlowp/LICENSE new file mode 100644 index 0000000..d645695 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/third_party/gemmlowp/LICENSE @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/edgeimpulse/edge-impulse-sdk/third_party/gemmlowp/fixedpoint/fixedpoint.h b/edgeimpulse/edge-impulse-sdk/third_party/gemmlowp/fixedpoint/fixedpoint.h new file mode 100644 index 0000000..51b5aff --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/third_party/gemmlowp/fixedpoint/fixedpoint.h @@ -0,0 +1,900 @@ +// Copyright 2015 The Gemmlowp Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// fixedpoint.h: fixed-point arithmetic, with basic operations and +// a few math functions such as tanh. + +#ifndef GEMMLOWP_INTERNAL_FIXEDPOINT_H_ +#define GEMMLOWP_INTERNAL_FIXEDPOINT_H_ + +#include +#include +#include +#include +#include + +#include "../internal/detect_platform.h" + +namespace gemmlowp { + +// Part 1: Low-level integer-arithmetic primitives. +// The implementations here are generic implementations valid for +// scalar types (e.g. std::int32_t). Architecture-specific SIMD types +// (e.g. NEON int32x4_t) may be supported by providing +// specializations for them in separate files. +// +// The purpose of these primitives is two-fold: +// - They will be used to implement higher-level fixed-point +// abstractions, namely the FixedPoint class and its arithmetic +// operators. +// - They will be directly used to implement some more involved +// fixed-point computations, e.g. the fixed-point implementation +// of math functions such as tanh. + +// Some compile-time traits around raw types to handle SIMD aspects: +// number of lanes, underlying scalar type. +template +struct FixedPointRawTypeTraits {}; + +template <> +struct FixedPointRawTypeTraits { + typedef std::int32_t ScalarRawType; + static constexpr int kLanes = 1; +}; + +template <> +struct FixedPointRawTypeTraits { + typedef std::int16_t ScalarRawType; + static constexpr int kLanes = 1; +}; + +// Returns a SIMD value duplicating a scalar value across all lanes. +template +tRawType Dup(typename FixedPointRawTypeTraits::ScalarRawType x) { + return x; +} + +// Plain bit-wise AND +template +tIntegerType BitAnd(tIntegerType a, tIntegerType b) { + return a & b; +} + +// Plain bit-wise OR +template +tIntegerType BitOr(tIntegerType a, tIntegerType b) { + return a | b; +} + +// Plain bit-wise XOR +template +tIntegerType BitXor(tIntegerType a, tIntegerType b) { + return a ^ b; +} + +// Plain bit-wise NOT +template +tIntegerType BitNot(tIntegerType a) { + return ~a; +} + +// Integer addition. Not saturating. Overflow is undefined behavior. +template +tIntegerType Add(tIntegerType a, tIntegerType b) { + return a + b; +} + +// Integer subtraction. Not saturating. Overflow is undefined behavior. +template +tIntegerType Mul(tIntegerType a, tIntegerType b) { + return a * b; +} + +template +tIntegerType Sub(tIntegerType a, tIntegerType b) { + return a - b; +} + +// Integer unary negative. Not saturating. Overflow is undefined behavior. +template +tIntegerType Neg(tIntegerType a) { + return -a; +} + +// Integer arithmetic left-shift, equivalent to multiplying with a power of two. +// Negative values are OK. In case of overflow, no Undefined +// Behavior, but the results are implementation-defined (in practice, +// they currently are saturated, but we make no commitment to that). The idea +// is that the caller will want to implement the overflowing cases with +// saturation with compare-and-mask, so we don't care about the results +// in the overflow case, we just want to avoid undefined behavior. +// +// tIntegerType may be int32 or any narrower signed type. +template +tIntegerType ShiftLeft(tIntegerType a, int offset) { + const std::int64_t wide_a = static_cast(a); + const std::int64_t wide_shifted = wide_a * (1 << offset); + const auto min = std::numeric_limits::min(); + const auto max = std::numeric_limits::max(); + return wide_shifted < min + ? min + : wide_shifted > max ? max + : static_cast(wide_shifted); +} + +// Integer arithmetic right-shift. Not rounding. +// Relying on implementation-defined, but in-practice-consistent, +// C++ compiler behavior. +template +tIntegerType ShiftRight(tIntegerType a, int offset) { + return a >> offset; +} + +// Each bit of the result is set to the corresponding bit of either then_val or +// else_val depending on whether the corresponding bit of if_mask is set. +// Equivalent to the VBSL instruction in ARM NEON. +template +tIntegerType SelectUsingMask(tIntegerType if_mask, tIntegerType then_val, + tIntegerType else_val) { + return BitXor(BitAnd(if_mask, then_val), BitAnd(BitNot(if_mask), else_val)); +} + +// For each input scalar, the corresponding bits of the result are set if the +// input scalar is non-zero. +template +tIntegerType MaskIfNonZero(tIntegerType a) { + static constexpr tIntegerType zero = 0; + return a ? BitNot(zero) : zero; +} + +// For each input scalar, the corresponding bits of the result are set if the +// input scalar is zero. +template +tIntegerType MaskIfZero(tIntegerType a) { + return MaskIfNonZero(!a); +} + +// For each pair of input scalars, the corresponding bits of the result are +// set if the input scalars are equal. +template +tIntegerType MaskIfEqual(tIntegerType a, tIntegerType b) { + return MaskIfNonZero(a == b); +} + +// For each pair of input scalars, the corresponding bits of the result are +// set if the input scalars are not equal. +template +tIntegerType MaskIfNotEqual(tIntegerType a, tIntegerType b) { + return MaskIfNonZero(a != b); +} + +// For each pair of input scalars, the corresponding bits of the result are +// set if the input scalars a, b satisfy a > b. +template +tIntegerType MaskIfGreaterThan(tIntegerType a, tIntegerType b) { + return MaskIfNonZero(a > b); +} + +// For each pair of input scalars, the corresponding bits of the result are +// set if the input scalars a, b satisfy a >= b. +template +tIntegerType MaskIfGreaterThanOrEqual(tIntegerType a, tIntegerType b) { + return MaskIfNonZero(a >= b); +} + +// For each pair of input scalars, the corresponding bits of the result are +// set if the input scalars a, b satisfy a < b. +template +tIntegerType MaskIfLessThan(tIntegerType a, tIntegerType b) { + return MaskIfNonZero(a < b); +} + +// For each pair of input scalars, the corresponding bits of the result are +// set if the input scalars a, b satisfy a <= b. +template +tIntegerType MaskIfLessThanOrEqual(tIntegerType a, tIntegerType b) { + return MaskIfNonZero(a <= b); +} + +// Returns true if all of the input scalars are nonzero. +// This function may currently assume that each of the input scalars has either +// all or none of its bits set. Otherwise, its behavior is currently undefined. +template +bool All(tIntegerType a) { + return a; +} + +// Returns true if any of the input scalars are nonzero. +// This function may currently assume that each of the input scalars has either +// all or none of its bits set. Otherwise, its behavior is currently undefined. +template +bool Any(tIntegerType a) { + return a; +} + +// Returns (a+b)/2, rounded to the nearest integer. +// Equivalent to VRHADD in the ARM NEON instruction set. +template +IntegerType RoundingHalfSum(IntegerType a, IntegerType b) { + static_assert(std::is_same::value, "unimplemented"); + (void)b; + return a; +} + +template <> +inline std::int32_t RoundingHalfSum(std::int32_t a, std::int32_t b) { + std::int64_t a64 = a; + std::int64_t b64 = b; + std::int64_t sum = a64 + b64; + std::int64_t sign = sum >= 0 ? 1 : -1; + return static_cast((sum + sign) / 2); +} + +template <> +inline std::int16_t RoundingHalfSum(std::int16_t a, std::int16_t b) { + std::int32_t a32 = a; + std::int32_t b32 = b; + std::int32_t sum = a32 + b32; + std::int32_t sign = sum >= 0 ? 1 : -1; + return static_cast((sum + sign) / 2); +} + +template +IntegerType SaturatingAdd(IntegerType a, IntegerType b) { + static_assert(std::is_same::value, "unimplemented"); + (void)b; + return a; +} + +// So far this is only needed for int16. +template <> +inline std::int16_t SaturatingAdd(std::int16_t a, std::int16_t b) { + std::int32_t a32 = a; + std::int32_t b32 = b; + std::int32_t sum = a32 + b32; + return static_cast( + std::min(static_cast(32767), + std::max(static_cast(-32768), sum))); +} + +// Returns a+b, saturating if the integers are 16bit or narrower, +// otherwise just a plain addition. +template +struct AddSaturatingIf16BitImpl { + static IntegerType Run(IntegerType a, IntegerType b) { return Add(a, b); } +}; +template +struct AddSaturatingIf16BitImpl { + static IntegerType Run(IntegerType a, IntegerType b) { + return SaturatingAdd(a, b); + } +}; +template +IntegerType AddSaturatingIf16Bit(IntegerType a, IntegerType b) { + using ScalarType = + typename FixedPointRawTypeTraits::ScalarRawType; + return AddSaturatingIf16BitImpl::Run(a, + b); +} + +// Returns the integer that represents the product of two fixed-point +// numbers, interpreting all integers as fixed-point values in the +// interval [-1, 1), rounding to the nearest value, and saturating +// -1 * -1 to the maximum value (since 1 is not in the half-open +// interval [-1, 1)). +// +// [The explanation below specializes to std::int32_t for example purpose.] +// +// The mapping between IntegerType and the interval [-1, 1) is unique and +// implied by IntegerType, which is assumed to be signed. For example, +// for IntegerType==std::int32_t, the mapping is +// real_value = integer_value / 2^31. +// So in this case, and leaving aside rounding and saturating, this +// function computes ((a / 2^31) * (b / 2^31)) * 2^31, which simplifies to +// (a * b) / 2^31. +// +// The 'doubling' part in the name of this function comes from the fact that +// this operation is very close to a "multiply-high" operation, keeping only +// the top half bits, except that that would be effectively computing +// (a * b) / 2^32, +// so here we are computing 2x that, since +// 1/2^31 = 2 * 1/2^32. +// The idea is to use all of the available 32 bits in the destination int32 +// value. +// +// [End of the explanation specializing to int32.] +// +// This is equivalent to the VQRDMULH instruction in ARM NEON. +template +IntegerType SaturatingRoundingDoublingHighMul(IntegerType a, IntegerType b) { + static_assert(std::is_same::value, "unimplemented"); + (void)b; + return a; +} + +// This function implements the same computation as the ARMv7 NEON VQRDMULH +// instruction. +template <> +inline std::int32_t SaturatingRoundingDoublingHighMul(std::int32_t a, + std::int32_t b) { + bool overflow = a == b && a == std::numeric_limits::min(); + std::int64_t a_64(a); + std::int64_t b_64(b); + std::int64_t ab_64 = a_64 * b_64; + std::int32_t nudge = ab_64 >= 0 ? (1 << 30) : (1 - (1 << 30)); + std::int32_t ab_x2_high32 = + static_cast((ab_64 + nudge) / (1ll << 31)); + return overflow ? std::numeric_limits::max() : ab_x2_high32; +} + +template <> +inline std::int16_t SaturatingRoundingDoublingHighMul(std::int16_t a, + std::int16_t b) { + bool overflow = a == b && a == std::numeric_limits::min(); + std::int32_t a_32(a); + std::int32_t b_32(b); + std::int32_t ab_32 = a_32 * b_32; + std::int16_t nudge = ab_32 >= 0 ? (1 << 14) : (1 - (1 << 14)); + std::int16_t ab_x2_high16 = + static_cast((ab_32 + nudge) / (1 << 15)); + return overflow ? std::numeric_limits::max() : ab_x2_high16; +} + +// Correctly-rounded-to-nearest division by a power-of-two. +// Also known as a rounding arithmetic right shift. +template +inline IntegerType RoundingDivideByPOT(IntegerType x, int exponent) { + assert(exponent >= 0); + assert(exponent <= 31); + const IntegerType mask = Dup((1ll << exponent) - 1); + const IntegerType zero = Dup(0); + const IntegerType one = Dup(1); + const IntegerType remainder = BitAnd(x, mask); + const IntegerType threshold = + Add(ShiftRight(mask, 1), BitAnd(MaskIfLessThan(x, zero), one)); + return Add(ShiftRight(x, exponent), + BitAnd(MaskIfGreaterThan(remainder, threshold), one)); +} + +// Returns the product of a run-time integer value by a compile-time power +// of two, with either a positive exponent (equivalent to an arithmetic +// left shift, saturating) or a negative exponent (equivalent to an arithmetic +// right shift, rounding to nearest). +template 0 ? 1 : Exponent < 0 ? -1 : 0)> +struct ImplSaturatingRoundingMultiplyByPOT {}; + +template +struct ImplSaturatingRoundingMultiplyByPOT { + static IntegerType eval(IntegerType x) { return x; } +}; + +template +struct ImplSaturatingRoundingMultiplyByPOT { + static IntegerType eval(IntegerType x) { + using ScalarIntegerType = + typename FixedPointRawTypeTraits::ScalarRawType; + const IntegerType min = + Dup(std::numeric_limits::min()); + const IntegerType max = + Dup(std::numeric_limits::max()); + const int ScalarIntegerTypeBits = 8 * sizeof(ScalarIntegerType); + + const std::int32_t threshold = + ((1 << (ScalarIntegerTypeBits - 1 - Exponent)) - 1); + const IntegerType positive_mask = + MaskIfGreaterThan(x, Dup(threshold)); + const IntegerType negative_mask = + MaskIfLessThan(x, Dup(-threshold)); + + IntegerType result = ShiftLeft(x, Exponent); + result = SelectUsingMask(positive_mask, max, result); + result = SelectUsingMask(negative_mask, min, result); + return result; + } +}; + +template +struct ImplSaturatingRoundingMultiplyByPOT { + static IntegerType eval(IntegerType x) { + return RoundingDivideByPOT(x, -Exponent); + } +}; + +template +IntegerType SaturatingRoundingMultiplyByPOT(IntegerType x) { + return ImplSaturatingRoundingMultiplyByPOT::eval(x); +} + +// Part 2: the FixedPoint class. + +// A FixedPoint object represents a fixed-point value stored in the underlying +// integer type tRawType, if tRawType is a plain scalar integer type. +// Alternatively, tRawType may be a SIMD type (e.g. NEON int32x4_t) in which +// case a FixedPoint object represents a corresponding SIMD vector of fixed +// point values. +// +// tIntegerBits describes the range of the fixed-point format: if +// tIntegerBits == m then the range of representable values is the half-open +// interval [-2^m; 2^m) where the open boundary on the right side means that +// 2^m is not representable (how close the maximum representable value is to +// it, depends on bit-depth of tRawType). +// +// In "Q format notation", +// https://en.wikipedia.org/wiki/Q_(number_format) +// we are describing the format +// Qm.n +// where +// m = tIntegerBits +// and +// n = NumberOfBits(tRawType) - (m + 1) +// Note that the (m + 1) in the above line is because we adopt the convention +// that we count the integer bits exclusively of the sign bit; so (m + 1) is +// the total number of integer bits inclusive of the sign bit. +// +// Accordingly, the number of integral representable values in our range +// [-2^m ; 2^m) +// is equal to 2^(m+1). +template +class FixedPoint { + public: + typedef tRawType RawType; + + typedef FixedPointRawTypeTraits RawTypeTraits; + typedef typename RawTypeTraits::ScalarRawType ScalarRawType; + + static constexpr int kTotalBits = 8 * sizeof(ScalarRawType); + static constexpr int kIntegerBits = tIntegerBits; + static constexpr int kFractionalBits = kTotalBits - 1 - kIntegerBits; + static_assert(kIntegerBits >= 0 && kIntegerBits < kTotalBits, + "bad IntegerBits"); + + typedef FixedPoint ScalarFixedPointType; + + static const ScalarRawType ScalarRawMin() { + return std::numeric_limits::min(); + } + + static const ScalarRawType ScalarRawMax() { + return std::numeric_limits::max(); + } + + static const ScalarRawType RawMin() { + return VectorFromScalar(ScalarRawMin()); + } + + static const ScalarRawType RawMax() { + return VectorFromScalar(ScalarRawMax()); + } + + static FixedPoint FromRaw(RawType x) { + FixedPoint retval; + retval.raw() = x; + return retval; + } + + static FixedPoint FromScalarRaw(ScalarRawType x) { + FixedPoint retval; + retval.raw() = Dup(x); + return retval; + } + + static FixedPoint FromScalarFixedPoint(ScalarFixedPointType x) { + return FromScalarRaw(x.raw()); + } + + template + static FixedPoint ConstantPOT() { + static constexpr int kOffset = kFractionalBits + Exponent; + static_assert( + kOffset < 31, + "Constant not exactly representable in this fixed-point format"); + return FromScalarRaw(ScalarRawType(1) << kOffset); + } + + static FixedPoint Zero() { return FromScalarRaw(0); } + + static FixedPoint One() { + return FromScalarRaw( + kIntegerBits == 0 + ? ScalarRawMax() + : (ScalarRawType(1) << (kIntegerBits == 0 ? 0 : kFractionalBits))); + } + + static FixedPoint FromDouble(double x) { + const double min_bound = static_cast(ScalarRawMin()); + const double max_bound = static_cast(ScalarRawMax()); + return FromScalarRaw(static_cast(std::min( + std::max(round(x * static_cast(1ll << kFractionalBits)), + min_bound), + max_bound))); + } + + RawType raw() const { return i_; } + RawType& raw() { return i_; } + + private: + RawType i_; +}; + +// Part 3: implementation of arithmetic operators for the +// FixedPoint class, and a few related functions. + +// A FixedPoint multiplication is just a +// SaturatingRoundingDoublingHighMul operation on the underlying +// raw integer values. The IntegerBits simply add up, as is obvious +// from the fact that the range is [-2^IntegerBits, 2^IntegerBits). +template +FixedPoint operator*( + FixedPoint a, + FixedPoint b) { + FixedPoint c; + c.raw() = SaturatingRoundingDoublingHighMul(a.raw(), b.raw()); + return c; +} + +// Tweaking IntegerBits gives exact multiplication by a power of two. +template +FixedPoint ExactMulByPot( + FixedPoint a) { + FixedPoint c; + c.raw() = a.raw(); + return c; +} + +// If we want to leave IntegerBits fixed, then multiplication +// by a power of two has to be saturating/rounding, not exact anymore. +template +FixedPoint SaturatingRoundingMultiplyByPOT( + FixedPoint a) { + return FixedPoint::FromRaw( + SaturatingRoundingMultiplyByPOT(a.raw())); +} + +// Generic arithmetic operators. + +#define MAKE_FIXEDPOINT_UNARY_FUNC(FuncName, ImplFuncName) \ + template \ + FixedPoint FuncName( \ + FixedPoint a) { \ + return FixedPoint::FromRaw(ImplFuncName(a.raw())); \ + } + +#define MAKE_FIXEDPOINT_BINARY_FUNC(FuncName, ImplFuncName) \ + template \ + FixedPoint FuncName( \ + FixedPoint a, \ + FixedPoint b) { \ + return FixedPoint::FromRaw( \ + ImplFuncName(a.raw(), b.raw())); \ + } + +MAKE_FIXEDPOINT_UNARY_FUNC(operator-, Neg) +MAKE_FIXEDPOINT_UNARY_FUNC(operator~, BitNot) +MAKE_FIXEDPOINT_BINARY_FUNC(operator+, Add) +MAKE_FIXEDPOINT_BINARY_FUNC(operator-, Sub) +MAKE_FIXEDPOINT_BINARY_FUNC(operator&, BitAnd) +MAKE_FIXEDPOINT_BINARY_FUNC(operator^, BitXor) +MAKE_FIXEDPOINT_BINARY_FUNC(operator|, BitOr) +MAKE_FIXEDPOINT_BINARY_FUNC(RoundingHalfSum, RoundingHalfSum) + +#undef MAKE_FIXEDPOINT_UNARY_FUNC +#undef MAKE_FIXEDPOINT_BINARY_FUNC + +#define MAKE_FIXEDPOINT_UNARY_FUNC_RETURNING_RAW(FuncName) \ + template \ + tRawType FuncName(FixedPoint a) { \ + return FuncName(a.raw()); \ + } + +#define MAKE_FIXEDPOINT_BINARY_FUNC_RETURNING_RAW(FuncName) \ + template \ + tRawType FuncName(FixedPoint a, \ + FixedPoint b) { \ + return FuncName(a.raw(), b.raw()); \ + } + +MAKE_FIXEDPOINT_UNARY_FUNC_RETURNING_RAW(MaskIfZero) +MAKE_FIXEDPOINT_UNARY_FUNC_RETURNING_RAW(MaskIfNonZero) +MAKE_FIXEDPOINT_BINARY_FUNC_RETURNING_RAW(MaskIfEqual) +MAKE_FIXEDPOINT_BINARY_FUNC_RETURNING_RAW(MaskIfNotEqual) +MAKE_FIXEDPOINT_BINARY_FUNC_RETURNING_RAW(MaskIfGreaterThan) +MAKE_FIXEDPOINT_BINARY_FUNC_RETURNING_RAW(MaskIfGreaterThanOrEqual) +MAKE_FIXEDPOINT_BINARY_FUNC_RETURNING_RAW(MaskIfLessThan) +MAKE_FIXEDPOINT_BINARY_FUNC_RETURNING_RAW(MaskIfLessThanOrEqual) + +#undef MAKE_FIXEDPOINT_UNARY_FUNC_RETURNING_RAW +#undef MAKE_FIXEDPOINT_BINARY_FUNC_RETURNING_RAW + +template +FixedPoint SelectUsingMask( + tRawType if_mask, FixedPoint then_val, + FixedPoint else_val) { + return FixedPoint::FromRaw( + SelectUsingMask(if_mask, then_val.raw(), else_val.raw())); +} + +template +bool operator==(FixedPoint a, + FixedPoint b) { + return All(MaskIfEqual(a.raw(), b.raw())); +} + +template +bool operator!=(FixedPoint a, + FixedPoint b) { + return !(a == b); +} + +template +FixedPoint SaturatingAdd( + FixedPoint a, + FixedPoint b) { + return FixedPoint::FromRaw( + SaturatingAdd(a.raw(), b.raw())); +} + +template +FixedPoint AddSaturatingIf16Bit( + FixedPoint a, + FixedPoint b) { + return FixedPoint::FromRaw( + AddSaturatingIf16Bit(a.raw(), b.raw())); +} + +// Conversion to floating-point. +template +double ToDouble(FixedPoint x) { + static_assert(FixedPointRawTypeTraits::kLanes == 1, + "not applicable to SIMD types"); + typedef FixedPoint F; + return x.raw() / static_cast(1ll << F::kFractionalBits); +} + +// Rescale changes the number of IntegerBits and updates the underlying +// raw integer value accordingly. +template +FixedPoint Rescale( + FixedPoint x) { + static constexpr int kExponent = tIntegerBitsSrc - tIntegerBitsDst; + FixedPoint result; + result.raw() = SaturatingRoundingMultiplyByPOT(x.raw()); + return result; +} + +// CheckedFixedPointConstant allows to specify fixed-point constants +// initialized as real numbers, in a way that does not compile floating-point +// arithmetic in production code, yet still checks agreement with the +// floating-point expressions when asserts are enabled. +// +// The raw integer value provided is always a int32, encoding a 32-bit +// fixed-point value, regardless of the actual Scalar type. This allows +// writing generic code that applies just as well to the 32-bit and 16-bit +// cases. In the 16-bit case, the raw integer value is internally +// rounding-shifted by 16 bits to the right. +template +inline typename FixedPointType::ScalarRawType RescaleConstantInitializer( + std::int32_t int32_value) { + typedef typename FixedPointType::ScalarRawType ScalarRawType; + static constexpr int ScalarTypeBits = 8 * sizeof(ScalarRawType); + return static_cast( + RoundingDivideByPOT(int32_value, 32 - ScalarTypeBits)); +} +#ifdef GEMMLOWP_ENABLE_FIXEDPOINT_CONSTANTS_CHECKS +template +FixedPointType CheckedFixedPointConstant(std::int32_t raw_value, + double double_value) { + const FixedPointType result = FixedPointType::FromScalarRaw(raw_value); + assert(result == FixedPointType::FromDouble(double_value)); + return result; +} +#define GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(FixedPointType, \ + ScalarRawInt32Value, DoubleValue) \ + (gemmlowp::CheckedFixedPointConstant( \ + gemmlowp::RescaleConstantInitializer( \ + ScalarRawInt32Value), \ + DoubleValue)) + +#else +#define GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(FixedPointType, \ + ScalarRawInt32Value, DoubleValue) \ + (FixedPointType::FromScalarRaw( \ + gemmlowp::RescaleConstantInitializer( \ + ScalarRawInt32Value))) +#endif + +// Implementation of exponential function. + +// Returns exp(x) for x in [-1/4, 0). +template +FixedPoint exp_on_interval_between_negative_one_quarter_and_0_excl( + FixedPoint a) { + typedef FixedPoint F; + const F constant_term = + GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(F, 1895147668, std::exp(-1.0 / 8.0)); + const F constant_1_over_3 = + GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(F, 715827883, 1.0 / 3.0); + // We're evaluating a Taylor expansion around -1/8, so we do the change of + // variable: x = a + 1/8. + // In fixed-point with 0 integer bits, 1/8 is represented by 1 << 28. + F x = a + F::template ConstantPOT<-3>(); + F x2 = x * x; + F x3 = x2 * x; + F x4 = x2 * x2; + F x4_over_4 = SaturatingRoundingMultiplyByPOT<-2>(x4); + F x4_over_24_plus_x3_over_6_plus_x2_over_2 = + SaturatingRoundingMultiplyByPOT<-1>( + ((x4_over_4 + x3) * constant_1_over_3) + x2); + return AddSaturatingIf16Bit( + constant_term, + constant_term * (x + x4_over_24_plus_x3_over_6_plus_x2_over_2)); +} + +// Returns exp(x) for x < 0. +template +FixedPoint exp_on_negative_values( + FixedPoint a) { + typedef FixedPoint InputF; + typedef FixedPoint ResultF; + static constexpr int kFractionalBits = InputF::kFractionalBits; + static constexpr int kIntegerBits = InputF::kIntegerBits; + const InputF kOneQuarter = InputF::template ConstantPOT<-2>(); + InputF mask = kOneQuarter - InputF::FromScalarRaw(1); + InputF a_mod_quarter_minus_one_quarter = (a & mask) - kOneQuarter; + ResultF result = exp_on_interval_between_negative_one_quarter_and_0_excl( + Rescale<0>(a_mod_quarter_minus_one_quarter)); + tRawType remainder = (a_mod_quarter_minus_one_quarter - a).raw(); + +#define GEMMLOWP_EXP_BARREL_SHIFTER(Exponent, FixedPointMultiplier) \ + if (kIntegerBits > Exponent) { \ + const ResultF kMultiplier = GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT( \ + ResultF, FixedPointMultiplier, std::exp(-std::pow(2.0, Exponent))); \ + static constexpr int kShiftAmount = \ + kIntegerBits > Exponent ? kFractionalBits + Exponent : 0; \ + result = SelectUsingMask( \ + MaskIfNonZero(BitAnd(remainder, Dup(1 << kShiftAmount))), \ + result * kMultiplier, result); \ + } + + GEMMLOWP_EXP_BARREL_SHIFTER(-2, 1672461947); + GEMMLOWP_EXP_BARREL_SHIFTER(-1, 1302514674); + GEMMLOWP_EXP_BARREL_SHIFTER(+0, 790015084); + GEMMLOWP_EXP_BARREL_SHIFTER(+1, 290630308); + GEMMLOWP_EXP_BARREL_SHIFTER(+2, 39332535); + GEMMLOWP_EXP_BARREL_SHIFTER(+3, 720401); + GEMMLOWP_EXP_BARREL_SHIFTER(+4, 242); + +#undef GEMMLOWP_EXP_BARREL_SHIFTER + + static constexpr int clampB = kIntegerBits > 5 ? 36 - kIntegerBits : 0; + if (kIntegerBits > 5) { + const InputF clamp = + GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(InputF, -(1 << clampB), -32.0); + result = SelectUsingMask(MaskIfLessThan(a, clamp), ResultF::Zero(), result); + } + + result = SelectUsingMask(MaskIfZero(a), ResultF::One(), result); + return result; +} + +// Implementation of tanh: (1 - exp(-2x)) / (1 + exp(-2x)). + +// Returns (1 - x) / (1 + x) for x in (0, 1). +template +FixedPoint one_minus_x_over_one_plus_x_for_x_in_0_1( + FixedPoint a) { + typedef FixedPoint F0; + typedef FixedPoint F2; + F0 half_denominator = RoundingHalfSum(a, F0::One()); + // Newton-Raphson division + // https://en.wikipedia.org/wiki/Division_algorithm#Newton.E2.80.93Raphson_division + // Refer to that page for the logic behind the 48/17 and 32/17 constants. + const F2 constant_48_over_17 = + GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(F2, 1515870810, 48.0 / 17.0); + const F2 constant_neg_32_over_17 = + GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(F2, -1010580540, -32.0 / 17.0); + F2 x = constant_48_over_17 + half_denominator * constant_neg_32_over_17; + for (int i = 0; i < 3; i++) { + F2 half_denominator_times_x = half_denominator * x; + F2 one_minus_half_denominator_times_x = + F2::One() - half_denominator_times_x; + x = x + Rescale<2>(x * one_minus_half_denominator_times_x); + } + return Rescale<0>(x - F2::One()); +} + +// Returns -tanh(x) for x < 0. +template +FixedPoint neg_tanh_on_negative_values( + FixedPoint a) { + return one_minus_x_over_one_plus_x_for_x_in_0_1( + exp_on_negative_values(ExactMulByPot<1>(a))); +} + +// Returns tanh(x) for any x. +template +FixedPoint tanh(FixedPoint a) { + typedef FixedPoint InputF; + typedef FixedPoint ResultF; + tRawType mask_if_negative = MaskIfLessThan(a, InputF::Zero()); + tRawType mask_if_zero = MaskIfZero(a); + InputF n = SelectUsingMask(mask_if_negative, a, -a); + ResultF t = neg_tanh_on_negative_values(n); + return SelectUsingMask(mask_if_zero, ResultF::Zero(), + SelectUsingMask(mask_if_negative, -t, t)); +} + +// Implementation of logistic function. + +// Returns 1 / (1 + x) for x in (0, 1). +template +FixedPoint one_over_one_plus_x_for_x_in_0_1( + FixedPoint a) { + typedef FixedPoint F0; + typedef FixedPoint F2; + F0 half_denominator = RoundingHalfSum(a, F0::One()); + // Newton-Raphson division + // https://en.wikipedia.org/wiki/Division_algorithm#Newton.E2.80.93Raphson_division + // Refer to that page for the logic behind the 48/17 and 32/17 constants. + const F2 constant_48_over_17 = + GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(F2, 1515870810, 48.0 / 17.0); + const F2 constant_neg_32_over_17 = + GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(F2, -1010580540, -32.0 / 17.0); + F2 x = constant_48_over_17 + half_denominator * constant_neg_32_over_17; + for (int i = 0; i < 3; i++) { + F2 half_denominator_times_x = half_denominator * x; + F2 one_minus_half_denominator_times_x = + F2::One() - half_denominator_times_x; + x = x + Rescale<2>(x * one_minus_half_denominator_times_x); + } + return Rescale<0>(ExactMulByPot<-1>(x)); +} + +// Returns logistic(x) = 1 / (1 + exp(-x)) for x > 0. +template +FixedPoint logistic_on_positive_values( + FixedPoint a) { + return one_over_one_plus_x_for_x_in_0_1(exp_on_negative_values(-a)); +} + +// Returns logistic(x) = 1 / (1 + exp(-x)) for any x. +template +FixedPoint logistic(FixedPoint a) { + typedef FixedPoint InputF; + typedef FixedPoint ResultF; + tRawType mask_if_positive = MaskIfGreaterThan(a, InputF::Zero()); + tRawType mask_if_zero = MaskIfZero(a); + InputF abs_input = SelectUsingMask(mask_if_positive, a, -a); + ResultF result_if_positive = logistic_on_positive_values(abs_input); + ResultF result_if_negative = ResultF::One() - result_if_positive; + const ResultF one_half = + GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(ResultF, 1 << 30, 0.5); + return SelectUsingMask(mask_if_zero, one_half, + SelectUsingMask(mask_if_positive, result_if_positive, + result_if_negative)); +} + +} // end namespace gemmlowp + +#ifdef GEMMLOWP_NEON +#include "./fixedpoint_neon.h" +#elif defined(GEMMLOWP_AVX2) +#include "./fixedpoint_avx.h" +#elif defined(GEMMLOWP_SSE4) +#include "./fixedpoint_sse.h" +#elif defined(GEMMLOWP_MSA) +#include "./fixedpoint_msa.h" +#endif + +#endif // GEMMLOWP_INTERNAL_FIXEDPOINT_H_ diff --git a/edgeimpulse/edge-impulse-sdk/third_party/gemmlowp/fixedpoint/fixedpoint_neon.h b/edgeimpulse/edge-impulse-sdk/third_party/gemmlowp/fixedpoint/fixedpoint_neon.h new file mode 100644 index 0000000..646c590 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/third_party/gemmlowp/fixedpoint/fixedpoint_neon.h @@ -0,0 +1,331 @@ +// Copyright 2015 The Gemmlowp Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// fixedpoint_neon.h: optimized NEON specializations of the templates +// in fixedpoint.h. + +#ifndef GEMMLOWP_INTERNAL_FIXEDPOINT_NEON_H_ +#define GEMMLOWP_INTERNAL_FIXEDPOINT_NEON_H_ + +#include + +namespace gemmlowp { + +template <> +struct FixedPointRawTypeTraits { + typedef std::int32_t ScalarRawType; + static constexpr int kLanes = 4; +}; + +template <> +struct FixedPointRawTypeTraits { + typedef std::int16_t ScalarRawType; + static constexpr int kLanes = 8; +}; + +template <> +inline int32x4_t BitAnd(int32x4_t a, int32x4_t b) { + return vandq_s32(a, b); +} + +template <> +inline int16x8_t BitAnd(int16x8_t a, int16x8_t b) { + return vandq_s16(a, b); +} + +template <> +inline int32x4_t BitOr(int32x4_t a, int32x4_t b) { + return vorrq_s32(a, b); +} + +template <> +inline int16x8_t BitOr(int16x8_t a, int16x8_t b) { + return vorrq_s16(a, b); +} + +template <> +inline int32x4_t BitXor(int32x4_t a, int32x4_t b) { + return veorq_s32(a, b); +} + +template <> +inline int16x8_t BitXor(int16x8_t a, int16x8_t b) { + return veorq_s16(a, b); +} + +template <> +inline int32x4_t BitNot(int32x4_t a) { + return veorq_s32(a, vdupq_n_s32(-1)); +} + +template <> +inline int16x8_t BitNot(int16x8_t a) { + return veorq_s16(a, vdupq_n_s16(-1)); +} + +template <> +inline int32x4_t Add(int32x4_t a, int32x4_t b) { + return vaddq_s32(a, b); +} + +template <> +inline int16x8_t Add(int16x8_t a, int16x8_t b) { + return vaddq_s16(a, b); +} + +template <> +inline int32x4_t Sub(int32x4_t a, int32x4_t b) { + return vsubq_s32(a, b); +} + +template <> +inline int16x8_t Sub(int16x8_t a, int16x8_t b) { + return vsubq_s16(a, b); +} + +template <> +inline int32x4_t Neg(int32x4_t a) { + return vnegq_s32(a); +} + +template <> +inline int16x8_t Neg(int16x8_t a) { + return vnegq_s16(a); +} + +template <> +inline int32x4_t ShiftLeft(int32x4_t a, int offset) { + return vshlq_s32(a, vdupq_n_s32(offset)); +} + +template <> +inline int16x8_t ShiftLeft(int16x8_t a, int offset) { + return vshlq_s16(a, vdupq_n_s16(offset)); +} + +template <> +inline int32x4_t ShiftRight(int32x4_t a, int offset) { + return vshlq_s32(a, vdupq_n_s32(-offset)); +} + +template <> +inline int16x8_t ShiftRight(int16x8_t a, int offset) { + return vshlq_s16(a, vdupq_n_s16(-offset)); +} + +template <> +inline int32x4_t SelectUsingMask(int32x4_t if_mask, int32x4_t then_val, + int32x4_t else_val) { + return vbslq_s32(vreinterpretq_u32_s32(if_mask), then_val, else_val); +} + +template <> +inline int16x8_t SelectUsingMask(int16x8_t if_mask, int16x8_t then_val, + int16x8_t else_val) { + return vbslq_s16(vreinterpretq_u16_s16(if_mask), then_val, else_val); +} + +template <> +inline int32x4_t MaskIfEqual(int32x4_t a, int32x4_t b) { + return vreinterpretq_s32_u32(vceqq_s32(a, b)); +} + +template <> +inline int16x8_t MaskIfEqual(int16x8_t a, int16x8_t b) { + return vreinterpretq_s16_u16(vceqq_s16(a, b)); +} + +template <> +inline int32x4_t MaskIfNotEqual(int32x4_t a, int32x4_t b) { + return BitNot(MaskIfEqual(a, b)); +} + +template <> +inline int16x8_t MaskIfNotEqual(int16x8_t a, int16x8_t b) { + return BitNot(MaskIfEqual(a, b)); +} + +template <> +inline int32x4_t MaskIfZero(int32x4_t a) { + return MaskIfEqual(a, vdupq_n_s32(0)); +} + +template <> +inline int16x8_t MaskIfZero(int16x8_t a) { + return MaskIfEqual(a, vdupq_n_s16(0)); +} + +template <> +inline int32x4_t MaskIfNonZero(int32x4_t a) { + return vreinterpretq_s32_u32(vtstq_s32(a, a)); +} + +template <> +inline int16x8_t MaskIfNonZero(int16x8_t a) { + return vreinterpretq_s16_u16(vtstq_s16(a, a)); +} + +template <> +inline int32x4_t MaskIfGreaterThan(int32x4_t a, int32x4_t b) { + return vreinterpretq_s32_u32(vcgtq_s32(a, b)); +} + +template <> +inline int16x8_t MaskIfGreaterThan(int16x8_t a, int16x8_t b) { + return vreinterpretq_s16_u16(vcgtq_s16(a, b)); +} + +template <> +inline int32x4_t MaskIfGreaterThanOrEqual(int32x4_t a, int32x4_t b) { + return vreinterpretq_s32_u32(vcgeq_s32(a, b)); +} + +template <> +inline int16x8_t MaskIfGreaterThanOrEqual(int16x8_t a, int16x8_t b) { + return vreinterpretq_s16_u16(vcgeq_s16(a, b)); +} + +template <> +inline int32x4_t MaskIfLessThan(int32x4_t a, int32x4_t b) { + return vreinterpretq_s32_u32(vcltq_s32(a, b)); +} + +template <> +inline int16x8_t MaskIfLessThan(int16x8_t a, int16x8_t b) { + return vreinterpretq_s16_u16(vcltq_s16(a, b)); +} + +template <> +inline int32x4_t MaskIfLessThanOrEqual(int32x4_t a, int32x4_t b) { + return vreinterpretq_s32_u32(vcleq_s32(a, b)); +} + +template <> +inline int16x8_t MaskIfLessThanOrEqual(int16x8_t a, int16x8_t b) { + return vreinterpretq_s16_u16(vcleq_s16(a, b)); +} + +template <> +inline bool All(int32x4_t a) { + a = vandq_s32(a, vextq_s32(a, a, 1)); + a = vandq_s32(a, vextq_s32(a, a, 2)); + return vgetq_lane_s32(a, 0); +} + +template <> +inline bool All(int16x8_t a) { + a = vandq_s16(a, vextq_s16(a, a, 1)); + a = vandq_s16(a, vextq_s16(a, a, 2)); + a = vandq_s16(a, vextq_s16(a, a, 4)); + return vgetq_lane_s16(a, 0); +} + +template <> +inline bool Any(int32x4_t a) { + a = vorrq_s32(a, vextq_s32(a, a, 1)); + a = vorrq_s32(a, vextq_s32(a, a, 2)); + return vgetq_lane_s32(a, 0); +} + +template <> +inline bool Any(int16x8_t a) { + a = vorrq_s16(a, vextq_s16(a, a, 1)); + a = vorrq_s16(a, vextq_s16(a, a, 2)); + a = vorrq_s16(a, vextq_s16(a, a, 4)); + return vgetq_lane_s16(a, 0); +} + +template <> +inline int32x4_t RoundingHalfSum(int32x4_t a, int32x4_t b) { + return vrhaddq_s32(a, b); +} + +template <> +inline int16x8_t RoundingHalfSum(int16x8_t a, int16x8_t b) { + return vrhaddq_s16(a, b); +} + +template <> +inline int32x4_t SaturatingRoundingDoublingHighMul(int32x4_t a, int32x4_t b) { + return vqrdmulhq_s32(a, b); +} + +template <> +inline int16x8_t SaturatingRoundingDoublingHighMul(int16x8_t a, int16x8_t b) { + return vqrdmulhq_s16(a, b); +} + +template <> +inline int32x4_t RoundingDivideByPOT(int32x4_t x, int exponent) { + const int32x4_t shift_vec = vdupq_n_s32(-exponent); + const int32x4_t fixup = vshrq_n_s32(vandq_s32(x, shift_vec), 31); + const int32x4_t fixed_up_x = vqaddq_s32(x, fixup); + return vrshlq_s32(fixed_up_x, shift_vec); +} + +template <> +inline int16x8_t RoundingDivideByPOT(int16x8_t x, int exponent) { + const int16x8_t shift_vec = vdupq_n_s16(-exponent); + const int16x8_t fixup = vshrq_n_s16(vandq_s16(x, shift_vec), 15); + const int16x8_t fixed_up_x = vqaddq_s16(x, fixup); + return vrshlq_s16(fixed_up_x, shift_vec); +} + +template +struct ImplSaturatingRoundingMultiplyByPOT { + static int32x4_t eval(int32x4_t x) { return vqshlq_n_s32(x, Exponent); } +}; + +template +struct ImplSaturatingRoundingMultiplyByPOT { + static int32x4_t eval(int32x4_t x) { + const int32x4_t fixup = vshrq_n_s32(x, 31); + const int32x4_t fixed_up_x = vqaddq_s32(x, fixup); + return vrshrq_n_s32(fixed_up_x, -Exponent); + } +}; + +template +struct ImplSaturatingRoundingMultiplyByPOT { + static int16x8_t eval(int16x8_t x) { return vqshlq_n_s16(x, Exponent); } +}; + +template +struct ImplSaturatingRoundingMultiplyByPOT { + static int16x8_t eval(int16x8_t x) { + const int16x8_t fixup = vshrq_n_s16(x, 15); + const int16x8_t fixed_up_x = vqaddq_s16(x, fixup); + return vrshrq_n_s16(fixed_up_x, -Exponent); + } +}; + +template <> +inline int32x4_t Dup(std::int32_t x) { + return vdupq_n_s32(x); +} + +template <> +inline int16x8_t Dup(std::int16_t x) { + return vdupq_n_s16(x); +} + +// So far this is only needed for int16. +template <> +inline int16x8_t SaturatingAdd(int16x8_t a, int16x8_t b) { + return vqaddq_s16(a, b); +} + +} // end namespace gemmlowp + +#endif // GEMMLOWP_INTERNAL_FIXEDPOINT_NEON_H_ diff --git a/edgeimpulse/edge-impulse-sdk/third_party/gemmlowp/fixedpoint/fixedpoint_sse.h b/edgeimpulse/edge-impulse-sdk/third_party/gemmlowp/fixedpoint/fixedpoint_sse.h new file mode 100644 index 0000000..a1fae32 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/third_party/gemmlowp/fixedpoint/fixedpoint_sse.h @@ -0,0 +1,384 @@ +// Copyright 2015 Google Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// fixedpoint_SSE.h: optimized SSE specializations of the templates +// in fixedpoint.h. + +#ifndef GEMMLOWP_INTERNAL_FIXEDPOINT_SSE_H_ +#define GEMMLOWP_INTERNAL_FIXEDPOINT_SSE_H_ + +#include +#include "fixedpoint.h" + +namespace gemmlowp { + +// SSE intrinsics are not finely typed: there is a single __m128i vector +// type that does not distinguish between "int32x4" and "int16x8" use +// cases, unlike the NEON equivalents. Because we had initially focused +// on int32x4, we did not pay attention and specialized these fixedpoint +// templates directly for __m128i hardcoding the int32x4 semantics, +// not leaving room for int16x8 semantics. Amending that by adding a separate +// data type, int16x8_m128i, that wraps __m128i while being a separate +// type. +struct int16x8_m128i { + int16x8_m128i() {} + explicit int16x8_m128i(__m128i w) : v(w) {} + ~int16x8_m128i() {} + + __m128i v; +}; + +template <> +struct FixedPointRawTypeTraits<__m128i> { + typedef std::int32_t ScalarRawType; + static constexpr int kLanes = 4; +}; + +template <> +struct FixedPointRawTypeTraits { + typedef std::int16_t ScalarRawType; + static constexpr int kLanes = 8; +}; + +template <> +inline __m128i BitAnd(__m128i a, __m128i b) { + return _mm_and_si128(a, b); +} + +template <> +inline int16x8_m128i BitAnd(int16x8_m128i a, int16x8_m128i b) { + return int16x8_m128i(_mm_and_si128(a.v, b.v)); +} + +template <> +inline __m128i BitOr(__m128i a, __m128i b) { + return _mm_or_si128(a, b); +} + +template <> +inline int16x8_m128i BitOr(int16x8_m128i a, int16x8_m128i b) { + return int16x8_m128i(_mm_or_si128(a.v, b.v)); +} + +template <> +inline __m128i BitXor(__m128i a, __m128i b) { + return _mm_xor_si128(a, b); +} + +template <> +inline int16x8_m128i BitXor(int16x8_m128i a, int16x8_m128i b) { + return int16x8_m128i(_mm_xor_si128(a.v, b.v)); +} + +template <> +inline __m128i BitNot(__m128i a) { + return _mm_andnot_si128(a, _mm_set1_epi32(-1)); +} + +template <> +inline int16x8_m128i BitNot(int16x8_m128i a) { + return int16x8_m128i(_mm_andnot_si128(a.v, _mm_set1_epi16(-1))); +} + +template <> +inline __m128i Add(__m128i a, __m128i b) { + return _mm_add_epi32(a, b); +} + +template <> +inline int16x8_m128i Add(int16x8_m128i a, int16x8_m128i b) { + return int16x8_m128i(_mm_add_epi16(a.v, b.v)); +} + +template <> +inline __m128i Mul(__m128i a, __m128i b) { + return _mm_mullo_epi32(a, b); +} + +template <> +inline int16x8_m128i Mul(int16x8_m128i a, int16x8_m128i b) { + return int16x8_m128i(_mm_mullo_epi16(a.v, b.v)); +} + +template <> +inline __m128i Sub(__m128i a, __m128i b) { + return _mm_sub_epi32(a, b); +} + +template <> +inline int16x8_m128i Sub(int16x8_m128i a, int16x8_m128i b) { + return int16x8_m128i(_mm_sub_epi16(a.v, b.v)); +} + +template <> +inline __m128i Neg(__m128i a) { + return _mm_sign_epi32(a, _mm_set1_epi32(-1)); +} + +template <> +inline int16x8_m128i Neg(int16x8_m128i a) { + return int16x8_m128i(_mm_sign_epi16(a.v, _mm_set1_epi16(-1))); +} + +template <> +inline __m128i ShiftLeft(__m128i a, int offset) { + return _mm_slli_epi32(a, offset); +} + +template <> +inline int16x8_m128i ShiftLeft(int16x8_m128i a, int offset) { + return int16x8_m128i(_mm_slli_epi16(a.v, offset)); +} + +template <> +inline __m128i ShiftRight(__m128i a, int offset) { + return _mm_srai_epi32(a, offset); +} + +template <> +inline int16x8_m128i ShiftRight(int16x8_m128i a, int offset) { + return int16x8_m128i(_mm_srai_epi16(a.v, offset)); +} + +template <> +inline __m128i SelectUsingMask(__m128i if_mask, __m128i then_val, + __m128i else_val) { + // borrowed from Intel's arm_neon_sse.h header. + return _mm_or_si128(_mm_and_si128(if_mask, then_val), + _mm_andnot_si128(if_mask, else_val)); +} + +template <> +inline int16x8_m128i SelectUsingMask(int16x8_m128i if_mask, + int16x8_m128i then_val, + int16x8_m128i else_val) { + // borrowed from Intel's arm_neon_sse.h header. + return int16x8_m128i(SelectUsingMask(if_mask.v, then_val.v, else_val.v)); +} + +template <> +inline __m128i MaskIfEqual(__m128i a, __m128i b) { + return _mm_cmpeq_epi32(a, b); +} + +template <> +inline int16x8_m128i MaskIfEqual(int16x8_m128i a, int16x8_m128i b) { + return int16x8_m128i(_mm_cmpeq_epi16(a.v, b.v)); +} + +template <> +inline __m128i MaskIfNotEqual(__m128i a, __m128i b) { + return BitNot(MaskIfEqual(a, b)); +} + +template <> +inline int16x8_m128i MaskIfNotEqual(int16x8_m128i a, int16x8_m128i b) { + return BitNot(MaskIfEqual(a, b)); +} + +template <> +inline __m128i MaskIfZero(__m128i a) { + return MaskIfEqual(a, _mm_set1_epi32(0)); +} + +template <> +inline int16x8_m128i MaskIfZero(int16x8_m128i a) { + return MaskIfEqual(a, int16x8_m128i(_mm_set1_epi16(0))); +} + +template <> +inline __m128i MaskIfNonZero(__m128i a) { + return MaskIfNotEqual(a, _mm_set1_epi32(0)); +} + +template <> +inline int16x8_m128i MaskIfNonZero(int16x8_m128i a) { + return MaskIfNotEqual(a, int16x8_m128i(_mm_set1_epi16(0))); +} + +template <> +inline __m128i MaskIfGreaterThan(__m128i a, __m128i b) { + return _mm_cmpgt_epi32(a, b); +} + +template <> +inline int16x8_m128i MaskIfGreaterThan(int16x8_m128i a, int16x8_m128i b) { + return int16x8_m128i(_mm_cmpgt_epi16(a.v, b.v)); +} + +template <> +inline __m128i MaskIfLessThan(__m128i a, __m128i b) { + return _mm_cmplt_epi32(a, b); +} + +template <> +inline int16x8_m128i MaskIfLessThan(int16x8_m128i a, int16x8_m128i b) { + return int16x8_m128i(_mm_cmplt_epi16(a.v, b.v)); +} + +template <> +inline __m128i MaskIfGreaterThanOrEqual(__m128i a, __m128i b) { + return BitNot(MaskIfLessThan(a, b)); +} + +template <> +inline int16x8_m128i MaskIfGreaterThanOrEqual(int16x8_m128i a, + int16x8_m128i b) { + return BitNot(MaskIfLessThan(a, b)); +} + +template <> +inline __m128i MaskIfLessThanOrEqual(__m128i a, __m128i b) { + return BitNot(MaskIfGreaterThan(a, b)); +} + +template <> +inline int16x8_m128i MaskIfLessThanOrEqual(int16x8_m128i a, int16x8_m128i b) { + return BitNot(MaskIfGreaterThan(a, b)); +} + +/* Assumptions: + - All and Any are used on masks. + - masks are all_ones for true lanes, all_zeroes otherwise. +Hence, All means all 128bits set, and Any means any bit set. +*/ + +template <> +inline bool All(__m128i a) { + return _mm_testc_si128(a, a); +} + +template <> +inline bool All(int16x8_m128i a) { + return _mm_testc_si128(a.v, a.v); +} + +template <> +inline bool Any(__m128i a) { + return !_mm_testz_si128(a, a); +} + +template <> +inline bool Any(int16x8_m128i a) { + return !_mm_testz_si128(a.v, a.v); +} + +template <> +inline __m128i RoundingHalfSum(__m128i a, __m128i b) { + /* __m128i round_bit_mask, a_over_2, b_over_2, round_bit, sum; */ + /* We divide the inputs before the add to avoid the overflow and costly test + */ + /* of checking if an overflow occured on signed add */ + /* round_bit_mask = _mm_set1_epi32(1); */ + /* a_over_2 = _mm_srai_epi32(a, 1); */ + /* b_over_2 = _mm_srai_epi32(b, 1); */ + /* sum = Add(a_over_2, b_over_2); */ + /* round_bit = _mm_sign_epi32(BitAnd(BitOr(a,b), round_bit_mask), sum); */ + /* return Add(sum, round_bit); */ + + /* Other possibility detecting overflow and xor the sign if an overflow + * happened*/ + __m128i one, sign_bit_mask, sum, rounded_half_sum, overflow, result; + one = _mm_set1_epi32(1); + sign_bit_mask = _mm_set1_epi32(0x80000000); + sum = Add(a, b); + rounded_half_sum = _mm_srai_epi32(Add(sum, one), 1); + overflow = + BitAnd(BitAnd(BitXor(a, rounded_half_sum), BitXor(b, rounded_half_sum)), + sign_bit_mask); + result = BitXor(rounded_half_sum, overflow); + return result; +} + +template <> +inline int16x8_m128i RoundingHalfSum(int16x8_m128i a, int16x8_m128i b) { + // Idea: go to unsigned to use _mm_avg_epu16, + // borrowed from Intel's arm_neon_sse.h header. + __m128i constant_neg_32768 = _mm_set1_epi16(-32768); + __m128i a_unsigned = _mm_sub_epi16(a.v, constant_neg_32768); + __m128i b_unsigned = _mm_sub_epi16(b.v, constant_neg_32768); + __m128i avg_unsigned = _mm_avg_epu16(a_unsigned, b_unsigned); + __m128i avg = _mm_add_epi16(avg_unsigned, constant_neg_32768); + return int16x8_m128i(avg); +} + +template <> +inline __m128i SaturatingRoundingDoublingHighMul(__m128i a, __m128i b) { + __m128i min, saturation_mask, a0_a2, a1_a3, b0_b2, b1_b3; + __m128i a0b0_a2b2, a1b1_a3b3, a0b0_a2b2_rounded, a1b1_a3b3_rounded; + __m128i a0b0_a2b2_rounded_2x, a1b1_a3b3_rounded_2x, result; + __m128i nudge; + + // saturation only happen if a == b == INT_MIN + min = _mm_set1_epi32(std::numeric_limits::min()); + saturation_mask = BitAnd(MaskIfEqual(a, b), MaskIfEqual(a, min)); + + // a = a0 | a1 | a2 | a3 + // b = b0 | b1 | b2 | b3 + a0_a2 = a; + a1_a3 = _mm_srli_si128(a, 4); + b0_b2 = b; + b1_b3 = _mm_srli_si128(b, 4); + + a0b0_a2b2 = _mm_mul_epi32(a0_a2, b0_b2); + a1b1_a3b3 = _mm_mul_epi32(a1_a3, b1_b3); + + // do the rounding and take into account that it will be doubled + nudge = _mm_set1_epi64x(1 << 30); + a0b0_a2b2_rounded = _mm_add_epi64(a0b0_a2b2, nudge); + a1b1_a3b3_rounded = _mm_add_epi64(a1b1_a3b3, nudge); + + // do the doubling + a0b0_a2b2_rounded_2x = _mm_slli_epi64(a0b0_a2b2_rounded, 1); + a1b1_a3b3_rounded_2x = _mm_slli_epi64(a1b1_a3b3_rounded, 1); + + // get the high part of the products + result = _mm_blend_epi16(_mm_srli_si128(a0b0_a2b2_rounded_2x, 4), + a1b1_a3b3_rounded_2x, 0xcc); + + // saturate those which overflowed + return SelectUsingMask(saturation_mask, min, result); +} + +template <> +inline int16x8_m128i SaturatingRoundingDoublingHighMul(int16x8_m128i a, + int16x8_m128i b) { + // Idea: use _mm_mulhrs_epi16 then saturate with a bit-operation, + // borrowed from Intel's arm_neon_sse.h header. + __m128i result_unsaturated = _mm_mulhrs_epi16(a.v, b.v); + __m128i saturation_mask = + _mm_cmpeq_epi16(result_unsaturated, _mm_set1_epi16(0x8000)); + __m128i result = _mm_xor_si128(result_unsaturated, saturation_mask); + return int16x8_m128i(result); +} + +template <> +inline __m128i Dup<__m128i>(std::int32_t x) { + return _mm_set1_epi32(x); +} + +template <> +inline int16x8_m128i Dup(std::int16_t x) { + return int16x8_m128i(_mm_set1_epi16(x)); +} + +// So far this is only needed for int16. +template <> +inline int16x8_m128i SaturatingAdd(int16x8_m128i a, int16x8_m128i b) { + return int16x8_m128i(_mm_adds_epi16(a.v, b.v)); +} + +} // end namespace gemmlowp + +#endif // GEMMLOWP_INTERNAL_FIXEDPOINT_SSE_H_ diff --git a/edgeimpulse/edge-impulse-sdk/third_party/gemmlowp/internal/detect_platform.h b/edgeimpulse/edge-impulse-sdk/third_party/gemmlowp/internal/detect_platform.h new file mode 100644 index 0000000..6f06d19 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/third_party/gemmlowp/internal/detect_platform.h @@ -0,0 +1,166 @@ +// Copyright 2018 The Gemmlowp Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// detect_platform.h: Sets up macros that control architecture-specific +// features of gemmlowp's implementation. + +#ifndef GEMMLOWP_INTERNAL_DETECT_PLATFORM_H_ +#define GEMMLOWP_INTERNAL_DETECT_PLATFORM_H_ + +// Our inline assembly path assume GCC/Clang syntax. +// Native Client doesn't seem to support inline assembly(?). +#if defined(__GNUC__) && !defined(__native_client__) +#define GEMMLOWP_ALLOW_INLINE_ASM +#endif + +// Define macro statement that avoids inlining for GCC. +// For non-GCC, define as empty macro. +#if defined(__GNUC__) +#define GEMMLOWP_NOINLINE __attribute__((noinline)) +#else +#define GEMMLOWP_NOINLINE +#endif + +// Detect ARM, 32-bit or 64-bit +#ifdef __arm__ +#define GEMMLOWP_ARM_32 +#endif + +#ifdef __aarch64__ +#define GEMMLOWP_ARM_64 +#endif + +#if defined(GEMMLOWP_ARM_32) || defined(GEMMLOWP_ARM_64) +#define GEMMLOWP_ARM +#endif + +// Detect MIPS, 32-bit or 64-bit +#if defined(__mips) && !defined(__LP64__) +#define GEMMLOWP_MIPS_32 +#endif + +#if defined(__mips) && defined(__LP64__) +#define GEMMLOWP_MIPS_64 +#endif + +#if defined(GEMMLOWP_MIPS_32) || defined(GEMMLOWP_MIPS_64) +#define GEMMLOWP_MIPS +#endif + +// Detect x86, 32-bit or 64-bit +#if defined(__i386__) || defined(_M_IX86) || defined(_X86_) || defined(__i386) +#define GEMMLOWP_X86_32 +#endif + +#if defined(__x86_64__) || defined(_M_X64) || defined(__amd64) +#define GEMMLOWP_X86_64 +#endif + +#if defined(GEMMLOWP_X86_32) || defined(GEMMLOWP_X86_64) +#define GEMMLOWP_X86 +#endif + +// Some of our optimized paths use inline assembly and for +// now we don't bother enabling some other optimized paths using intrinddics +// where we can't use inline assembly paths. +#ifdef GEMMLOWP_ALLOW_INLINE_ASM + +// Detect NEON. It's important to check for both tokens. +#if (defined __ARM_NEON) || (defined __ARM_NEON__) +#define GEMMLOWP_NEON +#endif + +// Convenience NEON tokens for 32-bit or 64-bit +#if defined(GEMMLOWP_NEON) && defined(GEMMLOWP_ARM_32) +#define GEMMLOWP_NEON_32 +#endif + +#if defined(GEMMLOWP_NEON) && defined(GEMMLOWP_ARM_64) +#define GEMMLOWP_NEON_64 +#endif + +// Detect MIPS MSA. +// Limit MSA optimizations to little-endian CPUs for now. +// TODO: Perhaps, eventually support MSA optimizations on big-endian CPUs? +#if defined(GEMMLOWP_MIPS) && (__mips_isa_rev >= 5) && defined(__mips_msa) && \ + defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) +#define GEMMLOWP_MSA +#endif + +// Convenience MIPS MSA tokens for 32-bit or 64-bit. +#if defined(GEMMLOWP_MSA) && defined(GEMMLOWP_MIPS_32) +#define GEMMLOWP_MSA_32 +#endif + +#if defined(GEMMLOWP_MSA) && defined(GEMMLOWP_MIPS_64) +#define GEMMLOWP_MSA_64 +#endif + +// compiler define for AVX2 -D GEMMLOWP_ENABLE_AVX2 +// Detect AVX2 +#if defined(__AVX2__) && defined(GEMMLOWP_ENABLE_AVX2) +#define GEMMLOWP_AVX2 +// Detect SSE4. +// MSVC does not have __SSE4_1__ macro, but will enable SSE4 +// when AVX is turned on. +#elif defined(__SSE4_1__) || (defined(_MSC_VER) && defined(__AVX__)) +#define GEMMLOWP_SSE4 +// Detect SSE3. +#elif defined(__SSE3__) +#define GEMMLOWP_SSE3 +#endif + +// Convenience SSE4 tokens for 32-bit or 64-bit +#if defined(GEMMLOWP_SSE4) && defined(GEMMLOWP_X86_32) && \ + !defined(GEMMLOWP_DISABLE_SSE4) +#define GEMMLOWP_SSE4_32 +#endif + +#if defined(GEMMLOWP_SSE3) && defined(GEMMLOWP_X86_32) +#define GEMMLOWP_SSE3_32 +#endif + +#if defined(GEMMLOWP_SSE4) && defined(GEMMLOWP_X86_64) && \ + !defined(GEMMLOWP_DISABLE_SSE4) +#define GEMMLOWP_SSE4_64 +#endif + +#if defined(GEMMLOWP_SSE3) && defined(GEMMLOWP_X86_64) +#define GEMMLOWP_SSE3_64 +#endif + +#if defined(GEMMLOWP_AVX2) && defined(GEMMLOWP_X86_64) +#define GEMMLOWP_AVX2_64 +#endif + +#if defined(__has_feature) +#if __has_feature(memory_sanitizer) +#include +#define GEMMLOWP_MARK_MEMORY_AS_INITIALIZED __msan_unpoison +#elif __has_feature(address_sanitizer) +#include +#define GEMMLOWP_MARK_MEMORY_AS_INITIALIZED __asan_unpoison_memory_region +#endif +#endif + +#endif // GEMMLOWP_ALLOW_INLINE_ASM + +// Detect Android. Don't conflate with ARM - we care about tuning +// for non-ARM Android devices too. This can be used in conjunction +// with x86 to tune differently for mobile x86 CPUs (Atom) vs. desktop x86 CPUs. +#if defined(__ANDROID__) || defined(ANDROID) +#define GEMMLOWP_ANDROID +#endif + +#endif // GEMMLOWP_INTERNAL_DETECT_PLATFORM_H_ diff --git a/edgeimpulse/edge-impulse-sdk/third_party/ruy/ruy/profiler/instrumentation.h b/edgeimpulse/edge-impulse-sdk/third_party/ruy/ruy/profiler/instrumentation.h new file mode 100644 index 0000000..c4df1e6 --- /dev/null +++ b/edgeimpulse/edge-impulse-sdk/third_party/ruy/ruy/profiler/instrumentation.h @@ -0,0 +1,203 @@ +/* Copyright 2020 Google LLC. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef RUY_RUY_PROFILER_INSTRUMENTATION_H_ +#define RUY_RUY_PROFILER_INSTRUMENTATION_H_ + +#ifdef RUY_PROFILER +#include +#include +#include +#endif + +namespace ruy { +namespace profiler { + +#ifdef RUY_PROFILER + +// A label is how a code scope is annotated to appear in profiles. +// The stacks that are sampled by the profiler are stacks of such labels. +// A label consists of a literal string, plus optional integer arguments. +class Label { + public: + Label() {} + template + explicit Label(Args... args) { + Set(args...); + } + void Set(const char* format) { + format_ = format; + args_count_ = 0; + } + template + void Set(const char* format, Args... args) { + format_ = format; + args_count_ = sizeof...(args); + SetArgs(0, args...); + } + + void operator=(const Label& other); + + bool operator==(const Label& other) const; + + std::string Formatted() const; + const char* format() const { return format_; } + + private: + void SetArgs(int position, int arg0) { args_[position] = arg0; } + + template + void SetArgs(int position, int arg0, Args... args) { + SetArgs(position, arg0); + SetArgs(position + 1, args...); + } + + static constexpr int kMaxArgs = 4; + const char* format_ = nullptr; + int args_count_ = 0; + int args_[kMaxArgs]; +}; + +namespace detail { + +// Forward-declaration, see class ThreadStack below. +class ThreadStack; + +bool& GlobalIsProfilerRunning(); + +// Returns the global vector of pointers to all stacks, there being one stack +// per thread executing instrumented code. +std::vector* GlobalAllThreadStacks(); + +// Returns the mutex to be locked around any access to GlobalAllThreadStacks(). +std::mutex* GlobalsMutex(); + +// Returns the thread-local stack, specific to the current thread. +ThreadStack* ThreadLocalThreadStack(); + +// This 'stack' is what may be more appropriately called a 'pseudostack': +// It contains Label entries that are 'manually' entered by instrumentation +// code. It's unrelated to real call stacks. +struct Stack { + std::uint32_t id = 0; + static constexpr int kMaxSize = 64; + int size = 0; + Label labels[kMaxSize]; +}; + +// Returns the buffer byte size required by CopyToSample. +int GetBufferSize(const Stack& stack); + +// Copies this Stack into a byte buffer, called a 'sample'. +void CopyToBuffer(const Stack& stack, char* dst); + +// Populates this Stack from an existing sample buffer, typically +// produced by CopyToSample. +void ReadFromBuffer(const char* src, Stack* stack); + +// ThreadStack is meant to be used as a thread-local singleton, assigning to +// each thread a Stack object holding its pseudo-stack of profile labels, +// plus a mutex allowing to synchronize accesses to this pseudo-stack between +// this thread and a possible profiler thread sampling it. +class ThreadStack { + public: + ThreadStack(); + ~ThreadStack(); + + const Stack& stack() const { return stack_; } + + // Returns the mutex to lock around any access to this stack. Each stack is + // accessed by potentially two threads: the thread that it belongs to + // (which calls Push and Pop) and the profiler thread during profiling + // (which calls CopyToSample). + std::mutex& Mutex() const { return mutex_; } + + // Pushes a new label on the top of this Stack. + template + void Push(Args... args) { + // This mutex locking is needed to guard against race conditions as both + // the current thread and the profiler thread may be concurrently accessing + // this stack. In addition to that, this mutex locking also serves the other + // purpose of acting as a barrier (of compiler code reordering, of runtime + // CPU instruction reordering, and of memory access reordering), which + // gives a measure of correctness to this profiler. The downside is some + // latency. As this lock will be uncontended most of the times, the cost + // should be roughly that of an sequentially-consistent atomic access, + // comparable to an access to the level of CPU data cache that is shared + // among all cores, typically 60 cycles on current ARM CPUs, plus side + // effects from barrier instructions. + std::lock_guard lock(mutex_); + // Avoid overrunning the stack, even in 'release' builds. This profiling + // instrumentation code should not ship in release builds anyway, the + // overhead of this check is negligible, and overrunning a stack array would + // be bad. + if (stack_.size >= Stack::kMaxSize) { + abort(); + } + stack_.labels[stack_.size++].Set(args...); + } + + // Pops the top-most label from this Stack. + void Pop() { + // See the comment in Push about this lock. While it would be tempting to + // try to remove this lock and just atomically decrement size_ with a + // store-release, that would not necessarily be a substitute for all of the + // purposes that this lock serves, or if it was done carefully to serve all + // of the same purposes, then that wouldn't be faster than this (mostly + // uncontended) lock. + std::lock_guard lock(mutex_); + stack_.size--; + } + + private: + mutable std::mutex mutex_; + Stack stack_; +}; + +} // namespace detail + +// RAII user-facing way to construct Labels associated with their life scope +// and get them pushed to / popped from the current thread stack. +class ScopeLabel { + public: + template + ScopeLabel(Args... args) : thread_stack_(detail::ThreadLocalThreadStack()) { + thread_stack_->Push(args...); + } + + ~ScopeLabel() { thread_stack_->Pop(); } + + private: + detail::ThreadStack* thread_stack_; +}; + +#else // no RUY_PROFILER + +class ScopeLabel { + public: + template + explicit ScopeLabel(Args...) {} + + // This destructor is needed to consistently silence clang's -Wunused-variable + // which seems to trigger semi-randomly. + ~ScopeLabel() {} +}; + +#endif + +} // namespace profiler +} // namespace ruy + +#endif // RUY_RUY_PROFILER_INSTRUMENTATION_H_ diff --git a/gen_pack.sh b/gen_pack.sh new file mode 100644 index 0000000..b756ed4 --- /dev/null +++ b/gen_pack.sh @@ -0,0 +1,146 @@ +#!/usr/bin/env bash +# Version: 2.7 +# Date: 2023-05-22 +# This bash script generates a CMSIS Software Pack: +# + +set -o pipefail + +# Set version of gen pack library +# For available versions see https://github.com/Open-CMSIS-Pack/gen-pack/tags. +# Use the tag name without the prefix "v", e.g., 0.7.0 +REQUIRED_GEN_PACK_LIB="0.8.7" + +# Set default command line arguments +DEFAULT_ARGS=() + +# Pack warehouse directory - destination +# Default: ./output +# +# PACK_OUTPUT=./output + +# Temporary pack build directory, +# Default: ./build +# +# PACK_BUILD=./build + +# Specify directory names to be added to pack base directory +# An empty list defaults to all folders next to this script. +# Default: empty (all folders) +# +PACK_DIRS=" + edgeimpulse +" + +# Specify file names to be added to pack base directory +# Default: empty +# +PACK_BASE_FILES=" + LICENSE-apache-2.0.txt +" + +# Specify file names to be deleted from pack build directory +# Default: empty +# +PACK_DELETE_FILES=" + gen_pack.sh +" + +# Specify patches to be applied +# Default: empty +# +# PACK_PATCH_FILES=" +# +# " + +# Specify addition argument to packchk +# Default: empty +# +#PACKCHK_ARGS=() + +# Specify additional dependencies for packchk +# Default: empty +# +PACKCHK_DEPS=" + ARM.CMSIS.pdsc + ARM.CMSIS-DSP.pdsc + ARM.CMSIS-NN.pdsc +" + +# Optional: restrict fallback modes for changelog generation +# Default: full +# Values: +# - full Tag annotations, release descriptions, or commit messages (in order) +# - release Tag annotations, or release descriptions (in order) +# - tag Tag annotations only +# +# PACK_CHANGELOG_MODE="" +PACK_CHANGELOG_MODE="tag" + +# +# custom pre-processing steps +# +# usage: preprocess +# The build folder +# +function preprocess() { + # add custom steps here to be executed + # before populating the pack build folder + return 0 +} + +# +# custom post-processing steps +# +# usage: postprocess +# The build folder +# +function postprocess() { + # add custom steps here to be executed + # after populating the pack build folder + # but before archiving the pack into output folder + return 0 +} + +############ DO NOT EDIT BELOW ########### + +function install_lib() { + local URL="https://github.com/Open-CMSIS-Pack/gen-pack/archive/refs/tags/v$1.tar.gz" + local STATUS=$(curl -sLI "${URL}" | grep "^HTTP" | tail -n 1 | cut -d' ' -f2 || echo "$((600+$?))") + if [[ $STATUS -ge 400 ]]; then + echo "Wrong/unavailable gen-pack lib version '$1'!" >&2 + echo "Check REQUIRED_GEN_PACK_LIB variable." >&2 + echo "For available versions see https://github.com/Open-CMSIS-Pack/gen-pack/tags." >&2 + exit 1 + fi + echo "Downloading gen-pack lib version '$1' to '$2' ..." + mkdir -p "$2" + curl -L "${URL}" -s | tar -xzf - --strip-components 1 -C "$2" || exit 1 +} + +function load_lib() { + if [[ -d ${GEN_PACK_LIB} ]]; then + . "${GEN_PACK_LIB}/gen-pack" + return 0 + fi + local GLOBAL_LIB="/usr/local/share/gen-pack/${REQUIRED_GEN_PACK_LIB}" + local USER_LIB="${HOME}/.local/share/gen-pack/${REQUIRED_GEN_PACK_LIB}" + if [[ ! -d "${GLOBAL_LIB}" && ! -d "${USER_LIB}" ]]; then + echo "Required gen_pack lib not found!" >&2 + install_lib "${REQUIRED_GEN_PACK_LIB}" "${USER_LIB}" + fi + + if [[ -d "${GLOBAL_LIB}" ]]; then + . "${GLOBAL_LIB}/gen-pack" + elif [[ -d "${USER_LIB}" ]]; then + . "${USER_LIB}/gen-pack" + else + echo "Required gen-pack lib is not installed!" >&2 + exit 1 + fi +} + +load_lib +gen_pack "${DEFAULT_ARGS[@]}" "$@" + +exit 0