Merge branch 'master' of https://github.com/AI-performance/embedded-a…

…i.bench
AI-performance · Sep 6, 2020 · cf87925 · cf87925
2 parents e0a9739 + 52423da
commit cf87925
Show file tree

Hide file tree

Showing 4 changed files with 261 additions and 3 deletions.
diff --git a/models/README.md b/models/README.md
@@ -18,5 +18,5 @@ Original model stored for embedded-ai.bench.
 |vgg16| fp32 |caffe| [prototxt](https://gist.githubusercontent.com/ksimonyan/211839e770f7b538e2d8/raw/0067c9b32f60362c74f4c445a080beed06b07eb3/VGG_ILSVRC_16_layers_deploy.prototxt), [caffemodel](http://www.robots.ox.ac.uk/~vgg/software/very_deep/caffe/VGG_ILSVRC_16_layers.caffemodel) | [ksimonyan](https://gist.github.com/ksimonyan/211839e770f7b538e2d8/) | 
 |mobilenetv1| fp32 |tensorflow| [tflite&pb](https://storage.googleapis.com/download.tensorflow.org/models/mobilenet_v1_2018_02_22/mobilenet_v1_1.0_224.tgz) | [hosted_models](https://tensorflow.google.cn/lite/guide/hosted_models) | 
 |mobilenetv2| fp32 |tensorflow| [tflite&pb](https://storage.googleapis.com/download.tensorflow.org/models/tflite_11_05_08/mobilenet_v2_1.0_224.tgz) | [hosted_models](https://tensorflow.google.cn/lite/guide/hosted_models)  | 
-|squeezenetv1.1| fp32 |tensorflow| no provided | no provided |
+|squeezenetv1.1| fp32 |tensorflow| [tflite&pb](https://storage.googleapis.com/download.tensorflow.org/models/tflite/model_zoo/upload_20180427/squeezenet_2018_04_27.tgz) | [hosted_models](https://tensorflow.google.cn/lite/guide/hosted_models) |
 |vgg16| fp32 |tensorflow| [vgg_16_2016_08_28.tar.gz](http://download.tensorflow.org/models/vgg_16_2016_08_28.tar.gz) | [tensorflow/models: Pre-trained Models](https://github.com/tensorflow/models/tree/master/research/slim#Pretrained) |
diff --git a/models/download.sh b/models/download.sh
@@ -107,6 +107,8 @@ function tensorflow_model_urls() {
                           "https://storage.googleapis.com/download.tensorflow.org/models/tflite_11_05_08/mobilenet_v2_1.0_224.tgz" \
                            \ # vgg16
                           "http://download.tensorflow.org/models/vgg_16_2016_08_28.tar.gz" \
+                           \ # squeezenet
+                          "https://storage.googleapis.com/download.tensorflow.org/models/tflite/model_zoo/upload_20180427/squeezenet_2018_04_27.tgz" \
                          )
     echo ${links_for_tensorflow[*]}
 }
@@ -131,6 +133,11 @@ function rename_tensorflow_models() {
     tar -zxvf ./vgg_16_2016_08_28.tar.gz
     mkdir tf_vgg16
     mv vgg_16.ckpt ./tf_vgg16/tf_vgg16.ckpt
+
+    # squeezenet
+    tar -zxvf squeezenet_2018_04_27.tgz
+    mkdir tf_squeezenet_v1.1
+    mv squeeze* ./tf_squeezenet_v1.1
 }
 
 function prepare_tensorflow_models() {

diff --git a/tflite/benchmark_model.cc b/tflite/benchmark_model.cc
@@ -0,0 +1,250 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/lite/tools/benchmark/benchmark_model.h"
+
+#include <iostream>
+#include <sstream>
+
+#include "tensorflow/lite/profiling/memory_info.h"
+#include "tensorflow/lite/profiling/time.h"
+#include "tensorflow/lite/tools/benchmark/benchmark_utils.h"
+#include "tensorflow/lite/tools/logging.h"
+
+namespace tflite {
+namespace benchmark {
+using tensorflow::Stat;
+
+BenchmarkParams BenchmarkModel::DefaultParams() {
+  BenchmarkParams params;
+  params.AddParam("num_runs", BenchmarkParam::Create<int32_t>(50));
+  params.AddParam("min_secs", BenchmarkParam::Create<float>(1.0f));
+  params.AddParam("max_secs", BenchmarkParam::Create<float>(150.0f));
+  params.AddParam("run_delay", BenchmarkParam::Create<float>(-1.0f));
+  params.AddParam("num_threads", BenchmarkParam::Create<int32_t>(1));
+  params.AddParam("use_caching", BenchmarkParam::Create<bool>(false));
+  params.AddParam("benchmark_name", BenchmarkParam::Create<std::string>(""));
+  params.AddParam("output_prefix", BenchmarkParam::Create<std::string>(""));
+  params.AddParam("warmup_runs", BenchmarkParam::Create<int32_t>(1));
+  params.AddParam("warmup_min_secs", BenchmarkParam::Create<float>(0.5f));
+  params.AddParam("verbose", BenchmarkParam::Create<bool>(false));
+  return params;
+}
+
+BenchmarkModel::BenchmarkModel() : params_(DefaultParams()) {}
+
+void BenchmarkLoggingListener::OnBenchmarkEnd(const BenchmarkResults& results) {
+  auto inference_us = results.inference_time_us();
+  auto init_us = results.startup_latency_us();
+  auto warmup_us = results.warmup_time_us();
+  auto init_mem_usage = results.init_mem_usage();
+  auto overall_mem_usage = results.overall_mem_usage();
+
+  std::cout << "time cost: min=" << inference_us.min() / 1000. << "ms"
+            << " max=" << inference_us.max() / 1000. << "ms"
+            << " avg=" << inference_us.avg() / 1000. << "ms"
+            << " std_dev=" << inference_us.std_deviation() / 1000. << "ms"
+            << std::endl;
+  TFLITE_LOG(INFO) << "Inference timings in us: "
+                   << "Init: " << init_us << ", "
+                   << "First inference: " << warmup_us.first() << ", "
+                   << "Warmup (avg): " << warmup_us.avg() << ", "
+                   << "Inference (avg): " << inference_us.avg();
+
+  if (!init_mem_usage.IsSupported()) return;
+  TFLITE_LOG(INFO)
+      << "Note: as the benchmark tool itself affects memory footprint, the "
+         "following is only APPROXIMATE to the actual memory footprint of the "
+         "model at runtime. Take the information at your discretion.";
+  TFLITE_LOG(INFO) << "Peak memory footprint (MB): init="
+                   << init_mem_usage.max_rss_kb / 1024.0
+                   << " overall=" << overall_mem_usage.max_rss_kb / 1024.0;
+}
+
+std::vector<Flag> BenchmarkModel::GetFlags() {
+  return {
+      CreateFlag<int32_t>(
+          "num_runs", &params_,
+          "expected number of runs, see also min_secs, max_secs"),
+      CreateFlag<float>(
+          "min_secs", &params_,
+          "minimum number of seconds to rerun for, potentially making the "
+          "actual number of runs to be greater than num_runs"),
+      CreateFlag<float>(
+          "max_secs", &params_,
+          "maximum number of seconds to rerun for, potentially making the "
+          "actual number of runs to be less than num_runs. Note if --max-secs "
+          "is exceeded in the middle of a run, the benchmark will continue to "
+          "the end of the run but will not start the next run."),
+      CreateFlag<float>("run_delay", &params_, "delay between runs in seconds"),
+      CreateFlag<int32_t>("num_threads", &params_, "number of threads"),
+      CreateFlag<bool>(
+          "use_caching", &params_,
+          "Enable caching of prepacked weights matrices in matrix "
+          "multiplication routines. Currently implies the use of the Ruy "
+          "library."),
+      CreateFlag<std::string>("benchmark_name", &params_, "benchmark name"),
+      CreateFlag<std::string>("output_prefix", &params_,
+                              "benchmark output prefix"),
+      CreateFlag<int32_t>(
+          "warmup_runs", &params_,
+          "minimum number of runs performed on initialization, to "
+          "allow performance characteristics to settle, see also "
+          "warmup_min_secs"),
+      CreateFlag<float>(
+          "warmup_min_secs", &params_,
+          "minimum number of seconds to rerun for, potentially making the "
+          "actual number of warm-up runs to be greater than warmup_runs"),
+      CreateFlag<bool>("verbose", &params_,
+                       "Whether to log parameters whose values are not set. "
+                       "By default, only log those parameters that are set by "
+                       "parsing their values from the commandline flags."),
+  };
+}
+
+void BenchmarkModel::LogParams() {
+  const bool verbose = params_.Get<bool>("verbose");
+  TFLITE_LOG(INFO) << "Log parameter values verbosely: [" << verbose << "]";
+
+  LOG_BENCHMARK_PARAM(int32_t, "num_runs", "Min num runs", verbose);
+  LOG_BENCHMARK_PARAM(float, "min_secs", "Min runs duration (seconds)",
+                      verbose);
+  LOG_BENCHMARK_PARAM(float, "max_secs", "Max runs duration (seconds)",
+                      verbose);
+  LOG_BENCHMARK_PARAM(float, "run_delay", "Inter-run delay (seconds)", verbose);
+  LOG_BENCHMARK_PARAM(int32_t, "num_threads", "Num threads", verbose);
+  LOG_BENCHMARK_PARAM(bool, "use_caching", "Use caching", verbose);
+  LOG_BENCHMARK_PARAM(std::string, "benchmark_name", "Benchmark name", verbose);
+  LOG_BENCHMARK_PARAM(std::string, "output_prefix", "Output prefix", verbose);
+  LOG_BENCHMARK_PARAM(int32_t, "warmup_runs", "Min warmup runs", verbose);
+  LOG_BENCHMARK_PARAM(float, "warmup_min_secs",
+                      "Min warmup runs duration (seconds)", verbose);
+}
+
+TfLiteStatus BenchmarkModel::PrepareInputData() { return kTfLiteOk; }
+
+TfLiteStatus BenchmarkModel::ResetInputsAndOutputs() { return kTfLiteOk; }
+
+Stat<int64_t> BenchmarkModel::Run(int min_num_times, float min_secs,
+                                  float max_secs, RunType run_type,
+                                  TfLiteStatus* invoke_status) {
+  Stat<int64_t> run_stats;
+  TFLITE_LOG(INFO) << "Running benchmark for at least " << min_num_times
+                   << " iterations and at least " << min_secs << " seconds but"
+                   << " terminate if exceeding " << max_secs << " seconds.";
+  int64_t now_us = profiling::time::NowMicros();
+  int64_t min_finish_us = now_us + static_cast<int64_t>(min_secs * 1.e6f);
+  int64_t max_finish_us = now_us + static_cast<int64_t>(max_secs * 1.e6f);
+
+  *invoke_status = kTfLiteOk;
+  for (int run = 0; (run < min_num_times || now_us < min_finish_us) &&
+                    now_us <= max_finish_us;
+       run++) {
+    ResetInputsAndOutputs();
+    listeners_.OnSingleRunStart(run_type);
+    int64_t start_us = profiling::time::NowMicros();
+    TfLiteStatus status = RunImpl();
+    int64_t end_us = profiling::time::NowMicros();
+    listeners_.OnSingleRunEnd();
+
+    run_stats.UpdateStat(end_us - start_us);
+    util::SleepForSeconds(params_.Get<float>("run_delay"));
+    now_us = profiling::time::NowMicros();
+
+    if (status != kTfLiteOk) {
+      *invoke_status = status;
+    }
+  }
+
+  std::stringstream stream;
+  run_stats.OutputToStream(&stream);
+  TFLITE_LOG(INFO) << stream.str() << std::endl;
+
+  return run_stats;
+}
+
+TfLiteStatus BenchmarkModel::ValidateParams() { return kTfLiteOk; }
+
+TfLiteStatus BenchmarkModel::Run(int argc, char** argv) {
+  TF_LITE_ENSURE_STATUS(ParseFlags(argc, argv));
+  return Run();
+}
+
+TfLiteStatus BenchmarkModel::Run() {
+  TF_LITE_ENSURE_STATUS(ValidateParams());
+
+  LogParams();
+
+  const double model_size_mb = MayGetModelFileSize() / 1e6;
+  const auto start_mem_usage = profiling::memory::GetMemoryUsage();
+  int64_t initialization_start_us = profiling::time::NowMicros();
+  TF_LITE_ENSURE_STATUS(Init());
+  const auto init_end_mem_usage = profiling::memory::GetMemoryUsage();
+  int64_t initialization_end_us = profiling::time::NowMicros();
+  int64_t startup_latency_us = initialization_end_us - initialization_start_us;
+  const auto init_mem_usage = init_end_mem_usage - start_mem_usage;
+
+  if (model_size_mb > 0) {
+    TFLITE_LOG(INFO) << "The input model file size (MB): " << model_size_mb;
+  }
+  TFLITE_LOG(INFO) << "Initialized session in " << startup_latency_us / 1e3
+                   << "ms.";
+
+  TF_LITE_ENSURE_STATUS(PrepareInputData());
+
+  TfLiteStatus status = kTfLiteOk;
+  uint64_t input_bytes = ComputeInputBytes();
+  listeners_.OnBenchmarkStart(params_);
+  Stat<int64_t> warmup_time_us =
+      Run(params_.Get<int32_t>("warmup_runs"),
+          params_.Get<float>("warmup_min_secs"), params_.Get<float>("max_secs"),
+          WARMUP, &status);
+  if (status != kTfLiteOk) {
+    return status;
+  }
+
+  Stat<int64_t> inference_time_us =
+      Run(params_.Get<int32_t>("num_runs"), params_.Get<float>("min_secs"),
+          params_.Get<float>("max_secs"), REGULAR, &status);
+  const auto overall_mem_usage =
+      profiling::memory::GetMemoryUsage() - start_mem_usage;
+
+  listeners_.OnBenchmarkEnd({model_size_mb, startup_latency_us, input_bytes,
+                             warmup_time_us, inference_time_us, init_mem_usage,
+                             overall_mem_usage});
+  return status;
+}
+
+TfLiteStatus BenchmarkModel::ParseFlags(int* argc, char** argv) {
+  auto flag_list = GetFlags();
+  const bool parse_result =
+      Flags::Parse(argc, const_cast<const char**>(argv), flag_list);
+  if (!parse_result) {
+    std::string usage = Flags::Usage(argv[0], flag_list);
+    TFLITE_LOG(ERROR) << usage;
+    return kTfLiteError;
+  }
+
+  std::string unconsumed_args =
+      Flags::ArgsToString(*argc, const_cast<const char**>(argv));
+  if (!unconsumed_args.empty()) {
+    TFLITE_LOG(WARN) << "Unconsumed cmdline flags: " << unconsumed_args;
+  }
+
+  return kTfLiteOk;
+}
+
+}  // namespace benchmark
+}  // namespace tflite
diff --git a/tflite/build_tflite_android.sh b/tflite/build_tflite_android.sh
@@ -22,8 +22,8 @@ if [ ! -d ${ANDROID_SDK_HOME} ]; then
 fi
 
 # ref: https://github.com/tensorflow/tensorflow/tree/master/tensorflow/examples/android
-git clone https://gitee.com/mirrors/tensorflow.git tflite
-#git clone https://github.com/TensorFlow/Tensorflow.git tflite
+#git clone https://gitee.com/mirrors/tensorflow.git tflite
+git clone https://github.com/TensorFlow/Tensorflow.git tflite
 cd tflite
 
 sudo apt update
@@ -42,6 +42,7 @@ cd -
 
 
 cp ${TFLITE_DIR}/configure.py.bench ${TFLITE_DIR}/tflite/configure.py
+cp ${TFLITE_DIR}/benchmark_model.cc ${TFLITE_DIR}/tflite/tensorflow/lite/tools/benchmark/benchmark_model.cc
 cd ${TFLITE_DIR}/tflite
 ./configure
 # android: y