From 2570764cfd0073b78eb61e208dfd9c585a72fc4f Mon Sep 17 00:00:00 2001 From: ysh329 Date: Sun, 6 Sep 2020 15:16:46 +0800 Subject: [PATCH 1/7] Update README.md --- models/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/models/README.md b/models/README.md index f64f2e2..92b29b5 100644 --- a/models/README.md +++ b/models/README.md @@ -18,5 +18,5 @@ Original model stored for embedded-ai.bench. |vgg16| fp32 |caffe| [prototxt](https://gist.githubusercontent.com/ksimonyan/211839e770f7b538e2d8/raw/0067c9b32f60362c74f4c445a080beed06b07eb3/VGG_ILSVRC_16_layers_deploy.prototxt), [caffemodel](http://www.robots.ox.ac.uk/~vgg/software/very_deep/caffe/VGG_ILSVRC_16_layers.caffemodel) | [ksimonyan](https://gist.github.com/ksimonyan/211839e770f7b538e2d8/) | |mobilenetv1| fp32 |tensorflow| [tflite&pb](https://storage.googleapis.com/download.tensorflow.org/models/mobilenet_v1_2018_02_22/mobilenet_v1_1.0_224.tgz) | [hosted_models](https://tensorflow.google.cn/lite/guide/hosted_models) | |mobilenetv2| fp32 |tensorflow| [tflite&pb](https://storage.googleapis.com/download.tensorflow.org/models/tflite_11_05_08/mobilenet_v2_1.0_224.tgz) | [hosted_models](https://tensorflow.google.cn/lite/guide/hosted_models) | -|squeezenetv1.1| fp32 |tensorflow| no provided | no provided | +|squeezenetv1.1| fp32 |tensorflow| [tflite&pb](https://storage.googleapis.com/download.tensorflow.org/models/tflite/model_zoo/upload_20180427/squeezenet_2018_04_27.tgz) | [hosted_models](https://tensorflow.google.cn/lite/guide/hosted_models) | |vgg16| fp32 |tensorflow| [vgg_16_2016_08_28.tar.gz](http://download.tensorflow.org/models/vgg_16_2016_08_28.tar.gz) | [tensorflow/models: Pre-trained Models](https://github.com/tensorflow/models/tree/master/research/slim#Pretrained) | From cd3a26feb4177a32cf4b12a0173fc04a9c9da6f7 Mon Sep 17 00:00:00 2001 From: ysh329 Date: Sun, 6 Sep 2020 15:18:54 +0800 Subject: [PATCH 2/7] Update download.sh --- models/download.sh | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/models/download.sh b/models/download.sh index 9c59667..f8ca9c9 100755 --- a/models/download.sh +++ b/models/download.sh @@ -107,6 +107,8 @@ function tensorflow_model_urls() { "https://storage.googleapis.com/download.tensorflow.org/models/tflite_11_05_08/mobilenet_v2_1.0_224.tgz" \ \ # vgg16 "http://download.tensorflow.org/models/vgg_16_2016_08_28.tar.gz" \ + \ # squeezenet + "https://storage.googleapis.com/download.tensorflow.org/models/tflite/model_zoo/upload_20180427/squeezenet_2018_04_27.tgz" \ ) echo ${links_for_tensorflow[*]} } @@ -131,6 +133,11 @@ function rename_tensorflow_models() { tar -zxvf ./vgg_16_2016_08_28.tar.gz mkdir tf_vgg16 mv vgg_16.ckpt ./tf_vgg16/tf_vgg16.ckpt + + # squeezenet + tar -zxvf squeezenet_2018_04_27.tgz + mkdir tf_squeezenet_v1.1 + mv squeeze* ./tf_squeezenet_v1.1 } function prepare_tensorflow_models() { From 7e77f9856af20b723c8daa874c15251a06c56d3b Mon Sep 17 00:00:00 2001 From: "ysh329@sina.com" Date: Sun, 6 Sep 2020 08:58:05 +0000 Subject: [PATCH 3/7] add benchmark.cc for tflite --- tflite/benchmark_model.cc | 258 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 258 insertions(+) create mode 100644 tflite/benchmark_model.cc diff --git a/tflite/benchmark_model.cc b/tflite/benchmark_model.cc new file mode 100644 index 0000000..43391fa --- /dev/null +++ b/tflite/benchmark_model.cc @@ -0,0 +1,258 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/lite/tools/benchmark/benchmark_model.h" + +#include +#include + +#include "tensorflow/lite/profiling/memory_info.h" +#include "tensorflow/lite/profiling/time.h" +#include "tensorflow/lite/tools/benchmark/benchmark_utils.h" +#include "tensorflow/lite/tools/logging.h" + +namespace tflite { +namespace benchmark { +using tensorflow::Stat; + +BenchmarkParams BenchmarkModel::DefaultParams() { + BenchmarkParams params; + params.AddParam("num_runs", BenchmarkParam::Create(50)); + params.AddParam("min_secs", BenchmarkParam::Create(1.0f)); + params.AddParam("max_secs", BenchmarkParam::Create(150.0f)); + params.AddParam("run_delay", BenchmarkParam::Create(-1.0f)); + params.AddParam("num_threads", BenchmarkParam::Create(1)); + params.AddParam("use_caching", BenchmarkParam::Create(false)); + params.AddParam("benchmark_name", BenchmarkParam::Create("")); + params.AddParam("output_prefix", BenchmarkParam::Create("")); + params.AddParam("warmup_runs", BenchmarkParam::Create(1)); + params.AddParam("warmup_min_secs", BenchmarkParam::Create(0.5f)); + params.AddParam("verbose", BenchmarkParam::Create(false)); + return params; +} + +BenchmarkModel::BenchmarkModel() : params_(DefaultParams()) {} + +void BenchmarkLoggingListener::OnBenchmarkEnd(const BenchmarkResults& results) { + auto inference_us = results.inference_time_us(); + auto init_us = results.startup_latency_us(); + auto warmup_us = results.warmup_time_us(); + auto init_mem_usage = results.init_mem_usage(); + auto overall_mem_usage = results.overall_mem_usage(); + + double time_std_dev_ms = 0.; + double time_avg_ms = inference_us.avg() / 1000.; + for (size_t i = 0; i < inference_us.count(); ++i) { + time_std_dev_ms += pow((inference_us[i] / 1000. - time_avg_ms), 2); + } + time_std_dev_ms = sqrt(time_std_dev_ms / inference_us.count()); + + std::cout << params_.Get("output_prefix") + << " min=" << inference_us.min() / 1000. + << " max=" << inference_us.max() / 1000. + << " avg=" << inference_us.avg() / 1000. + << " std_dev=" << time_std_dev_ms + << std::endl; + TFLITE_LOG(INFO) << "Inference timings in us: " + << "Init: " << init_us << ", " + << "First inference: " << warmup_us.first() << ", " + << "Warmup (avg): " << warmup_us.avg() << ", " + << "Inference (avg): " << inference_us.avg(); + + if (!init_mem_usage.IsSupported()) return; + TFLITE_LOG(INFO) + << "Note: as the benchmark tool itself affects memory footprint, the " + "following is only APPROXIMATE to the actual memory footprint of the " + "model at runtime. Take the information at your discretion."; + TFLITE_LOG(INFO) << "Peak memory footprint (MB): init=" + << init_mem_usage.max_rss_kb / 1024.0 + << " overall=" << overall_mem_usage.max_rss_kb / 1024.0; +} + +std::vector BenchmarkModel::GetFlags() { + return { + CreateFlag( + "num_runs", ¶ms_, + "expected number of runs, see also min_secs, max_secs"), + CreateFlag( + "min_secs", ¶ms_, + "minimum number of seconds to rerun for, potentially making the " + "actual number of runs to be greater than num_runs"), + CreateFlag( + "max_secs", ¶ms_, + "maximum number of seconds to rerun for, potentially making the " + "actual number of runs to be less than num_runs. Note if --max-secs " + "is exceeded in the middle of a run, the benchmark will continue to " + "the end of the run but will not start the next run."), + CreateFlag("run_delay", ¶ms_, "delay between runs in seconds"), + CreateFlag("num_threads", ¶ms_, "number of threads"), + CreateFlag( + "use_caching", ¶ms_, + "Enable caching of prepacked weights matrices in matrix " + "multiplication routines. Currently implies the use of the Ruy " + "library."), + CreateFlag("benchmark_name", ¶ms_, "benchmark name"), + CreateFlag("output_prefix", ¶ms_, + "benchmark output prefix"), + CreateFlag( + "warmup_runs", ¶ms_, + "minimum number of runs performed on initialization, to " + "allow performance characteristics to settle, see also " + "warmup_min_secs"), + CreateFlag( + "warmup_min_secs", ¶ms_, + "minimum number of seconds to rerun for, potentially making the " + "actual number of warm-up runs to be greater than warmup_runs"), + CreateFlag("verbose", ¶ms_, + "Whether to log parameters whose values are not set. " + "By default, only log those parameters that are set by " + "parsing their values from the commandline flags."), + }; +} + +void BenchmarkModel::LogParams() { + const bool verbose = params_.Get("verbose"); + TFLITE_LOG(INFO) << "Log parameter values verbosely: [" << verbose << "]"; + + LOG_BENCHMARK_PARAM(int32_t, "num_runs", "Min num runs", verbose); + LOG_BENCHMARK_PARAM(float, "min_secs", "Min runs duration (seconds)", + verbose); + LOG_BENCHMARK_PARAM(float, "max_secs", "Max runs duration (seconds)", + verbose); + LOG_BENCHMARK_PARAM(float, "run_delay", "Inter-run delay (seconds)", verbose); + LOG_BENCHMARK_PARAM(int32_t, "num_threads", "Num threads", verbose); + LOG_BENCHMARK_PARAM(bool, "use_caching", "Use caching", verbose); + LOG_BENCHMARK_PARAM(std::string, "benchmark_name", "Benchmark name", verbose); + LOG_BENCHMARK_PARAM(std::string, "output_prefix", "Output prefix", verbose); + LOG_BENCHMARK_PARAM(int32_t, "warmup_runs", "Min warmup runs", verbose); + LOG_BENCHMARK_PARAM(float, "warmup_min_secs", + "Min warmup runs duration (seconds)", verbose); +} + +TfLiteStatus BenchmarkModel::PrepareInputData() { return kTfLiteOk; } + +TfLiteStatus BenchmarkModel::ResetInputsAndOutputs() { return kTfLiteOk; } + +Stat BenchmarkModel::Run(int min_num_times, float min_secs, + float max_secs, RunType run_type, + TfLiteStatus* invoke_status) { + Stat run_stats; + TFLITE_LOG(INFO) << "Running benchmark for at least " << min_num_times + << " iterations and at least " << min_secs << " seconds but" + << " terminate if exceeding " << max_secs << " seconds."; + int64_t now_us = profiling::time::NowMicros(); + int64_t min_finish_us = now_us + static_cast(min_secs * 1.e6f); + int64_t max_finish_us = now_us + static_cast(max_secs * 1.e6f); + + *invoke_status = kTfLiteOk; + for (int run = 0; (run < min_num_times || now_us < min_finish_us) && + now_us <= max_finish_us; + run++) { + ResetInputsAndOutputs(); + listeners_.OnSingleRunStart(run_type); + int64_t start_us = profiling::time::NowMicros(); + TfLiteStatus status = RunImpl(); + int64_t end_us = profiling::time::NowMicros(); + listeners_.OnSingleRunEnd(); + + run_stats.UpdateStat(end_us - start_us); + util::SleepForSeconds(params_.Get("run_delay")); + now_us = profiling::time::NowMicros(); + + if (status != kTfLiteOk) { + *invoke_status = status; + } + } + + std::stringstream stream; + run_stats.OutputToStream(&stream); + TFLITE_LOG(INFO) << stream.str() << std::endl; + + return run_stats; +} + +TfLiteStatus BenchmarkModel::ValidateParams() { return kTfLiteOk; } + +TfLiteStatus BenchmarkModel::Run(int argc, char** argv) { + TF_LITE_ENSURE_STATUS(ParseFlags(argc, argv)); + return Run(); +} + +TfLiteStatus BenchmarkModel::Run() { + TF_LITE_ENSURE_STATUS(ValidateParams()); + + LogParams(); + + const double model_size_mb = MayGetModelFileSize() / 1e6; + const auto start_mem_usage = profiling::memory::GetMemoryUsage(); + int64_t initialization_start_us = profiling::time::NowMicros(); + TF_LITE_ENSURE_STATUS(Init()); + const auto init_end_mem_usage = profiling::memory::GetMemoryUsage(); + int64_t initialization_end_us = profiling::time::NowMicros(); + int64_t startup_latency_us = initialization_end_us - initialization_start_us; + const auto init_mem_usage = init_end_mem_usage - start_mem_usage; + + if (model_size_mb > 0) { + TFLITE_LOG(INFO) << "The input model file size (MB): " << model_size_mb; + } + TFLITE_LOG(INFO) << "Initialized session in " << startup_latency_us / 1e3 + << "ms."; + + TF_LITE_ENSURE_STATUS(PrepareInputData()); + + TfLiteStatus status = kTfLiteOk; + uint64_t input_bytes = ComputeInputBytes(); + listeners_.OnBenchmarkStart(params_); + Stat warmup_time_us = + Run(params_.Get("warmup_runs"), + params_.Get("warmup_min_secs"), params_.Get("max_secs"), + WARMUP, &status); + if (status != kTfLiteOk) { + return status; + } + + Stat inference_time_us = + Run(params_.Get("num_runs"), params_.Get("min_secs"), + params_.Get("max_secs"), REGULAR, &status); + const auto overall_mem_usage = + profiling::memory::GetMemoryUsage() - start_mem_usage; + + listeners_.OnBenchmarkEnd({model_size_mb, startup_latency_us, input_bytes, + warmup_time_us, inference_time_us, init_mem_usage, + overall_mem_usage}); + return status; +} + +TfLiteStatus BenchmarkModel::ParseFlags(int* argc, char** argv) { + auto flag_list = GetFlags(); + const bool parse_result = + Flags::Parse(argc, const_cast(argv), flag_list); + if (!parse_result) { + std::string usage = Flags::Usage(argv[0], flag_list); + TFLITE_LOG(ERROR) << usage; + return kTfLiteError; + } + + std::string unconsumed_args = + Flags::ArgsToString(*argc, const_cast(argv)); + if (!unconsumed_args.empty()) { + TFLITE_LOG(WARN) << "Unconsumed cmdline flags: " << unconsumed_args; + } + + return kTfLiteOk; +} + +} // namespace benchmark +} // namespace tflite From 71457a4d6b214570a5acde98fe26d8442ebbbd39 Mon Sep 17 00:00:00 2001 From: "ysh329@sina.com" Date: Sun, 6 Sep 2020 09:02:56 +0000 Subject: [PATCH 4/7] add benchmark_model.cc copy for build tflite --- tflite/build_tflite_android.sh | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tflite/build_tflite_android.sh b/tflite/build_tflite_android.sh index 854efdb..5a43026 100755 --- a/tflite/build_tflite_android.sh +++ b/tflite/build_tflite_android.sh @@ -22,8 +22,8 @@ if [ ! -d ${ANDROID_SDK_HOME} ]; then fi # ref: https://github.com/tensorflow/tensorflow/tree/master/tensorflow/examples/android -git clone https://gitee.com/mirrors/tensorflow.git tflite -#git clone https://github.com/TensorFlow/Tensorflow.git tflite +#git clone https://gitee.com/mirrors/tensorflow.git tflite +git clone https://github.com/TensorFlow/Tensorflow.git tflite cd tflite sudo apt update @@ -42,6 +42,7 @@ cd - cp ${TFLITE_DIR}/configure.py.bench ${TFLITE_DIR}/tflite/configure.py +cp ${TFLITE_DIR}/benchmark_model.cc ${TFLITE_DIR}/tflite/tensorflow/lite/tools/benchmark/benchmark_model.cc cd ${TFLITE_DIR}/tflite ./configure # android: y From 514ca6f7f672363e7e20cd6bb9ce3002a7b3df82 Mon Sep 17 00:00:00 2001 From: "ysh329@sina.com" Date: Sun, 6 Sep 2020 10:35:32 +0000 Subject: [PATCH 5/7] fix miscs in tflite benchmark_model.cc --- tflite/benchmark_model.cc | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/tflite/benchmark_model.cc b/tflite/benchmark_model.cc index 43391fa..28a18e0 100644 --- a/tflite/benchmark_model.cc +++ b/tflite/benchmark_model.cc @@ -52,18 +52,10 @@ void BenchmarkLoggingListener::OnBenchmarkEnd(const BenchmarkResults& results) { auto init_mem_usage = results.init_mem_usage(); auto overall_mem_usage = results.overall_mem_usage(); - double time_std_dev_ms = 0.; - double time_avg_ms = inference_us.avg() / 1000.; - for (size_t i = 0; i < inference_us.count(); ++i) { - time_std_dev_ms += pow((inference_us[i] / 1000. - time_avg_ms), 2); - } - time_std_dev_ms = sqrt(time_std_dev_ms / inference_us.count()); - - std::cout << params_.Get("output_prefix") - << " min=" << inference_us.min() / 1000. + std::cout << " min=" << inference_us.min() / 1000. << " max=" << inference_us.max() / 1000. << " avg=" << inference_us.avg() / 1000. - << " std_dev=" << time_std_dev_ms + << " std_dev=" << inference_us.std_deviation() / 1000. << std::endl; TFLITE_LOG(INFO) << "Inference timings in us: " << "Init: " << init_us << ", " From 1b2cd0de9824e39c885fb293b311e5bdadb89496 Mon Sep 17 00:00:00 2001 From: "ysh329@sina.com" Date: Sun, 6 Sep 2020 10:42:44 +0000 Subject: [PATCH 6/7] fix benchmark models.cc for tflite --- tflite/benchmark_model.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tflite/benchmark_model.cc b/tflite/benchmark_model.cc index 28a18e0..6b9b510 100644 --- a/tflite/benchmark_model.cc +++ b/tflite/benchmark_model.cc @@ -52,7 +52,7 @@ void BenchmarkLoggingListener::OnBenchmarkEnd(const BenchmarkResults& results) { auto init_mem_usage = results.init_mem_usage(); auto overall_mem_usage = results.overall_mem_usage(); - std::cout << " min=" << inference_us.min() / 1000. + std::cout << "time cost: min=" << inference_us.min() / 1000. << " max=" << inference_us.max() / 1000. << " avg=" << inference_us.avg() / 1000. << " std_dev=" << inference_us.std_deviation() / 1000. From 52423da3d02b21ce80c7bb35a7a56bb307ec7034 Mon Sep 17 00:00:00 2001 From: "ysh329@sina.com" Date: Sun, 6 Sep 2020 11:15:49 +0000 Subject: [PATCH 7/7] fix benchmark_model.cc --- tflite/benchmark_model.cc | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tflite/benchmark_model.cc b/tflite/benchmark_model.cc index 6b9b510..c8be3bb 100644 --- a/tflite/benchmark_model.cc +++ b/tflite/benchmark_model.cc @@ -52,10 +52,10 @@ void BenchmarkLoggingListener::OnBenchmarkEnd(const BenchmarkResults& results) { auto init_mem_usage = results.init_mem_usage(); auto overall_mem_usage = results.overall_mem_usage(); - std::cout << "time cost: min=" << inference_us.min() / 1000. - << " max=" << inference_us.max() / 1000. - << " avg=" << inference_us.avg() / 1000. - << " std_dev=" << inference_us.std_deviation() / 1000. + std::cout << "time cost: min=" << inference_us.min() / 1000. << "ms" + << " max=" << inference_us.max() / 1000. << "ms" + << " avg=" << inference_us.avg() / 1000. << "ms" + << " std_dev=" << inference_us.std_deviation() / 1000. << "ms" << std::endl; TFLITE_LOG(INFO) << "Inference timings in us: " << "Init: " << init_us << ", "