From 88482350fe617746fa5729452a2383f9e4537af8 Mon Sep 17 00:00:00 2001 From: Jiyoung Giuliana Yun Date: Wed, 18 Oct 2023 15:37:53 +0900 Subject: [PATCH] [tools/onert_train] Implement memory profiling functions (#11731) This commit implements memory profiling functions. It also has been updated the output result form. ONE-DCO-1.0-Signed-off-by: Jiyoung Yun --- tests/tools/onert_train/src/measure.h | 209 +++++++++++++++++++-- tests/tools/onert_train/src/onert_train.cc | 26 ++- 2 files changed, 204 insertions(+), 31 deletions(-) diff --git a/tests/tools/onert_train/src/measure.h b/tests/tools/onert_train/src/measure.h index f7c8610d0b2..ab9be905233 100644 --- a/tests/tools/onert_train/src/measure.h +++ b/tests/tools/onert_train/src/measure.h @@ -17,8 +17,14 @@ #ifndef __ONERT_TRAIN_MEASURE_H__ #define __ONERT_TRAIN_MEASURE_H__ +#include "benchmark/MemoryInfo.h" +#include "benchmark/MemoryPoller.h" + #include #include +#include +#include +#include #include namespace @@ -34,55 +40,226 @@ uint64_t nowMicros() namespace onert_train { +enum PhaseType +{ + MODEL_LOAD, + PREPARE, + EXECUTE, + END_OF_PHASE +}; + +const std::string getPhaseTypeStr(PhaseType type) +{ + switch (type) + { + case MODEL_LOAD: + return "MODEL_LOAD"; + case PREPARE: + return "PREPARE"; + case EXECUTE: + return "EXECUTE"; + default: + throw std::runtime_error("Invalid phase type"); + } +} + +benchmark::PhaseEnum convertToPhaseEnum(PhaseType type) +{ + switch (type) + { + case MODEL_LOAD: + return benchmark::PhaseEnum::MODEL_LOAD; + case PREPARE: + return benchmark::PhaseEnum::PREPARE; + case EXECUTE: + return benchmark::PhaseEnum::EXECUTE; + default: + throw std::runtime_error("Invalid phase type"); + } +} + +enum AggregateType +{ + AVERAGE, + SUM, + END_OF_AGGREGATE_TYPE +}; + +enum MemoryType +{ + RSS, + HWM, + PSS, + END_OF_MEM_TYPE +}; + +const std::string getMemoryTypeStr(MemoryType type) +{ + switch (type) + { + case RSS: + return "RSS"; + case HWM: + return "HWM"; + case PSS: + return "PSS"; + default: + throw std::runtime_error("Invalid memory type"); + } +} + struct Step { uint64_t time; // us - // TODO Support memory usage +}; + +struct Phase +{ + uint64_t time; // us + uint32_t memory[MemoryType::END_OF_MEM_TYPE]; // kB }; class Measure { public: - Measure() = default; + Measure(bool check_mem_poll) : _check_mem_poll(check_mem_poll) + { + if (_check_mem_poll) + { + assert(benchmark::prepareVmRSS()); + _mem_poll = std::make_unique(std::chrono::milliseconds(100), false); + } + } void set(const int epoch, const int step) { - _results.clear(); - _results.resize(epoch); - std::for_each(_results.begin(), _results.end(), [step](auto &v) { v.resize(step); }); + _step_results.clear(); + _step_results.resize(epoch); + std::for_each(_step_results.begin(), _step_results.end(), [step](auto &v) { v.resize(step); }); + } + + void run(const PhaseType phaseType, const std::function &func) + { + auto phaseEnum = convertToPhaseEnum(phaseType); + + if (_check_mem_poll) + { + _mem_poll->start(phaseEnum); + } + _phase_results[phaseType].time = nowMicros(); + + func(); + + _phase_results[phaseType].time = nowMicros() - _phase_results[phaseType].time; + if (_check_mem_poll) + { + _mem_poll->end(phaseEnum); + + _phase_results[phaseType].memory[MemoryType::RSS] = _mem_poll->getRssMap().at(phaseEnum); + _phase_results[phaseType].memory[MemoryType::HWM] = _mem_poll->getHwmMap().at(phaseEnum); + _phase_results[phaseType].memory[MemoryType::PSS] = _mem_poll->getPssMap().at(phaseEnum); + } } void run(const int epoch, const int step, const std::function &func) { - if (_results.empty() || _results.size() <= epoch || _results[epoch].size() <= step) + if (_step_results.empty() || _step_results.size() <= epoch || + _step_results[epoch].size() <= step) { throw std::runtime_error("Please set the number of epochs and steps first"); } - _results[epoch][step].time = nowMicros(); + _step_results[epoch][step].time = nowMicros(); func(); - _results[epoch][step].time = nowMicros() - _results[epoch][step].time; + _step_results[epoch][step].time = nowMicros() - _step_results[epoch][step].time; + } + + double sumTimeMicro(const int epoch) + { + double sum = 0u; + std::for_each(_step_results[epoch].begin(), _step_results[epoch].end(), + [&sum](auto &v) { sum += v.time; }); + return sum; } - double timeMicros(const int epoch) + double timeMicros(const int epoch, const AggregateType aggType) { - if (_results.empty() || _results.size() <= epoch) + if (_step_results.empty() || _step_results.size() <= epoch) { throw std::runtime_error("Invalid epoch"); } - double sum = 0u; - std::for_each(_results[epoch].begin(), _results[epoch].end(), - [&sum](auto &v) { sum += v.time; }); - return sum / _results[epoch].size(); + switch (aggType) + { + case AVERAGE: + return sumTimeMicro(epoch) / _step_results[epoch].size(); + case SUM: + return sumTimeMicro(epoch); + default: + throw std::runtime_error("Invalid aggregate type"); + } } - double timeMs(const int epoch) { return timeMicros(epoch) / 1e3; } + void printTimeMs(const int epoch, const AggregateType aggType) + { + std::cout.precision(3); + std::cout << " - time: " << timeMicros(epoch, aggType) / 1e3 << "ms/step"; + } + + void printResultTime() + { + std::cout << "===================================" << std::endl; + for (int i = 0; i < PhaseType::END_OF_PHASE; ++i) + { + auto type = static_cast(i); + std::cout << std::setw(12) << std::left << getPhaseTypeStr(type) << " takes " + << _phase_results[type].time / 1e3 << " ms" << std::endl; + if (i == PhaseType::EXECUTE) + { + for (int j = 0; j < _step_results.size(); ++j) + { + std::cout << "- " + << "Epoch " << j + 1 << std::setw(12) << std::right << " takes " + << timeMicros(j, AggregateType::SUM) / 1e3 << " ms" << std::endl; + } + } + } + std::cout << "===================================" << std::endl; + } + + void printResultMemory() + { + for (int i = 0; i < MemoryType::END_OF_MEM_TYPE; ++i) + { + auto type = static_cast(i); + std::cout << getMemoryTypeStr(type) << std::endl; + for (int j = 0; j < PhaseType::END_OF_PHASE; ++j) + { + auto phaseType = static_cast(j); + std::cout << "- " << std::setw(12) << std::left << getPhaseTypeStr(phaseType) << " takes " + << _phase_results[phaseType].memory[i] << " kb" << std::endl; + } + std::cout << "===================================" << std::endl; + } + } + + void printResult() + { + printResultTime(); + if (_check_mem_poll) + { + printResultMemory(); + } + } private: - std::vector> _results; + std::unordered_map _phase_results; + std::vector> _step_results; + + bool _check_mem_poll; + std::unique_ptr _mem_poll; }; } // namespace onert_train diff --git a/tests/tools/onert_train/src/onert_train.cc b/tests/tools/onert_train/src/onert_train.cc index 678d13fc963..bf7a3441d66 100644 --- a/tests/tools/onert_train/src/onert_train.cc +++ b/tests/tools/onert_train/src/onert_train.cc @@ -56,13 +56,15 @@ int main(const int argc, char **argv) // TODO Apply verbose level to phases const int verbose = args.getVerboseLevel(); - benchmark::Phases phases(benchmark::PhaseOption{}); + + // prepare measure tool + Measure measure(args.getMemoryPoll()); nnfw_session *session = nullptr; NNPR_ENSURE_STATUS(nnfw_create_session(&session)); // ModelLoad - phases.run("MODEL_LOAD", [&](const benchmark::Phase &, uint32_t) { + measure.run(PhaseType::MODEL_LOAD, [&]() { if (args.useSingleModel()) NNPR_ENSURE_STATUS( nnfw_load_model_from_modelfile(session, args.getModelFilename().c_str())); @@ -153,9 +155,8 @@ int main(const int argc, char **argv) // prepare execution // TODO When nnfw_{prepare|run} are failed, can't catch the time - phases.run("PREPARE", [&](const benchmark::Phase &, uint32_t) { - NNPR_ENSURE_STATUS(nnfw_train_prepare(session, &tri)); - }); + measure.run(PhaseType::PREPARE, + [&]() { NNPR_ENSURE_STATUS(nnfw_train_prepare(session, &tri)); }); // prepare input and expected tensor info lists std::vector input_infos; @@ -199,9 +200,8 @@ int main(const int argc, char **argv) exit(-1); } - Measure measure; std::vector losses(num_expecteds); - phases.run("EXECUTE", [&](const benchmark::Phase &, uint32_t) { + measure.run(PhaseType::EXECUTE, [&]() { const int num_step = data_length / tri.batch_size; const int num_epoch = args.getEpoch(); measure.set(num_epoch, num_step); @@ -242,10 +242,10 @@ int main(const int argc, char **argv) // print loss std::cout << std::fixed; - std::cout.precision(3); - std::cout << "Epoch " << epoch + 1 << "/" << num_epoch << " - " << measure.timeMs(epoch) - << "ms/step - loss: "; + std::cout << "Epoch " << epoch + 1 << "/" << num_epoch; + measure.printTimeMs(epoch, AggregateType::AVERAGE); std::cout.precision(4); + std::cout << " - loss: "; for (uint32_t i = 0; i < num_expecteds; ++i) { std::cout << "[" << i << "] " << losses[i] / num_step; @@ -256,11 +256,7 @@ int main(const int argc, char **argv) NNPR_ENSURE_STATUS(nnfw_close_session(session)); - // prepare result - benchmark::Result result(phases); - - // to stdout - benchmark::printResult(result); + measure.printResult(); return 0; }