From 9b1658a94b1e65d7fb00d719039e88a622c98ff9 Mon Sep 17 00:00:00 2001 From: Gita Koblents Date: Wed, 10 Apr 2024 22:34:36 -0400 Subject: [PATCH] Implement METHOD STATS verbose output - introduce new verbose tag: "#METHOD STATS" - print "key=value" pairs for each method where "key" is an arbitrary string - values can be agrregated across all methods by a parsing script - this format can be used for an arbitrary info about a compiled method - in particular, introduce a function that prints various footprint stats for a method --- compiler/codegen/OMRCodeGenerator.cpp | 112 ++++++++++++++++++++++++ compiler/codegen/OMRCodeGenerator.hpp | 39 +++++++++ compiler/env/VerboseLog.cpp | 1 + compiler/env/VerboseLog.hpp | 1 + compiler/infra/OMRCfg.cpp | 11 +++ compiler/infra/OMRCfg.hpp | 3 + compiler/x/codegen/OMRCodeGenerator.cpp | 32 +++++++ compiler/x/codegen/OMRCodeGenerator.hpp | 3 + 8 files changed, 202 insertions(+) diff --git a/compiler/codegen/OMRCodeGenerator.cpp b/compiler/codegen/OMRCodeGenerator.cpp index b2739b44e15..e542c8e0f6c 100644 --- a/compiler/codegen/OMRCodeGenerator.cpp +++ b/compiler/codegen/OMRCodeGenerator.cpp @@ -3388,3 +3388,115 @@ OMR::CodeGenerator::redoTrampolineReservationIfNecessary(TR::Instruction *callIn self()->fe()->reserveTrampolineIfNecessary(self()->comp(), calleeSymRef, true); } } + +uint32_t +OMR::CodeGenerator::getCodeSnippetsSize() + { + uint32_t codeSnippetsSize = 0; + + TR::list snippetList = self()->getSnippetList(); + + for (auto snippets = snippetList.begin(); snippets != snippetList.end(); ++snippets) + codeSnippetsSize += (*snippets)->getLength(0); + + return codeSnippetsSize; + } + +void +OMR::CodeGenerator::getMethodStats(MethodStats &methodStats) + { + auto &codeSize = methodStats.codeSize; + auto &warmBlocks = methodStats.warmBlocks; + auto &coldBlocks = methodStats.coldBlocks; + auto &prologue = methodStats.prologue; + auto &snippets = methodStats.snippets; + auto &outOfLine = methodStats.outOfLine; + auto &unaccounted = methodStats.unaccounted; + auto &blocksInColdCache = methodStats.blocksInColdCache; + auto &overestimateInColdCache = methodStats.overestimateInColdCache; + + // init + codeSize = 0; + warmBlocks = 0; + coldBlocks = 0; + prologue = 0; + snippets = 0; + outOfLine = 0; + unaccounted = 0; + blocksInColdCache = 0; + overestimateInColdCache = 0; // TODO: implement + + uint32_t allBlocks = 0; + uint32_t sizeBeforeFirstBlock = 0; + bool firstBlock = true; + bool insideColdCache = false; + uint32_t cold_frequence_size[NUMBER_BLOCK_FREQUENCIES] = {0}; + + codeSize = (uint32_t)(self()->getCodeEnd() - self()->getCodeStart()); + +#if 0 + // enable when splitting warm and cold blocks is enabled + if (self()->getLastWarmInstruction()) + codeSize = codeSize + self()->getWarmCodeEnd() - self()->getColdCodeStart(); +#endif + + for (TR::TreeTop *tt = self()->comp()->getMethodSymbol()->getFirstTreeTop(); tt ; tt = tt->getNextTreeTop()) + { + TR::Node *node = tt->getNode(); + + TR::Block *block; + uint8_t *startCursor, *endCursor; + + if (node->getOpCodeValue() == TR::BBStart) + { + block = node->getBlock(); + startCursor = block->getFirstInstruction()->getBinaryEncoding(); + endCursor = block->getLastInstruction()->getBinaryEncoding(); + + uint32_t blockSize = static_cast(endCursor - startCursor); + allBlocks += blockSize; + + if (block->isCold()) + { + coldBlocks += blockSize; + int32_t freq = block->getFrequency(); + + if (freq >= 0 && freq < NUMBER_BLOCK_FREQUENCIES) + cold_frequence_size[freq] += blockSize; + } + + if (insideColdCache) + blocksInColdCache += blockSize; + + if (firstBlock) + { + sizeBeforeFirstBlock = (uint32_t)(startCursor - self()->getCodeStart()); + firstBlock = false; + } +#if 0 + /// enable when splitting warm and cold blocks is enabled + if (block->isLastWarmBlock()) + insideColdCache = true; +#endif + } + } + + warmBlocks = allBlocks - coldBlocks; + snippets = self()->getCodeSnippetsSize() + self()->getDataSnippetsSize(); + outOfLine = self()->getOutOfLineCodeSize(); + unaccounted = codeSize - allBlocks - sizeBeforeFirstBlock - outOfLine - snippets; + prologue = sizeBeforeFirstBlock; + + if (self()->comp()->getOption(TR_TraceCG)) + { + uint32_t known_cold_blocks = 0; + for (int i = 0; i < NUMBER_BLOCK_FREQUENCIES; i++) + { + traceMsg(self()->comp(), "FOOTPRINT: COLD BLOCK TYPE: %s = %d\n", OMR::CFG::blockFrequencyNames[i], + cold_frequence_size[i]); + + known_cold_blocks += cold_frequence_size[i]; + } + traceMsg(self()->comp(), "FOOTPRINT: COLD BLOCK TYPE: OTHER = %d\n", coldBlocks - known_cold_blocks); + } + } diff --git a/compiler/codegen/OMRCodeGenerator.hpp b/compiler/codegen/OMRCodeGenerator.hpp index 9c9be51d50b..32fb172bcaa 100644 --- a/compiler/codegen/OMRCodeGenerator.hpp +++ b/compiler/codegen/OMRCodeGenerator.hpp @@ -1256,6 +1256,45 @@ class OMR_EXTENSIBLE CodeGenerator TR::list *getSnippetsToBePatchedOnClassRedefinition() { return &_snippetsToBePatchedOnClassRedefinition; } + /** + * \brief Calculates total size of all code snippets + * + * \return total size of all code snippets + */ + uint32_t getCodeSnippetsSize(); + + /** + * \brief Calculates total size of all data snippets + * + * \return total size of all data snippets + */ + uint32_t getDataSnippetsSize() { return 0; } + + /** + * \brief Calculates total size of all out of line code + * + * \return total size of all out of line code + */ + uint32_t getOutOfLineCodeSize() { return 0; } + + struct MethodStats + { + uint32_t codeSize; + uint32_t warmBlocks; + uint32_t coldBlocks; + uint32_t prologue; + uint32_t snippets; + uint32_t outOfLine; + uint32_t unaccounted; + uint32_t blocksInColdCache; + uint32_t overestimateInColdCache; + }; + + /** + * \brief Fills in MethodStats structure with footprint data + */ + void getMethodStats(MethodStats &methodStats); + // -------------------------------------------------------------------------- // Register pressure // diff --git a/compiler/env/VerboseLog.cpp b/compiler/env/VerboseLog.cpp index 0d5326a40b3..44e511c2fc7 100644 --- a/compiler/env/VerboseLog.cpp +++ b/compiler/env/VerboseLog.cpp @@ -67,6 +67,7 @@ const char * TR_VerboseLog::_vlogTable[] = "#FSD: ", "#VECTOR API: ", "#CHECKPOINT RESTORE: ", + "#METHOD STATS: " }; void TR_VerboseLog::writeLine(TR_VlogTag tag, const char *format, ...) diff --git a/compiler/env/VerboseLog.hpp b/compiler/env/VerboseLog.hpp index 93533e73a57..d0900e9b84a 100644 --- a/compiler/env/VerboseLog.hpp +++ b/compiler/env/VerboseLog.hpp @@ -75,6 +75,7 @@ enum TR_VlogTag TR_Vlog_FSD, TR_Vlog_VECTOR_API, TR_Vlog_CHECKPOINT_RESTORE, + TR_Vlog_METHOD_STATS, TR_Vlog_numTags }; diff --git a/compiler/infra/OMRCfg.cpp b/compiler/infra/OMRCfg.cpp index 5e89db15d75..c1bb07daf5e 100644 --- a/compiler/infra/OMRCfg.cpp +++ b/compiler/infra/OMRCfg.cpp @@ -67,6 +67,17 @@ OMR::CFG::self() { return static_cast(this); } +const char* +OMR::CFG::blockFrequencyNames[NUMBER_BLOCK_FREQUENCIES] = + { + "UNKNOWN_COLD_BLOCK_COUNT", + "VERSIONED_COLD_BLOCK_COUNT", + "UNRESOLVED_COLD_BLOCK_COUNT", + "CATCH_COLD_BLOCK_COUNT", + "INTERP_CALLEE_COLD_BLOCK_COUNT", + "REVERSE_ARRAYCOPY_COLD_BLOCK_COUNT" + }; + TR::CFGNode * OMR::CFG::addNode(TR::CFGNode *n, TR_RegionStructure *parent, bool isEntryInParent) { diff --git a/compiler/infra/OMRCfg.hpp b/compiler/infra/OMRCfg.hpp index 54d153faf14..1bcad80ee0f 100644 --- a/compiler/infra/OMRCfg.hpp +++ b/compiler/infra/OMRCfg.hpp @@ -73,6 +73,7 @@ template class TR_Array; #define INTERP_CALLEE_COLD_BLOCK_COUNT 4 #define REVERSE_ARRAYCOPY_COLD_BLOCK_COUNT 5 #define MAX_COLD_BLOCK_COUNT 5 +#define NUMBER_BLOCK_FREQUENCIES 6 #define MAX_WARM_BLOCK_COUNT ((MAX_BLOCK_COUNT + MAX_COLD_BLOCK_COUNT)/10) #define MAX_HOT_BLOCK_COUNT (2*MAX_WARM_BLOCK_COUNT) @@ -357,6 +358,8 @@ class CFG IsOrphanedRegion }; + static const char *blockFrequencyNames[]; + protected: TR::Compilation *_compilation; TR::ResolvedMethodSymbol *_method; diff --git a/compiler/x/codegen/OMRCodeGenerator.cpp b/compiler/x/codegen/OMRCodeGenerator.cpp index 266ab5ac0d2..aad79571b4a 100644 --- a/compiler/x/codegen/OMRCodeGenerator.cpp +++ b/compiler/x/codegen/OMRCodeGenerator.cpp @@ -2363,6 +2363,19 @@ void OMR::X86::CodeGenerator::emitDataSnippets() } } +uint32_t OMR::X86::CodeGenerator::getDataSnippetsSize() + { + uint32_t length = 0; + + for (auto iterator = _dataSnippetList.begin(); iterator != _dataSnippetList.end(); ++iterator) + { + length += (*iterator)->getLength(0); + } + + return length; + } + + TR::X86ConstantDataSnippet *OMR::X86::CodeGenerator::findOrCreate2ByteConstant(TR::Node * n, int16_t c) { return self()->findOrCreateConstantDataSnippet(n, &c, 2); @@ -3311,3 +3324,22 @@ OMR::X86::CodeGenerator::considerTypeForGRA(TR::SymbolReference *symRef) return true; } } + +uint32_t +OMR::X86::CodeGenerator::getOutOfLineCodeSize() + { + uint32_t totalSize = 0; + + auto oiIterator = self()->getOutlinedInstructionsList().begin(); + while (oiIterator != self()->getOutlinedInstructionsList().end()) + { + auto start = (*oiIterator)->getFirstInstruction()->getBinaryEncoding(); + auto end = (*oiIterator)->getAppendInstruction()->getBinaryEncoding(); + + totalSize += static_cast(end - start); + + ++oiIterator; + } + + return totalSize; + } diff --git a/compiler/x/codegen/OMRCodeGenerator.hpp b/compiler/x/codegen/OMRCodeGenerator.hpp index c8491c9d2ff..cbedb51c096 100644 --- a/compiler/x/codegen/OMRCodeGenerator.hpp +++ b/compiler/x/codegen/OMRCodeGenerator.hpp @@ -483,6 +483,7 @@ class OMR_EXTENSIBLE CodeGenerator : public OMR::CodeGenerator int32_t setEstimatedLocationsForDataSnippetLabels(int32_t estimatedSnippetStart); void emitDataSnippets(); bool hasDataSnippets() { return _dataSnippetList.empty() ? false : true; } + uint32_t getDataSnippetsSize(); TR::list &getSpilledIntRegisters() {return _spilledIntRegisters;} @@ -637,6 +638,8 @@ class OMR_EXTENSIBLE CodeGenerator : public OMR::CodeGenerator bool considerTypeForGRA(TR::DataType dt); bool considerTypeForGRA(TR::SymbolReference *symRef); + uint32_t getOutOfLineCodeSize(); + /* * \brief create a data snippet. *