diff --git a/cpucounters.cpp b/cpucounters.cpp index fd499d2b..7141e06a 100644 --- a/cpucounters.cpp +++ b/cpucounters.cpp @@ -757,6 +757,7 @@ void PCM::initRDT() return; } #endif + std::cout << "Initializing RMIDs" << std::endl; unsigned maxRMID; /* Calculate maximum number of RMID supported by socket */ maxRMID = getMaxRMID(); @@ -844,6 +845,7 @@ void PCM::initCStateSupportTables() case CHERRYTRAIL: case APOLLO_LAKE: case DENVERTON: + case SNOWRIDGE: PCM_CSTATE_ARRAY(pkgCStateMsr, PCM_PARAM_PROTECT({0, 0, 0x3F8, 0, 0x3F9, 0, 0x3FA, 0, 0, 0, 0 }) ); case NEHALEM_EP: case NEHALEM: @@ -910,6 +912,7 @@ void PCM::initCStateSupportTables() case APOLLO_LAKE: case DENVERTON: PCM_SKL_PATH_CASES + case SNOWRIDGE: case ICX: PCM_CSTATE_ARRAY(coreCStateMsr, PCM_PARAM_PROTECT({0, 0, 0, 0x3FC, 0, 0, 0x3FD, 0x3FE, 0, 0, 0}) ); case KNL: @@ -1551,6 +1554,7 @@ bool PCM::detectNominalFrequency() || cpu_model == APOLLO_LAKE || cpu_model == DENVERTON || useSKLPath() + || cpu_model == SNOWRIDGE || cpu_model == KNL || cpu_model == SKX || cpu_model == ICX @@ -1699,7 +1703,7 @@ void PCM::initUncoreObjects() #endif } } - if (cpu_model == ICX) + if (cpu_model == ICX || cpu_model == SNOWRIDGE) { initSocket2Ubox0Bus(); for (size_t s = 0; s < (size_t)num_sockets && s < socket2UBOX0bus.size() && s < server_pcicfg_uncore.size(); ++s) @@ -1825,20 +1829,13 @@ void PCM::initUncorePMUsDirect() } } // init IIO addresses - std::vector IIO_units; - IIO_units.push_back((int32)IIO_CBDMA); - IIO_units.push_back((int32)IIO_PCIe0); - IIO_units.push_back((int32)IIO_PCIe1); - IIO_units.push_back((int32)IIO_PCIe2); - IIO_units.push_back((int32)IIO_MCP0); - IIO_units.push_back((int32)IIO_MCP1); if (getCPUModel() == PCM::SKX) { iioPMUs.resize(num_sockets); for (uint32 s = 0; s < (uint32)num_sockets; ++s) { auto & handle = MSR[socketRefCore[s]]; - for (const auto & unit: IIO_units) + for (int unit = 0; unit < SKX_IIO_STACK_COUNT; ++unit) { iioPMUs[s][unit] = UncorePMU( std::make_shared(handle, SKX_IIO_CBDMA_UNIT_CTL + SKX_IIO_PM_REG_STEP * unit), @@ -1854,14 +1851,13 @@ void PCM::initUncorePMUsDirect() } } } - // Calculate IIO CTL and CTR MSRs using UNIT CTL base address - if (getCPUModel() == PCM::ICX) + else if (getCPUModel() == PCM::ICX) { iioPMUs.resize(num_sockets); for (uint32 s = 0; s < (uint32)num_sockets; ++s) { auto & handle = MSR[socketRefCore[s]]; - for (const auto & unit: IIO_units) + for (int unit = 0; unit < ICX_IIO_STACK_COUNT; ++unit) { iioPMUs[s][unit] = UncorePMU( std::make_shared(handle, ICX_IIO_UNIT_CTL[unit]), @@ -1877,6 +1873,28 @@ void PCM::initUncorePMUsDirect() } } } + else if (getCPUModel() == PCM::SNOWRIDGE) + { + iioPMUs.resize(num_sockets); + for (uint32 s = 0; s < (uint32)num_sockets; ++s) + { + auto & handle = MSR[socketRefCore[s]]; + for (int unit = 0; unit < SNR_IIO_STACK_COUNT; ++unit) + { + iioPMUs[s][unit] = UncorePMU( + std::make_shared(handle, SNR_IIO_CBDMA_UNIT_CTL + SNR_IIO_PM_REG_STEP * unit), + std::make_shared(handle, SNR_IIO_CBDMA_CTL0 + SNR_IIO_PM_REG_STEP * unit + 0), + std::make_shared(handle, SNR_IIO_CBDMA_CTL0 + SNR_IIO_PM_REG_STEP * unit + 1), + std::make_shared(handle, SNR_IIO_CBDMA_CTL0 + SNR_IIO_PM_REG_STEP * unit + 2), + std::make_shared(handle, SNR_IIO_CBDMA_CTL0 + SNR_IIO_PM_REG_STEP * unit + 3), + std::make_shared(handle, SNR_IIO_CBDMA_CTR0 + SNR_IIO_PM_REG_STEP * unit + 0), + std::make_shared(handle, SNR_IIO_CBDMA_CTR0 + SNR_IIO_PM_REG_STEP * unit + 1), + std::make_shared(handle, SNR_IIO_CBDMA_CTR0 + SNR_IIO_PM_REG_STEP * unit + 2), + std::make_shared(handle, SNR_IIO_CBDMA_CTR0 + SNR_IIO_PM_REG_STEP * unit + 3) + ); + } + } + } if (hasPCICFGUncore() && MSR.size()) { @@ -2178,6 +2196,7 @@ bool PCM::isCPUModelSupported(const int model_) || model_ == WESTMERE_EP || model_ == WESTMERE_EX || isAtom(model_) + || model_ == SNOWRIDGE || model_ == CLARKDALE || model_ == SANDY_BRIDGE || model_ == JAKETOWN @@ -2484,6 +2503,24 @@ PCM::ErrorCode PCM::program(const PCM::ProgramMode mode_, const void * parameter } else switch ( cpu_model ) { + case SNOWRIDGE: + coreEventDesc[0].event_number = ARCH_LLC_MISS_EVTNR; + coreEventDesc[0].umask_value = ARCH_LLC_MISS_UMASK; + coreEventDesc[1].event_number = ARCH_LLC_REFERENCE_EVTNR; + coreEventDesc[1].umask_value = ARCH_LLC_REFERENCE_UMASK; + coreEventDesc[2].event_number = SKL_MEM_LOAD_RETIRED_L2_MISS_EVTNR; + coreEventDesc[2].umask_value = SKL_MEM_LOAD_RETIRED_L2_MISS_UMASK; + coreEventDesc[3].event_number = SKL_MEM_LOAD_RETIRED_L2_HIT_EVTNR; + coreEventDesc[3].umask_value = SKL_MEM_LOAD_RETIRED_L2_HIT_UMASK; + L2CacheHitRatioAvailable = true; + L3CacheHitRatioAvailable = true; + L3CacheMissesAvailable = true; + L2CacheMissesAvailable = true; + L2CacheHitsAvailable = true; + L3CacheHitsSnoopAvailable = true; + L3CacheHitsAvailable = true; + core_gen_counter_num_used = 4; + break; PCM_SKL_PATH_CASES case SKX: case ICX: @@ -2701,7 +2738,14 @@ PCM::ErrorCode PCM::programCoreCounters(const int i /* core */, i /* core id */, leader_counter /* group leader */, 0)) <= 0) { std::cerr << "Linux Perf: Error when programming " << eventName << ", error: " << strerror(errno) << "\n"; - if (errno == 24) std::cerr << "try executing 'ulimit -n 10000' to increase the limit on the number of open files.\n"; + if (24 == errno) + { + std::cerr << "try executing 'ulimit -n 10000' to increase the limit on the number of open files.\n"; + } + else + { + std::cerr << "try running with environment variable PCM_NO_PERF=1\n"; + } decrementInstanceSemaphore(); return false; } @@ -3414,6 +3458,8 @@ const char * PCM::getUArchCodename(const int32 cpu_model_param) const return "Apollo Lake"; case DENVERTON: return "Denverton"; + case SNOWRIDGE: + return "Snowridge"; case NEHALEM_EP: case NEHALEM: return "Nehalem/Nehalem-EP"; @@ -4107,12 +4153,12 @@ PCM::ErrorCode PCM::programServerUncorePowerMetrics(int mc_profile, int pcu_prof case 3: PCUCntConf[1] = PCU_MSR_PMON_CTL_EVENT(0x04); // Thermal frequency limit cycles: FREQ_MAX_LIMIT_THERMAL_CYCLES PCUCntConf[2] = PCU_MSR_PMON_CTL_EVENT(0x05); // Power frequency limit cycles: FREQ_MAX_POWER_CYCLES - PCUCntConf[3] = PCU_MSR_PMON_CTL_EVENT(0x07); // Clipped frequency limit cycles: FREQ_MAX_CURRENT_CYCLES (not supported on SKX and ICX) + PCUCntConf[3] = PCU_MSR_PMON_CTL_EVENT(0x07); // Clipped frequency limit cycles: FREQ_MAX_CURRENT_CYCLES (not supported on SKX and ICX and SNOWRIDGE) break; - case 4: // not supported on SKX and ICX + case 4: // not supported on SKX and ICX and SNOWRIDGE PCUCntConf[1] = PCU_MSR_PMON_CTL_EVENT(0x06); // OS frequency limit cycles: FREQ_MAX_OS_CYCLES PCUCntConf[2] = PCU_MSR_PMON_CTL_EVENT(0x05); // Power frequency limit cycles: FREQ_MAX_POWER_CYCLES - PCUCntConf[3] = PCU_MSR_PMON_CTL_EVENT(0x07); // Clipped frequency limit cycles: FREQ_MAX_CURRENT_CYCLES (not supported on SKX and ICX) + PCUCntConf[3] = PCU_MSR_PMON_CTL_EVENT(0x07); // Clipped frequency limit cycles: FREQ_MAX_CURRENT_CYCLES (not supported on SKX and ICX and SNOWRIDGE) break; case 5: if(JAKETOWN == cpu_model) @@ -4123,7 +4169,7 @@ PCM::ErrorCode PCM::programServerUncorePowerMetrics(int mc_profile, int pcu_prof { PCUCntConf[1] = PCU_MSR_PMON_CTL_EVENT(0x60) + PCU_MSR_PMON_CTL_EDGE_DET ; // number of frequency transitions PCUCntConf[2] = PCU_MSR_PMON_CTL_EVENT(0x60) ; // cycles spent changing frequency: FREQ_TRANS_CYCLES - } else if (HASWELLX == cpu_model || BDX_DE == cpu_model || BDX == cpu_model || SKX == cpu_model || ICX == cpu_model) + } else if (HASWELLX == cpu_model || BDX_DE == cpu_model || BDX == cpu_model || SKX == cpu_model || ICX == cpu_model || SNOWRIDGE == cpu_model) { PCUCntConf[1] = PCU_MSR_PMON_CTL_EVENT(0x74) + PCU_MSR_PMON_CTL_EDGE_DET ; // number of frequency transitions PCUCntConf[2] = PCU_MSR_PMON_CTL_EVENT(0x74) ; // cycles spent changing frequency: FREQ_TRANS_CYCLES @@ -4142,10 +4188,10 @@ PCM::ErrorCode PCM::programServerUncorePowerMetrics(int mc_profile, int pcu_prof { PCUCntConf[2] = PCU_MSR_PMON_CTL_EVENT(0x2B) + PCU_MSR_PMON_CTL_EDGE_DET ; // PC2 transitions PCUCntConf[3] = PCU_MSR_PMON_CTL_EVENT(0x2D) + PCU_MSR_PMON_CTL_EDGE_DET ; // PC6 transitions - } else if (HASWELLX == cpu_model || BDX_DE == cpu_model || BDX == cpu_model || SKX == cpu_model || ICX == cpu_model) + } else if (HASWELLX == cpu_model || BDX_DE == cpu_model || BDX == cpu_model || SKX == cpu_model || ICX == cpu_model || SNOWRIDGE == cpu_model) { - PCUCntConf[0] = PCU_MSR_PMON_CTL_EVENT(0x4E) ; // PC1e residenicies (not supported on SKX and ICX) - PCUCntConf[1] = PCU_MSR_PMON_CTL_EVENT(0x4E) + PCU_MSR_PMON_CTL_EDGE_DET ; // PC1 transitions (not supported on SKX and ICX) + PCUCntConf[0] = PCU_MSR_PMON_CTL_EVENT(0x4E) ; // PC1e residenicies (not supported on SKX and ICX and SNOWRIDGE) + PCUCntConf[1] = PCU_MSR_PMON_CTL_EVENT(0x4E) + PCU_MSR_PMON_CTL_EDGE_DET ; // PC1 transitions (not supported on SKX and ICX and SNOWRIDGE) PCUCntConf[2] = PCU_MSR_PMON_CTL_EVENT(0x2B) + PCU_MSR_PMON_CTL_EDGE_DET ; // PC2 transitions PCUCntConf[3] = PCU_MSR_PMON_CTL_EVENT(0x2D) + PCU_MSR_PMON_CTL_EDGE_DET ; // PC6 transitions } else @@ -5435,6 +5481,13 @@ void ServerPCICFGUncore::initRegisterLocations(const PCM * pcm) PCM_PCICFG_EDC_INIT(6, ECLK, KNL) PCM_PCICFG_EDC_INIT(7, ECLK, KNL) } + else if (cpu_model == PCM::SNOWRIDGE) + { + PCM_PCICFG_M2M_INIT(0, SERVER) + PCM_PCICFG_M2M_INIT(1, SERVER) + PCM_PCICFG_M2M_INIT(2, SERVER) + PCM_PCICFG_M2M_INIT(3, SERVER) + } else { std::cerr << "Error: Uncore PMU for processor with model id " << cpu_model << " is not supported.\n"; @@ -5616,7 +5669,7 @@ void ServerPCICFGUncore::initDirect(uint32 socket_, const PCM * pcm) for (auto & handle : m2mHandles) { - if (cpu_model == PCM::ICX) + if (cpu_model == PCM::ICX || cpu_model == PCM::SNOWRIDGE) { m2mPMUs.push_back( UncorePMU( @@ -5651,7 +5704,14 @@ void ServerPCICFGUncore::initDirect(uint32 socket_, const PCM * pcm) } } - if(cpu_model == PCM::ICX) + int numChannels = 0; + + if (cpu_model == PCM::SNOWRIDGE || cpu_model == PCM::ICX) + { + numChannels = 2; + } + + if (numChannels > 0) { initSocket2Ubox0Bus(); if (socket_ < socket2UBOX0bus.size()) @@ -5659,7 +5719,6 @@ void ServerPCICFGUncore::initDirect(uint32 socket_, const PCM * pcm) auto memBars = getServerMemBars((uint32)m2mPMUs.size(), socket2UBOX0bus[socket_].first, socket2UBOX0bus[socket_].second); for (auto & memBar : memBars) { - const int numChannels = 2; for (int channel = 0; channel < numChannels; ++channel) { auto handle = std::make_shared(memBar + SERVER_MC_CH_PMON_BASE_ADDR + channel * SERVER_MC_CH_PMON_STEP, SERVER_MC_CH_PMON_SIZE, false); @@ -6181,6 +6240,7 @@ void ServerPCICFGUncore::programServerUncoreMemoryMetrics(const ServerUncoreMemo EDCCntConfig[EventPosition::READ] = MC_CH_PCI_PMON_CTL_EVENT(0x01) + MC_CH_PCI_PMON_CTL_UMASK(1); // monitor reads on counter 0: RPQ EDCCntConfig[EventPosition::WRITE] = MC_CH_PCI_PMON_CTL_EVENT(0x02) + MC_CH_PCI_PMON_CTL_UMASK(1); // monitor reads on counter 1: WPQ break; + case PCM::SNOWRIDGE: case PCM::ICX: if (metrics == PmemMemoryMode) { @@ -6223,6 +6283,7 @@ void ServerPCICFGUncore::programServerUncoreMemoryMetrics(const ServerUncoreMemo MCCntConfig[EventPosition::WRITE_RANK_B] = MC_CH_PCI_PMON_CTL_EVENT((0xb8 + rankB)) + MC_CH_PCI_PMON_CTL_UMASK(16); // WR_CAS_RANK(rankB) all banks break; case PCM::ICX: + case PCM::SNOWRIDGE: MCCntConfig[EventPosition::READ_RANK_A] = MC_CH_PCI_PMON_CTL_EVENT((0xb0 + rankA)) + MC_CH_PCI_PMON_CTL_UMASK(0x28); // RD_CAS_RANK(rankA) all banks MCCntConfig[EventPosition::WRITE_RANK_A] = MC_CH_PCI_PMON_CTL_EVENT((0xb8 + rankA)) + MC_CH_PCI_PMON_CTL_UMASK(0x28); // WR_CAS_RANK(rankA) all banks MCCntConfig[EventPosition::READ_RANK_B] = MC_CH_PCI_PMON_CTL_EVENT((0xb0 + rankB)) + MC_CH_PCI_PMON_CTL_UMASK(0x28); // RD_CAS_RANK(rankB) all banks @@ -6261,6 +6322,7 @@ void ServerPCICFGUncore::program() EDCCntConfig[EventPosition::READ] = MC_CH_PCI_PMON_CTL_EVENT(0x01) + MC_CH_PCI_PMON_CTL_UMASK(1); // monitor reads on counter 0: RPQ EDCCntConfig[EventPosition::WRITE] = MC_CH_PCI_PMON_CTL_EVENT(0x02) + MC_CH_PCI_PMON_CTL_UMASK(1); // monitor reads on counter 1: WPQ break; + case PCM::SNOWRIDGE: case PCM::ICX: MCCntConfig[EventPosition::READ] = MC_CH_PCI_PMON_CTL_EVENT(0x04) + MC_CH_PCI_PMON_CTL_UMASK(0x0f); // monitor reads on counter 0: CAS_COUNT.RD MCCntConfig[EventPosition::WRITE] = MC_CH_PCI_PMON_CTL_EVENT(0x04) + MC_CH_PCI_PMON_CTL_UMASK(0x30); // monitor writes on counter 1: CAS_COUNT.WR @@ -6486,7 +6548,7 @@ void ServerPCICFGUncore::program_power_metrics(int mc_profile) uint32 MCCntConfig[4] = {0,0,0,0}; unsigned int UNC_M_POWER_CKE_CYCLES = 0x83; - if (cpu_model == PCM::ICX) + if (cpu_model == PCM::ICX || cpu_model == PCM::SNOWRIDGE) { UNC_M_POWER_CKE_CYCLES = 0x47; } @@ -6978,7 +7040,7 @@ uint64 PCM::CX_MSR_PMON_CTRY(uint32 Cbo, uint32 Ctr) const { return HSX_C0_MSR_PMON_CTR0 + ((HSX_CBO_MSR_STEP)*Cbo) + Ctr; } - else if (ICX == cpu_model) + else if (ICX == cpu_model || SNOWRIDGE == cpu_model) { return CX_MSR_PMON_BOX_CTL(Cbo) + SERVER_CHA_MSR_PMON_CTR0_OFFSET + Ctr; } @@ -7029,7 +7091,7 @@ uint64 PCM::CX_MSR_PMON_CTLY(uint32 Cbo, uint32 Ctl) const { return HSX_C0_MSR_PMON_CTL0 + ((HSX_CBO_MSR_STEP)*Cbo) + Ctl; } - else if (ICX == cpu_model) + else if (ICX == cpu_model || SNOWRIDGE == cpu_model) { return CX_MSR_PMON_BOX_CTL(Cbo) + SERVER_CHA_MSR_PMON_CTL0_OFFSET + Ctl; } @@ -7053,6 +7115,10 @@ uint64 PCM::CX_MSR_PMON_BOX_CTL(uint32 Cbo) const { return ICX_CHA_MSR_PMON_BOX_CTL[Cbo]; } + else if (SNOWRIDGE == cpu_model) + { + return SNR_CHA_MSR_PMON_BOX_CTL[Cbo]; + } return 0; } @@ -7076,6 +7142,10 @@ uint32 PCM::getMaxNumOfCBoxes() const MSR[refCore]->read(NCUPMONConfig, &val); num = (uint32)(val & 63); } + else if (SNOWRIDGE == cpu_model) + { + num = (uint32)num_phys_cores_per_socket / 4; + } else { /* @@ -7128,12 +7198,24 @@ void PCM::programIIOCounters(uint64 rawEvents[4], int IIOStack) std::vector IIO_units; if (IIOStack == -1) { - IIO_units.push_back((int32)IIO_CBDMA); - IIO_units.push_back((int32)IIO_PCIe0); - IIO_units.push_back((int32)IIO_PCIe1); - IIO_units.push_back((int32)IIO_PCIe2); - IIO_units.push_back((int32)IIO_MCP0); - IIO_units.push_back((int32)IIO_MCP1); + int stacks_count; + switch (getCPUModel()) + { + case PCM::ICX: + stacks_count = ICX_IIO_STACK_COUNT; + break; + case PCM::SNOWRIDGE: + stacks_count = SNR_IIO_STACK_COUNT; + break; + case PCM::SKX: + default: + stacks_count = SKX_IIO_STACK_COUNT; + break; + } + IIO_units.reserve(stacks_count); + for (int stack = 0; stack < stacks_count; ++stack) { + IIO_units.push_back(stack); + } } else IIO_units.push_back(IIOStack); @@ -7167,6 +7249,7 @@ void PCM::programPCIeEventGroup(eventGroup_t &eventGroup) switch (cpu_model) { case PCM::ICX: + case PCM::SNOWRIDGE: for (uint32 idx = 0; idx < eventGroup.size(); ++idx) events[idx] = eventGroup[idx]; programCbo(events); @@ -7214,7 +7297,7 @@ void PCM::programCbo(const uint64 * events, const uint32 opCode, const uint32 nc { cboPMUs[i][cbo].initFreeze(UNC_PMON_UNIT_CTL_FRZ_EN); - if (ICX != cpu_model) + if (ICX != cpu_model && SNOWRIDGE != cpu_model) programCboOpcodeFilter(opCode, cboPMUs[i][cbo], nc_, 0, loc, rem); if((HASWELLX == cpu_model || BDX_DE == cpu_model || BDX == cpu_model || SKX == cpu_model) && llc_lookup_tid_filter != 0) @@ -7320,17 +7403,23 @@ void PCM::initLLCReadMissLatencyEvents(uint64 * events, uint32 & opCode) switch (cpu_model) { case ICX: + case SNOWRIDGE: umask = 1ULL; break; case SKX: umask = (uint64)(SKX_CHA_TOR_INSERTS_UMASK_IRQ(1)) + (uint64)(SKX_CHA_TOR_INSERTS_UMASK_MISS(1)); - break; + break; } uint64 umask_ext = 0; - if (ICX == cpu_model) + switch (cpu_model) { - umask_ext = 0xC817FE; + case ICX: + umask_ext = 0xC817FE; + break; + case SNOWRIDGE: + umask_ext = 0xC827FE; + break; } const uint64 all_umasks = CBO_MSR_PMON_CTL_UMASK(umask) + UNC_PMON_CTL_UMASK_EXT(umask_ext); diff --git a/cpucounters.h b/cpucounters.h index c3fbea1d..5721a2c5 100644 --- a/cpucounters.h +++ b/cpucounters.h @@ -732,6 +732,38 @@ class PCM_API PCM IIO_STACK_COUNT = 6 }; + // Offsets/enumeration of IIO stacks Skylake server. + enum SkylakeIIOStacks { + SKX_IIO_CBDMA_DMI = 0, + SKX_IIO_PCIe0 = 1, + SKX_IIO_PCIe1 = 2, + SKX_IIO_PCIe2 = 3, + SKX_IIO_MCP0 = 4, + SKX_IIO_MCP1 = 5, + SKX_IIO_STACK_COUNT = 6 + }; + + // Offsets/enumeration of IIO stacks for IceLake server. + enum IcelakeIIOStacks { + ICX_IIO_PCIe0 = 0, + ICX_IIO_PCIe1 = 1, + ICX_IIO_MCP0 = 2, + ICX_IIO_PCIe2 = 3, + ICX_IIO_PCIe3 = 4, + ICX_IIO_CBDMA_DMI = 5, + ICX_IIO_STACK_COUNT = 6 + }; + + // Offsets/enumeration of IIO stacks for IceLake server. + enum SnowridgeIIOStacks { + SNR_IIO_QAT = 0, + SNR_IIO_CBDMA_DMI = 1, + SNR_IIO_NIS = 2, + SNR_IIO_HQM = 3, + SNR_IIO_PCIe0 = 4, + SNR_IIO_STACK_COUNT = 5 + }; + struct SimplePCIeDevInfo { enum PCIeWidthMode width; @@ -1259,6 +1291,7 @@ class PCM_API PCM CHERRYTRAIL = 76, APOLLO_LAKE = 92, DENVERTON = 95, + SNOWRIDGE = 134, CLARKDALE = 37, WESTMERE_EP = 44, NEHALEM_EX = 46, @@ -1414,6 +1447,7 @@ class PCM_API PCM case ICX: case BDX: case KNL: + case SNOWRIDGE: return (server_pcicfg_uncore.size() && server_pcicfg_uncore[0].get()) ? (server_pcicfg_uncore[0]->getNumMCChannels()) : 0; } return 0; @@ -1441,6 +1475,7 @@ class PCM_API PCM case ICX: case BDX: case KNL: + case SNOWRIDGE: return (socket < server_pcicfg_uncore.size() && server_pcicfg_uncore[socket].get()) ? (server_pcicfg_uncore[socket]->getNumMCChannels(controller)) : 0; } return 0; @@ -1466,6 +1501,8 @@ class PCM_API PCM if (ICL == cpu_model || TGL == cpu_model) return 5; switch (cpu_model) { + case SNOWRIDGE: + return 4; case DENVERTON: return 3; case NEHALEM_EP: @@ -1512,6 +1549,7 @@ class PCM_API PCM return 1000000000ULL; // 1 GHz case SKX: case ICX: + case SNOWRIDGE: return 1100000000ULL; // 1.1 GHz } return 0; @@ -1533,6 +1571,7 @@ class PCM_API PCM case BDX_DE: case SKX: case ICX: + case SNOWRIDGE: case KNL: return true; default: @@ -1711,6 +1750,7 @@ class PCM_API PCM || cpu_model_ == CHERRYTRAIL || cpu_model_ == APOLLO_LAKE || cpu_model_ == DENVERTON + // || cpu_model_ == SNOWRIDGE do not use Atom code for SNOWRIDGE ; } @@ -1733,6 +1773,7 @@ class PCM_API PCM || cpu_model == PCM::BAYTRAIL || cpu_model == PCM::APOLLO_LAKE || cpu_model == PCM::DENVERTON + || cpu_model == PCM::SNOWRIDGE || cpu_model == PCM::HASWELLX || cpu_model == PCM::BROADWELL || cpu_model == PCM::BDX_DE @@ -1807,10 +1848,8 @@ class PCM_API PCM bool memoryTrafficMetricsAvailable() const { - return !( - isAtom() - || cpu_model == PCM::CLARKDALE - ); + return (!(isAtom() || cpu_model == PCM::CLARKDALE)) + ; } bool MCDRAMmemoryTrafficMetricsAvailable() const @@ -1835,6 +1874,7 @@ class PCM_API PCM return ( cpu_model == PCM::SKX || cpu_model == PCM::ICX + || cpu_model == PCM::SNOWRIDGE ); } @@ -1863,6 +1903,7 @@ class PCM_API PCM isCLX() || isCPX() || cpu_model == PCM::ICX + || cpu_model == PCM::SNOWRIDGE ); } @@ -1880,6 +1921,7 @@ class PCM_API PCM || ((SKX == cpu_model) && (num_sockets == 1)) #endif || ICX == cpu_model + || SNOWRIDGE == cpu_model ); } @@ -1894,6 +1936,7 @@ class PCM_API PCM { return ( cpu_model == PCM::JAKETOWN + || cpu_model == PCM::SNOWRIDGE || cpu_model == PCM::IVYTOWN || cpu_model == PCM::HASWELLX || cpu_model == PCM::BDX_DE @@ -2284,7 +2327,7 @@ uint64 getDRAMClocks(uint32 channel, const CounterStateType & before, const Coun { const auto clk = after.DRAMClocks[channel] - before.DRAMClocks[channel]; const auto cpu_model = PCM::getInstance()->getCPUModel(); - if (cpu_model == PCM::ICX) + if (cpu_model == PCM::ICX || cpu_model == PCM::SNOWRIDGE) { return 2 * clk; } @@ -3151,10 +3194,11 @@ uint64 getL2CacheMisses(const CounterStateType & before, const CounterStateType { auto pcm = PCM::getInstance(); if (pcm->isL2CacheMissesAvailable() == false) return 0ULL; - if (pcm->useSkylakeEvents()) { + const auto cpu_model = pcm->getCPUModel(); + if (pcm->useSkylakeEvents() || cpu_model == PCM::SNOWRIDGE) { return after.Event[BasicCounterState::SKLL2MissPos] - before.Event[BasicCounterState::SKLL2MissPos]; } - if (pcm->isAtom() || pcm->getCPUModel() == PCM::KNL) + if (pcm->isAtom() || cpu_model == PCM::KNL) { return after.Event[BasicCounterState::ArchLLCMissPos] - before.Event[BasicCounterState::ArchLLCMissPos]; } @@ -3243,8 +3287,17 @@ uint64 getL3CacheHitsNoSnoop(const CounterStateType & before, const CounterState template uint64 getL3CacheHitsSnoop(const CounterStateType & before, const CounterStateType & after) { - if (!PCM::getInstance()->isL3CacheHitsSnoopAvailable()) return 0; - if (PCM::getInstance()->useSkylakeEvents()) { + auto pcm = PCM::getInstance(); + if (!pcm->isL3CacheHitsSnoopAvailable()) return 0; + const auto cpu_model = pcm->getCPUModel(); + if (cpu_model == PCM::SNOWRIDGE) + { + const int64 misses = getL3CacheMisses(before, after); + const int64 refs = after.Event[BasicCounterState::ArchLLCRefPos] - before.Event[BasicCounterState::ArchLLCRefPos]; + const int64 hits = refs - misses; + return (hits > 0)? hits : 0; + } + if (pcm->useSkylakeEvents()) { return after.Event[BasicCounterState::SKLL3HitPos] - before.Event[BasicCounterState::SKLL3HitPos]; } return after.Event[BasicCounterState::L2HitMPos] - before.Event[BasicCounterState::L2HitMPos]; diff --git a/lspci.h b/lspci.h index 89471bd1..eaa6cde2 100644 --- a/lspci.h +++ b/lspci.h @@ -214,6 +214,7 @@ struct bdf { uint8_t busno; uint8_t devno; uint8_t funcno; + bdf () : busno(0), devno(0), funcno(0) {} }; struct pci { @@ -251,6 +252,7 @@ struct pci { }; uint32_t link_info; }; + pci () : exist(false), offset_0(0), header_type(0), offset_18(0), link_info(0) {} }; struct counter { @@ -279,6 +281,34 @@ struct iio_skx { uint32_t socket_id; }; +struct iio_bifurcated_part { + int part_id; + /* single device represent root port */ + struct pci root_pci_dev; + /* Contain child switch and end-point devices */ + std::vector child_pci_devs; +}; + +struct iio_stack { + std::vector parts; + uint32_t iio_unit_id; + std::string stack_name; + std::vector values; + bool flipped = false; + /* holding busno for each IIO stack */ + uint8_t busno; +}; + +bool operator<(const iio_stack& lh, const iio_stack& rh) +{ + return lh.iio_unit_id < rh.iio_unit_id; +} + +struct iio_stacks_on_socket { + std::vector stacks; + uint32_t socket_id; +}; + bool operator < (const bdf &l, const bdf &r) { if (l.busno < r.busno) return true; @@ -325,35 +355,38 @@ void probe_capability_pci_express(struct pci *p, uint32_t cap_ptr) } } -void probe_pci(struct pci *p) +bool probe_pci(struct pci *p) { uint32 value; + p->exist = false; struct bdf *bdf = &p->bdf; if (PciHandleType::exists(0, bdf->busno, bdf->devno, bdf->funcno)) { - p->exist = true; PciHandleType h(0, bdf->busno, bdf->devno, bdf->funcno); - h.read32(0x0, &value); //VID:DID - if (value == (std::numeric_limits::max)()) // invalid VID::DID - { - p->exist = false; - return; - } - p->offset_0 = value; - h.read32(0xc, &value); - p->header_type = (value >> 16) & 0x7f; - if (p->header_type == 0) { - h.read32(0x4, &value); //Status register - if (value & 0x100000) {//Capability list == true - h.read32(0x34, &value); //Capability pointer - probe_capability_pci_express(p, value); + // VID:DID + h.read32(0x0, &value); + // Invalid VID::DID + if (value != (std::numeric_limits::max)()) { + p->offset_0 = value; + h.read32(0xc, &value); + p->header_type = (value >> 16) & 0x7f; + if (p->header_type == 0) { + // Status register + h.read32(0x4, &value); + // Capability list == true + if (value & 0x100000) { + // Capability pointer + h.read32(0x34, &value); + probe_capability_pci_express(p, value); + } + } else if (p->header_type == 1) { + h.read32(0x18, &value); + p->offset_18 = value; } - } else if (p->header_type == 1) { - h.read32(0x18, &value); - p->offset_18 = value; + p->exist = true; } } - else - p->exist = false; + + return p->exist; } /* diff --git a/msr.cpp b/msr.cpp index 916dbd89..7420369a 100644 --- a/msr.cpp +++ b/msr.cpp @@ -23,6 +23,7 @@ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND #endif #include "types.h" #include "msr.h" +#include "utils.h" #include #ifdef _MSC_VER @@ -214,6 +215,16 @@ int32 MsrHandle::read(uint64 msr_number, uint64 * value) // here comes a Linux version MsrHandle::MsrHandle(uint32 cpu) : fd(-1), cpu_id(cpu) { + constexpr auto allowWritesPath = "/sys/module/msr/parameters/allow_writes"; + static bool writesEnabled = false; + if (writesEnabled == false) + { + if (readSysFS(allowWritesPath, true).length() > 0) + { + writeSysFS(allowWritesPath, "on", false); + } + writesEnabled = true; + } char * path = new char[200]; snprintf(path, 200, "/dev/cpu/%d/msr", cpu); int handle = ::open(path, O_RDWR); diff --git a/opCode-106.txt b/opCode-106.txt index bc64f6ae..226a8893 100644 --- a/opCode-106.txt +++ b/opCode-106.txt @@ -5,35 +5,19 @@ ctr=0,ev_sel=0x83,umask=0x1,en=1,ch_mask=1,fc_mask=0x7,multiplier=4,divider=1,hn ctr=1,ev_sel=0x83,umask=0x1,en=1,ch_mask=2,fc_mask=0x7,multiplier=4,divider=1,hname=IB write,vname=Part1 (2nd x4) ctr=0,ev_sel=0x83,umask=0x1,en=1,ch_mask=4,fc_mask=0x7,multiplier=4,divider=1,hname=IB write,vname=Part2 (2nd x8/3rd x4) ctr=1,ev_sel=0x83,umask=0x1,en=1,ch_mask=8,fc_mask=0x7,multiplier=4,divider=1,hname=IB write,vname=Part3 (4th x4) -ctr=0,ev_sel=0x83,umask=0x1,en=1,ch_mask=16,fc_mask=0x7,multiplier=4,divider=1,hname=IB write,vname=Part4 (1st x16/x8/x4) -ctr=1,ev_sel=0x83,umask=0x1,en=1,ch_mask=32,fc_mask=0x7,multiplier=4,divider=1,hname=IB write,vname=Part5 (2nd x4) -ctr=0,ev_sel=0x83,umask=0x1,en=1,ch_mask=64,fc_mask=0x7,multiplier=4,divider=1,hname=IB write,vname=Part6 (2nd x8/3rd x4) -ctr=1,ev_sel=0x83,umask=0x1,en=1,ch_mask=128,fc_mask=0x7,multiplier=4,divider=1,hname=IB write,vname=Part7 (4th x4) ctr=0,ev_sel=0x83,umask=0x4,en=1,ch_mask=1,fc_mask=0x7,multiplier=4,divider=1,hname=IB read,vname=Part0 (1st x16/x8/x4) ctr=1,ev_sel=0x83,umask=0x4,en=1,ch_mask=2,fc_mask=0x7,multiplier=4,divider=1,hname=IB read,vname=Part1 (2nd x4) ctr=0,ev_sel=0x83,umask=0x4,en=1,ch_mask=4,fc_mask=0x7,multiplier=4,divider=1,hname=IB read,vname=Part2 (2nd x8/3rd x4) ctr=1,ev_sel=0x83,umask=0x4,en=1,ch_mask=8,fc_mask=0x7,multiplier=4,divider=1,hname=IB read,vname=Part3 (4th x4) -ctr=0,ev_sel=0x83,umask=0x4,en=1,ch_mask=16,fc_mask=0x7,multiplier=4,divider=1,hname=IB read,vname=Part4 (1st x16/x8/x4) -ctr=1,ev_sel=0x83,umask=0x4,en=1,ch_mask=32,fc_mask=0x7,multiplier=4,divider=1,hname=IB read,vname=Part5 (2nd x4) -ctr=0,ev_sel=0x83,umask=0x4,en=1,ch_mask=64,fc_mask=0x7,multiplier=4,divider=1,hname=IB read,vname=Part6 (2nd x8/3rd x4) -ctr=1,ev_sel=0x83,umask=0x4,en=1,ch_mask=128,fc_mask=0x7,multiplier=4,divider=1,hname=IB read,vname=Part7 (4th x4) # Outbound (CPU MMIO to the PCIe device) payload events ctr=2,ev_sel=0x83,umask=0x80,en=1,ch_mask=1,fc_mask=0x7,multiplier=1,divider=1,hname=OB read,vname=Part0 (1st x16/x8/x4) ctr=3,ev_sel=0x83,umask=0x80,en=1,ch_mask=2,fc_mask=0x7,multiplier=1,divider=1,hname=OB read,vname=Part1 (2nd x4) ctr=2,ev_sel=0x83,umask=0x80,en=1,ch_mask=4,fc_mask=0x7,multiplier=1,divider=1,hname=OB read,vname=Part2 (2nd x8/3rd x4) ctr=3,ev_sel=0x83,umask=0x80,en=1,ch_mask=8,fc_mask=0x7,multiplier=1,divider=1,hname=OB read,vname=Part3 (4th x4) -ctr=2,ev_sel=0x83,umask=0x80,en=1,ch_mask=16,fc_mask=0x7,multiplier=1,divider=1,hname=OB read,vname=Part4 (1st x16/x8/x4) -ctr=3,ev_sel=0x83,umask=0x80,en=1,ch_mask=32,fc_mask=0x7,multiplier=1,divider=1,hname=OB read,vname=Part5 (2nd x4) -ctr=2,ev_sel=0x83,umask=0x80,en=1,ch_mask=64,fc_mask=0x7,multiplier=1,divider=1,hname=OB read,vname=Part6 (2nd x8/3rd x4) -ctr=3,ev_sel=0x83,umask=0x80,en=1,ch_mask=128,fc_mask=0x7,multiplier=1,divider=1,hname=OB read,vname=Part7 (4th x4) ctr=2,ev_sel=0xc0,umask=0x1,en=1,ch_mask=1,fc_mask=0x7,multiplier=1,divider=1,hname=OB write,vname=Part0 (1st x16/x8/x4) ctr=3,ev_sel=0xc0,umask=0x1,en=1,ch_mask=2,fc_mask=0x7,multiplier=1,divider=1,hname=OB write,vname=Part1 (2nd x4) ctr=2,ev_sel=0xc0,umask=0x1,en=1,ch_mask=4,fc_mask=0x7,multiplier=1,divider=1,hname=OB write,vname=Part2 (2nd x8/3rd x4) ctr=3,ev_sel=0xc0,umask=0x1,en=1,ch_mask=8,fc_mask=0x7,multiplier=1,divider=1,hname=OB write,vname=Part3 (4th x4) -ctr=2,ev_sel=0xc0,umask=0x1,en=1,ch_mask=16,fc_mask=0x7,multiplier=1,divider=1,hname=OB write,vname=Part4 (1st x16/x8/x4) -ctr=3,ev_sel=0xc0,umask=0x1,en=1,ch_mask=32,fc_mask=0x7,multiplier=1,divider=1,hname=OB write,vname=Part5 (2nd x4) -ctr=2,ev_sel=0xc0,umask=0x1,en=1,ch_mask=64,fc_mask=0x7,multiplier=1,divider=1,hname=OB write,vname=Part6 (2nd x8/3rd x4) -ctr=3,ev_sel=0xc0,umask=0x1,en=1,ch_mask=128,fc_mask=0x7,multiplier=1,divider=1,hname=OB write,vname=Part7 (4th x4) # IOMMU events ctr=0,ev_sel=0x40,umask=0x02,en=1,ch_mask=0x0,fc_mask=0x0,multiplier=1,divider=1,hname=IOTLB Lookup,vname=Total ctr=1,ev_sel=0x40,umask=0x20,en=1,ch_mask=0x0,fc_mask=0x0,multiplier=1,divider=1,hname=IOTLB Miss,vname=Total @@ -42,4 +26,4 @@ ctr=3,ev_sel=0x41,umask=0x10,en=1,ch_mask=0x0,fc_mask=0x0,multiplier=1,divider=1 ctr=0,ev_sel=0x41,umask=0x08,en=1,ch_mask=0x0,fc_mask=0x0,multiplier=1,divider=1,hname=1G Cache Hit,vname=Total ctr=1,ev_sel=0x41,umask=0x04,en=1,ch_mask=0x0,fc_mask=0x0,multiplier=1,divider=1,hname=2M Cache Hit,vname=Total ctr=2,ev_sel=0x41,umask=0x02,en=1,ch_mask=0x0,fc_mask=0x0,multiplier=1,divider=1,hname=4K Cache Hit,vname=Total -ctr=3,ev_sel=0x41,umask=0x40,en=1,ch_mask=0x0,fc_mask=0x0,multiplier=1,divider=1,hname=IOMMU Mem Access,vname=Total +ctr=3,ev_sel=0x41,umask=0x40,en=1,ch_mask=0x0,fc_mask=0x0,multiplier=1,divider=1,hname=IOMMU Mem Access,vname=Total \ No newline at end of file diff --git a/opCode-134.txt b/opCode-134.txt new file mode 100644 index 00000000..3fb35757 --- /dev/null +++ b/opCode-134.txt @@ -0,0 +1,45 @@ +#Clockticks +#ctr=0,ev_sel=0x1,umask=0x0,en=1,ch_mask=0,fc_mask=0x0,multiplier=1,divider=1,hname=Clockticks,vname=Total +# Inbound (PCIe device DMA into system) payload events +ctr=0,ev_sel=0x83,umask=0x1,en=1,ch_mask=1,fc_mask=0x7,multiplier=4,divider=1,hname=IB write,vname=Part0 (1st x16/x8/x4) +ctr=1,ev_sel=0x83,umask=0x1,en=1,ch_mask=2,fc_mask=0x7,multiplier=4,divider=1,hname=IB write,vname=Part1 (2nd x4) +ctr=0,ev_sel=0x83,umask=0x1,en=1,ch_mask=4,fc_mask=0x7,multiplier=4,divider=1,hname=IB write,vname=Part2 (2nd x8/3rd x4) +ctr=1,ev_sel=0x83,umask=0x1,en=1,ch_mask=8,fc_mask=0x7,multiplier=4,divider=1,hname=IB write,vname=Part3 (4th x4) +ctr=0,ev_sel=0x83,umask=0x1,en=1,ch_mask=16,fc_mask=0x7,multiplier=4,divider=1,hname=IB write,vname=Part4 (1st x16/x8/x4) +ctr=1,ev_sel=0x83,umask=0x1,en=1,ch_mask=32,fc_mask=0x7,multiplier=4,divider=1,hname=IB write,vname=Part5 (2nd x4) +ctr=0,ev_sel=0x83,umask=0x1,en=1,ch_mask=64,fc_mask=0x7,multiplier=4,divider=1,hname=IB write,vname=Part6 (2nd x8/3rd x4) +ctr=1,ev_sel=0x83,umask=0x1,en=1,ch_mask=128,fc_mask=0x7,multiplier=4,divider=1,hname=IB write,vname=Part7 (4th x4) +ctr=0,ev_sel=0x83,umask=0x4,en=1,ch_mask=1,fc_mask=0x7,multiplier=4,divider=1,hname=IB read,vname=Part0 (1st x16/x8/x4) +ctr=1,ev_sel=0x83,umask=0x4,en=1,ch_mask=2,fc_mask=0x7,multiplier=4,divider=1,hname=IB read,vname=Part1 (2nd x4) +ctr=0,ev_sel=0x83,umask=0x4,en=1,ch_mask=4,fc_mask=0x7,multiplier=4,divider=1,hname=IB read,vname=Part2 (2nd x8/3rd x4) +ctr=1,ev_sel=0x83,umask=0x4,en=1,ch_mask=8,fc_mask=0x7,multiplier=4,divider=1,hname=IB read,vname=Part3 (4th x4) +ctr=0,ev_sel=0x83,umask=0x4,en=1,ch_mask=16,fc_mask=0x7,multiplier=4,divider=1,hname=IB read,vname=Part4 (1st x16/x8/x4) +ctr=1,ev_sel=0x83,umask=0x4,en=1,ch_mask=32,fc_mask=0x7,multiplier=4,divider=1,hname=IB read,vname=Part5 (2nd x4) +ctr=0,ev_sel=0x83,umask=0x4,en=1,ch_mask=64,fc_mask=0x7,multiplier=4,divider=1,hname=IB read,vname=Part6 (2nd x8/3rd x4) +ctr=1,ev_sel=0x83,umask=0x4,en=1,ch_mask=128,fc_mask=0x7,multiplier=4,divider=1,hname=IB read,vname=Part7 (4th x4) +# Outbound (CPU MMIO to the PCIe device) payload events +ctr=2,ev_sel=0x83,umask=0x80,en=1,ch_mask=1,fc_mask=0x7,multiplier=1,divider=1,hname=OB read,vname=Part0 (1st x16/x8/x4) +ctr=3,ev_sel=0x83,umask=0x80,en=1,ch_mask=2,fc_mask=0x7,multiplier=1,divider=1,hname=OB read,vname=Part1 (2nd x4) +ctr=2,ev_sel=0x83,umask=0x80,en=1,ch_mask=4,fc_mask=0x7,multiplier=1,divider=1,hname=OB read,vname=Part2 (2nd x8/3rd x4) +ctr=3,ev_sel=0x83,umask=0x80,en=1,ch_mask=8,fc_mask=0x7,multiplier=1,divider=1,hname=OB read,vname=Part3 (4th x4) +ctr=2,ev_sel=0x83,umask=0x80,en=1,ch_mask=16,fc_mask=0x7,multiplier=1,divider=1,hname=OB read,vname=Part4 (1st x16/x8/x4) +ctr=3,ev_sel=0x83,umask=0x80,en=1,ch_mask=32,fc_mask=0x7,multiplier=1,divider=1,hname=OB read,vname=Part5 (2nd x4) +ctr=2,ev_sel=0x83,umask=0x80,en=1,ch_mask=64,fc_mask=0x7,multiplier=1,divider=1,hname=OB read,vname=Part6 (2nd x8/3rd x4) +ctr=3,ev_sel=0x83,umask=0x80,en=1,ch_mask=128,fc_mask=0x7,multiplier=1,divider=1,hname=OB read,vname=Part7 (4th x4) +ctr=2,ev_sel=0xc0,umask=0x1,en=1,ch_mask=1,fc_mask=0x7,multiplier=1,divider=1,hname=OB write,vname=Part0 (1st x16/x8/x4) +ctr=3,ev_sel=0xc0,umask=0x1,en=1,ch_mask=2,fc_mask=0x7,multiplier=1,divider=1,hname=OB write,vname=Part1 (2nd x4) +ctr=2,ev_sel=0xc0,umask=0x1,en=1,ch_mask=4,fc_mask=0x7,multiplier=1,divider=1,hname=OB write,vname=Part2 (2nd x8/3rd x4) +ctr=3,ev_sel=0xc0,umask=0x1,en=1,ch_mask=8,fc_mask=0x7,multiplier=1,divider=1,hname=OB write,vname=Part3 (4th x4) +ctr=2,ev_sel=0xc0,umask=0x1,en=1,ch_mask=16,fc_mask=0x7,multiplier=1,divider=1,hname=OB write,vname=Part4 (1st x16/x8/x4) +ctr=3,ev_sel=0xc0,umask=0x1,en=1,ch_mask=32,fc_mask=0x7,multiplier=1,divider=1,hname=OB write,vname=Part5 (2nd x4) +ctr=2,ev_sel=0xc0,umask=0x1,en=1,ch_mask=64,fc_mask=0x7,multiplier=1,divider=1,hname=OB write,vname=Part6 (2nd x8/3rd x4) +ctr=3,ev_sel=0xc0,umask=0x1,en=1,ch_mask=128,fc_mask=0x7,multiplier=1,divider=1,hname=OB write,vname=Part7 (4th x4) +# IOMMU events +ctr=0,ev_sel=0x40,umask=0x02,en=1,ch_mask=0x0,fc_mask=0x0,multiplier=1,divider=1,hname=IOTLB Lookup,vname=Total +ctr=1,ev_sel=0x40,umask=0x20,en=1,ch_mask=0x0,fc_mask=0x0,multiplier=1,divider=1,hname=IOTLB Miss,vname=Total +ctr=2,ev_sel=0x40,umask=0x80,en=1,ch_mask=0x0,fc_mask=0x0,multiplier=1,divider=1,hname=Ctxt Cache Hit,vname=Total +ctr=3,ev_sel=0x41,umask=0x10,en=1,ch_mask=0x0,fc_mask=0x0,multiplier=1,divider=1,hname=512G Cache Hit,vname=Total +ctr=0,ev_sel=0x41,umask=0x08,en=1,ch_mask=0x0,fc_mask=0x0,multiplier=1,divider=1,hname=1G Cache Hit,vname=Total +ctr=1,ev_sel=0x41,umask=0x04,en=1,ch_mask=0x0,fc_mask=0x0,multiplier=1,divider=1,hname=2M Cache Hit,vname=Total +ctr=2,ev_sel=0x41,umask=0x02,en=1,ch_mask=0x0,fc_mask=0x0,multiplier=1,divider=1,hname=4K Cache Hit,vname=Total +ctr=3,ev_sel=0x41,umask=0x40,en=1,ch_mask=0x0,fc_mask=0x0,multiplier=1,divider=1,hname=IOMMU Mem Access,vname=Total \ No newline at end of file diff --git a/pcm-iio.cpp b/pcm-iio.cpp index 4347d615..c869675b 100644 --- a/pcm-iio.cpp +++ b/pcm-iio.cpp @@ -39,6 +39,21 @@ using namespace pcm; #define PCM_DELAY_DEFAULT 3.0 // in seconds +#define QAT_DID 0x18DA +#define NIS_DID 0x18D1 +#define HQM_DID 0x270B + +#define ROOT_BUSES_OFFSET 0xCC +#define ROOT_BUSES_OFFSET_2 0xD0 + +#define SKX_SOCKETID_UBOX_DID 0x2014 +#define SKX_UBOX_DEVICE_NUM 0x08 +#define SKX_UBOX_FUNCTION_NUM 0x02 +#define SKX_BUS_NUM_STRIDE 8 +//the below LNID and GID applies to Skylake Server +#define SKX_UNC_SOCKETID_UBOX_LNID_OFFSET 0xC0 +#define SKX_UNC_SOCKETID_UBOX_GID_OFFSET 0xD4 + const uint8_t max_sockets = 4; static const std::string iio_stack_names[6] = { "IIO Stack 0 - CBDMA/DMI ", @@ -49,18 +64,76 @@ static const std::string iio_stack_names[6] = { "IIO Stack 5 - MCP1 " }; -static const std::string icx_iio_stack_names[6] = { +static const std::string skx_iio_stack_names[6] = { "IIO Stack 0 - CBDMA/DMI ", + "IIO Stack 1 - PCIe0 ", + "IIO Stack 2 - PCIe1 ", + "IIO Stack 3 - PCIe2 ", + "IIO Stack 4 - MCP0 ", + "IIO Stack 5 - MCP1 " +}; + +static const std::string icx_iio_stack_names[6] = { + "IIO Stack 0 - PCIe0 ", "IIO Stack 1 - PCIe1 ", - "IIO Stack 2 - PCIe2 ", - "IIO Stack 3 - PCIe3 ", - "IIO Stack 4 - PCIe4 ", - "IIO Stack 5 - PCIe5 " + "IIO Stack 2 - MCP ", + "IIO Stack 3 - PCIe2 ", + "IIO Stack 4 - PCIe3 ", + "IIO Stack 5 - CBDMA/DMI " +}; + +static const std::string snr_iio_stack_names[5] = { + "IIO Stack 0 - QAT ", + "IIO Stack 1 - CBDMA/DMI ", + "IIO Stack 2 - NIS ", + "IIO Stack 3 - HQM ", + "IIO Stack 4 - PCIe " +}; + +#define ICX_CBDMA_DMI_SAD_ID 0 +#define ICX_MCP_SAD_ID 3 + +#define ICX_PCH_PART_ID 0 +#define ICX_CBDMA_PART_ID 3 + +#define SNR_ICX_SAD_CONTROL_CFG_OFFSET 0x3F4 +#define SNR_ICX_MESH2IIO_MMAP_DID 0x09A2 + +#define ICX_ROOT_PORT_A_DID 0x347A +#define ICX_VMD_PCI_DEVNO 0x00 +#define ICX_VMD_PCI_FUNCNO 0x05 + +static const std::map icx_sad_to_pmu_id_mapping = { + { ICX_CBDMA_DMI_SAD_ID, 5 }, + { 1, 0 }, + { 2, 1 }, + { ICX_MCP_SAD_ID, 2 }, + { 4, 3 }, + { 5, 4 } +}; + +#define SNR_ACCELERATOR_PART_ID 4 + +#define SNR_ROOT_PORT_A_DID 0x334A + +#define SNR_CBDMA_DMI_SAD_ID 0 +#define SNR_PCIE_GEN3_SAD_ID 1 +#define SNR_HQM_SAD_ID 2 +#define SNR_NIS_SAD_ID 3 +#define SNR_QAT_SAD_ID 4 + +static const std::map snr_sad_to_pmu_id_mapping = { + { SNR_CBDMA_DMI_SAD_ID, 1 }, + { SNR_PCIE_GEN3_SAD_ID, 4 }, + { SNR_HQM_SAD_ID , 3 }, + { SNR_NIS_SAD_ID , 2 }, + { SNR_QAT_SAD_ID , 0 } }; map opcodeFieldMap; //TODO: add description for this nameMap map>> nameMap; +//TODO: remove binding to stacks amount result_content results(max_sockets, stack_content(6, ctr_data())); struct data{ @@ -195,20 +268,18 @@ string build_pci_header(const PCIDB & pciDB, uint32_t column_width, struct pci p return s; } -vector build_display(vector iio_skx_v, vector &ctrs, vector skt_list, vector stack_list, const PCIDB & pciDB) +vector build_display(vector& iios, vector& ctrs, const PCIDB& pciDB) { vector buffer; vector headers; vector data; uint64_t header_width; string row; - - for (vector::const_iterator skt_unit = skt_list.begin(); skt_unit != skt_list.end(); ++skt_unit) { - buffer.push_back("Socket" + std::to_string(*skt_unit)); - struct iio_skx iio_skx = iio_skx_v[*skt_unit]; - for (vector::const_iterator stack_unit = stack_list.begin(); stack_unit != stack_list.end(); ++stack_unit) { - uint32_t s = *stack_unit; - headers = combine_stack_name_and_counter_names(iio_skx.stacks[s].stack_name); + for (auto socket = iios.cbegin(); socket != iios.cend(); ++socket) { + buffer.push_back("Socket" + std::to_string(socket->socket_id)); + for (auto stack = socket->stacks.cbegin(); stack != socket->stacks.cend(); ++stack) { + auto stack_id = stack->iio_unit_id; + headers = combine_stack_name_and_counter_names(stack->stack_name); //Print first row row = std::accumulate(headers.begin(), headers.end(), string(" "), a_header_footer); header_width = row.size(); @@ -222,17 +293,17 @@ vector build_display(vector iio_skx_v, vector> v_sort; //re-organize data collection to be row wise - for (vector::iterator cunit = ctrs.begin(); cunit != ctrs.end(); ++cunit) { - v_sort[cunit->v_id][cunit->h_id] = &(*cunit); + for (std::vector::iterator counter = ctrs.begin(); counter != ctrs.end(); ++counter) { + v_sort[counter->v_id][counter->h_id] = &(*counter); } - for (map>::const_iterator vunit = v_sort.begin(); vunit != v_sort.end(); ++vunit) { + for (std::map>::const_iterator vunit = v_sort.cbegin(); vunit != v_sort.cend(); ++vunit) { map h_array = vunit->second; uint32_t vv_id = vunit->first; vector h_data; string v_name = h_array[0]->v_event_name; - for (map::const_iterator hunit = h_array.begin(); hunit != h_array.end(); ++hunit) { + for (map::const_iterator hunit = h_array.cbegin(); hunit != h_array.cend(); ++hunit) { uint32_t hh_id = hunit->first; - uint64_t raw_data = hunit->second->data[0][*skt_unit][s][std::pair(hh_id,vv_id)]; + uint64_t raw_data = hunit->second->data[0][socket->socket_id][stack_id][std::pair(hh_id,vv_id)]; h_data.push_back(raw_data); } data = prepare_data(h_data, headers); @@ -241,19 +312,16 @@ vector build_display(vector iio_skx_v, vector pp = iio_skx.stacks[s].parts[p].child_pci_devs; + for (const auto& part : stack->parts) { uint8_t level = 1; - for (std::vector::const_iterator iunit = pp.begin(); iunit != pp.end(); ++iunit) - { - row = build_pci_header(pciDB, (uint32_t)header_width, *iunit, -1, level); + for (const auto& pci_device : part.child_pci_devs) { + row = build_pci_header(pciDB, (uint32_t)header_width, pci_device, -1, level); buffer.push_back(row); - if (iunit->header_type == 1) + if (pci_device.header_type == 1) level += 1; } } @@ -272,109 +340,459 @@ void display(const vector &buff) std::cout << std::flush; } -void discover_pci_tree(const vector & busno, uint8_t socket_id, vector &v_iio_skx, PCM * m) +class IPlatformMapping { +private: +public: + virtual ~IPlatformMapping() {}; + static IPlatformMapping* getPlatformMapping(int cpu_model); + virtual bool pciTreeDiscover(std::vector& iios, uint32_t sockets_count) = 0; +}; + +// Mapping for SkyLake Server. +class PurleyPlatformMapping: public IPlatformMapping { +private: + void getUboxBusNumbers(std::vector& ubox); +public: + PurleyPlatformMapping() = default; + ~PurleyPlatformMapping() = default; + bool pciTreeDiscover(std::vector& iios, uint32_t sockets_count) override; +}; + +void PurleyPlatformMapping::getUboxBusNumbers(std::vector& ubox) +{ + for (uint16_t bus = 0; bus < 256; bus++) { + for (uint8_t device = 0; device < 32; device++) { + for (uint8_t function = 0; function < 8; function++) { + struct pci pci_dev; + pci_dev.bdf.busno = bus; + pci_dev.bdf.devno = device; + pci_dev.bdf.funcno = function; + if (probe_pci(&pci_dev)) { + if ((pci_dev.vendor_id == PCM_INTEL_PCI_VENDOR_ID) && (pci_dev.device_id == SKX_SOCKETID_UBOX_DID)) { + ubox.push_back(bus); + } + } + } + } + } +} + +bool PurleyPlatformMapping::pciTreeDiscover(std::vector& iios, uint32_t sockets_count) { - struct iio_skx iio_skx; - uint32 cpubusno = 0; - - if (PciHandleType::exists(0, (uint32)busno[socket_id], 8, 2)) { - //14nm - iio_skx.socket_id = socket_id; - PciHandleType h(0, busno[socket_id], 8, 2); - h.read32(0xcc, &cpubusno); // CPUBUSNO register - iio_skx.stacks[0].busno = cpubusno & 0xff; - iio_skx.stacks[1].busno = (cpubusno >> 8) & 0xff; - iio_skx.stacks[2].busno = (cpubusno >> 16) & 0xff; - iio_skx.stacks[3].busno = (cpubusno >> 24) & 0xff; - h.read32(0xd0, &cpubusno); // CPUBUSNO1 register - iio_skx.stacks[4].busno = cpubusno & 0xff; - iio_skx.stacks[5].busno = (cpubusno >> 8) & 0xff; - - for (uint8_t stack = 0; stack < 6; stack++) { - uint8_t busno = iio_skx.stacks[stack].busno; - iio_skx.stacks[stack].stack_name = iio_stack_names[stack]; - //std::cout << "stack" << unsigned(stack) << std::hex << ":0x" << unsigned(busno) << std::dec << ",(" << unsigned(busno) << ")\n"; - for (uint8_t part = 0; part < 4; part++) { - struct pci *pci = &iio_skx.stacks[stack].parts[part].root_pci_dev; + std::vector ubox; + getUboxBusNumbers(ubox); + if (ubox.empty()) { + cerr << "UBOXs were not found! Program aborted" << endl; + return false; + } + + for (uint32_t socket_id = 0; socket_id < sockets_count; socket_id++) { + if (!PciHandleType::exists(0, ubox[socket_id], SKX_UBOX_DEVICE_NUM, SKX_UBOX_FUNCTION_NUM)) { + cerr << "No access to PCICFG\n" << endl; + return false; + } + uint64 cpubusno = 0; + struct iio_stacks_on_socket iio_on_socket; + iio_on_socket.socket_id = socket_id; + PciHandleType h(0, ubox[socket_id], SKX_UBOX_DEVICE_NUM, SKX_UBOX_FUNCTION_NUM); + h.read64(ROOT_BUSES_OFFSET, &cpubusno); + + iio_on_socket.stacks.reserve(6); + for (int stack_id = 0; stack_id < 6; stack_id++) { + struct iio_stack stack; + stack.iio_unit_id = stack_id; + stack.busno = (uint8_t)(cpubusno >> (stack_id * SKX_BUS_NUM_STRIDE)); + stack.stack_name = skx_iio_stack_names[stack_id]; + for (uint8_t part_id = 0; part_id < 4; part_id++) { + struct iio_bifurcated_part part; + part.part_id = part_id; + struct pci *pci = &part.root_pci_dev; struct bdf *bdf = &pci->bdf; - bdf->busno = busno; - bdf->devno = part; + bdf->busno = stack.busno; + bdf->devno = part_id; bdf->funcno = 0; - if (stack != 0 && busno == 0) /* This is a workaround to catch some IIO stack does not exist */ + /* This is a workaround to catch some IIO stack does not exist */ + if (stack_id != 0 && stack.busno == 0) { pci->exist = false; - else - probe_pci(pci); + } + else if (probe_pci(pci)) { + /* FIXME: for 0:0.0, we may need to scan from secondary switch down */ + for (uint8_t bus = pci->secondary_bus_number; bus <= pci->subordinate_bus_number; bus++) { + for (uint8_t device = 0; device < 32; device++) { + for (uint8_t function = 0; function < 8; function++) { + struct pci child_pci_dev; + child_pci_dev.bdf.busno = bus; + child_pci_dev.bdf.devno = device; + child_pci_dev.bdf.funcno = function; + if (probe_pci(&child_pci_dev)) { + part.child_pci_devs.push_back(child_pci_dev); + } + } + } + } + } + stack.parts.push_back(part); } + + iio_on_socket.stacks.push_back(stack); } + iios.push_back(iio_on_socket); } - else if (PciHandleType::exists(0, (uint32)busno[socket_id], 2, 0)) { - //10nm wave1 - uint32 busvalid = 0; - iio_skx.socket_id = socket_id; - PciHandleType h(0, busno[socket_id], 2, 0); - h.read32(0x104, &cpubusno); // CPUBUSNO register - iio_skx.stacks[0].busno = cpubusno & 0xff; - iio_skx.stacks[1].busno = (cpubusno >> 8) & 0xff; - iio_skx.stacks[2].busno = (cpubusno >> 16) & 0xff; - iio_skx.stacks[3].busno = (cpubusno >> 24) & 0xff; - h.read32(0x108, &cpubusno); // CPUBUSNO1 register - iio_skx.stacks[4].busno = cpubusno & 0xff; - iio_skx.stacks[5].busno = (cpubusno >> 8) & 0xff; - h.read32(0x110, &busvalid); - - for (uint8_t stack = 0; stack < 6; stack++) { - if (m->getCPUModel() == PCM::ICX) - iio_skx.stacks[stack].stack_name = icx_iio_stack_names[stack]; - for (uint8_t part = 0; part < 4; part++) { - if (((busvalid & (1 << stack))) == 0) { - iio_skx.stacks[stack].parts[part].root_pci_dev.exist = false; - } - else { - if (PciHandleType::exists(0, (uint32)iio_skx.stacks[stack].busno, 0, 0)) { - uint32 reg = 0; - PciHandleType h(0, (uint32)iio_skx.stacks[stack].busno, 0, 0); - h.read32(0x802, ®); - iio_skx.stacks[stack].flipped = (reg & 0x4)?true:false; + + return true; +} + +class IPlatformMapping10Nm: public IPlatformMapping { +private: +public: + bool getSadIdRootBusMap(uint32_t socket_id, std::map& sad_id_bus_map); +}; + +bool IPlatformMapping10Nm::getSadIdRootBusMap(uint32_t socket_id, std::map& sad_id_bus_map) +{ + for (uint16_t bus = 0; bus < 256; bus++) { + for (uint8_t device = 0; device < 32; device++) { + for (uint8_t function = 0; function < 8; function++) { + struct pci pci_dev; + pci_dev.bdf.busno = bus; + pci_dev.bdf.devno = device; + pci_dev.bdf.funcno = function; + if (probe_pci(&pci_dev) && (pci_dev.vendor_id == PCM_INTEL_PCI_VENDOR_ID) + && (pci_dev.device_id == SNR_ICX_MESH2IIO_MMAP_DID)) { + + PciHandleType h(0, bus, device, function); + std::uint32_t sad_ctrl_cfg; + h.read32(SNR_ICX_SAD_CONTROL_CFG_OFFSET, &sad_ctrl_cfg); + if (sad_ctrl_cfg == (std::numeric_limits::max)()) { + cerr << "Could not read SAD_CONTROL_CFG" << endl; + return false; } - if (m->getCPUModel() == PCM::ICX) { - struct pci *pci = &iio_skx.stacks[stack].parts[part].root_pci_dev; - struct bdf *bdf = &pci->bdf; - bdf->busno = iio_skx.stacks[stack].busno; - bdf->devno = (part + 2); - bdf->funcno = 0; - probe_pci(pci); + + if ((sad_ctrl_cfg & 0xf) == socket_id) { + uint8_t sid = (sad_ctrl_cfg >> 4) & 0x7; + sad_id_bus_map.insert(std::pair(sid, (uint8_t)bus)); } } } } } - else { - cerr << "No access to PCICFG" << endl; - exit(EXIT_FAILURE); + + if (sad_id_bus_map.empty()) { + cerr << "Could not find Root Port bus numbers" << endl; + return false; } - for (uint8_t stack = 0; stack < 6; stack++) { - for (uint8_t part = 0; part < 4; part++) { - struct pci p = iio_skx.stacks[stack].parts[part].root_pci_dev; - if (!p.exist) + return true; +} + +// Mapping for IceLake Server. +class WhitleyPlatformMapping: public IPlatformMapping10Nm { +private: +public: + WhitleyPlatformMapping() = default; + ~WhitleyPlatformMapping() = default; + bool pciTreeDiscover(std::vector& iios, uint32_t sockets_count) override; +}; + +bool WhitleyPlatformMapping::pciTreeDiscover(std::vector& iios, uint32_t sockets_count) +{ + for (uint32_t socket = 0; socket < sockets_count; socket++) { + struct iio_stacks_on_socket iio_on_socket; + iio_on_socket.socket_id = socket; + std::map sad_id_bus_map; + if (!getSadIdRootBusMap(socket, sad_id_bus_map)) { + return false; + } + + { + struct iio_stack stack; + stack.iio_unit_id = icx_sad_to_pmu_id_mapping.at(ICX_MCP_SAD_ID); + stack.stack_name = icx_iio_stack_names[stack.iio_unit_id]; + iio_on_socket.stacks.push_back(stack); + } + + for (auto sad_id_bus_pair = sad_id_bus_map.cbegin(); sad_id_bus_pair != sad_id_bus_map.cend(); ++sad_id_bus_pair) { + int sad_id = sad_id_bus_pair->first; + if (icx_sad_to_pmu_id_mapping.find(sad_id) == + icx_sad_to_pmu_id_mapping.end()) { + cerr << "Unknown SAD ID: " << sad_id << endl; + return false; + } + + if (sad_id == ICX_MCP_SAD_ID) { + continue; + } + + struct iio_stack stack; + int root_bus = sad_id_bus_pair->second; + if (sad_id == ICX_CBDMA_DMI_SAD_ID) { + // There is one DMA Controller on each socket + stack.iio_unit_id = icx_sad_to_pmu_id_mapping.at(sad_id); + stack.busno = root_bus; + stack.stack_name = icx_iio_stack_names[stack.iio_unit_id]; + + // PCH is on socket 0 only + if (socket == 0) { + struct iio_bifurcated_part pch_part; + struct pci *pci = &pch_part.root_pci_dev; + struct bdf *bdf = &pci->bdf; + pch_part.part_id = ICX_PCH_PART_ID; + bdf->busno = root_bus; + bdf->devno = 0x00; + bdf->funcno = 0x00; + probe_pci(pci); + // Probe child devices only under PCH part. + for (uint8_t bus = pci->secondary_bus_number; bus <= pci->subordinate_bus_number; bus++) { + for (uint8_t device = 0; device < 32; device++) { + for (uint8_t function = 0; function < 8; function++) { + struct pci child_pci_dev; + child_pci_dev.bdf.busno = bus; + child_pci_dev.bdf.devno = device; + child_pci_dev.bdf.funcno = function; + if (probe_pci(&child_pci_dev)) { + pch_part.child_pci_devs.push_back(child_pci_dev); + } + } + } + } + stack.parts.push_back(pch_part); + } + + struct iio_bifurcated_part part; + part.part_id = ICX_CBDMA_PART_ID; + struct pci *pci = &part.root_pci_dev; + struct bdf *bdf = &pci->bdf; + bdf->busno = root_bus; + bdf->devno = 0x01; + bdf->funcno = 0x00; + probe_pci(pci); + stack.parts.push_back(part); + + iio_on_socket.stacks.push_back(stack); continue; - for (uint8_t b = p.secondary_bus_number; b <= p.subordinate_bus_number; b++) { /* FIXME: for 0:0.0, we may need to scan from secondary switch down */ - for (uint8_t d = 0; d < 32; d++) { - for (uint8_t f = 0; f < 8; f++) { - struct pci pci; - pci.exist = false; - pci.bdf.busno = b; - pci.bdf.devno = d; - pci.bdf.funcno = f; - probe_pci(&pci); - if (pci.exist) - iio_skx.stacks[stack].parts[part].child_pci_devs.push_back(pci); + } + stack.busno = root_bus; + stack.iio_unit_id = icx_sad_to_pmu_id_mapping.at(sad_id); + stack.stack_name = icx_iio_stack_names[stack.iio_unit_id]; + for (int slot = 2; slot < 6; slot++) { + struct pci pci; + pci.bdf.busno = root_bus; + pci.bdf.devno = slot; + pci.bdf.funcno = 0x00; + if (!probe_pci(&pci)) { + continue; + } + int part_id = pci.device_id - ICX_ROOT_PORT_A_DID; + if ((part_id < 0) || (part_id > 4)) { + cerr << "Invalid part ID " << part_id << endl; + return false; + } + struct iio_bifurcated_part part; + part.part_id = part_id; + part.root_pci_dev = pci; + + for (uint8_t bus = pci.secondary_bus_number; bus <= pci.subordinate_bus_number; bus++) { + for (uint8_t device = 0; device < 32; device++) { + for (uint8_t function = 0; function < 8; function++) { + struct pci child_pci_dev; + child_pci_dev.bdf.busno = bus; + child_pci_dev.bdf.devno = device; + child_pci_dev.bdf.funcno = function; + if (probe_pci(&child_pci_dev)) { + part.child_pci_devs.push_back(child_pci_dev); + } + } + } + } + stack.parts.push_back(part); + } + iio_on_socket.stacks.push_back(stack); + } + std::sort(iio_on_socket.stacks.begin(), iio_on_socket.stacks.end()); + iios.push_back(iio_on_socket); + } + return true; +} + +// Mapping for Snowridge. +class JacobsvillePlatformMapping: public IPlatformMapping10Nm { +private: +public: + JacobsvillePlatformMapping() = default; + ~JacobsvillePlatformMapping() = default; + bool pciTreeDiscover(std::vector& iios, uint32_t sockets_count) override; + bool JacobsvilleAccelerators(const std::pair& sad_id_bus_pair, struct iio_stack& stack); +}; + +bool JacobsvillePlatformMapping::JacobsvilleAccelerators(const std::pair& sad_id_bus_pair, struct iio_stack& stack) +{ + uint16_t expected_dev_id; + auto sad_id = sad_id_bus_pair.first; + switch (sad_id) { + case SNR_HQM_SAD_ID: + expected_dev_id = HQM_DID; + break; + case SNR_NIS_SAD_ID: + expected_dev_id = NIS_DID; + break; + case SNR_QAT_SAD_ID: + expected_dev_id = QAT_DID; + break; + default: + return false; + } + stack.iio_unit_id = snr_sad_to_pmu_id_mapping.at(sad_id); + stack.stack_name = snr_iio_stack_names[stack.iio_unit_id]; + for (uint16_t bus = sad_id_bus_pair.second; bus < 256; bus++) { + for (uint8_t device = 0; device < 32; device++) { + for (uint8_t function = 0; function < 8; function++) { + struct pci pci_dev; + pci_dev.bdf.busno = bus; + pci_dev.bdf.devno = device; + pci_dev.bdf.funcno = function; + if (probe_pci(&pci_dev)) { + if (expected_dev_id == pci_dev.device_id) { + struct iio_bifurcated_part part; + part.part_id = SNR_ACCELERATOR_PART_ID; + part.root_pci_dev = pci_dev; + stack.busno = bus; + stack.parts.push_back(part); + return true; } } } } } - v_iio_skx.push_back(iio_skx); + return false; +} + +bool JacobsvillePlatformMapping::pciTreeDiscover(std::vector& iios, uint32_t sockets_count) +{ + std::map sad_id_bus_map; + PCM_UNUSED(sockets_count); + if (!getSadIdRootBusMap(0, sad_id_bus_map)) { + return false; + } + struct iio_stacks_on_socket iio_on_socket; + iio_on_socket.socket_id = 0; + if (sad_id_bus_map.size() != snr_sad_to_pmu_id_mapping.size()) { + cerr << "Found unexpected number of stacks: " << sad_id_bus_map.size() << ", expected: " << snr_sad_to_pmu_id_mapping.size() << endl; + return false; + } + + for (auto sad_id_bus_pair = sad_id_bus_map.cbegin(); sad_id_bus_pair != sad_id_bus_map.cend(); ++sad_id_bus_pair) { + int sad_id = sad_id_bus_pair->first; + struct iio_stack stack; + switch (sad_id) { + case SNR_CBDMA_DMI_SAD_ID: + { + int root_bus = sad_id_bus_pair->second; + stack.iio_unit_id = snr_sad_to_pmu_id_mapping.at(sad_id); + stack.stack_name = snr_iio_stack_names[stack.iio_unit_id]; + stack.busno = root_bus; + // DMA Controller + struct iio_bifurcated_part part; + part.part_id = 0; + struct pci pci_dev; + pci_dev.bdf.busno = root_bus; + pci_dev.bdf.devno = 0x01; + pci_dev.bdf.funcno = 0x00; + probe_pci(&pci_dev); + part.root_pci_dev = pci_dev; + stack.parts.push_back(part); + + part.part_id = 4; + pci_dev.bdf.busno = root_bus; + pci_dev.bdf.devno = 0x00; + pci_dev.bdf.funcno = 0x00; + probe_pci(&pci_dev); + for (uint8_t bus = pci_dev.secondary_bus_number; bus <= pci_dev.subordinate_bus_number; bus++) { + for (uint8_t device = 0; device < 32; device++) { + for (uint8_t function = 0; function < 8; function++) { + struct pci child_pci_dev; + child_pci_dev.bdf.busno = bus; + child_pci_dev.bdf.devno = device; + child_pci_dev.bdf.funcno = function; + if (probe_pci(&child_pci_dev)) { + part.child_pci_devs.push_back(child_pci_dev); + } + } + } + } + part.root_pci_dev = pci_dev; + stack.parts.push_back(part); + } + break; + case SNR_PCIE_GEN3_SAD_ID: + { + int root_bus = sad_id_bus_pair->second; + stack.busno = root_bus; + stack.iio_unit_id = snr_sad_to_pmu_id_mapping.at(sad_id); + stack.stack_name = snr_iio_stack_names[stack.iio_unit_id]; + for (int slot = 4; slot < 8; slot++) { + struct pci pci_dev; + pci_dev.bdf.busno = root_bus; + pci_dev.bdf.devno = slot; + pci_dev.bdf.funcno = 0x00; + if (!probe_pci(&pci_dev)) { + continue; + } + int part_id = 4 + pci_dev.device_id - SNR_ROOT_PORT_A_DID; + if ((part_id < 0) || (part_id > 4)) { + cerr << "Invalid part ID " << part_id << endl; + return false; + } + struct iio_bifurcated_part part; + part.part_id = part_id; + part.root_pci_dev = pci_dev; + for (uint8_t bus = pci_dev.secondary_bus_number; bus <= pci_dev.subordinate_bus_number; bus++) { + for (uint8_t device = 0; device < 32; device++) { + for (uint8_t function = 0; function < 8; function++) { + struct pci child_pci_dev; + child_pci_dev.bdf.busno = bus; + child_pci_dev.bdf.devno = device; + child_pci_dev.bdf.funcno = function; + if (probe_pci(&child_pci_dev)) { + part.child_pci_devs.push_back(child_pci_dev); + } + } + } + } + stack.parts.push_back(part); + } + } + break; + case SNR_HQM_SAD_ID: + case SNR_NIS_SAD_ID: + case SNR_QAT_SAD_ID: + JacobsvilleAccelerators(*sad_id_bus_pair, stack); + break; + default: + cerr << "Unknown SAD ID: " << sad_id << endl; + return false; + } + iio_on_socket.stacks.push_back(stack); + } + + std::sort(iio_on_socket.stacks.begin(), iio_on_socket.stacks.end()); + + iios.push_back(iio_on_socket); + + return true; +} + +IPlatformMapping* IPlatformMapping::getPlatformMapping(int cpu_model) +{ + switch (cpu_model) { + case PCM::SKX: + return new PurleyPlatformMapping(); + case PCM::ICX: + return new WhitleyPlatformMapping(); + case PCM::SNOWRIDGE: + return new JacobsvillePlatformMapping(); + default: + return nullptr; + } } std::string dos2unix(std::string in) @@ -393,6 +811,7 @@ ccr* get_ccr(PCM* m, uint64_t& ccr) case PCM::SKX: return new skx_ccr(ccr); case PCM::ICX: + case PCM::SNOWRIDGE: return new icx_ccr(ccr); default: cerr << "Skylake Server CPU is required for this tool! Program aborted" << endl; @@ -530,40 +949,33 @@ vector load_events(PCM * m, const char* fn) return v; } -result_content get_IIO_Samples(PCM *m, vector iio_skx_v, struct counter ctr, uint32_t delay_ms) +result_content get_IIO_Samples(PCM *m, const std::vector& iios, struct counter ctr, uint32_t delay_ms) { IIOCounterState *before, *after; uint64 rawEvents[4] = {0}; std::unique_ptr pccr(get_ccr(m, ctr.ccr)); - std::vector IIO_units; - IIO_units.push_back((int32)PCM::IIO_CBDMA); - IIO_units.push_back((int32)PCM::IIO_PCIe0); - IIO_units.push_back((int32)PCM::IIO_PCIe1); - IIO_units.push_back((int32)PCM::IIO_PCIe2); - IIO_units.push_back((int32)PCM::IIO_MCP0); - IIO_units.push_back((int32)PCM::IIO_MCP1); rawEvents[ctr.idx] = pccr->get_ccr_value(); - before = new IIOCounterState[iio_skx_v.size() * IIO_units.size()]; - after = new IIOCounterState[iio_skx_v.size() * IIO_units.size()]; - - m->programIIOCounters(rawEvents, -1); - for (vector::const_iterator socket = iio_skx_v.begin(); socket != iio_skx_v.end(); ++socket) { - for (vector::const_iterator stack = IIO_units.begin(); stack != IIO_units.end(); ++stack) { - uint32_t idx = (uint32_t)IIO_units.size()*socket->socket_id + *stack; - before[idx] = m->getIIOCounterState(socket->socket_id, *stack, ctr.idx); + int stacks_count = iios[0].stacks.size(); + before = new IIOCounterState[iios.size() * stacks_count]; + after = new IIOCounterState[iios.size() * stacks_count]; + + m->programIIOCounters(rawEvents); + for (auto socket = iios.cbegin(); socket != iios.cend(); ++socket) { + for (auto stack = socket->stacks.cbegin(); stack != socket->stacks.cend(); ++stack) { + auto iio_unit_id = stack->iio_unit_id; + uint32_t idx = (uint32_t)stacks_count * socket->socket_id + iio_unit_id; + before[idx] = m->getIIOCounterState(socket->socket_id, iio_unit_id, ctr.idx); } } MySleepMs(delay_ms); - for (vector::const_iterator socket = iio_skx_v.begin(); socket != iio_skx_v.end(); ++socket) { - struct iio_skx iio_skx = *socket; - //iio_skx.stacks[*stack].values.clear(); - for (vector::const_iterator stack = IIO_units.begin(); stack != IIO_units.end(); ++stack) { - uint32_t idx = (uint32_t)IIO_units.size()*socket->socket_id + *stack; - after[idx] = m->getIIOCounterState(socket->socket_id, *stack, ctr.idx); + for (auto socket = iios.cbegin(); socket != iios.cend(); ++socket) { + for (auto stack = socket->stacks.cbegin(); stack != socket->stacks.cend(); ++stack) { + auto iio_unit_id = stack->iio_unit_id; + uint32_t idx = (uint32_t)stacks_count * socket->socket_id + iio_unit_id; + after[idx] = m->getIIOCounterState(socket->socket_id, iio_unit_id, ctr.idx); uint64_t raw_result = getNumberOfEvents(before[idx], after[idx]); uint64_t trans_result = uint64_t (raw_result * ctr.multiplier / (double) ctr.divider * (1000 / (double) delay_ms)); - results[iio_skx.socket_id][*stack][std::pair(ctr.h_id,ctr.v_id)] = trans_result; - //cout << "skt:" << iio_skx.socket_id << " stack:" << *stack << " h_id:" << ctr.h_id << " v_id:" << ctr.v_id << " res:" << raw_result << " trans:" << trans_result << "\n"; + results[socket->socket_id][iio_unit_id][std::pair(ctr.h_id,ctr.v_id)] = trans_result; } } delete[] before; @@ -571,39 +983,38 @@ result_content get_IIO_Samples(PCM *m, vector iio_skx_v, struct return results; } -void collect_data(PCM *m, vector iio_skx_v, vector &ctrs) +void collect_data(PCM *m, vector& iios, vector& ctrs) { - result_content s; uint32_t delay_ms = (uint32_t)(PCM_DELAY_DEFAULT / ctrs.size() * 1000); - //cout << "delay_ms:" << delay_ms << "\n"; - for (vector::iterator cunit = ctrs.begin(); cunit != ctrs.end(); ++cunit) { - cunit->data.clear(); - s = get_IIO_Samples(m, iio_skx_v, *cunit, delay_ms); - cunit->data.push_back(s); + for (auto counter = ctrs.begin(); counter != ctrs.end(); ++counter) { + counter->data.clear(); + result_content sample = get_IIO_Samples(m, iios, *counter, delay_ms); + counter->data.push_back(sample); } } /** * For debug only */ -void print_PCIeMapping(vector iio_skx_, const PCIDB & pciDB) +void print_PCIeMapping(const std::vector& iios, const PCIDB & pciDB) { - for (auto it = iio_skx_.begin(); it != iio_skx_.end(); ++it) - { + for (auto it = iios.begin(); it != iios.end(); ++it) { printf("Socket %d\n", (*it).socket_id); for (int stack = 0; stack < 6; stack++) { - printf("\t%s root bus: 0x%x", (*it).stacks[stack].stack_name.c_str(), (*it).stacks[stack].busno); - printf((*it).stacks[stack].flipped ? "\tflipped: true\n" : "\tflipped: false\n"); - for (uint32_t p = 0; p < 4; p++) { - vector pp = (*it).stacks[stack].parts[p].child_pci_devs; - uint8_t level = 1; - for (std::vector::const_iterator iunit = pp.begin(); iunit != pp.end(); ++iunit) - { - uint64_t header_width = 100; - string row = build_pci_header(pciDB, (uint32_t)header_width, *iunit, -1, level); - printf("\t\t%s\n", row.c_str()); - if (iunit->header_type == 1) - level += 1; + for (auto stack : it->stacks) { + printf("\t%s root bus: 0x%x", stack.stack_name.c_str(), stack.busno); + printf(stack.flipped ? "\tflipped: true\n" : "\tflipped: false\n"); + for (auto part : stack.parts) { + vector pp = part.child_pci_devs; + uint8_t level = 1; + for (std::vector::const_iterator iunit = pp.begin(); iunit != pp.end(); ++iunit) + { + uint64_t header_width = 100; + string row = build_pci_header(pciDB, (uint32_t)header_width, *iunit, -1, level); + printf("\t\t%s\n", row.c_str()); + if (iunit->header_type == 1) + level += 1; + } } } } @@ -616,9 +1027,6 @@ int main() std::cout << "\n Processor Counter Monitor " << PCM_VERSION << "\n"; std::cout << "\n This utility measures Skylake-SP IIO information\n\n"; - vector skt_list; - vector stack_list; - vector iio_skx_v; vector counters; vector display_buffer; PCIDB pciDB; @@ -627,23 +1035,17 @@ int main() PCM * m = PCM::getInstance(); print_cpu_details(); string ev_file_name; - if ( m->getCPUModel() == PCM::SKX || - m->getCPUModel() == PCM::ICX) + if (m->IIOEventsAvailable()) { ev_file_name = "opCode-" + std::to_string(m->getCPUModel()) + ".txt"; } else { - cerr << "Skylake Server CPU is required for this tool! Program aborted\n"; - exit(EXIT_FAILURE); - } - if(m->getNumSockets() > max_sockets) - { - cerr << "Only systems with up to " << (int)max_sockets << " sockets are supported! Program aborted\n"; + cerr << "This CPU is not supported by PCM IIO tool! Program aborted\n"; exit(EXIT_FAILURE); } - opcodeFieldMap["opcode"] =PCM::OPCODE; + opcodeFieldMap["opcode"] = PCM::OPCODE; opcodeFieldMap["ev_sel"] = PCM::EVENT_SELECT; opcodeFieldMap["umask"] = PCM::UMASK; opcodeFieldMap["reset"] = PCM::RESET; @@ -665,50 +1067,31 @@ int main() //print_nameMap(); //TODO: Taking from cli - vector busno; + //TODO: remove binding to max sockets count. + if (m->getNumSockets() > max_sockets) { + cerr << "Only systems with up to " << max_sockets << " sockets are supported! Program aborted\n"; + exit(EXIT_FAILURE); + } - switch(m->getNumSockets()) - { - case 1: - case 2: - { // TODO: do a proper bus scan - vector _{0x0, 0x80}; - busno = _; - } - break; - case 4: - { - vector _{0x0, 0x40, 0x80, 0xc0}; - busno = _; - } - break; - default: - cerr << "Only systems with " <getNumSockets()<< " sockets are not supported! Program aborted\n"; - exit(EXIT_FAILURE); + auto mapping = IPlatformMapping::getPlatformMapping(m->getCPUModel()); + if (!mapping) { + cerr << "Failed to discover pci tree: unknown platform" << endl; + exit(EXIT_FAILURE); } - for(uint32 s=0; s < m->getNumSockets();++s) { - skt_list.push_back(s); - discover_pci_tree(busno, s, iio_skx_v, m); + + std::vector iios; + if (!mapping->pciTreeDiscover(iios, m->getNumSockets())) { + exit(EXIT_FAILURE); } + /* Debug only: - print_PCIeMapping(iio_skx_v, pciDB); + print_PCIeMapping(iios, pciDB); return 0; */ - stack_list.push_back(PCM::IIO_CBDMA); - stack_list.push_back(PCM::IIO_PCIe0); - stack_list.push_back(PCM::IIO_PCIe1); - stack_list.push_back(PCM::IIO_PCIe2); - stack_list.push_back(PCM::IIO_MCP0); - stack_list.push_back(PCM::IIO_MCP1); - while (1) { - collect_data(m, iio_skx_v, counters); - display_buffer = build_display(iio_skx_v, counters, skt_list, stack_list, pciDB); + collect_data(m, iios, counters); + display_buffer = build_display(iios, counters, pciDB); display(display_buffer); - if (m->getCPUModel() == PCM::ICX) - { - std::cerr << "WARNING: The IIO stack <-> device mapping might not be incorrect. Will be addressed in a later version." << std::endl; - } }; } diff --git a/pcm-pcie.cpp b/pcm-pcie.cpp index c5e5366d..094ea25a 100644 --- a/pcm-pcie.cpp +++ b/pcm-pcie.cpp @@ -94,6 +94,7 @@ IPlatform *IPlatform::getPlatform(PCM *m, bool csv, bool print_bandwidth, bool p { switch (m->getCPUModel()) { case PCM::ICX: + case PCM::SNOWRIDGE: return new WhitleyPlatform(m, csv, print_bandwidth, print_additional_info, delay); case PCM::SKX: return new PurleyPlatform(m, csv, print_bandwidth, print_additional_info, delay); diff --git a/pcm-power.cpp b/pcm-power.cpp index d7d501c6..f2ae0de8 100644 --- a/pcm-power.cpp +++ b/pcm-power.cpp @@ -404,13 +404,13 @@ int main(int argc, char * argv[]) << "; PCUClocks: " << getPCUClocks(BeforeState[socket], AfterState[socket]) << "; Thermal freq limit cycles: " << getNormalizedPCUCounter(1, BeforeState[socket], AfterState[socket]) * 100. << " %" << "; Power freq limit cycles:" << getNormalizedPCUCounter(2, BeforeState[socket], AfterState[socket]) * 100. << " %"; - if(cpu_model != PCM::SKX && cpu_model != PCM::ICX) + if(cpu_model != PCM::SKX && cpu_model != PCM::ICX && cpu_model != PCM::SNOWRIDGE) cout << "; Clipped freq limit cycles:" << getNormalizedPCUCounter(3, BeforeState[socket], AfterState[socket]) * 100. << " %"; cout << "\n"; break; case 4: - if (cpu_model == PCM::SKX || cpu_model == PCM::ICX) + if (cpu_model == PCM::SKX || cpu_model == PCM::ICX || cpu_model == PCM::SNOWRIDGE) { cout << "This PCU profile is not supported on your processor\n"; break; @@ -440,7 +440,7 @@ int main(int argc, char * argv[]) cout << "; PC1e+ residency: " << getNormalizedPCUCounter(0, BeforeState[socket], AfterState[socket], m) * 100. << " %" "; PC1e+ transition count: " << getPCUCounter(1, BeforeState[socket], AfterState[socket]) << " "; - if (cpu_model == PCM::IVYTOWN || cpu_model == PCM::HASWELLX || PCM::BDX_DE == cpu_model || PCM::SKX == cpu_model || PCM::ICX == cpu_model) + if (cpu_model == PCM::IVYTOWN || cpu_model == PCM::HASWELLX || PCM::BDX_DE == cpu_model || PCM::SKX == cpu_model || PCM::ICX == cpu_model || cpu_model == PCM::SNOWRIDGE) { cout << "; PC2 residency: " << getPackageCStateResidency(2, BeforeState[socket], AfterState[socket]) * 100. << " %"; cout << "; PC2 transitions: " << getPCUCounter(2, BeforeState[socket], AfterState[socket]) << " "; diff --git a/pcm.cpp b/pcm.cpp index 033c4cc1..3442d60e 100644 --- a/pcm.cpp +++ b/pcm.cpp @@ -1354,26 +1354,6 @@ int main(int argc, char * argv[]) print_output(m, cstates1, cstates2, sktstate1, sktstate2, ycores, sstate1, sstate2, cpu_model, show_core_output, show_partial_core_output, show_socket_output, show_system_output); - // sanity checks - if (m->isAtom() || cpu_model == PCM::KNL) - { - assert(getNumberOfCustomEvents(0, sstate1, sstate2) == getL2CacheMisses(sstate1, sstate2)); - assert(getNumberOfCustomEvents(1, sstate1, sstate2) == getL2CacheMisses(sstate1, sstate2) + getL2CacheHits(sstate1, sstate2)); - } - else - { - assert(getNumberOfCustomEvents(0, sstate1, sstate2) == getL3CacheMisses(sstate1, sstate2)); - if (m->useSkylakeEvents()) { - assert(getNumberOfCustomEvents(1, sstate1, sstate2) == getL3CacheHits(sstate1, sstate2)); - assert(getNumberOfCustomEvents(2, sstate1, sstate2) == getL2CacheMisses(sstate1, sstate2)); - } - else { - assert(getNumberOfCustomEvents(1, sstate1, sstate2) == getL3CacheHitsNoSnoop(sstate1, sstate2)); - assert(getNumberOfCustomEvents(2, sstate1, sstate2) == getL3CacheHitsSnoop(sstate1, sstate2)); - } - assert(getNumberOfCustomEvents(3, sstate1, sstate2) == getL2CacheHits(sstate1, sstate2)); - } - std::swap(sstate1, sstate2); std::swap(sktstate1, sktstate2); std::swap(cstates1, cstates2); diff --git a/types.h b/types.h index 1eeca9b0..a82a4e10 100644 --- a/types.h +++ b/types.h @@ -994,6 +994,10 @@ static const uint32 ICX_CHA_MSR_PMON_BOX_CTL[] = { 0x0B7C, 0x0B8A, 0x0B98, 0x0BA6, 0x0BB4, 0x0BC2 }; +static const uint32 SNR_CHA_MSR_PMON_BOX_CTL[] = { + 0x1C00, 0x1C10, 0x1C20, 0x1C30, 0x1C40, 0x1C50 +}; + #define SERVER_CHA_MSR_PMON_CTL0_OFFSET (1) /* #define SERVER_CHA_MSR_PMON_CTL1_OFFSET (2) @@ -1055,11 +1059,25 @@ static const uint32 ICX_CHA_MSR_PMON_BOX_CTL[] = { #define ICX_IIO_CBDMA_UNIT_STATUS (0x0A57) #define ICX_IIO_CTL_REG_OFFSET (0x0008) #define ICX_IIO_CTR_REG_OFFSET (0x0001) -//Adding array for ICX IIO +/* + * M2IOSF MSRs in order: + * M2IOSF0 - PCIe0 stack + * M2IOSF1 - PCIe1 stack + * M2IOSF2 - MCP stack + * M2IOSF3 - PCIe2 stack + * M2IOSF4 - PCIe3 stack + * M2IOSF5 - CBDMA/DMI stack + */ static const uint32 ICX_IIO_UNIT_CTL[] = { - 0x0B20, 0x0A50, 0x0A70, 0x0A90, 0x0AE0, 0x0B00 + 0x0A50, 0x0A70, 0x0A90, 0x0AE0, 0x0B00, 0x0B20 }; +#define SNR_IIO_CBDMA_UNIT_STATUS (0x1E07) +#define SNR_IIO_CBDMA_UNIT_CTL (0x1E00) +#define SNR_IIO_CBDMA_CTR0 (0x1E01) +#define SNR_IIO_CBDMA_CTL0 (0x1E08) +#define SNR_IIO_PM_REG_STEP (0x0010) + #define IIO_MSR_PMON_CTL_EVENT(x) ((x) << 0) #define IIO_MSR_PMON_CTL_UMASK(x) ((x) << 8) #define IIO_MSR_PMON_CTL_RST (1 << 17)