Skip to content

Commit

Permalink
Merge pull request #296 from opcm/push-2021-05-15
Browse files Browse the repository at this point in the history
Push 2021 05 15
  • Loading branch information
opcm authored May 15, 2021
2 parents 442f410 + 79df99f commit 09bc9f2
Show file tree
Hide file tree
Showing 11 changed files with 906 additions and 309 deletions.
161 changes: 125 additions & 36 deletions cpucounters.cpp

Large diffs are not rendered by default.

71 changes: 62 additions & 9 deletions cpucounters.h
Original file line number Diff line number Diff line change
Expand Up @@ -732,6 +732,38 @@ class PCM_API PCM
IIO_STACK_COUNT = 6
};

// Offsets/enumeration of IIO stacks Skylake server.
enum SkylakeIIOStacks {
SKX_IIO_CBDMA_DMI = 0,
SKX_IIO_PCIe0 = 1,
SKX_IIO_PCIe1 = 2,
SKX_IIO_PCIe2 = 3,
SKX_IIO_MCP0 = 4,
SKX_IIO_MCP1 = 5,
SKX_IIO_STACK_COUNT = 6
};

// Offsets/enumeration of IIO stacks for IceLake server.
enum IcelakeIIOStacks {
ICX_IIO_PCIe0 = 0,
ICX_IIO_PCIe1 = 1,
ICX_IIO_MCP0 = 2,
ICX_IIO_PCIe2 = 3,
ICX_IIO_PCIe3 = 4,
ICX_IIO_CBDMA_DMI = 5,
ICX_IIO_STACK_COUNT = 6
};

// Offsets/enumeration of IIO stacks for IceLake server.
enum SnowridgeIIOStacks {
SNR_IIO_QAT = 0,
SNR_IIO_CBDMA_DMI = 1,
SNR_IIO_NIS = 2,
SNR_IIO_HQM = 3,
SNR_IIO_PCIe0 = 4,
SNR_IIO_STACK_COUNT = 5
};

struct SimplePCIeDevInfo
{
enum PCIeWidthMode width;
Expand Down Expand Up @@ -1259,6 +1291,7 @@ class PCM_API PCM
CHERRYTRAIL = 76,
APOLLO_LAKE = 92,
DENVERTON = 95,
SNOWRIDGE = 134,
CLARKDALE = 37,
WESTMERE_EP = 44,
NEHALEM_EX = 46,
Expand Down Expand Up @@ -1414,6 +1447,7 @@ class PCM_API PCM
case ICX:
case BDX:
case KNL:
case SNOWRIDGE:
return (server_pcicfg_uncore.size() && server_pcicfg_uncore[0].get()) ? (server_pcicfg_uncore[0]->getNumMCChannels()) : 0;
}
return 0;
Expand Down Expand Up @@ -1441,6 +1475,7 @@ class PCM_API PCM
case ICX:
case BDX:
case KNL:
case SNOWRIDGE:
return (socket < server_pcicfg_uncore.size() && server_pcicfg_uncore[socket].get()) ? (server_pcicfg_uncore[socket]->getNumMCChannels(controller)) : 0;
}
return 0;
Expand All @@ -1466,6 +1501,8 @@ class PCM_API PCM
if (ICL == cpu_model || TGL == cpu_model) return 5;
switch (cpu_model)
{
case SNOWRIDGE:
return 4;
case DENVERTON:
return 3;
case NEHALEM_EP:
Expand Down Expand Up @@ -1512,6 +1549,7 @@ class PCM_API PCM
return 1000000000ULL; // 1 GHz
case SKX:
case ICX:
case SNOWRIDGE:
return 1100000000ULL; // 1.1 GHz
}
return 0;
Expand All @@ -1533,6 +1571,7 @@ class PCM_API PCM
case BDX_DE:
case SKX:
case ICX:
case SNOWRIDGE:
case KNL:
return true;
default:
Expand Down Expand Up @@ -1711,6 +1750,7 @@ class PCM_API PCM
|| cpu_model_ == CHERRYTRAIL
|| cpu_model_ == APOLLO_LAKE
|| cpu_model_ == DENVERTON
// || cpu_model_ == SNOWRIDGE do not use Atom code for SNOWRIDGE
;
}

Expand All @@ -1733,6 +1773,7 @@ class PCM_API PCM
|| cpu_model == PCM::BAYTRAIL
|| cpu_model == PCM::APOLLO_LAKE
|| cpu_model == PCM::DENVERTON
|| cpu_model == PCM::SNOWRIDGE
|| cpu_model == PCM::HASWELLX
|| cpu_model == PCM::BROADWELL
|| cpu_model == PCM::BDX_DE
Expand Down Expand Up @@ -1807,10 +1848,8 @@ class PCM_API PCM

bool memoryTrafficMetricsAvailable() const
{
return !(
isAtom()
|| cpu_model == PCM::CLARKDALE
);
return (!(isAtom() || cpu_model == PCM::CLARKDALE))
;
}

bool MCDRAMmemoryTrafficMetricsAvailable() const
Expand All @@ -1835,6 +1874,7 @@ class PCM_API PCM
return (
cpu_model == PCM::SKX
|| cpu_model == PCM::ICX
|| cpu_model == PCM::SNOWRIDGE
);
}

Expand Down Expand Up @@ -1863,6 +1903,7 @@ class PCM_API PCM
isCLX()
|| isCPX()
|| cpu_model == PCM::ICX
|| cpu_model == PCM::SNOWRIDGE
);
}

Expand All @@ -1880,6 +1921,7 @@ class PCM_API PCM
|| ((SKX == cpu_model) && (num_sockets == 1))
#endif
|| ICX == cpu_model
|| SNOWRIDGE == cpu_model
);
}

Expand All @@ -1894,6 +1936,7 @@ class PCM_API PCM
{
return (
cpu_model == PCM::JAKETOWN
|| cpu_model == PCM::SNOWRIDGE
|| cpu_model == PCM::IVYTOWN
|| cpu_model == PCM::HASWELLX
|| cpu_model == PCM::BDX_DE
Expand Down Expand Up @@ -2284,7 +2327,7 @@ uint64 getDRAMClocks(uint32 channel, const CounterStateType & before, const Coun
{
const auto clk = after.DRAMClocks[channel] - before.DRAMClocks[channel];
const auto cpu_model = PCM::getInstance()->getCPUModel();
if (cpu_model == PCM::ICX)
if (cpu_model == PCM::ICX || cpu_model == PCM::SNOWRIDGE)
{
return 2 * clk;
}
Expand Down Expand Up @@ -3151,10 +3194,11 @@ uint64 getL2CacheMisses(const CounterStateType & before, const CounterStateType
{
auto pcm = PCM::getInstance();
if (pcm->isL2CacheMissesAvailable() == false) return 0ULL;
if (pcm->useSkylakeEvents()) {
const auto cpu_model = pcm->getCPUModel();
if (pcm->useSkylakeEvents() || cpu_model == PCM::SNOWRIDGE) {
return after.Event[BasicCounterState::SKLL2MissPos] - before.Event[BasicCounterState::SKLL2MissPos];
}
if (pcm->isAtom() || pcm->getCPUModel() == PCM::KNL)
if (pcm->isAtom() || cpu_model == PCM::KNL)
{
return after.Event[BasicCounterState::ArchLLCMissPos] - before.Event[BasicCounterState::ArchLLCMissPos];
}
Expand Down Expand Up @@ -3243,8 +3287,17 @@ uint64 getL3CacheHitsNoSnoop(const CounterStateType & before, const CounterState
template <class CounterStateType>
uint64 getL3CacheHitsSnoop(const CounterStateType & before, const CounterStateType & after)
{
if (!PCM::getInstance()->isL3CacheHitsSnoopAvailable()) return 0;
if (PCM::getInstance()->useSkylakeEvents()) {
auto pcm = PCM::getInstance();
if (!pcm->isL3CacheHitsSnoopAvailable()) return 0;
const auto cpu_model = pcm->getCPUModel();
if (cpu_model == PCM::SNOWRIDGE)
{
const int64 misses = getL3CacheMisses(before, after);
const int64 refs = after.Event[BasicCounterState::ArchLLCRefPos] - before.Event[BasicCounterState::ArchLLCRefPos];
const int64 hits = refs - misses;
return (hits > 0)? hits : 0;
}
if (pcm->useSkylakeEvents()) {
return after.Event[BasicCounterState::SKLL3HitPos] - before.Event[BasicCounterState::SKLL3HitPos];
}
return after.Event[BasicCounterState::L2HitMPos] - before.Event[BasicCounterState::L2HitMPos];
Expand Down
75 changes: 54 additions & 21 deletions lspci.h
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,7 @@ struct bdf {
uint8_t busno;
uint8_t devno;
uint8_t funcno;
bdf () : busno(0), devno(0), funcno(0) {}
};

struct pci {
Expand Down Expand Up @@ -251,6 +252,7 @@ struct pci {
};
uint32_t link_info;
};
pci () : exist(false), offset_0(0), header_type(0), offset_18(0), link_info(0) {}
};

struct counter {
Expand Down Expand Up @@ -279,6 +281,34 @@ struct iio_skx {
uint32_t socket_id;
};

struct iio_bifurcated_part {
int part_id;
/* single device represent root port */
struct pci root_pci_dev;
/* Contain child switch and end-point devices */
std::vector<struct pci> child_pci_devs;
};

struct iio_stack {
std::vector<struct iio_bifurcated_part> parts;
uint32_t iio_unit_id;
std::string stack_name;
std::vector<uint64_t> values;
bool flipped = false;
/* holding busno for each IIO stack */
uint8_t busno;
};

bool operator<(const iio_stack& lh, const iio_stack& rh)
{
return lh.iio_unit_id < rh.iio_unit_id;
}

struct iio_stacks_on_socket {
std::vector<struct iio_stack> stacks;
uint32_t socket_id;
};

bool operator < (const bdf &l, const bdf &r) {
if (l.busno < r.busno)
return true;
Expand Down Expand Up @@ -325,35 +355,38 @@ void probe_capability_pci_express(struct pci *p, uint32_t cap_ptr)
}
}

void probe_pci(struct pci *p)
bool probe_pci(struct pci *p)
{
uint32 value;
p->exist = false;
struct bdf *bdf = &p->bdf;
if (PciHandleType::exists(0, bdf->busno, bdf->devno, bdf->funcno)) {
p->exist = true;
PciHandleType h(0, bdf->busno, bdf->devno, bdf->funcno);
h.read32(0x0, &value); //VID:DID
if (value == (std::numeric_limits<unsigned int>::max)()) // invalid VID::DID
{
p->exist = false;
return;
}
p->offset_0 = value;
h.read32(0xc, &value);
p->header_type = (value >> 16) & 0x7f;
if (p->header_type == 0) {
h.read32(0x4, &value); //Status register
if (value & 0x100000) {//Capability list == true
h.read32(0x34, &value); //Capability pointer
probe_capability_pci_express(p, value);
// VID:DID
h.read32(0x0, &value);
// Invalid VID::DID
if (value != (std::numeric_limits<unsigned int>::max)()) {
p->offset_0 = value;
h.read32(0xc, &value);
p->header_type = (value >> 16) & 0x7f;
if (p->header_type == 0) {
// Status register
h.read32(0x4, &value);
// Capability list == true
if (value & 0x100000) {
// Capability pointer
h.read32(0x34, &value);
probe_capability_pci_express(p, value);
}
} else if (p->header_type == 1) {
h.read32(0x18, &value);
p->offset_18 = value;
}
} else if (p->header_type == 1) {
h.read32(0x18, &value);
p->offset_18 = value;
p->exist = true;
}
}
else
p->exist = false;

return p->exist;
}

/*
Expand Down
11 changes: 11 additions & 0 deletions msr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
#endif
#include "types.h"
#include "msr.h"
#include "utils.h"
#include <assert.h>

#ifdef _MSC_VER
Expand Down Expand Up @@ -214,6 +215,16 @@ int32 MsrHandle::read(uint64 msr_number, uint64 * value)
// here comes a Linux version
MsrHandle::MsrHandle(uint32 cpu) : fd(-1), cpu_id(cpu)
{
constexpr auto allowWritesPath = "/sys/module/msr/parameters/allow_writes";
static bool writesEnabled = false;
if (writesEnabled == false)
{
if (readSysFS(allowWritesPath, true).length() > 0)
{
writeSysFS(allowWritesPath, "on", false);
}
writesEnabled = true;
}
char * path = new char[200];
snprintf(path, 200, "/dev/cpu/%d/msr", cpu);
int handle = ::open(path, O_RDWR);
Expand Down
18 changes: 1 addition & 17 deletions opCode-106.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,35 +5,19 @@ ctr=0,ev_sel=0x83,umask=0x1,en=1,ch_mask=1,fc_mask=0x7,multiplier=4,divider=1,hn
ctr=1,ev_sel=0x83,umask=0x1,en=1,ch_mask=2,fc_mask=0x7,multiplier=4,divider=1,hname=IB write,vname=Part1 (2nd x4)
ctr=0,ev_sel=0x83,umask=0x1,en=1,ch_mask=4,fc_mask=0x7,multiplier=4,divider=1,hname=IB write,vname=Part2 (2nd x8/3rd x4)
ctr=1,ev_sel=0x83,umask=0x1,en=1,ch_mask=8,fc_mask=0x7,multiplier=4,divider=1,hname=IB write,vname=Part3 (4th x4)
ctr=0,ev_sel=0x83,umask=0x1,en=1,ch_mask=16,fc_mask=0x7,multiplier=4,divider=1,hname=IB write,vname=Part4 (1st x16/x8/x4)
ctr=1,ev_sel=0x83,umask=0x1,en=1,ch_mask=32,fc_mask=0x7,multiplier=4,divider=1,hname=IB write,vname=Part5 (2nd x4)
ctr=0,ev_sel=0x83,umask=0x1,en=1,ch_mask=64,fc_mask=0x7,multiplier=4,divider=1,hname=IB write,vname=Part6 (2nd x8/3rd x4)
ctr=1,ev_sel=0x83,umask=0x1,en=1,ch_mask=128,fc_mask=0x7,multiplier=4,divider=1,hname=IB write,vname=Part7 (4th x4)
ctr=0,ev_sel=0x83,umask=0x4,en=1,ch_mask=1,fc_mask=0x7,multiplier=4,divider=1,hname=IB read,vname=Part0 (1st x16/x8/x4)
ctr=1,ev_sel=0x83,umask=0x4,en=1,ch_mask=2,fc_mask=0x7,multiplier=4,divider=1,hname=IB read,vname=Part1 (2nd x4)
ctr=0,ev_sel=0x83,umask=0x4,en=1,ch_mask=4,fc_mask=0x7,multiplier=4,divider=1,hname=IB read,vname=Part2 (2nd x8/3rd x4)
ctr=1,ev_sel=0x83,umask=0x4,en=1,ch_mask=8,fc_mask=0x7,multiplier=4,divider=1,hname=IB read,vname=Part3 (4th x4)
ctr=0,ev_sel=0x83,umask=0x4,en=1,ch_mask=16,fc_mask=0x7,multiplier=4,divider=1,hname=IB read,vname=Part4 (1st x16/x8/x4)
ctr=1,ev_sel=0x83,umask=0x4,en=1,ch_mask=32,fc_mask=0x7,multiplier=4,divider=1,hname=IB read,vname=Part5 (2nd x4)
ctr=0,ev_sel=0x83,umask=0x4,en=1,ch_mask=64,fc_mask=0x7,multiplier=4,divider=1,hname=IB read,vname=Part6 (2nd x8/3rd x4)
ctr=1,ev_sel=0x83,umask=0x4,en=1,ch_mask=128,fc_mask=0x7,multiplier=4,divider=1,hname=IB read,vname=Part7 (4th x4)
# Outbound (CPU MMIO to the PCIe device) payload events
ctr=2,ev_sel=0x83,umask=0x80,en=1,ch_mask=1,fc_mask=0x7,multiplier=1,divider=1,hname=OB read,vname=Part0 (1st x16/x8/x4)
ctr=3,ev_sel=0x83,umask=0x80,en=1,ch_mask=2,fc_mask=0x7,multiplier=1,divider=1,hname=OB read,vname=Part1 (2nd x4)
ctr=2,ev_sel=0x83,umask=0x80,en=1,ch_mask=4,fc_mask=0x7,multiplier=1,divider=1,hname=OB read,vname=Part2 (2nd x8/3rd x4)
ctr=3,ev_sel=0x83,umask=0x80,en=1,ch_mask=8,fc_mask=0x7,multiplier=1,divider=1,hname=OB read,vname=Part3 (4th x4)
ctr=2,ev_sel=0x83,umask=0x80,en=1,ch_mask=16,fc_mask=0x7,multiplier=1,divider=1,hname=OB read,vname=Part4 (1st x16/x8/x4)
ctr=3,ev_sel=0x83,umask=0x80,en=1,ch_mask=32,fc_mask=0x7,multiplier=1,divider=1,hname=OB read,vname=Part5 (2nd x4)
ctr=2,ev_sel=0x83,umask=0x80,en=1,ch_mask=64,fc_mask=0x7,multiplier=1,divider=1,hname=OB read,vname=Part6 (2nd x8/3rd x4)
ctr=3,ev_sel=0x83,umask=0x80,en=1,ch_mask=128,fc_mask=0x7,multiplier=1,divider=1,hname=OB read,vname=Part7 (4th x4)
ctr=2,ev_sel=0xc0,umask=0x1,en=1,ch_mask=1,fc_mask=0x7,multiplier=1,divider=1,hname=OB write,vname=Part0 (1st x16/x8/x4)
ctr=3,ev_sel=0xc0,umask=0x1,en=1,ch_mask=2,fc_mask=0x7,multiplier=1,divider=1,hname=OB write,vname=Part1 (2nd x4)
ctr=2,ev_sel=0xc0,umask=0x1,en=1,ch_mask=4,fc_mask=0x7,multiplier=1,divider=1,hname=OB write,vname=Part2 (2nd x8/3rd x4)
ctr=3,ev_sel=0xc0,umask=0x1,en=1,ch_mask=8,fc_mask=0x7,multiplier=1,divider=1,hname=OB write,vname=Part3 (4th x4)
ctr=2,ev_sel=0xc0,umask=0x1,en=1,ch_mask=16,fc_mask=0x7,multiplier=1,divider=1,hname=OB write,vname=Part4 (1st x16/x8/x4)
ctr=3,ev_sel=0xc0,umask=0x1,en=1,ch_mask=32,fc_mask=0x7,multiplier=1,divider=1,hname=OB write,vname=Part5 (2nd x4)
ctr=2,ev_sel=0xc0,umask=0x1,en=1,ch_mask=64,fc_mask=0x7,multiplier=1,divider=1,hname=OB write,vname=Part6 (2nd x8/3rd x4)
ctr=3,ev_sel=0xc0,umask=0x1,en=1,ch_mask=128,fc_mask=0x7,multiplier=1,divider=1,hname=OB write,vname=Part7 (4th x4)
# IOMMU events
ctr=0,ev_sel=0x40,umask=0x02,en=1,ch_mask=0x0,fc_mask=0x0,multiplier=1,divider=1,hname=IOTLB Lookup,vname=Total
ctr=1,ev_sel=0x40,umask=0x20,en=1,ch_mask=0x0,fc_mask=0x0,multiplier=1,divider=1,hname=IOTLB Miss,vname=Total
Expand All @@ -42,4 +26,4 @@ ctr=3,ev_sel=0x41,umask=0x10,en=1,ch_mask=0x0,fc_mask=0x0,multiplier=1,divider=1
ctr=0,ev_sel=0x41,umask=0x08,en=1,ch_mask=0x0,fc_mask=0x0,multiplier=1,divider=1,hname=1G Cache Hit,vname=Total
ctr=1,ev_sel=0x41,umask=0x04,en=1,ch_mask=0x0,fc_mask=0x0,multiplier=1,divider=1,hname=2M Cache Hit,vname=Total
ctr=2,ev_sel=0x41,umask=0x02,en=1,ch_mask=0x0,fc_mask=0x0,multiplier=1,divider=1,hname=4K Cache Hit,vname=Total
ctr=3,ev_sel=0x41,umask=0x40,en=1,ch_mask=0x0,fc_mask=0x0,multiplier=1,divider=1,hname=IOMMU Mem Access,vname=Total
ctr=3,ev_sel=0x41,umask=0x40,en=1,ch_mask=0x0,fc_mask=0x0,multiplier=1,divider=1,hname=IOMMU Mem Access,vname=Total
Loading

0 comments on commit 09bc9f2

Please sign in to comment.