Skip to content

Commit

Permalink
Add more timing histogram and gauge metrics to the Neon extension (ne…
Browse files Browse the repository at this point in the history
…ondatabase#9116)

We now also track:

- Number of PS IOs in-flight
- Number of pages cached by smgr prefetch implementation
- IO timing histograms for LFC reads and writes, per IO issued

## Problem

There's little insight into the timing metrics of LFC, and what the
prefetch state of each backend is.

This changes that, by measuring (and subsequently exposing) these data
points.

## Summary of changes

- Extract IOHistogram as separate type, rather than a collection of
fields on NeonMetrics
- others, see items above.

Part of neondatabase#8926
  • Loading branch information
MMeent authored Oct 14, 2024
1 parent f54e3e9 commit dab96a6
Show file tree
Hide file tree
Showing 4 changed files with 205 additions and 73 deletions.
27 changes: 25 additions & 2 deletions pgxn/neon/file_cache.c
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
#include "hll.h"
#include "bitmap.h"
#include "neon.h"
#include "neon_perf_counters.h"

#define CriticalAssert(cond) do if (!(cond)) elog(PANIC, "Assertion %s failed at %s:%d: ", #cond, __FILE__, __LINE__); while (0)

Expand Down Expand Up @@ -114,7 +115,9 @@ typedef struct FileCacheControl
uint32 limit; /* shared copy of lfc_size_limit */
uint64 hits;
uint64 misses;
uint64 writes;
uint64 writes; /* number of writes issued */
uint64 time_read; /* time spent reading (us) */
uint64 time_write; /* time spent writing (us) */
dlist_head lru; /* double linked list for LRU replacement
* algorithm */
dlist_head holes; /* double linked list of punched holes */
Expand Down Expand Up @@ -270,6 +273,8 @@ lfc_shmem_startup(void)
lfc_ctl->hits = 0;
lfc_ctl->misses = 0;
lfc_ctl->writes = 0;
lfc_ctl->time_read = 0;
lfc_ctl->time_write = 0;
dlist_init(&lfc_ctl->lru);
dlist_init(&lfc_ctl->holes);

Expand Down Expand Up @@ -701,6 +706,7 @@ lfc_readv_select(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
int blocks_in_chunk = Min(nblocks, BLOCKS_PER_CHUNK - (blkno % BLOCKS_PER_CHUNK));
int iteration_hits = 0;
int iteration_misses = 0;
uint64 io_time_us = 0;
Assert(blocks_in_chunk > 0);

for (int i = 0; i < blocks_in_chunk; i++)
Expand Down Expand Up @@ -795,6 +801,13 @@ lfc_readv_select(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
lfc_ctl->misses += iteration_misses;
pgBufferUsage.file_cache.hits += iteration_hits;
pgBufferUsage.file_cache.misses += iteration_misses;

if (iteration_hits)
{
lfc_ctl->time_read += io_time_us;
inc_page_cache_read_wait(io_time_us);
}

CriticalAssert(entry->access_count > 0);
if (--entry->access_count == 0)
dlist_push_tail(&lfc_ctl->lru, &entry->list_node);
Expand Down Expand Up @@ -859,6 +872,7 @@ lfc_writev(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
struct iovec iov[PG_IOV_MAX];
int chunk_offs = blkno & (BLOCKS_PER_CHUNK - 1);
int blocks_in_chunk = Min(nblocks, BLOCKS_PER_CHUNK - (blkno % BLOCKS_PER_CHUNK));
instr_time io_start, io_end;
Assert(blocks_in_chunk > 0);

for (int i = 0; i < blocks_in_chunk; i++)
Expand Down Expand Up @@ -947,12 +961,13 @@ lfc_writev(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,

generation = lfc_ctl->generation;
entry_offset = entry->offset;
lfc_ctl->writes += blocks_in_chunk;
LWLockRelease(lfc_lock);

pgstat_report_wait_start(WAIT_EVENT_NEON_LFC_WRITE);
INSTR_TIME_SET_CURRENT(io_start);
rc = pwritev(lfc_desc, iov, blocks_in_chunk,
((off_t) entry_offset * BLOCKS_PER_CHUNK + chunk_offs) * BLCKSZ);
INSTR_TIME_SET_CURRENT(io_end);
pgstat_report_wait_end();

if (rc != BLCKSZ * blocks_in_chunk)
Expand All @@ -965,9 +980,17 @@ lfc_writev(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,

if (lfc_ctl->generation == generation)
{
uint64 time_spent_us;
CriticalAssert(LFC_ENABLED());
/* Place entry to the head of LRU list */
CriticalAssert(entry->access_count > 0);

lfc_ctl->writes += blocks_in_chunk;
INSTR_TIME_SUBTRACT(io_start, io_end);
time_spent_us = INSTR_TIME_GET_MICROSEC(io_start);
lfc_ctl->time_write += time_spent_us;
inc_page_cache_write_wait(time_spent_us);

if (--entry->access_count == 0)
dlist_push_tail(&lfc_ctl->lru, &entry->list_node);

Expand Down
174 changes: 111 additions & 63 deletions pgxn/neon/neon_perf_counters.c
Original file line number Diff line number Diff line change
Expand Up @@ -50,28 +50,52 @@ NeonPerfCountersShmemInit(void)
}
}

/*
* Count a GetPage wait operation.
*/
void
inc_getpage_wait(uint64 latency_us)
static inline void
inc_iohist(IOHistogram hist, uint64 latency_us)
{
int lo = 0;
int hi = NUM_GETPAGE_WAIT_BUCKETS - 1;
int hi = NUM_IO_WAIT_BUCKETS - 1;

/* Find the right bucket with binary search */
while (lo < hi)
{
int mid = (lo + hi) / 2;

if (latency_us < getpage_wait_bucket_thresholds[mid])
if (latency_us < io_wait_bucket_thresholds[mid])
hi = mid;
else
lo = mid + 1;
}
MyNeonCounters->getpage_wait_us_bucket[lo]++;
MyNeonCounters->getpage_wait_us_sum += latency_us;
MyNeonCounters->getpage_wait_us_count++;
hist->wait_us_bucket[lo]++;
hist->wait_us_sum += latency_us;
hist->wait_us_count++;
}

/*
* Count a GetPage wait operation.
*/
void
inc_getpage_wait(uint64 latency)
{
inc_iohist(&MyNeonCounters->getpage_hist, latency);
}

/*
* Count an LFC read wait operation.
*/
void
inc_page_cache_read_wait(uint64 latency)
{
inc_iohist(&MyNeonCounters->file_cache_read_hist, latency);
}

/*
* Count an LFC write wait operation.
*/
void
inc_page_cache_write_wait(uint64 latency)
{
inc_iohist(&MyNeonCounters->file_cache_write_hist, latency);
}

/*
Expand All @@ -81,77 +105,91 @@ inc_getpage_wait(uint64 latency_us)

typedef struct
{
char *name;
const char *name;
bool is_bucket;
double bucket_le;
double value;
} metric_t;

static metric_t *
neon_perf_counters_to_metrics(neon_per_backend_counters *counters)
static int
histogram_to_metrics(IOHistogram histogram,
metric_t *metrics,
const char *count,
const char *sum,
const char *bucket)
{
#define NUM_METRICS (2 + NUM_GETPAGE_WAIT_BUCKETS + 8)
metric_t *metrics = palloc((NUM_METRICS + 1) * sizeof(metric_t));
uint64 bucket_accum;
int i = 0;
int i = 0;
uint64 bucket_accum = 0;

metrics[i].name = "getpage_wait_seconds_count";
metrics[i].name = count;
metrics[i].is_bucket = false;
metrics[i].value = (double) counters->getpage_wait_us_count;
metrics[i].value = (double) histogram->wait_us_count;
i++;
metrics[i].name = "getpage_wait_seconds_sum";
metrics[i].name = sum;
metrics[i].is_bucket = false;
metrics[i].value = ((double) counters->getpage_wait_us_sum) / 1000000.0;
metrics[i].value = (double) histogram->wait_us_sum / 1000000.0;
i++;

bucket_accum = 0;
for (int bucketno = 0; bucketno < NUM_GETPAGE_WAIT_BUCKETS; bucketno++)
for (int bucketno = 0; bucketno < NUM_IO_WAIT_BUCKETS; bucketno++)
{
uint64 threshold = getpage_wait_bucket_thresholds[bucketno];
uint64 threshold = io_wait_bucket_thresholds[bucketno];

bucket_accum += counters->getpage_wait_us_bucket[bucketno];
bucket_accum += histogram->wait_us_bucket[bucketno];

metrics[i].name = "getpage_wait_seconds_bucket";
metrics[i].name = bucket;
metrics[i].is_bucket = true;
metrics[i].bucket_le = (threshold == UINT64_MAX) ? INFINITY : ((double) threshold) / 1000000.0;
metrics[i].value = (double) bucket_accum;
i++;
}
metrics[i].name = "getpage_prefetch_requests_total";
metrics[i].is_bucket = false;
metrics[i].value = (double) counters->getpage_prefetch_requests_total;
i++;
metrics[i].name = "getpage_sync_requests_total";
metrics[i].is_bucket = false;
metrics[i].value = (double) counters->getpage_sync_requests_total;
i++;
metrics[i].name = "getpage_prefetch_misses_total";
metrics[i].is_bucket = false;
metrics[i].value = (double) counters->getpage_prefetch_misses_total;
i++;
metrics[i].name = "getpage_prefetch_discards_total";
metrics[i].is_bucket = false;
metrics[i].value = (double) counters->getpage_prefetch_discards_total;
i++;
metrics[i].name = "pageserver_requests_sent_total";
metrics[i].is_bucket = false;
metrics[i].value = (double) counters->pageserver_requests_sent_total;
i++;
metrics[i].name = "pageserver_disconnects_total";
metrics[i].is_bucket = false;
metrics[i].value = (double) counters->pageserver_disconnects_total;
i++;
metrics[i].name = "pageserver_send_flushes_total";
metrics[i].is_bucket = false;
metrics[i].value = (double) counters->pageserver_send_flushes_total;
i++;
metrics[i].name = "file_cache_hits_total";
metrics[i].is_bucket = false;
metrics[i].value = (double) counters->file_cache_hits_total;
i++;

return i;
}

static metric_t *
neon_perf_counters_to_metrics(neon_per_backend_counters *counters)
{
#define NUM_METRICS ((2 + NUM_IO_WAIT_BUCKETS) * 3 + 10)
metric_t *metrics = palloc((NUM_METRICS + 1) * sizeof(metric_t));
int i = 0;

#define APPEND_METRIC(_name) do { \
metrics[i].name = #_name; \
metrics[i].is_bucket = false; \
metrics[i].value = (double) counters->_name; \
i++; \
} while (false)

i += histogram_to_metrics(&counters->getpage_hist, &metrics[i],
"getpage_wait_seconds_count",
"getpage_wait_seconds_sum",
"getpage_wait_seconds_bucket");

APPEND_METRIC(getpage_prefetch_requests_total);
APPEND_METRIC(getpage_sync_requests_total);
APPEND_METRIC(getpage_prefetch_misses_total);
APPEND_METRIC(getpage_prefetch_discards_total);
APPEND_METRIC(pageserver_requests_sent_total);
APPEND_METRIC(pageserver_disconnects_total);
APPEND_METRIC(pageserver_send_flushes_total);
APPEND_METRIC(pageserver_open_requests);
APPEND_METRIC(getpage_prefetches_buffered);

APPEND_METRIC(file_cache_hits_total);

i += histogram_to_metrics(&counters->file_cache_read_hist, &metrics[i],
"file_cache_read_wait_seconds_count",
"file_cache_read_wait_seconds_sum",
"file_cache_read_wait_seconds_bucket");
i += histogram_to_metrics(&counters->file_cache_write_hist, &metrics[i],
"file_cache_write_wait_seconds_count",
"file_cache_write_wait_seconds_sum",
"file_cache_write_wait_seconds_bucket");

Assert(i == NUM_METRICS);

#undef APPEND_METRIC
#undef NUM_METRICS

/* NULL entry marks end of array */
metrics[i].name = NULL;
metrics[i].value = 0;
Expand Down Expand Up @@ -216,6 +254,15 @@ neon_get_backend_perf_counters(PG_FUNCTION_ARGS)
return (Datum) 0;
}

static inline void
histogram_merge_into(IOHistogram into, IOHistogram from)
{
into->wait_us_count += from->wait_us_count;
into->wait_us_sum += from->wait_us_sum;
for (int bucketno = 0; bucketno < NUM_IO_WAIT_BUCKETS; bucketno++)
into->wait_us_bucket[bucketno] += from->wait_us_bucket[bucketno];
}

PG_FUNCTION_INFO_V1(neon_get_perf_counters);
Datum
neon_get_perf_counters(PG_FUNCTION_ARGS)
Expand All @@ -234,18 +281,19 @@ neon_get_perf_counters(PG_FUNCTION_ARGS)
{
neon_per_backend_counters *counters = &neon_per_backend_counters_shared[procno];

totals.getpage_wait_us_count += counters->getpage_wait_us_count;
totals.getpage_wait_us_sum += counters->getpage_wait_us_sum;
for (int bucketno = 0; bucketno < NUM_GETPAGE_WAIT_BUCKETS; bucketno++)
totals.getpage_wait_us_bucket[bucketno] += counters->getpage_wait_us_bucket[bucketno];
histogram_merge_into(&totals.getpage_hist, &counters->getpage_hist);
totals.getpage_prefetch_requests_total += counters->getpage_prefetch_requests_total;
totals.getpage_sync_requests_total += counters->getpage_sync_requests_total;
totals.getpage_prefetch_misses_total += counters->getpage_prefetch_misses_total;
totals.getpage_prefetch_discards_total += counters->getpage_prefetch_discards_total;
totals.pageserver_requests_sent_total += counters->pageserver_requests_sent_total;
totals.pageserver_disconnects_total += counters->pageserver_disconnects_total;
totals.pageserver_send_flushes_total += counters->pageserver_send_flushes_total;
totals.pageserver_open_requests += counters->pageserver_open_requests;
totals.getpage_prefetches_buffered += counters->getpage_prefetches_buffered;
totals.file_cache_hits_total += counters->file_cache_hits_total;
histogram_merge_into(&totals.file_cache_read_hist, &counters->file_cache_read_hist);
histogram_merge_into(&totals.file_cache_write_hist, &counters->file_cache_write_hist);
}

metrics = neon_perf_counters_to_metrics(&totals);
Expand Down
Loading

0 comments on commit dab96a6

Please sign in to comment.