Add more timing histogram and gauge metrics to the Neon extension (ne…

…ondatabase#9116) We now also track: - Number of PS IOs in-flight - Number of pages cached by smgr prefetch implementation - IO timing histograms for LFC reads and writes, per IO issued ## Problem There's little insight into the timing metrics of LFC, and what the prefetch state of each backend is. This changes that, by measuring (and subsequently exposing) these data points. ## Summary of changes - Extract IOHistogram as separate type, rather than a collection of fields on NeonMetrics - others, see items above. Part of neondatabase#8926
lyang24 · Oct 14, 2024 · dab96a6 · dab96a6
1 parent f54e3e9
commit dab96a6
Show file tree

Hide file tree

Showing 4 changed files with 205 additions and 73 deletions.
diff --git a/pgxn/neon/file_cache.c b/pgxn/neon/file_cache.c
@@ -43,6 +43,7 @@
 #include "hll.h"
 #include "bitmap.h"
 #include "neon.h"
+#include "neon_perf_counters.h"
 
 #define CriticalAssert(cond) do if (!(cond)) elog(PANIC, "Assertion %s failed at %s:%d: ", #cond, __FILE__, __LINE__); while (0)
 
@@ -114,7 +115,9 @@ typedef struct FileCacheControl
 	uint32		limit;			/* shared copy of lfc_size_limit */
 	uint64		hits;
 	uint64		misses;
-	uint64		writes;
+	uint64		writes;			/* number of writes issued */
+	uint64		time_read;		/* time spent reading (us) */
+	uint64		time_write;		/* time spent writing (us) */
 	dlist_head	lru;			/* double linked list for LRU replacement
 								 * algorithm */
 	dlist_head  holes;          /* double linked list of punched holes */
@@ -270,6 +273,8 @@ lfc_shmem_startup(void)
 		lfc_ctl->hits = 0;
 		lfc_ctl->misses = 0;
 		lfc_ctl->writes = 0;
+		lfc_ctl->time_read = 0;
+		lfc_ctl->time_write = 0;
 		dlist_init(&lfc_ctl->lru);
 		dlist_init(&lfc_ctl->holes);
 
@@ -701,6 +706,7 @@ lfc_readv_select(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
 		int		blocks_in_chunk = Min(nblocks, BLOCKS_PER_CHUNK - (blkno % BLOCKS_PER_CHUNK));
 		int		iteration_hits = 0;
 		int		iteration_misses = 0;
+		uint64	io_time_us = 0;
 		Assert(blocks_in_chunk > 0);
 
 		for (int i = 0; i < blocks_in_chunk; i++)
@@ -795,6 +801,13 @@ lfc_readv_select(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
 			lfc_ctl->misses += iteration_misses;
 			pgBufferUsage.file_cache.hits += iteration_hits;
 			pgBufferUsage.file_cache.misses += iteration_misses;
+
+			if (iteration_hits)
+			{
+				lfc_ctl->time_read += io_time_us;
+				inc_page_cache_read_wait(io_time_us);
+			}
+
 			CriticalAssert(entry->access_count > 0);
 			if (--entry->access_count == 0)
 				dlist_push_tail(&lfc_ctl->lru, &entry->list_node);
@@ -859,6 +872,7 @@ lfc_writev(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
 		struct iovec iov[PG_IOV_MAX];
 		int		chunk_offs = blkno & (BLOCKS_PER_CHUNK - 1);
 		int		blocks_in_chunk = Min(nblocks, BLOCKS_PER_CHUNK - (blkno % BLOCKS_PER_CHUNK));
+		instr_time io_start, io_end;
 		Assert(blocks_in_chunk > 0);
 
 		for (int i = 0; i < blocks_in_chunk; i++)
@@ -947,12 +961,13 @@ lfc_writev(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
 
 		generation = lfc_ctl->generation;
 		entry_offset = entry->offset;
-		lfc_ctl->writes += blocks_in_chunk;
 		LWLockRelease(lfc_lock);
 
 		pgstat_report_wait_start(WAIT_EVENT_NEON_LFC_WRITE);
+		INSTR_TIME_SET_CURRENT(io_start);
 		rc = pwritev(lfc_desc, iov, blocks_in_chunk,
 					 ((off_t) entry_offset * BLOCKS_PER_CHUNK + chunk_offs) * BLCKSZ);
+		INSTR_TIME_SET_CURRENT(io_end);
 		pgstat_report_wait_end();
 
 		if (rc != BLCKSZ * blocks_in_chunk)
@@ -965,9 +980,17 @@ lfc_writev(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
 
 			if (lfc_ctl->generation == generation)
 			{
+				uint64	time_spent_us;
 				CriticalAssert(LFC_ENABLED());
 				/* Place entry to the head of LRU list */
 				CriticalAssert(entry->access_count > 0);
+
+				lfc_ctl->writes += blocks_in_chunk;
+				INSTR_TIME_SUBTRACT(io_start, io_end);
+				time_spent_us = INSTR_TIME_GET_MICROSEC(io_start);
+				lfc_ctl->time_write += time_spent_us;
+				inc_page_cache_write_wait(time_spent_us);
+
 				if (--entry->access_count == 0)
 					dlist_push_tail(&lfc_ctl->lru, &entry->list_node);
 

diff --git a/pgxn/neon/neon_perf_counters.c b/pgxn/neon/neon_perf_counters.c
@@ -50,28 +50,52 @@ NeonPerfCountersShmemInit(void)
 	}
 }
 
-/*
- * Count a GetPage wait operation.
- */
-void
-inc_getpage_wait(uint64 latency_us)
+static inline void
+inc_iohist(IOHistogram hist, uint64 latency_us)
 {
 	int			lo = 0;
-	int			hi = NUM_GETPAGE_WAIT_BUCKETS - 1;
+	int			hi = NUM_IO_WAIT_BUCKETS - 1;
 
 	/* Find the right bucket with binary search */
 	while (lo < hi)
 	{
 		int			mid = (lo + hi) / 2;
 
-		if (latency_us < getpage_wait_bucket_thresholds[mid])
+		if (latency_us < io_wait_bucket_thresholds[mid])
 			hi = mid;
 		else
 			lo = mid + 1;
 	}
-	MyNeonCounters->getpage_wait_us_bucket[lo]++;
-	MyNeonCounters->getpage_wait_us_sum += latency_us;
-	MyNeonCounters->getpage_wait_us_count++;
+	hist->wait_us_bucket[lo]++;
+	hist->wait_us_sum += latency_us;
+	hist->wait_us_count++;
+}
+
+/*
+ * Count a GetPage wait operation.
+ */
+void
+inc_getpage_wait(uint64 latency)
+{
+	inc_iohist(&MyNeonCounters->getpage_hist, latency);
+}
+
+/*
+ * Count an LFC read wait operation.
+ */
+void
+inc_page_cache_read_wait(uint64 latency)
+{
+	inc_iohist(&MyNeonCounters->file_cache_read_hist, latency);
+}
+
+/*
+ * Count an LFC write wait operation.
+ */
+void
+inc_page_cache_write_wait(uint64 latency)
+{
+	inc_iohist(&MyNeonCounters->file_cache_write_hist, latency);
 }
 
 /*
@@ -81,77 +105,91 @@ inc_getpage_wait(uint64 latency_us)
 
 typedef struct
 {
-	char	   *name;
+	const char *name;
 	bool		is_bucket;
 	double		bucket_le;
 	double		value;
 } metric_t;
 
-static metric_t *
-neon_perf_counters_to_metrics(neon_per_backend_counters *counters)
+static int
+histogram_to_metrics(IOHistogram histogram,
+					 metric_t *metrics,
+					 const char *count,
+					 const char *sum,
+					 const char *bucket)
 {
-#define NUM_METRICS (2 + NUM_GETPAGE_WAIT_BUCKETS + 8)
-	metric_t   *metrics = palloc((NUM_METRICS + 1) * sizeof(metric_t));
-	uint64		bucket_accum;
-	int			i = 0;
+	int		i = 0;
+	uint64	bucket_accum = 0;
 
-	metrics[i].name = "getpage_wait_seconds_count";
+	metrics[i].name = count;
 	metrics[i].is_bucket = false;
-	metrics[i].value = (double) counters->getpage_wait_us_count;
+	metrics[i].value = (double) histogram->wait_us_count;
 	i++;
-	metrics[i].name = "getpage_wait_seconds_sum";
+	metrics[i].name = sum;
 	metrics[i].is_bucket = false;
-	metrics[i].value = ((double) counters->getpage_wait_us_sum) / 1000000.0;
+	metrics[i].value = (double) histogram->wait_us_sum / 1000000.0;
 	i++;
-
-	bucket_accum = 0;
-	for (int bucketno = 0; bucketno < NUM_GETPAGE_WAIT_BUCKETS; bucketno++)
+	for (int bucketno = 0; bucketno < NUM_IO_WAIT_BUCKETS; bucketno++)
 	{
-		uint64		threshold = getpage_wait_bucket_thresholds[bucketno];
+		uint64		threshold = io_wait_bucket_thresholds[bucketno];
 
-		bucket_accum += counters->getpage_wait_us_bucket[bucketno];
+		bucket_accum += histogram->wait_us_bucket[bucketno];
 
-		metrics[i].name = "getpage_wait_seconds_bucket";
+		metrics[i].name = bucket;
 		metrics[i].is_bucket = true;
 		metrics[i].bucket_le = (threshold == UINT64_MAX) ? INFINITY : ((double) threshold) / 1000000.0;
 		metrics[i].value = (double) bucket_accum;
 		i++;
 	}
-	metrics[i].name = "getpage_prefetch_requests_total";
-	metrics[i].is_bucket = false;
-	metrics[i].value = (double) counters->getpage_prefetch_requests_total;
-	i++;
-	metrics[i].name = "getpage_sync_requests_total";
-	metrics[i].is_bucket = false;
-	metrics[i].value = (double) counters->getpage_sync_requests_total;
-	i++;
-	metrics[i].name = "getpage_prefetch_misses_total";
-	metrics[i].is_bucket = false;
-	metrics[i].value = (double) counters->getpage_prefetch_misses_total;
-	i++;
-	metrics[i].name = "getpage_prefetch_discards_total";
-	metrics[i].is_bucket = false;
-	metrics[i].value = (double) counters->getpage_prefetch_discards_total;
-	i++;
-	metrics[i].name = "pageserver_requests_sent_total";
-	metrics[i].is_bucket = false;
-	metrics[i].value = (double) counters->pageserver_requests_sent_total;
-	i++;
-	metrics[i].name = "pageserver_disconnects_total";
-	metrics[i].is_bucket = false;
-	metrics[i].value = (double) counters->pageserver_disconnects_total;
-	i++;
-	metrics[i].name = "pageserver_send_flushes_total";
-	metrics[i].is_bucket = false;
-	metrics[i].value = (double) counters->pageserver_send_flushes_total;
-	i++;
-	metrics[i].name = "file_cache_hits_total";
-	metrics[i].is_bucket = false;
-	metrics[i].value = (double) counters->file_cache_hits_total;
-	i++;
+
+	return i;
+}
+
+static metric_t *
+neon_perf_counters_to_metrics(neon_per_backend_counters *counters)
+{
+#define NUM_METRICS ((2 + NUM_IO_WAIT_BUCKETS) * 3 + 10)
+	metric_t   *metrics = palloc((NUM_METRICS + 1) * sizeof(metric_t));
+	int			i = 0;
+
+#define APPEND_METRIC(_name) do { \
+		metrics[i].name = #_name; \
+		metrics[i].is_bucket = false; \
+		metrics[i].value = (double) counters->_name; \
+		i++; \
+	} while (false)
+
+	i += histogram_to_metrics(&counters->getpage_hist, &metrics[i],
+							  "getpage_wait_seconds_count",
+							  "getpage_wait_seconds_sum",
+							  "getpage_wait_seconds_bucket");
+
+	APPEND_METRIC(getpage_prefetch_requests_total);
+	APPEND_METRIC(getpage_sync_requests_total);
+	APPEND_METRIC(getpage_prefetch_misses_total);
+	APPEND_METRIC(getpage_prefetch_discards_total);
+	APPEND_METRIC(pageserver_requests_sent_total);
+	APPEND_METRIC(pageserver_disconnects_total);
+	APPEND_METRIC(pageserver_send_flushes_total);
+	APPEND_METRIC(pageserver_open_requests);
+	APPEND_METRIC(getpage_prefetches_buffered);
+
+	APPEND_METRIC(file_cache_hits_total);
+
+	i += histogram_to_metrics(&counters->file_cache_read_hist, &metrics[i],
+							  "file_cache_read_wait_seconds_count",
+							  "file_cache_read_wait_seconds_sum",
+							  "file_cache_read_wait_seconds_bucket");
+	i += histogram_to_metrics(&counters->file_cache_write_hist, &metrics[i],
+							  "file_cache_write_wait_seconds_count",
+							  "file_cache_write_wait_seconds_sum",
+							  "file_cache_write_wait_seconds_bucket");
 
 	Assert(i == NUM_METRICS);
 
+#undef APPEND_METRIC
+#undef NUM_METRICS
+
 	/* NULL entry marks end of array */
 	metrics[i].name = NULL;
 	metrics[i].value = 0;
@@ -216,6 +254,15 @@ neon_get_backend_perf_counters(PG_FUNCTION_ARGS)
 	return (Datum) 0;
 }
 
+static inline void
+histogram_merge_into(IOHistogram into, IOHistogram from)
+{
+	into->wait_us_count += from->wait_us_count;
+	into->wait_us_sum += from->wait_us_sum;
+	for (int bucketno = 0; bucketno < NUM_IO_WAIT_BUCKETS; bucketno++)
+		into->wait_us_bucket[bucketno] += from->wait_us_bucket[bucketno];
+}
+
 PG_FUNCTION_INFO_V1(neon_get_perf_counters);
 Datum
 neon_get_perf_counters(PG_FUNCTION_ARGS)
@@ -234,18 +281,19 @@ neon_get_perf_counters(PG_FUNCTION_ARGS)
 	{
 		neon_per_backend_counters *counters = &neon_per_backend_counters_shared[procno];
 
-		totals.getpage_wait_us_count += counters->getpage_wait_us_count;
-		totals.getpage_wait_us_sum += counters->getpage_wait_us_sum;
-		for (int bucketno = 0; bucketno < NUM_GETPAGE_WAIT_BUCKETS; bucketno++)
-			totals.getpage_wait_us_bucket[bucketno] += counters->getpage_wait_us_bucket[bucketno];
+		histogram_merge_into(&totals.getpage_hist, &counters->getpage_hist);
 		totals.getpage_prefetch_requests_total += counters->getpage_prefetch_requests_total;
 		totals.getpage_sync_requests_total += counters->getpage_sync_requests_total;
 		totals.getpage_prefetch_misses_total += counters->getpage_prefetch_misses_total;
 		totals.getpage_prefetch_discards_total += counters->getpage_prefetch_discards_total;
 		totals.pageserver_requests_sent_total += counters->pageserver_requests_sent_total;
 		totals.pageserver_disconnects_total += counters->pageserver_disconnects_total;
 		totals.pageserver_send_flushes_total += counters->pageserver_send_flushes_total;
+		totals.pageserver_open_requests += counters->pageserver_open_requests;
+		totals.getpage_prefetches_buffered += counters->getpage_prefetches_buffered;
 		totals.file_cache_hits_total += counters->file_cache_hits_total;
+		histogram_merge_into(&totals.file_cache_read_hist, &counters->file_cache_read_hist);
+		histogram_merge_into(&totals.file_cache_write_hist, &counters->file_cache_write_hist);
 	}
 
 	metrics = neon_perf_counters_to_metrics(&totals);