diff --git a/src/client/dfs/SConscript b/src/client/dfs/SConscript index 5b98d6d7e8a..906b1bfd7d4 100644 --- a/src/client/dfs/SConscript +++ b/src/client/dfs/SConscript @@ -44,7 +44,7 @@ def scons(): libraries = ['daos_common', 'daos', 'uuid', 'gurt'] dfs_src = ['common.c', 'cont.c', 'dir.c', 'file.c', 'io.c', 'lookup.c', 'mnt.c', 'obj.c', - 'pipeline.c', 'readdir.c', 'rename.c', 'xattr.c', 'dfs_sys.c'] + 'pipeline.c', 'readdir.c', 'rename.c', 'xattr.c', 'dfs_sys.c', 'metrics.c'] dfs = denv.d_library('dfs', dfs_src, LIBS=libraries) denv.Install('$PREFIX/lib64/', dfs) diff --git a/src/client/dfs/common.c b/src/client/dfs/common.c index 04f2867065e..384229d4961 100644 --- a/src/client/dfs/common.c +++ b/src/client/dfs/common.c @@ -625,6 +625,8 @@ entry_stat(dfs_t *dfs, daos_handle_t th, daos_handle_t oh, const char *name, siz stbuf->st_atim.tv_sec = stbuf->st_mtim.tv_sec; stbuf->st_atim.tv_nsec = stbuf->st_mtim.tv_nsec; } + + DFS_OP_STAT_INCR(dfs, DOS_STAT); return 0; } diff --git a/src/client/dfs/dfs_internal.h b/src/client/dfs/dfs_internal.h index 7425fc2f00d..41be576c349 100644 --- a/src/client/dfs/dfs_internal.h +++ b/src/client/dfs/dfs_internal.h @@ -15,6 +15,8 @@ #include #include +#include "metrics.h" + /** D-key name of SB metadata */ #define SB_DKEY "DFS_SB_METADATA" @@ -190,6 +192,8 @@ struct dfs { struct dfs_mnt_hdls *cont_hdl; /** the root dir stat buf */ struct stat root_stbuf; + /** DFS top-level metrics */ + struct dfs_metrics *metrics; }; struct dfs_entry { diff --git a/src/client/dfs/dir.c b/src/client/dfs/dir.c index 000c0625f58..6194d37e12b 100644 --- a/src/client/dfs/dir.c +++ b/src/client/dfs/dir.c @@ -65,6 +65,7 @@ dfs_mkdir(dfs_t *dfs, dfs_obj_t *parent, const char *name, mode_t mode, daos_ocl if (rc != 0) return daos_der2errno(rc); + DFS_OP_STAT_INCR(dfs, DOS_MKDIR); return rc; } @@ -220,6 +221,7 @@ dfs_remove(dfs_t *dfs, dfs_obj_t *parent, const char *name, bool force, daos_obj if (oid) oid_cp(oid, entry.oid); + DFS_OP_STAT_INCR(dfs, DOS_REMOVE); out: rc = check_tx(th, rc); if (rc == ERESTART) diff --git a/src/client/dfs/io.c b/src/client/dfs/io.c index 3919d8cfe19..f964b52ca36 100644 --- a/src/client/dfs/io.c +++ b/src/client/dfs/io.c @@ -41,6 +41,18 @@ read_cb(tse_task_t *task, void *data) return rc; } +static void +dfs_update_file_metrics(dfs_t *dfs, daos_size_t read_bytes, daos_size_t write_bytes) +{ + if (dfs == NULL || dfs->metrics == NULL) + return; + + if (read_bytes > 0) + d_tm_inc_gauge(dfs->metrics->dm_read_bytes, read_bytes); + if (write_bytes > 0) + d_tm_inc_gauge(dfs->metrics->dm_write_bytes, write_bytes); +} + static int dfs_read_int(dfs_t *dfs, dfs_obj_t *obj, daos_off_t off, dfs_iod_t *iod, d_sg_list_t *sgl, daos_size_t buf_size, daos_size_t *read_size, daos_event_t *ev) @@ -85,11 +97,14 @@ dfs_read_int(dfs_t *dfs, dfs_obj_t *obj, daos_off_t off, dfs_iod_t *iod, d_sg_li if (rc) D_GOTO(err_params, rc); + DFS_OP_STAT_INCR(dfs, DOS_READ); /* * dc_task_schedule() calls tse_task_complete() even on error (which also calls the * completion cb that frees params in this case, so we can just ignore the rc here. */ dc_task_schedule(task, true); + + dfs_update_file_metrics(dfs, *params->read_size, 0); return 0; err_params: @@ -125,6 +140,7 @@ dfs_read(dfs_t *dfs, dfs_obj_t *obj, d_sg_list_t *sgl, daos_off_t off, daos_size daos_event_launch(ev); daos_event_complete(ev, 0); } + DFS_OP_STAT_INCR(dfs, DOS_READ); return 0; } @@ -146,7 +162,9 @@ dfs_read(dfs_t *dfs, dfs_obj_t *obj, d_sg_list_t *sgl, daos_off_t off, daos_size return daos_der2errno(rc); } + DFS_OP_STAT_INCR(dfs, DOS_READ); *read_size = iod.arr_nr_read; + dfs_update_file_metrics(dfs, iod.arr_nr_read, 0); return 0; } @@ -173,6 +191,7 @@ dfs_readx(dfs_t *dfs, dfs_obj_t *obj, dfs_iod_t *iod, d_sg_list_t *sgl, daos_siz daos_event_launch(ev); daos_event_complete(ev, 0); } + DFS_OP_STAT_INCR(dfs, DOS_READX); return 0; } @@ -189,7 +208,9 @@ dfs_readx(dfs_t *dfs, dfs_obj_t *obj, dfs_iod_t *iod, d_sg_list_t *sgl, daos_siz return daos_der2errno(rc); } + DFS_OP_STAT_INCR(dfs, DOS_READX); *read_size = arr_iod.arr_nr_read; + dfs_update_file_metrics(dfs, arr_iod.arr_nr_read, 0); return 0; } @@ -223,6 +244,7 @@ dfs_write(dfs_t *dfs, dfs_obj_t *obj, d_sg_list_t *sgl, daos_off_t off, daos_eve daos_event_launch(ev); daos_event_complete(ev, 0); } + DFS_OP_STAT_INCR(dfs, DOS_WRITE); return 0; } @@ -238,8 +260,12 @@ dfs_write(dfs_t *dfs, dfs_obj_t *obj, d_sg_list_t *sgl, daos_off_t off, daos_eve daos_event_errno_rc(ev); rc = daos_array_write(obj->oh, DAOS_TX_NONE, &iod, sgl, ev); - if (rc) + if (rc == 0) { + DFS_OP_STAT_INCR(dfs, DOS_WRITE); + dfs_update_file_metrics(dfs, 0, buf_size); + } else { D_ERROR("daos_array_write() failed, " DF_RC "\n", DP_RC(rc)); + } return daos_der2errno(rc); } @@ -266,6 +292,7 @@ dfs_writex(dfs_t *dfs, dfs_obj_t *obj, dfs_iod_t *iod, d_sg_list_t *sgl, daos_ev daos_event_launch(ev); daos_event_complete(ev, 0); } + DFS_OP_STAT_INCR(dfs, DOS_WRITEX); return 0; } @@ -277,8 +304,11 @@ dfs_writex(dfs_t *dfs, dfs_obj_t *obj, dfs_iod_t *iod, d_sg_list_t *sgl, daos_ev daos_event_errno_rc(ev); rc = daos_array_write(obj->oh, DAOS_TX_NONE, &arr_iod, sgl, ev); - if (rc) + if (rc == 0) { + DFS_OP_STAT_INCR(dfs, DOS_WRITEX); + } else { D_ERROR("daos_array_write() failed (%d)\n", rc); + } return daos_der2errno(rc); } diff --git a/src/client/dfs/metrics.c b/src/client/dfs/metrics.c new file mode 100644 index 00000000000..874b2ceb5ad --- /dev/null +++ b/src/client/dfs/metrics.c @@ -0,0 +1,158 @@ +/** + * (C) Copyright 2024 Intel Corporation. + * + * SPDX-License-Identifier: BSD-2-Clause-Patent + */ +#define D_LOGFAC DD_FAC(dfs) + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "metrics.h" +#include "dfs_internal.h" + +#define DFS_METRICS_ROOT "dfs" + +#define STAT_METRICS_SIZE (D_TM_METRIC_SIZE * DOS_LIMIT) +#define FILE_METRICS_SIZE (((D_TM_METRIC_SIZE * NR_SIZE_BUCKETS) * 2) + D_TM_METRIC_SIZE * 2) +#define DFS_METRICS_SIZE (STAT_METRICS_SIZE + FILE_METRICS_SIZE) + +#define SPRINTF_CONT_PATH(buf, cont_uuid, path) \ + snprintf(buf, sizeof(buf), "container/" DF_UUIDF "/%s", DP_UUID(cont_uuid), path); + +#define ADD_STAT_METRIC(name, ...) \ + SPRINTF_CONT_PATH(tmp_path, cont_uuid, DFS_METRICS_ROOT "/ops/" #name); \ + rc = d_tm_add_metric(&metrics->dm_op_stats[i], D_TM_COUNTER, "Count of " #name " calls", \ + "calls", tmp_path); \ + if (rc != 0) { \ + DL_ERROR(rc, "failed to create " #name " counter"); \ + return; \ + } \ + i++; + +static void +op_stats_init(struct dfs_metrics *metrics, uuid_t cont_uuid) +{ + char tmp_path[D_TM_MAX_NAME_LEN] = {0}; + int i = 0; + int rc; + + if (metrics == NULL) + return; + + D_FOREACH_DFS_OP_STAT(ADD_STAT_METRIC); +} + +static void +cont_stats_init(struct dfs_metrics *metrics, uuid_t cont_uuid) +{ + char tmp_path[D_TM_MAX_NAME_LEN] = {0}; + int rc = 0; + + if (metrics == NULL) + return; + + SPRINTF_CONT_PATH(tmp_path, cont_uuid, "mount_time"); + rc = d_tm_add_metric(&metrics->dm_mount_time, D_TM_TIMESTAMP, "container mount time", NULL, + tmp_path); + if (rc != 0) + DL_ERROR(rc, "failed to create mount_time timestamp"); +} + +static void +file_stats_init(struct dfs_metrics *metrics, uuid_t cont_uuid) +{ + char tmp_path[D_TM_MAX_NAME_LEN] = {0}; + int rc = 0; + + if (metrics == NULL) + return; + + SPRINTF_CONT_PATH(tmp_path, cont_uuid, DFS_METRICS_ROOT "/read_bytes"); + rc = d_tm_add_metric(&metrics->dm_read_bytes, D_TM_STATS_GAUGE, "dfs read bytes", "bytes", + tmp_path); + if (rc != 0) + DL_ERROR(rc, "failed to create dfs read_bytes counter"); + rc = + d_tm_init_histogram(metrics->dm_read_bytes, tmp_path, NR_SIZE_BUCKETS, 256, 2, "bytes"); + if (rc) + DL_ERROR(rc, "Failed to init dfs read size histogram"); + + SPRINTF_CONT_PATH(tmp_path, cont_uuid, DFS_METRICS_ROOT "/write_bytes"); + rc = d_tm_add_metric(&metrics->dm_write_bytes, D_TM_STATS_GAUGE, "dfs write bytes", "bytes", + tmp_path); + if (rc != 0) + DL_ERROR(rc, "failed to create dfs write_bytes counter"); + rc = d_tm_init_histogram(metrics->dm_write_bytes, tmp_path, NR_SIZE_BUCKETS, 256, 2, + "bytes"); + if (rc) + DL_ERROR(rc, "Failed to init dfs write size histogram"); +} + +void +dfs_metrics_init(dfs_t *dfs) +{ + uuid_t cont_uuid; + char root_name[D_TM_MAX_NAME_LEN]; + pid_t pid = getpid(); + size_t root_size = DFS_METRICS_SIZE + (D_TM_METRIC_SIZE * 3); + int rc; + + if (dfs == NULL) + return; + + rc = dc_cont_hdl2uuid(dfs->coh, NULL, &cont_uuid); + if (rc != 0) { + DL_ERROR(rc, "failed to get container UUID"); + goto error; + } + + snprintf(root_name, sizeof(root_name), "%d", pid); + /* if only container-level metrics are enabled; this will init a root for them */ + rc = d_tm_init_with_name(d_tm_cli_pid_key(pid), root_size, D_TM_OPEN_OR_CREATE, root_name); + if (rc != 0 && rc != -DER_ALREADY) { + DL_ERROR(rc, "failed to init DFS metrics"); + goto error; + } + + D_ALLOC_PTR(dfs->metrics); + if (dfs->metrics == NULL) { + D_ERROR("failed to alloc DFS metrics"); + goto error; + } + + SPRINTF_CONT_PATH(root_name, cont_uuid, DFS_METRICS_ROOT); + rc = d_tm_add_ephemeral_dir(NULL, DFS_METRICS_SIZE, root_name); + if (rc != 0) { + DL_ERROR(rc, "failed to add DFS metrics dir"); + goto error; + } + + cont_stats_init(dfs->metrics, cont_uuid); + op_stats_init(dfs->metrics, cont_uuid); + file_stats_init(dfs->metrics, cont_uuid); + + d_tm_record_timestamp(dfs->metrics->dm_mount_time); + return; + +error: + if (dfs->metrics != NULL) + D_FREE(dfs->metrics); +} + +void +dfs_metrics_fini(dfs_t *dfs) +{ + D_FREE(dfs->metrics); +} \ No newline at end of file diff --git a/src/client/dfs/metrics.h b/src/client/dfs/metrics.h new file mode 100644 index 00000000000..a9d676d4891 --- /dev/null +++ b/src/client/dfs/metrics.h @@ -0,0 +1,78 @@ +/** + * (C) Copyright 2024 Intel Corporation. + * + * SPDX-License-Identifier: BSD-2-Clause-Patent + */ +#ifndef __DFS_METRICS_H__ +#define __DFS_METRICS_H__ + +#if defined(__cplusplus) +extern "C" { +#endif + +#include +#include +#include +#include + +/* + * Report read/write counts on a per-I/O size. + * Buckets starts at [0; 256B[ and are increased by power of 2 + * (i.e. [256B; 512B[, [512B; 1KB[) up to [4MB; infinity[ + * Since 4MB = 2^22 and 256B = 2^8, this means + * (22 - 8 + 1) = 15 buckets plus the 4MB+ bucket, so + * 16 buckets in total. + */ +#define NR_SIZE_BUCKETS 16 + +/* define a set of ops that we'll count if metrics are enabled */ +#define D_FOREACH_DFS_OP_STAT(ACTION) \ + ACTION(OPEN) \ + ACTION(CREATE) \ + ACTION(RELEASE) \ + ACTION(READ) \ + ACTION(READX) \ + ACTION(WRITE) \ + ACTION(WRITEX) \ + ACTION(GETSIZE) \ + ACTION(PUNCH) \ + ACTION(READDIR) \ + ACTION(MKDIR) \ + ACTION(REMOVE) \ + ACTION(MOVE) \ + ACTION(STAT) \ + ACTION(OSTAT) \ + ACTION(OSTATX) \ + ACTION(SETATTR) \ + ACTION(CHMOD) \ + ACTION(CHOWN) \ + ACTION(SYNC) \ + ACTION(SETXATTR) \ + ACTION(GETXATTR) \ + ACTION(RMXATTR) \ + ACTION(LSXATTR) + +#define DFS_OP_STAT_DEFINE(name, ...) DOS_##name, + +enum dfs_op_stat { + D_FOREACH_DFS_OP_STAT(DFS_OP_STAT_DEFINE) DOS_LIMIT, +}; + +#define DFS_OP_STAT_INCR(_dfs, _name) \ + if (_dfs->metrics != NULL) \ + d_tm_inc_counter(_dfs->metrics->dm_op_stats[(_name)], 1); + +struct dfs_metrics { + struct d_tm_node_t *dm_op_stats[DOS_LIMIT]; + struct d_tm_node_t *dm_read_bytes; + struct d_tm_node_t *dm_write_bytes; + struct d_tm_node_t *dm_mount_time; +}; + +void +dfs_metrics_init(dfs_t *dfs); + +void +dfs_metrics_fini(dfs_t *dfs); + +#endif /* __DFS_METRICS_H__ */ \ No newline at end of file diff --git a/src/client/dfs/mnt.c b/src/client/dfs/mnt.c index 3b6fc561032..a40146740d7 100644 --- a/src/client/dfs/mnt.c +++ b/src/client/dfs/mnt.c @@ -730,6 +730,12 @@ dfs_mount_int(daos_handle_t poh, daos_handle_t coh, int flags, daos_epoch_t epoc daos_obj_oid_cycle(&dfs->oid); } + /** If container metrics are enabled, set up the DFS metrics for it */ + entry = daos_prop_entry_get(prop, DAOS_PROP_CO_METRICS_ENABLED); + if (entry && entry->dpe_val == 1) { + dfs_metrics_init(dfs); + } + dfs->mounted = DFS_MOUNT; *_dfs = dfs; daos_prop_free(prop); @@ -845,6 +851,8 @@ dfs_umount(dfs_t *dfs) daos_obj_close(dfs->root.oh, NULL); daos_obj_close(dfs->super_oh, NULL); + dfs_metrics_fini(dfs); + D_FREE(dfs->prefix); D_MUTEX_DESTROY(&dfs->lock); D_FREE(dfs); diff --git a/src/client/dfs/obj.c b/src/client/dfs/obj.c index 309439bc807..722432e2937 100644 --- a/src/client/dfs/obj.c +++ b/src/client/dfs/obj.c @@ -412,6 +412,12 @@ open_stat(dfs_t *dfs, dfs_obj_t *parent, const char *name, mode_t mode, int flag out: if (rc == 0) { + if (flags & O_CREAT) { + DFS_OP_STAT_INCR(dfs, DOS_CREATE); + } else { + DFS_OP_STAT_INCR(dfs, DOS_OPEN); + } + if (stbuf) { stbuf->st_size = file_size; stbuf->st_nlink = 1; @@ -786,6 +792,8 @@ dfs_stat(dfs_t *dfs, dfs_obj_t *parent, const char *name, struct stat *stbuf) oh = parent->oh; } + DFS_OP_STAT_INCR(dfs, DOS_STAT); + return entry_stat(dfs, dfs->th, oh, name, len, NULL, true, stbuf, NULL); } @@ -810,6 +818,9 @@ dfs_ostat(dfs_t *dfs, dfs_obj_t *obj, struct stat *stbuf) D_GOTO(out, rc); out: + if (rc == 0) + DFS_OP_STAT_INCR(dfs, DOS_OSTAT); + daos_obj_close(oh, NULL); return rc; } @@ -1013,6 +1024,9 @@ statx_task(tse_task_t *task) err1_out: D_FREE(op_args); daos_obj_close(args->parent_oh, NULL); + + if (rc == 0) + DFS_OP_STAT_INCR(args->dfs, DOS_OSTATX); return rc; } @@ -1243,6 +1257,7 @@ dfs_chmod(dfs_t *dfs, dfs_obj_t *parent, const char *name, mode_t mode) D_GOTO(out, rc = daos_der2errno(rc)); } + DFS_OP_STAT_INCR(dfs, DOS_CHMOD); out: if (S_ISLNK(entry.mode)) { dfs_release(sym); @@ -1378,6 +1393,7 @@ dfs_chown(dfs_t *dfs, dfs_obj_t *parent, const char *name, uid_t uid, gid_t gid, D_GOTO(out, rc = daos_der2errno(rc)); } + DFS_OP_STAT_INCR(dfs, DOS_CHOWN); out: if (!(flags & O_NOFOLLOW) && S_ISLNK(entry.mode)) { dfs_release(sym); @@ -1598,6 +1614,7 @@ dfs_osetattr(dfs_t *dfs, dfs_obj_t *obj, struct stat *stbuf, int flags) D_GOTO(out_obj, rc = daos_der2errno(rc)); } + DFS_OP_STAT_INCR(dfs, DOS_SETATTR); out_stat: *stbuf = rstat; out_obj: @@ -1662,6 +1679,7 @@ dfs_punch(dfs_t *dfs, dfs_obj_t *obj, daos_off_t offset, daos_size_t len) return daos_der2errno(rc); } + DFS_OP_STAT_INCR(dfs, DOS_PUNCH); return rc; } @@ -1708,6 +1726,7 @@ dfs_sync(dfs_t *dfs) if (dfs->amode != O_RDWR) return EPERM; + DFS_OP_STAT_INCR(dfs, DOS_SYNC); /** Take a snapshot here and allow rollover to that when supported. */ return 0; diff --git a/src/client/dfs/readdir.c b/src/client/dfs/readdir.c index a284b92e4c2..dbbc731f12b 100644 --- a/src/client/dfs/readdir.c +++ b/src/client/dfs/readdir.c @@ -81,6 +81,7 @@ readdir_int(dfs_t *dfs, dfs_obj_t *obj, daos_anchor_t *anchor, uint32_t *nr, str break; } *nr = key_nr; + DFS_OP_STAT_INCR(dfs, DOS_READDIR); out: D_FREE(enum_buf); diff --git a/src/client/dfs/rename.c b/src/client/dfs/rename.c index a7431f03536..03572943d39 100644 --- a/src/client/dfs/rename.c +++ b/src/client/dfs/rename.c @@ -299,6 +299,8 @@ dfs_move_internal(dfs_t *dfs, unsigned int flags, dfs_obj_t *parent, const char rc = check_tx(th, rc); if (rc == ERESTART) goto restart; + if (rc == 0) + DFS_OP_STAT_INCR(dfs, DOS_MOVE); if (entry.value) { D_ASSERT(S_ISLNK(entry.mode)); diff --git a/src/client/dfs/xattr.c b/src/client/dfs/xattr.c index 49b700e3def..b3a13a31a8d 100644 --- a/src/client/dfs/xattr.c +++ b/src/client/dfs/xattr.c @@ -122,6 +122,7 @@ dfs_setxattr(dfs_t *dfs, dfs_obj_t *obj, const char *name, const void *value, da } } + DFS_OP_STAT_INCR(dfs, DOS_SETXATTR); out: daos_obj_close(oh, NULL); free: @@ -194,6 +195,7 @@ dfs_getxattr(dfs_t *dfs, dfs_obj_t *obj, const char *name, void *value, daos_siz } *size = iod.iod_size; + DFS_OP_STAT_INCR(dfs, DOS_GETXATTR); close: daos_obj_close(oh, NULL); @@ -277,6 +279,7 @@ dfs_removexattr(dfs_t *dfs, dfs_obj_t *obj, const char *name) D_GOTO(out, rc = daos_der2errno(rc)); } + DFS_OP_STAT_INCR(dfs, DOS_RMXATTR); out: daos_obj_close(oh, NULL); free: @@ -354,6 +357,7 @@ dfs_listxattr(dfs_t *dfs, dfs_obj_t *obj, char *list, daos_size_t *size) } *size = ret_size; + DFS_OP_STAT_INCR(dfs, DOS_LSXATTR); out: daos_obj_close(oh, NULL); return rc; diff --git a/src/gurt/telemetry.c b/src/gurt/telemetry.c index 27a0dc42058..06f55ff27b9 100644 --- a/src/gurt/telemetry.c +++ b/src/gurt/telemetry.c @@ -804,6 +804,12 @@ destroy_shmem_with_key(key_t key) return 0; } +static bool +is_initialized(void) +{ + return tm_shmem.ctx != NULL && tm_shmem.ctx->shmem_root != NULL; +} + /** * Initialize an instance of the telemetry and metrics API for the producer * process with the root set to the provided name. @@ -833,6 +839,9 @@ d_tm_init_with_name(int id, uint64_t mem_size, int flags, const char *root_name) int shmid; int rc = DER_SUCCESS; + if (is_initialized()) + return -DER_ALREADY; + if (root_name == NULL || strnlen(root_name, D_TM_MAX_NAME_LEN) == 0) { D_ERROR("root name cannot be empty\n"); return -DER_INVAL; @@ -2253,13 +2262,6 @@ d_tm_find_metric(struct d_tm_context *ctx, char *path) return node; } -static bool -is_initialized(void) -{ - return tm_shmem.ctx != NULL && - tm_shmem.ctx->shmem_root != NULL; -} - /* * Get a pointer to the last token in the path without modifying the original * string.