Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DAOS-xxx client: Dump metrics to container #15525

Draft
wants to merge 3 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions docs/user/container.md
Original file line number Diff line number Diff line change
Expand Up @@ -507,6 +507,12 @@ corruption proactively. (see data_integrity.md for more details). This can be
disabled per container using the `DAOS_PROP_CO_SCRUBBER_DISABLED` container
property.

### Per-Container Metrics Collection

If enabled, per-container metrics (currently only POSIX containers are supported)
will be collected on the client. Set the `DAOS_PROP_CO_METRICS_ENABLED` container
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i don't think this is a good idea. we can discuss offline

property to enable this behavior.

### Deduplication (Preview)

Data deduplication (dedup) is a process that allows to eliminate duplicated
Expand Down
5 changes: 5 additions & 0 deletions src/client/api/metrics.c
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,11 @@ dc_tm_fini()
if (!daos_client_metric)
return;

/* If we're dumping to a container, don't try to dump to a file. */
if (d_isenv_def(DAOS_CLIENT_METRICS_DUMP_POOL) &&
d_isenv_def(DAOS_CLIENT_METRICS_DUMP_CONT))
goto out;

rc = d_agetenv_str(&dump_dir, DAOS_CLIENT_METRICS_DUMP_DIR);
if (rc != 0)
D_GOTO(out, rc);
Expand Down
2 changes: 1 addition & 1 deletion src/client/dfs/SConscript
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def scons():
libraries = ['daos_common', 'daos', 'uuid', 'gurt']

dfs_src = ['common.c', 'cont.c', 'dir.c', 'file.c', 'io.c', 'lookup.c', 'mnt.c', 'obj.c',
'pipeline.c', 'readdir.c', 'rename.c', 'xattr.c', 'dfs_sys.c']
'pipeline.c', 'readdir.c', 'rename.c', 'xattr.c', 'dfs_sys.c', 'metrics.c']
dfs = denv.d_library('dfs', dfs_src, LIBS=libraries)
denv.Install('$PREFIX/lib64/', dfs)

Expand Down
2 changes: 2 additions & 0 deletions src/client/dfs/common.c
Original file line number Diff line number Diff line change
Expand Up @@ -625,6 +625,8 @@ entry_stat(dfs_t *dfs, daos_handle_t th, daos_handle_t oh, const char *name, siz
stbuf->st_atim.tv_sec = stbuf->st_mtim.tv_sec;
stbuf->st_atim.tv_nsec = stbuf->st_mtim.tv_nsec;
}

DFS_OP_STAT_INCR(dfs, DOS_STAT);
return 0;
}

Expand Down
4 changes: 4 additions & 0 deletions src/client/dfs/dfs_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
#include <daos.h>
#include <daos_fs.h>

#include "metrics.h"

/** D-key name of SB metadata */
#define SB_DKEY "DFS_SB_METADATA"

Expand Down Expand Up @@ -190,6 +192,8 @@ struct dfs {
struct dfs_mnt_hdls *cont_hdl;
/** the root dir stat buf */
struct stat root_stbuf;
/** DFS top-level metrics */
struct dfs_metrics *metrics;
};

struct dfs_entry {
Expand Down
2 changes: 2 additions & 0 deletions src/client/dfs/dir.c
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ dfs_mkdir(dfs_t *dfs, dfs_obj_t *parent, const char *name, mode_t mode, daos_ocl
if (rc != 0)
return daos_der2errno(rc);

DFS_OP_STAT_INCR(dfs, DOS_MKDIR);
return rc;
}

Expand Down Expand Up @@ -220,6 +221,7 @@ dfs_remove(dfs_t *dfs, dfs_obj_t *parent, const char *name, bool force, daos_obj
if (oid)
oid_cp(oid, entry.oid);

DFS_OP_STAT_INCR(dfs, DOS_REMOVE);
out:
rc = check_tx(th, rc);
if (rc == ERESTART)
Expand Down
34 changes: 32 additions & 2 deletions src/client/dfs/io.c
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,18 @@ read_cb(tse_task_t *task, void *data)
return rc;
}

static void
dfs_update_file_metrics(dfs_t *dfs, daos_size_t read_bytes, daos_size_t write_bytes)
{
if (dfs == NULL || dfs->metrics == NULL)
return;

if (read_bytes > 0)
d_tm_inc_gauge(dfs->metrics->dm_read_bytes, read_bytes);
if (write_bytes > 0)
d_tm_inc_gauge(dfs->metrics->dm_write_bytes, write_bytes);
}

static int
dfs_read_int(dfs_t *dfs, dfs_obj_t *obj, daos_off_t off, dfs_iod_t *iod, d_sg_list_t *sgl,
daos_size_t buf_size, daos_size_t *read_size, daos_event_t *ev)
Expand Down Expand Up @@ -85,11 +97,14 @@ dfs_read_int(dfs_t *dfs, dfs_obj_t *obj, daos_off_t off, dfs_iod_t *iod, d_sg_li
if (rc)
D_GOTO(err_params, rc);

DFS_OP_STAT_INCR(dfs, DOS_READ);
/*
* dc_task_schedule() calls tse_task_complete() even on error (which also calls the
* completion cb that frees params in this case, so we can just ignore the rc here.
*/
dc_task_schedule(task, true);

dfs_update_file_metrics(dfs, *params->read_size, 0);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i think those metrics update calls should be in the completion callback when the read completes

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed.

return 0;

err_params:
Expand Down Expand Up @@ -125,6 +140,7 @@ dfs_read(dfs_t *dfs, dfs_obj_t *obj, d_sg_list_t *sgl, daos_off_t off, daos_size
daos_event_launch(ev);
daos_event_complete(ev, 0);
}
DFS_OP_STAT_INCR(dfs, DOS_READ);
return 0;
}

Expand All @@ -146,7 +162,9 @@ dfs_read(dfs_t *dfs, dfs_obj_t *obj, d_sg_list_t *sgl, daos_off_t off, daos_size
return daos_der2errno(rc);
}

DFS_OP_STAT_INCR(dfs, DOS_READ);
*read_size = iod.arr_nr_read;
dfs_update_file_metrics(dfs, iod.arr_nr_read, 0);
return 0;
}

Expand All @@ -173,6 +191,7 @@ dfs_readx(dfs_t *dfs, dfs_obj_t *obj, dfs_iod_t *iod, d_sg_list_t *sgl, daos_siz
daos_event_launch(ev);
daos_event_complete(ev, 0);
}
DFS_OP_STAT_INCR(dfs, DOS_READX);
return 0;
}

Expand All @@ -189,7 +208,9 @@ dfs_readx(dfs_t *dfs, dfs_obj_t *obj, dfs_iod_t *iod, d_sg_list_t *sgl, daos_siz
return daos_der2errno(rc);
}

DFS_OP_STAT_INCR(dfs, DOS_READX);
*read_size = arr_iod.arr_nr_read;
dfs_update_file_metrics(dfs, arr_iod.arr_nr_read, 0);
return 0;
}

Expand Down Expand Up @@ -223,6 +244,7 @@ dfs_write(dfs_t *dfs, dfs_obj_t *obj, d_sg_list_t *sgl, daos_off_t off, daos_eve
daos_event_launch(ev);
daos_event_complete(ev, 0);
}
DFS_OP_STAT_INCR(dfs, DOS_WRITE);
return 0;
}

Expand All @@ -238,8 +260,12 @@ dfs_write(dfs_t *dfs, dfs_obj_t *obj, d_sg_list_t *sgl, daos_off_t off, daos_eve
daos_event_errno_rc(ev);

rc = daos_array_write(obj->oh, DAOS_TX_NONE, &iod, sgl, ev);
if (rc)
if (rc == 0) {
DFS_OP_STAT_INCR(dfs, DOS_WRITE);
dfs_update_file_metrics(dfs, 0, buf_size);
} else {
D_ERROR("daos_array_write() failed, " DF_RC "\n", DP_RC(rc));
}

return daos_der2errno(rc);
}
Expand All @@ -266,6 +292,7 @@ dfs_writex(dfs_t *dfs, dfs_obj_t *obj, dfs_iod_t *iod, d_sg_list_t *sgl, daos_ev
daos_event_launch(ev);
daos_event_complete(ev, 0);
}
DFS_OP_STAT_INCR(dfs, DOS_WRITEX);
return 0;
}

Expand All @@ -277,8 +304,11 @@ dfs_writex(dfs_t *dfs, dfs_obj_t *obj, dfs_iod_t *iod, d_sg_list_t *sgl, daos_ev
daos_event_errno_rc(ev);

rc = daos_array_write(obj->oh, DAOS_TX_NONE, &arr_iod, sgl, ev);
if (rc)
if (rc == 0) {
DFS_OP_STAT_INCR(dfs, DOS_WRITEX);
} else {
D_ERROR("daos_array_write() failed (%d)\n", rc);
}

return daos_der2errno(rc);
}
Loading
Loading