Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

in_podman_metrics: Added remove_stale_counters opt #7503

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
104 changes: 94 additions & 10 deletions plugins/in_podman_metrics/podman_metrics.c
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
* that are children to root array, and in them, search for ID and name (which is also
* an array.
*/
static int collect_container_data(struct flb_in_metrics *ctx)
static int collect_container_data(struct flb_in_metrics *ctx, int gather_only)
{
/* Buffers for reading data from JSON */
char *buffer;
Expand All @@ -57,6 +57,8 @@ static int collect_container_data(struct flb_in_metrics *ctx)
jsmn_parser p;
jsmntok_t t[JSON_TOKENS];

struct container_id *cid;

flb_utils_read_file(ctx->config, &buffer, &read_bytes);
if (!read_bytes) {
flb_plg_warn(ctx->ins, "Failed to open %s", ctx->config);
Expand Down Expand Up @@ -119,11 +121,26 @@ static int collect_container_data(struct flb_in_metrics *ctx)
image_name[metadata_token_size] = '\0';

flb_plg_trace(ctx->ins, "Found image name %s", image_name);
add_container_to_list(ctx, id, name, image_name);
if (!gather_only) {
add_container_to_list(ctx, id, name, image_name);
}
}
else {
flb_plg_warn(ctx->ins, "Image name was not found for %s", id);
add_container_to_list(ctx, id, name, "unknown");
if (!gather_only) {
add_container_to_list(ctx, id, name, "unknown");
}
}

if (gather_only) {
cid = flb_malloc(sizeof(struct container_id));
if (!cid) {
flb_errno();
return -1;
}
cid->id = flb_sds_create(id);
mk_list_add(&cid->_head, &ctx->ids);
flb_plg_trace(ctx->ins, "Found id for gather only %s", cid->id);
}
collected_containers++;
}
Expand Down Expand Up @@ -173,18 +190,55 @@ static int destroy_container_list(struct flb_in_metrics *ctx)
struct container *cnt;
struct net_iface *iface;
struct sysfs_path *pth;
struct container_id *id;
struct mk_list *head;
struct mk_list *tmp;
struct mk_list *inner_head;
struct mk_list *inner_tmp;
int can_remove_stale_counters = FLB_FALSE;
int id_found;
int collected;

if (ctx->remove_stale_counters) {
collected = collect_container_data(ctx, FLB_TRUE);
if (collected == -1) {
flb_plg_error(ctx->ins, "Could not collect container ids");
}
else {
can_remove_stale_counters = FLB_TRUE;
flb_plg_debug(ctx->ins, "Collected %d for deletion", collected);
}
}

mk_list_foreach_safe(head, tmp, &ctx->items) {
id_found = FLB_FALSE;
cnt = mk_list_entry(head, struct container, _head);
flb_plg_debug(ctx->ins, "Destroying container data (id: %s, name: %s", cnt->id, cnt->name);

/* If recreation was already triggered, there is no point in determining it again */
if (can_remove_stale_counters && !ctx->recreate_cmt) {
mk_list_foreach_safe(inner_head, inner_tmp, &ctx->ids) {
id = mk_list_entry(inner_head, struct container_id, _head);
if (strcmp(cnt->id, id->id) == 0) {
id_found = FLB_TRUE;
break;
}
}

if (!id_found) {
flb_plg_info(ctx->ins, "Counter will be removed because %s is gone", cnt->name);
ctx->recreate_cmt = FLB_TRUE;
}
else {
flb_plg_debug(ctx->ins, "No need to remove stale counters");
}
}


flb_sds_destroy(cnt->id);
flb_sds_destroy(cnt->name);
flb_sds_destroy(cnt->image_name);

mk_list_foreach_safe(inner_head, inner_tmp, &cnt->net_data) {
iface = mk_list_entry(inner_head, struct net_iface, _head);
flb_sds_destroy(iface->name);
Expand All @@ -194,6 +248,7 @@ static int destroy_container_list(struct flb_in_metrics *ctx)
mk_list_del(&cnt->_head);
flb_free(cnt);
}


mk_list_foreach_safe(head, tmp, &ctx->sysfs_items) {
pth = mk_list_entry(head, struct sysfs_path, _head);
Expand All @@ -202,10 +257,19 @@ static int destroy_container_list(struct flb_in_metrics *ctx)
mk_list_del(&pth->_head);
flb_free(pth);
}

if (ctx->remove_stale_counters) {
mk_list_foreach_safe(head, tmp, &ctx->ids) {
id = mk_list_entry(head, struct container_id, _head);
flb_plg_trace(ctx->ins, "Destroying container id: %s", id->id);
flb_sds_destroy(id->id);
mk_list_del(&id->_head);
flb_free(id);
}
}
return 0;
}


/*
* Create counter for given metric name, using name, image name and value as counter labels. Counters
* are created per counter name, so they are "shared" between multiple containers - counter
Expand All @@ -218,8 +282,8 @@ static int create_counter(struct flb_in_metrics *ctx, struct cmt_counter **count
{
flb_sds_t *labels;
uint64_t fvalue = value;

int label_count;

if (value == UINT64_MAX) {
flb_plg_debug(ctx->ins, "Ignoring invalid counter for %s, %s_%s_%s", name, COUNTER_PREFIX, metric_prefix, metric_name);
return -1;
Expand All @@ -246,6 +310,12 @@ static int create_counter(struct flb_in_metrics *ctx, struct cmt_counter **count
*counter = cmt_counter_create(ctx->ins->cmt, COUNTER_PREFIX, metric_prefix, metric_name, description, label_count, fields);
}

if (ctx->recreate_cmt) {
flb_plg_debug(ctx->ins, "Recreating counter for %s, %s_%s_%s", name, COUNTER_PREFIX, metric_prefix, metric_name);
cmt_counter_destroy(*counter);
*counter = cmt_counter_create(ctx->ins->cmt, COUNTER_PREFIX, metric_prefix, metric_name, description, label_count, fields);
}

/* Allow setting value that is not grater that current one (if, for example, memory usage stays exactly the same) */
cmt_counter_allow_reset(*counter);
flb_plg_debug(ctx->ins, "Set counter for %s, %s_%s_%s: %lu", name, COUNTER_PREFIX, metric_prefix, metric_name, fvalue);
Expand All @@ -268,20 +338,26 @@ static int create_gauge(struct flb_in_metrics *ctx, struct cmt_gauge **gauge, fl
{
flb_sds_t *labels;
int label_count;
labels = (char *[]){id, name, image_name};
label_count = 3;

if (value == UINT64_MAX) {
flb_plg_debug(ctx->ins, "Ignoring invalid gauge for %s, %s_%s_%s", name, COUNTER_PREFIX, metric_prefix, metric_name);
return -1;
}

labels = (char *[]){id, name, image_name};
label_count = 3;

/* if gauge was not yet created, it means that this function is called for the first time per counter type */
if (*gauge == NULL) {
flb_plg_debug(ctx->ins, "Creating gauge for %s, %s_%s_%s", name, COUNTER_PREFIX, metric_prefix, metric_name);
*gauge = cmt_gauge_create(ctx->ins->cmt, COUNTER_PREFIX, metric_prefix, metric_name, description, label_count, fields);
}

if (ctx->recreate_cmt) {
flb_plg_debug(ctx->ins, "Recreating gauge for %s, %s_%s_%s", name, COUNTER_PREFIX, metric_prefix, metric_name);
cmt_gauge_destroy(*gauge);
*gauge = cmt_gauge_create(ctx->ins->cmt, COUNTER_PREFIX, metric_prefix, metric_name, description, label_count, fields);
}

flb_plg_debug(ctx->ins, "Set gauge for %s, %s_%s_%s: %lu", name, COUNTER_PREFIX, metric_prefix, metric_name, value);
if (cmt_gauge_set(*gauge, cfl_time_now(), value, label_count, labels) == -1) {
flb_plg_warn(ctx->ins, "Failed to set gauge for %s, %s_%s_%s", name, COUNTER_PREFIX, metric_prefix, metric_name);
Expand Down Expand Up @@ -340,7 +416,12 @@ static int create_counters(struct flb_in_metrics *ctx)
DESCRIPTION_TX_BYTES, iface->name, iface->tx_bytes);
create_counter(ctx, &ctx->tx_errors, cnt->id, cnt->name, cnt->image_name, COUNTER_NETWORK_PREFIX, FIELDS_METRIC_WITH_IFACE, COUNTER_TX_ERRORS,
DESCRIPTION_TX_ERRORS, iface->name, iface->tx_errors);
/* Stop recreating after first iteration, at this point we cleared all counters/gauges */
ctx->recreate_cmt = FLB_FALSE;
}

// Do it again in case of previous loop not looping at all
ctx->recreate_cmt = FLB_FALSE;
}
return 0;
}
Expand All @@ -357,7 +438,7 @@ static int scrape_metrics(struct flb_config *config, struct flb_in_metrics *ctx)
return -1;
}

if (collect_container_data(ctx) == -1) {
if (collect_container_data(ctx, FLB_FALSE) == -1) {
flb_plg_error(ctx->ins, "Could not collect container ids");
return -1;
}
Expand Down Expand Up @@ -429,6 +510,8 @@ static int in_metrics_init(struct flb_input_instance *in, struct flb_config *con
ctx->tx_bytes = NULL;
ctx->tx_errors = NULL;

ctx->recreate_cmt = FLB_FALSE;

if (flb_input_config_map_set(in, (void *) ctx) == -1) {
flb_free(ctx);
return -1;
Expand Down Expand Up @@ -462,6 +545,7 @@ static int in_metrics_init(struct flb_input_instance *in, struct flb_config *con

mk_list_init(&ctx->items);
mk_list_init(&ctx->sysfs_items);
mk_list_init(&ctx->ids);

if (ctx->scrape_interval >= 2 && ctx->scrape_on_start) {
flb_plg_info(ctx->ins, "Generating podman metrics (initial scrape)");
Expand Down Expand Up @@ -490,8 +574,8 @@ static int in_metrics_exit(void *data, struct flb_config *config)
return 0;
}

flb_sds_destroy(ctx->config);
destroy_container_list(ctx);
flb_sds_destroy(ctx->config);
flb_free(ctx);
return 0;
}
Expand Down
7 changes: 6 additions & 1 deletion plugins/in_podman_metrics/podman_metrics.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@

#include "podman_metrics_config.h"

static int collect_container_data(struct flb_in_metrics *ctx);
static int collect_container_data(struct flb_in_metrics *ctx, int gather_only);
static int add_container_to_list(struct flb_in_metrics *ctx, flb_sds_t id, flb_sds_t name, flb_sds_t image_name);
static int destroy_container_list(struct flb_in_metrics *ctx);

Expand Down Expand Up @@ -78,6 +78,11 @@ static struct flb_config_map config_map[] = {
0, FLB_TRUE, offsetof(struct flb_in_metrics, procfs_path),
"Path to proc subsystem directory"
},
{
FLB_CONFIG_MAP_BOOL, "remove_stale_counters", "false",
0, FLB_TRUE, offsetof(struct flb_in_metrics, remove_stale_counters),
"Remove counters for removed containers"
},

/* EOF */
{0}
Expand Down
10 changes: 10 additions & 0 deletions plugins/in_podman_metrics/podman_metrics_config.h
Original file line number Diff line number Diff line change
Expand Up @@ -169,18 +169,28 @@ struct sysfs_path {
struct mk_list _head;
};

struct container_id {
flb_sds_t id;
struct mk_list _head;
};

struct flb_in_metrics {
/* config map options */
int scrape_on_start;
int scrape_interval;
flb_sds_t podman_config_path;
int remove_stale_counters;
int recreate_cmt;

/* container list */
struct mk_list items;

/* sysfs path list */
struct mk_list sysfs_items;

/* container id list */
struct mk_list ids;

/* counters */
struct cmt_counter *c_memory_usage;
struct cmt_counter *c_memory_max_usage;
Expand Down
1 change: 1 addition & 0 deletions tests/runtime/in_podman_metrics.c
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ void flb_test_ipm_regular() {
"scrape_on_start", "true",
"path.sysfs", DPATH_PODMAN_REGULAR,
"path.procfs", DPATH_PODMAN_REGULAR,
"remove_stale_counters", "true",
NULL);
TEST_CHECK(flb_start(ctx) == 0);
sleep(1);
Expand Down
Loading