Skip to content

Commit

Permalink
grep: add common grep APIs
Browse files Browse the repository at this point in the history
Signed-off-by: Takahiro Yamashita <[email protected]>
  • Loading branch information
nokute78 committed May 7, 2023
1 parent 5864c41 commit eb30ea9
Show file tree
Hide file tree
Showing 5 changed files with 836 additions and 0 deletions.
68 changes: 68 additions & 0 deletions include/fluent-bit/flb_grep.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */

/* Fluent Bit
* ==========
* Copyright (C) 2015-2023 The Fluent Bit Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef FLB_GREP_H
#define FLB_GREP_H

#include <fluent-bit/flb_sds.h>
#include <fluent-bit/flb_regex.h>
#include <fluent-bit/flb_record_accessor.h>

/* rule types */
enum flb_grep_rule_type {
FLB_GREP_NO_RULE,
FLB_GREP_REGEX,
FLB_GREP_EXCLUDE
};

/* actions */
enum flb_grep_action {
FLB_GREP_RET_KEEP,
FLB_GREP_RET_EXCLUDE
};

enum flb_grep_logical_op {
FLB_GREP_LOGICAL_OP_LEGACY,
FLB_GREP_LOGICAL_OP_OR,
FLB_GREP_LOGICAL_OP_AND
};

struct flb_grep_rule {
int type;
flb_sds_t field;
char *regex_pattern;
struct flb_regex *regex;
struct flb_record_accessor *ra;
struct mk_list _head;
};


struct flb_grep {
enum flb_grep_rule_type first_rule;
enum flb_grep_logical_op op;
struct mk_list rules; /* flb_grep_rule list */
};


int flb_grep_filter(msgpack_object map, struct flb_grep *grep_ctx);
int flb_grep_set_rule_str(struct flb_grep *ctx, enum flb_grep_rule_type type, char *rule_str);
struct flb_grep *flb_grep_create(enum flb_grep_logical_op op);
int flb_grep_destroy(struct flb_grep *grep_ctx);

#endif /* FLB_GREP_H */
1 change: 1 addition & 0 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ set(src
flb_log_event_encoder_dynamic_field.c
flb_processor.c
flb_reload.c
flb_grep.c
)

# Config format
Expand Down
270 changes: 270 additions & 0 deletions src/flb_grep.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,270 @@
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */

/* Fluent Bit
* ==========
* Copyright (C) 2015-2023 The Fluent Bit Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <fluent-bit/flb_sds.h>
#include <fluent-bit/flb_str.h>
#include <fluent-bit/flb_log.h>
#include <fluent-bit/flb_utils.h>
#include <fluent-bit/flb_grep.h>
#include <fluent-bit/flb_mem.h>
#include <fluent-bit/flb_record_accessor.h>

static int flb_grep_delete_rules(struct mk_list *rules)
{
struct mk_list *tmp;
struct mk_list *head;
struct flb_grep_rule *rule;

if (rules == NULL) {
return 0;
}

mk_list_foreach_safe(head, tmp, rules) {
rule = mk_list_entry(head, struct flb_grep_rule, _head);
flb_sds_destroy(rule->field);
flb_free(rule->regex_pattern);
flb_ra_destroy(rule->ra);
flb_regex_destroy(rule->regex);
mk_list_del(&rule->_head);
flb_free(rule);
}
return 0;
}

int flb_grep_destroy(struct flb_grep *grep_ctx)
{
int ret;

if (grep_ctx == NULL) {
return 0;
}

ret = flb_grep_delete_rules(&grep_ctx->rules);
flb_free(grep_ctx);

return ret;
}


static int is_valid_rule_type(struct flb_grep *grep_ctx, enum flb_grep_rule_type type)
{
if (type == FLB_GREP_NO_RULE) {
flb_error("%s: invalid type", __FUNCTION__);
return -1;
}

if (grep_ctx->op != FLB_GREP_LOGICAL_OP_LEGACY) {
/* 'AND'/'OR' case */
if (grep_ctx->first_rule != FLB_GREP_NO_RULE /* 2+ rules */
&& grep_ctx->first_rule != type) {
flb_error("Both 'regex' and 'exclude' are set.");
return FLB_FALSE;
}
}

return FLB_TRUE;
}

/*
* rule_str format is "KEY REGEX" .
* e.g. hostname *.com
*/
int flb_grep_set_rule_str(struct flb_grep *grep_ctx, enum flb_grep_rule_type type, char *rule_str)
{
int ret;
struct mk_list *split;
struct flb_split_entry *sentry;
struct flb_grep_rule *rule = NULL;

if (grep_ctx == NULL || rule_str == NULL) {
flb_error("%s: input error", __FUNCTION__);
return -1;
}

if (is_valid_rule_type(grep_ctx, type) != FLB_TRUE) {
return -1;
}

rule = flb_malloc(sizeof(struct flb_grep_rule));
if (rule == NULL) {
flb_errno();
return -1;
}
rule->type = type;
if (grep_ctx->first_rule == FLB_GREP_NO_RULE) {
grep_ctx->first_rule = type;
}

/* As a value we expect a pair of field name and a regular expression */
split = flb_utils_split(rule_str, ' ', 1);
if (mk_list_size(split) != 2) {
flb_error("invalid regex, expected field and regular expression");
flb_free(rule);
flb_utils_split_free(split);
return -1;
}
/* Get first value (field) */
sentry = mk_list_entry_first(split, struct flb_split_entry, _head);
if (*sentry->value == '$') {
rule->field = flb_sds_create_len(sentry->value, sentry->len);
}
else {
rule->field = flb_sds_create_size(sentry->len + 2);
ret = flb_sds_cat_safe(&rule->field, "$", 1);
if (ret != 0) {
flb_error("flb_sds_cat_safe failed");
flb_free(rule);
flb_utils_split_free(split);
return -1;
}

ret = flb_sds_cat_safe(&rule->field, sentry->value, sentry->len);
if (ret != 0) {
flb_error("flb_sds_cat_safe failed");
flb_free(rule);
flb_utils_split_free(split);
return -1;
}
}

/* Get remaining content (regular expression) */
sentry = mk_list_entry_last(split, struct flb_split_entry, _head);
rule->regex_pattern = flb_strndup(sentry->value, sentry->len);
if (rule->regex_pattern == NULL) {
flb_errno();
flb_free(rule);
flb_utils_split_free(split);
return -1;
}

/* Release split */
flb_utils_split_free(split);

/* Create a record accessor context for this rule */
rule->ra = flb_ra_create(rule->field, FLB_FALSE);
if (!rule->ra) {
flb_error("invalid record accessor? '%s'", rule->field);
flb_free(rule);
return -1;
}

/* Convert string to regex pattern */
rule->regex = flb_regex_create(rule->regex_pattern);
if (!rule->regex) {
flb_error("could not compile regex pattern '%s'",
rule->regex_pattern);
flb_free(rule);
return -1;
}

/* Link to parent list */
mk_list_add(&rule->_head, &grep_ctx->rules);

return 0;
}

struct flb_grep *flb_grep_create(enum flb_grep_logical_op op)
{
struct flb_grep *ctx = NULL;

ctx = flb_calloc(1, sizeof(struct flb_grep));
if (ctx == NULL) {
return NULL;
}

ctx->first_rule = FLB_GREP_NO_RULE;
ctx->op = op;
mk_list_init(&ctx->rules);

return ctx;
}

static int flb_grep_filter_legacy(msgpack_object map,
struct flb_grep *grep_ctx)
{
ssize_t ret;
struct mk_list *head;
struct flb_grep_rule *rule;

/* For each rule, validate against map fields */
mk_list_foreach(head, &grep_ctx->rules) {
rule = mk_list_entry(head, struct flb_grep_rule, _head);

ret = flb_ra_regex_match(rule->ra, map, rule->regex, NULL);
if (ret <= 0) { /* no match */
if (rule->type == FLB_GREP_REGEX) {
return FLB_GREP_RET_EXCLUDE;
}
}
else {
if (rule->type == FLB_GREP_EXCLUDE) {
return FLB_GREP_RET_EXCLUDE;
}
else {
return FLB_GREP_RET_KEEP;
}
}
}

return FLB_GREP_RET_KEEP;
}

static int flb_grep_filter_data_and_or(msgpack_object map, struct flb_grep *ctx)
{
ssize_t ra_ret;
int found = FLB_FALSE;
struct mk_list *head;
struct flb_grep_rule *rule;

/* For each rule, validate against map fields */
mk_list_foreach(head, &ctx->rules) {
found = FLB_FALSE;
rule = mk_list_entry(head, struct flb_grep_rule, _head);

ra_ret = flb_ra_regex_match(rule->ra, map, rule->regex, NULL);
if (ra_ret > 0) {
found = FLB_TRUE;
}

if (ctx->op == FLB_GREP_LOGICAL_OP_OR && found == FLB_TRUE) {
/* OR case: One rule is matched. */
goto grep_filter_data_and_or_end;
}
else if (ctx->op == FLB_GREP_LOGICAL_OP_AND && found == FLB_FALSE) {
/* AND case: One rule is not matched */
goto grep_filter_data_and_or_end;
}
}

grep_filter_data_and_or_end:
if (rule->type == FLB_GREP_REGEX) {
return found ? FLB_GREP_RET_KEEP : FLB_GREP_RET_EXCLUDE;
}

/* rule is exclude */
return found ? FLB_GREP_RET_EXCLUDE : FLB_GREP_RET_KEEP;
}

int flb_grep_filter(msgpack_object map, struct flb_grep *grep_ctx)
{
if (grep_ctx->op == FLB_GREP_LOGICAL_OP_LEGACY) {
return flb_grep_filter_legacy(map, grep_ctx);
}
return flb_grep_filter_data_and_or(map, grep_ctx);
}
1 change: 1 addition & 0 deletions tests/internal/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ set(UNIT_TESTS_FILES
env.c
log.c
processor.c
grep.c
)

# Config format
Expand Down
Loading

0 comments on commit eb30ea9

Please sign in to comment.