From 3116fd05a3cb198aa20957ce2d6cbbbd9329b499 Mon Sep 17 00:00:00 2001 From: erwann lesech Date: Sat, 27 Jan 2024 13:53:42 +0100 Subject: [PATCH] style: refactor all lexer function so it's clang-tidy compliant --- src/execute/utils/builtin.c | 2 +- src/lexer/Makefile.am | 2 +- src/lexer/lexer.c | 101 +++----------------------- src/lexer/lexer.h | 61 ++++++++++++++++ src/lexer/lexer_utils.c | 36 ++-------- src/lexer/lexer_utils2.c | 60 ++++++++++++++++ src/lexer/lexer_utils3.c | 138 ++++++++++++++++++++++++++++++++++++ 7 files changed, 276 insertions(+), 124 deletions(-) create mode 100644 src/lexer/lexer_utils3.c diff --git a/src/execute/utils/builtin.c b/src/execute/utils/builtin.c index 97563c2c..c602e499 100644 --- a/src/execute/utils/builtin.c +++ b/src/execute/utils/builtin.c @@ -346,7 +346,7 @@ int dot_fun(struct ast_node *node) { return return_val; } - + if (doted) { int r = check_file(node, first_arg, path); diff --git a/src/lexer/Makefile.am b/src/lexer/Makefile.am index 1cdec2f4..e65d6d90 100644 --- a/src/lexer/Makefile.am +++ b/src/lexer/Makefile.am @@ -1,5 +1,5 @@ lib_LIBRARIES = liblexer.a -liblexer_a_SOURCES = lexer.c lexer_utils.c lexer_utils2.c lexer.h token.h +liblexer_a_SOURCES = lexer.c lexer_utils.c lexer_utils2.c lexer_utils3.c lexer.h token.h liblexer_a_CFLAGS = -Wall -Wextra -Werror -Wvla -pedantic -std=c99 liblexer_a_CPPFLAGS = -I$(top_srcdir)/src diff --git a/src/lexer/lexer.c b/src/lexer/lexer.c index 9555f931..172e7a63 100644 --- a/src/lexer/lexer.c +++ b/src/lexer/lexer.c @@ -150,38 +150,18 @@ char *get_word(struct lexer *lexer, bool *is_diactivated) if (!first_char_check(lexer, &word, &word_index)) { // Handle the word - while (lexer->data[lexer->index] != ' ' - && lexer->data[lexer->index] != '\0' - && lexer->data[lexer->index] != ';' - && lexer->data[lexer->index] != '\n' - && lexer->data[lexer->index] != '\t' - && lexer->data[lexer->index] != '>' - && lexer->data[lexer->index] != '<' - && lexer->data[lexer->index] != '|' - && lexer->data[lexer->index] != '&' - && lexer->data[lexer->index] != '(' - && lexer->data[lexer->index] != ')') + while (word_separator_check(lexer)) { word = append_end_of_word(word, word_index); // Handle the word assignement if it's contain '=' and it's not the // first character - if (lexer->data[lexer->index] == '=' && word_index > 0 - && lexer->curr_tok.type != TOKEN_DOUBLE_QUOTE - && lexer->curr_tok.type != TOKEN_VARIABLE_VALUE - && check_variable_assignement(word)) + if (handle_egal(lexer, word, word_index)) { - lexer->curr_tok.type = TOKEN_WORD_ASSIGNMENT; break; } - else if (lexer->data[lexer->index] == '=' && word_index == 0 - && lexer->curr_tok.type == TOKEN_VARIABLE_VALUE) - { - lexer->index += 1; - } - // Handle the variable if (lexer->data[lexer->index] == '$') { @@ -206,15 +186,10 @@ char *get_word(struct lexer *lexer, bool *is_diactivated) } } // Take next char and put it in the word - word = realloc(word, sizeof(char) * (word_index + 1)); - word[word_index] = lexer->data[lexer->index]; - ++word_index; - ++lexer->index; + append_char_to_word(lexer, &word, &word_index); // Handle the double quote - if (lexer->data[lexer->index - 1] == '\"' - || lexer->curr_tok.type == TOKEN_DOUBLE_QUOTE - || lexer->curr_tok.type == TOKEN_VARIABLE_AND_DOUBLE_QUOTE) + if (check_double_quote(lexer)) { // Handle the end of the double quote if (word_index > 0 && lexer->data[lexer->index - 1] == '\"') @@ -241,31 +216,16 @@ char *get_word(struct lexer *lexer, bool *is_diactivated) { return NULL; } - if (lexer->curr_tok.type == TOKEN_VARIABLE_AND_DOUBLE_QUOTE - || lexer->curr_tok.type == TOKEN_SUB_AND_DOUBLE_QUOTE) + + if (is_sub_or_var_and_double_quote(lexer)) { return word; } } - // Handle backslash - else if (lexer->data[lexer->index - 1] == '\\') - { - handle_backslash(lexer, is_diactivated, word, word_index); - } - - // Handle simple quote - else if (lexer->data[lexer->index - 1] == '\'') + else if (NULL == hbsq(lexer, word, &word_index, is_diactivated)) { - word = handle_simple_quote(lexer, is_diactivated, word, - &word_index); - - // Missing closing simple quote - if (!word) - { - return NULL; - } - lexer->index += 1; + return NULL; } } } @@ -299,49 +259,10 @@ struct token parse_input_for_tok(struct lexer *lexer) return token; } - // Check if the word is a word_assignement (contains a '=') and if it's a - // variable name is valid - if (lexer->curr_tok.type == TOKEN_WORD_ASSIGNMENT - && check_variable_assignement(word)) - { - token.type = TOKEN_WORD_ASSIGNMENT; - token.data = word; - // Usefull to have the next word token - lexer->curr_tok.type = TOKEN_VARIABLE_VALUE; - return token; - } - - if (lexer->curr_tok.type == TOKEN_IONUMBER) - { - token.type = TOKEN_IONUMBER; - token.data = word; - lexer->curr_tok.type = TOKEN_EOL; - return token; - } - - // Check if the word is a variable name - if (lexer->curr_tok.type == TOKEN_VARIABLE - || lexer->curr_tok.type == TOKEN_VARIABLE_AND_DOUBLE_QUOTE) - { - token.type = TOKEN_VARIABLE; - token.data = word; - if (lexer->curr_tok.type == TOKEN_VARIABLE_AND_DOUBLE_QUOTE) - { - lexer->curr_tok.type = TOKEN_DOUBLE_QUOTE; - } - else - { - lexer->curr_tok.type = TOKEN_EOL; - } - return token; - } - - if (lexer->curr_tok.type == TOKEN_SUBSTITUTION - || lexer->curr_tok.type == TOKEN_SUB_AND_DOUBLE_QUOTE) + // Check if the word is a special case + token = check_special_cases(lexer, word, token); + if (token.type != TOKEN_ERROR) { - token.type = TOKEN_SUBSTITUTION; - token.data = word; - lexer->curr_tok.type = TOKEN_EOL; return token; } diff --git a/src/lexer/lexer.h b/src/lexer/lexer.h index ea164f75..fc8b2b0a 100644 --- a/src/lexer/lexer.h +++ b/src/lexer/lexer.h @@ -202,4 +202,65 @@ char *handle_redir(struct lexer *lexer, unsigned *word_index); */ char *get_word(struct lexer *lexer, bool *is_diactivated); +// clang_tidy functions + +/** + * \brief replace if of line 101 in check_variable_name + */ +enum token_type affect_curr_tok_type_var_name(struct lexer *lexer); + +/** + * \brief replace if of line 120 in check_variable_name + */ +bool not_valid_check_var(struct lexer *lexer, char **word, char *curr_word); + +/** + * \brief replace elif of line 140 in check_variable_name + */ +bool elif_check_var(struct lexer *lexer); + +/** + * \brief replace while of line 160 in check_variable_name + */ +bool while_check_var(struct lexer *lexer); + +/** + * \brief replace check cases in parse_input_for_tok + */ +struct token check_special_cases(struct lexer *lexer, char *word, + struct token token); + +/** + * \brief replace while separator condition of get_word + */ +bool word_separator_check(struct lexer *lexer); + +/** + * \brief replace egal handling of get_word + */ +bool handle_egal(struct lexer *lexer, char *word, unsigned word_index); + +/** + * \brief replace append char to word of get_word + */ +void append_char_to_word(struct lexer *lexer, char **word, + unsigned *word_index); + +/** + * \brief replace backslash and simple quote handling of get_word + * handle backslash & single quote + */ +char *hbsq(struct lexer *lexer, char *word, unsigned *word_index, + bool *is_diactivated); + +/** + * \brief replace if double quote handling of get_word + */ +bool check_double_quote(struct lexer *lexer); + +/** + * \brief replace spacial curr_tok type handling of get_word + */ +bool is_sub_or_var_and_double_quote(struct lexer *lexer); + #endif /* !LEXER_H */ diff --git a/src/lexer/lexer_utils.c b/src/lexer/lexer_utils.c index 2cf86283..8f6c85b0 100644 --- a/src/lexer/lexer_utils.c +++ b/src/lexer/lexer_utils.c @@ -98,15 +98,7 @@ bool check_variable_name(struct lexer *lexer, char **word, unsigned *word_index, char *curr_word = *word; *is_in_braces = false; // Handle variable in double quote - if (lexer->curr_tok.type == TOKEN_DOUBLE_QUOTE - || lexer->curr_tok.type == TOKEN_VARIABLE_AND_DOUBLE_QUOTE) - { - lexer->curr_tok.type = TOKEN_VARIABLE_AND_DOUBLE_QUOTE; - } - else - { - lexer->curr_tok.type = TOKEN_VARIABLE; - } + lexer->curr_tok.type = affect_curr_tok_type_var_name(lexer); // Check if it's a special variable (like $?, $*, $@, $# or $$) if (lexer->data[lexer->index] == '?' || lexer->data[lexer->index] == '*' @@ -138,38 +130,18 @@ bool check_variable_name(struct lexer *lexer, char **word, unsigned *word_index, } // Classic variable name - else if (lexer->data[lexer->index] == '_' - || lexer->data[lexer->index] == '-' - || (lexer->data[lexer->index] >= 'a' - && lexer->data[lexer->index] <= 'z') - || (lexer->data[lexer->index] >= 'A' - && lexer->data[lexer->index] <= 'Z')) + else if (elif_check_var(lexer)) { append_char_to_word(lexer, &curr_word, word_index); } // Not a valid variable name else { - if (lexer->curr_tok.type != TOKEN_VARIABLE_AND_DOUBLE_QUOTE) - { - lexer->curr_tok.type = TOKEN_WORD; - } - else - { - lexer->curr_tok.type = TOKEN_DOUBLE_QUOTE; - } - *word = curr_word; - return false; + return not_valid_check_var(lexer, word, curr_word); } // Check the rest of the variable name break - while (lexer->data[lexer->index] == '_' || lexer->data[lexer->index] == '-' - || (lexer->data[lexer->index] >= 'a' - && lexer->data[lexer->index] <= 'z') - || (lexer->data[lexer->index] >= 'A' - && lexer->data[lexer->index] <= 'Z') - || (lexer->data[lexer->index] >= '0' - && lexer->data[lexer->index] <= '9')) + while (while_check_var(lexer)) { append_char_to_word(lexer, &curr_word, word_index); } diff --git a/src/lexer/lexer_utils2.c b/src/lexer/lexer_utils2.c index 41b845a8..a86a488d 100644 --- a/src/lexer/lexer_utils2.c +++ b/src/lexer/lexer_utils2.c @@ -173,3 +173,63 @@ void handle_back_slash_in_double_quote(struct lexer *lexer, char *word, } *word_index += 1; } + +enum token_type affect_curr_tok_type_var_name(struct lexer *lexer) +{ + if (lexer->curr_tok.type == TOKEN_DOUBLE_QUOTE) + { + return TOKEN_VARIABLE_AND_DOUBLE_QUOTE; + } + else + { + return TOKEN_VARIABLE; + } +} + +bool elif_check_var(struct lexer *lexer) +{ + if (lexer->data[lexer->index] == '_' || lexer->data[lexer->index] == '-' + || (lexer->data[lexer->index] >= 'a' + && lexer->data[lexer->index] <= 'z') + || (lexer->data[lexer->index] >= 'A' + && lexer->data[lexer->index] <= 'Z')) + { + return true; + } + else + { + return false; + } +} + +bool while_check_var(struct lexer *lexer) +{ + if (lexer->data[lexer->index] == '_' || lexer->data[lexer->index] == '-' + || (lexer->data[lexer->index] >= 'a' + && lexer->data[lexer->index] <= 'z') + || (lexer->data[lexer->index] >= 'A' + && lexer->data[lexer->index] <= 'Z') + || (lexer->data[lexer->index] >= '0' + && lexer->data[lexer->index] <= '9')) + { + return true; + } + else + { + return false; + } +} + +bool not_valid_check_var(struct lexer *lexer, char **word, char *curr_word) +{ + if (lexer->curr_tok.type != TOKEN_VARIABLE_AND_DOUBLE_QUOTE) + { + lexer->curr_tok.type = TOKEN_WORD; + } + else + { + lexer->curr_tok.type = TOKEN_DOUBLE_QUOTE; + } + *word = curr_word; + return false; +} diff --git a/src/lexer/lexer_utils3.c b/src/lexer/lexer_utils3.c new file mode 100644 index 00000000..705b49f8 --- /dev/null +++ b/src/lexer/lexer_utils3.c @@ -0,0 +1,138 @@ +#include "lexer.h" + +struct token check_special_cases(struct lexer *lexer, char *word, + struct token token) +{ + // Check if the word is a word_assignement (contains a '=') and if it's a + // variable name is valid + if (lexer->curr_tok.type == TOKEN_WORD_ASSIGNMENT + && check_variable_assignement(word)) + { + token.type = TOKEN_WORD_ASSIGNMENT; + token.data = word; + // Usefull to have the next word token + lexer->curr_tok.type = TOKEN_VARIABLE_VALUE; + return token; + } + + if (lexer->curr_tok.type == TOKEN_IONUMBER) + { + token.type = TOKEN_IONUMBER; + token.data = word; + lexer->curr_tok.type = TOKEN_EOL; + return token; + } + + // Check if the word is a variable name + if (lexer->curr_tok.type == TOKEN_VARIABLE + || lexer->curr_tok.type == TOKEN_VARIABLE_AND_DOUBLE_QUOTE) + { + token.type = TOKEN_VARIABLE; + token.data = word; + if (lexer->curr_tok.type == TOKEN_VARIABLE_AND_DOUBLE_QUOTE) + { + lexer->curr_tok.type = TOKEN_DOUBLE_QUOTE; + } + else + { + lexer->curr_tok.type = TOKEN_EOL; + } + return token; + } + + if (lexer->curr_tok.type == TOKEN_SUBSTITUTION + || lexer->curr_tok.type == TOKEN_SUB_AND_DOUBLE_QUOTE) + { + token.type = TOKEN_SUBSTITUTION; + token.data = word; + lexer->curr_tok.type = TOKEN_EOL; + return token; + } + token.type = TOKEN_ERROR; + return token; +} + +bool word_separator_check(struct lexer *lexer) +{ + if (lexer->data[lexer->index] != ' ' && lexer->data[lexer->index] != '\0' + && lexer->data[lexer->index] != ';' && lexer->data[lexer->index] != '\n' + && lexer->data[lexer->index] != '\t' && lexer->data[lexer->index] != '>' + && lexer->data[lexer->index] != '<' && lexer->data[lexer->index] != '|' + && lexer->data[lexer->index] != '&' && lexer->data[lexer->index] != '(' + && lexer->data[lexer->index] != ')') + { + return true; + } + else + { + return false; + } +} + +bool handle_egal(struct lexer *lexer, char *word, unsigned word_index) +{ + if (lexer->data[lexer->index] == '=' && word_index > 0 + && lexer->curr_tok.type != TOKEN_DOUBLE_QUOTE + && lexer->curr_tok.type != TOKEN_VARIABLE_VALUE + && check_variable_assignement(word)) + { + lexer->curr_tok.type = TOKEN_WORD_ASSIGNMENT; + return true; + } + + else if (lexer->data[lexer->index] == '=' && word_index == 0 + && lexer->curr_tok.type == TOKEN_VARIABLE_VALUE) + { + lexer->index += 1; + } + + return false; +} + +char *hbsq(struct lexer *lexer, char *word, unsigned *word_index, + bool *is_diactivated) +{ + if (lexer->data[lexer->index - 1] == '\\') + { + handle_backslash(lexer, is_diactivated, word, *word_index); + } + + // Handle simple quote + else if (lexer->data[lexer->index - 1] == '\'') + { + word = handle_simple_quote(lexer, is_diactivated, word, word_index); + + // Missing closing simple quote + if (!word) + { + return NULL; + } + lexer->index += 1; + } + + return word; +} + +bool check_double_quote(struct lexer *lexer) +{ + // Handle the double quote + if (lexer->data[lexer->index - 1] == '\"' + || lexer->curr_tok.type == TOKEN_DOUBLE_QUOTE + || lexer->curr_tok.type == TOKEN_VARIABLE_AND_DOUBLE_QUOTE) + { + return true; + } + + return false; +} + +bool is_sub_or_var_and_double_quote(struct lexer *lexer) +{ + if (lexer->curr_tok.type == TOKEN_VARIABLE_AND_DOUBLE_QUOTE + || lexer->curr_tok.type == TOKEN_SUB_AND_DOUBLE_QUOTE) + { + return true; + } + + return false; +}