Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Lexer part2 #46

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .github/workflows/clang_tidy.sh
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,12 @@ for file in $(find "$root_dir/src" -type f -name '*.c'); do

if [[ "$parameters" -gt 4 ]]; then
echo "Too many parameters in function: $function_name"
echo "Parameters: $parameters"
fi

if [[ "$lines_in_function" -gt 40 ]]; then
echo "Too many lines in function: $function_name"
echo "Lines in function: $lines_in_function"
fi

# echo "Function: $function_name"
Expand All @@ -40,5 +42,6 @@ for file in $(find "$root_dir/src" -type f -name '*.c'); do
# echo "Total functions: $function_count"
if [[ "$function_count" -gt 10 ]]; then
echo "Too many functions in file: $file"
echo "Total functions: $function_count"
fi
done
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,9 @@ config.status

*.diff

*.gv
*.png

# Doxygen
Doxyfile.bak
html
Expand Down
7 changes: 4 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,10 @@
## Installation

```sh
git clone <path>
cd <path>
make
git clone [email protected]:ErwannLesech/42-Sh.git
cd 42-Sh
./42-install.sh
cd src/
```

## Usage
Expand Down
10 changes: 8 additions & 2 deletions src/ast/ast.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,10 @@
#ifndef AST_H
#define AST_H

#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>

#include "../options/options.h"

/**
* \enum ast_type
* \brief Enumerate the different types of AST nodes.
Expand Down Expand Up @@ -67,6 +66,13 @@ void ast_free(struct ast_node *node);
*/
struct ast_node *ast_node_word(char *value);

/**
* \brief Convert an AST type to a string.
* \param type The type to convert.
* \return The AST type string.
*/
char *ast_type_to_string(enum ast_type type);

/**
* \brief Create a new AST node of type AST_SIMPLE_COMMAND.
* \param value The value of the node.
Expand Down
2 changes: 1 addition & 1 deletion src/lexer/Makefile.am
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
lib_LIBRARIES = liblexer.a

liblexer_a_SOURCES = lexer.c lexer.h token.h
liblexer_a_SOURCES = lexer.c lexer_utils.c lexer.h token.h
liblexer_a_CFLAGS = -Wall -Wextra -Werror -std=c99 -pedantic
liblexer_a_CPPFLAGS = -I$(top_srcdir)
209 changes: 97 additions & 112 deletions src/lexer/lexer.c
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,21 @@
#include "lexer.h"

#include <err.h>
#include <fnmatch.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

struct lex_match lex_match[8] = {
{ "if", TOKEN_IF }, { "then", TOKEN_THEN }, { "elif", TOKEN_ELIF },
{ "else", TOKEN_ELSE }, { "fi", TOKEN_FI }, { ";", TOKEN_SEMICOLON },
{ "\n", TOKEN_EOL }, { "\0", TOKEN_EOF }
struct lex_match lex_match[] = {
{ "if", TOKEN_IF }, { "then", TOKEN_THEN }, { "elif", TOKEN_ELIF },
{ "else", TOKEN_ELSE }, { "fi", TOKEN_FI }, { ";", TOKEN_SEMICOLON },
{ "\n", TOKEN_EOL }, { "\0", TOKEN_EOF },

{ "&&", TOKEN_AND }, { "||", TOKEN_OR }, { "|", TOKEN_PIPE },
{ "!", TOKEN_NEGATE }, { "<", TOKEN_REDIR }, { ">", TOKEN_REDIR },
{ ">>", TOKEN_REDIR }, { "<&", TOKEN_REDIR }, { ">&", TOKEN_REDIR },
{ ">|", TOKEN_REDIR }, { "<>", TOKEN_REDIR }
};

struct lexer *lexer_new(const char *input)
Expand Down Expand Up @@ -45,140 +51,107 @@ void token_free(struct token token)
free(token.data);
}

/**
* \brief Handle the backslash character.
*
* \return false if it's the end of the string, true otherwise.
*/
bool handle_backslash(struct lexer *lexer, bool *is_diactivated, char *word,
unsigned word_index)
char *get_word(struct lexer *lexer, bool *is_diactivated)
{
*is_diactivated = true;
if (lexer->data[lexer->index] != '\0')
{
word[word_index - 1] = lexer->data[lexer->index];
++lexer->index;
}
else
{
word[word_index - 1] = '\0';
return false;
}

return true;
}
char *word = malloc(sizeof(char) * 2);
unsigned word_index = 0;

/**
* \brief Handle the simple quote character.
*
* \return false if a closing quote was not found, true otherwise.
*/
char *handle_simple_quote(struct lexer *lexer, bool *is_diactivated, char *word,
unsigned *word_index)
{
*is_diactivated = true;
*word_index -= 1;
while (lexer->data[lexer->index] != '\'')
if (lexer->data[lexer->index] == ';' || lexer->data[lexer->index] == '\n')
{
if (lexer->data[lexer->index] == '\0')
word[0] = lexer->data[lexer->index];
word_index = 1;
++lexer->index;
if (lexer->data[lexer->index] == ' ')
{
free(word);
word = NULL;
return NULL;
++lexer->index;
}
word = realloc(word, sizeof(char) * (*word_index + 1));
word[*word_index] = lexer->data[lexer->index];
*word_index += 1;
lexer->index += 1;
}

return word;
}

char *handle_comment(struct lexer *lexer, char *word, unsigned word_index)
{
// Skip the comment
++lexer->index;

// Find the end of the comment
while (lexer->data[lexer->index] != '\n'
&& lexer->data[lexer->index] != '\0')
{
++lexer->index;
}
word[word_index] = lexer->data[lexer->index];
++lexer->index;
// If the comment isn't the last thing in the string, we need to add a '\0'
// at the end of the word.
if (word[word_index] != '\0')
{
word[word_index + 1] = '\0';
}

// Skip the spaces after the comment
while (lexer->data[lexer->index] == ' ')
else if (lexer->data[lexer->index] == '#')
{
++lexer->index;
word = handle_comment(lexer, word, &word_index);
}
return word;
}

char *get_word(struct lexer *lexer, bool *is_diactivated)
{
char *word = malloc(sizeof(char) * 2);
unsigned word_index = 0;
if (lexer->data[lexer->index] == '\0')
else if (lexer->data[lexer->index] == '>'
|| lexer->data[lexer->index] == '<')
{
++lexer->index;
word[0] = '\0';
return word;
word = handle_redir(lexer, &word_index);
}
if (lexer->data[lexer->index] == ';' || lexer->data[lexer->index] == '\n')
else if (lexer->data[lexer->index] == '|'
|| lexer->data[lexer->index] == '&')
{
word[0] = lexer->data[lexer->index];
word[1] = '\0';
word_index = 1;
++lexer->index;
if (lexer->data[lexer->index] == ' ')

if (lexer->data[lexer->index] == '|'
|| lexer->data[lexer->index] == '&')
{
word = realloc(word, sizeof(char) * (word_index + 1));
word[word_index] = lexer->data[lexer->index];
word_index = 2;
++lexer->index;
}
return word;
}
if (lexer->data[lexer->index] == '#')
{
return handle_comment(lexer, word, 0);
}
while (lexer->data[lexer->index] != ' ' && lexer->data[lexer->index] != '\0'
&& lexer->data[lexer->index] != ';'
&& lexer->data[lexer->index] != '\n'
&& lexer->data[lexer->index] != '\t')
else
{
word = realloc(word, sizeof(char) * (word_index + 1));
word[word_index] = lexer->data[lexer->index];
++word_index;
++lexer->index;
if (lexer->data[lexer->index - 1] == '\\')
while (lexer->data[lexer->index] != ' '
&& lexer->data[lexer->index] != '\0'
&& lexer->data[lexer->index] != ';'
&& lexer->data[lexer->index] != '\n'
&& lexer->data[lexer->index] != '\t'
&& lexer->data[lexer->index] != '>'
&& lexer->data[lexer->index] != '<'
&& lexer->data[lexer->index] != '|'
&& lexer->data[lexer->index] != '&')
{
if (!handle_backslash(lexer, is_diactivated, word, word_index))
if (lexer->data[lexer->index] == '=' && word_index > 0 && lexer->curr_tok.type != TOKEN_DOUBLE_QUOTE)
{
printf("word1: %s\n", word);
lexer->curr_tok.type = TOKEN_WORD_ASSIGNMENT;
lexer->index += 1;
break;
}
word = realloc(word, sizeof(char) * (word_index + 1));
word[word_index] = lexer->data[lexer->index];
++word_index;
++lexer->index;
if (lexer->data[lexer->index - 1] == '\"'
|| lexer->curr_tok.type == TOKEN_DOUBLE_QUOTE)
{
if (lexer->data[lexer->index - 1] == '\"')
{
word_index -= 1;
lexer->curr_tok.type = TOKEN_DOUBLE_QUOTE;
}
word = handle_double_quote(lexer, is_diactivated, word,
&word_index);
if (!word)
{
return NULL;
}
word[word_index] = '\0';
return word;
}
}
else if (lexer->data[lexer->index - 1] == '\'')
{
word =
handle_simple_quote(lexer, is_diactivated, word, &word_index);
if (!word)
else if (lexer->data[lexer->index - 1] == '\\')
{
handle_backslash(lexer, is_diactivated, word, word_index);
}
else if (lexer->data[lexer->index - 1] == '\'')
{
return NULL;
word = handle_simple_quote(lexer, is_diactivated, word,
&word_index);
if (!word)
{
return NULL;
}
lexer->index += 1;
}
lexer->index += 1;
}
}
word = realloc(word, sizeof(char) * (word_index + 1));
word[word_index] = '\0';

while (lexer->data[lexer->index] == ' ')
while (lexer->data[lexer->index] == ' '
|| lexer->data[lexer->index] == '\t')
{
++lexer->index;
}
Expand Down Expand Up @@ -207,16 +180,25 @@ struct token parse_input_for_tok(struct lexer *lexer)
return token;
}

if (lexer->curr_tok.type == TOKEN_WORD_ASSIGNMENT)
{
token.type = TOKEN_WORD_ASSIGNMENT;
token.data = word;
lexer->curr_tok.type = TOKEN_EOL;
printf("word: %s\n", word);
return token;
}

for (unsigned i = 0; i < sizeof(lex_match) / sizeof(*lex_match); ++i)
{
if (!strcmp(word, lex_match[i].str) && !is_diactivated)
if (fnmatch(lex_match[i].str, word, 0) == 0 && !is_diactivated)
{
token.type = lex_match[i].type;
token.data = word;
return token;
}
}

token.type = TOKEN_WORD;
token.data = word;
return token;
Expand Down Expand Up @@ -247,6 +229,9 @@ struct token lexer_pop(struct lexer *lexer)
return token;
}
struct token token = parse_input_for_tok(lexer);
lexer->curr_tok = token;
if (token.type == TOKEN_EOF)
{
lexer->curr_tok = token;
}
return token;
}
Loading
Loading