Skip to content

Commit

Permalink
Merge pull request #47 from ErwannLesech/lexer_part2
Browse files Browse the repository at this point in the history
Lexer part2
  • Loading branch information
Nimu93 authored Jan 15, 2024
2 parents e9a0f98 + 475c4e7 commit 097bd32
Show file tree
Hide file tree
Showing 15 changed files with 1,051 additions and 143 deletions.
3 changes: 3 additions & 0 deletions .github/workflows/clang_tidy.sh
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,12 @@ for file in $(find "$root_dir/src" -type f -name '*.c'); do

if [[ "$parameters" -gt 4 ]]; then
echo "Too many parameters in function: $function_name"
echo "Parameters: $parameters"
fi

if [[ "$lines_in_function" -gt 40 ]]; then
echo "Too many lines in function: $function_name"
echo "Lines in function: $lines_in_function"
fi

# echo "Function: $function_name"
Expand All @@ -40,5 +42,6 @@ for file in $(find "$root_dir/src" -type f -name '*.c'); do
# echo "Total functions: $function_count"
if [[ "$function_count" -gt 10 ]]; then
echo "Too many functions in file: $file"
echo "Total functions: $function_count"
fi
done
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,9 @@ config.status

*.diff

*.gv
*.png

# Doxygen
Doxyfile.bak
html
Expand Down
7 changes: 4 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,10 @@
## Installation

```sh
git clone <path>
cd <path>
make
git clone [email protected]:ErwannLesech/42-Sh.git
cd 42-Sh
./42-install.sh
cd src/
```

## Usage
Expand Down
10 changes: 8 additions & 2 deletions src/ast/ast.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,10 @@
#ifndef AST_H
#define AST_H

#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>

#include "../options/options.h"

/**
* \enum ast_type
* \brief Enumerate the different types of AST nodes.
Expand Down Expand Up @@ -67,6 +66,13 @@ void ast_free(struct ast_node *node);
*/
struct ast_node *ast_node_word(char *value);

/**
* \brief Convert an AST type to a string.
* \param type The type to convert.
* \return The AST type string.
*/
char *ast_type_to_string(enum ast_type type);

/**
* \brief Create a new AST node of type AST_SIMPLE_COMMAND.
* \param value The value of the node.
Expand Down
2 changes: 1 addition & 1 deletion src/lexer/Makefile.am
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
lib_LIBRARIES = liblexer.a

liblexer_a_SOURCES = lexer.c lexer.h token.h
liblexer_a_SOURCES = lexer.c lexer_utils.c lexer.h token.h
liblexer_a_CFLAGS = -Wall -Wextra -Werror -std=c99 -pedantic
liblexer_a_CPPFLAGS = -I$(top_srcdir)
209 changes: 97 additions & 112 deletions src/lexer/lexer.c
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,21 @@
#include "lexer.h"

#include <err.h>
#include <fnmatch.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

struct lex_match lex_match[8] = {
{ "if", TOKEN_IF }, { "then", TOKEN_THEN }, { "elif", TOKEN_ELIF },
{ "else", TOKEN_ELSE }, { "fi", TOKEN_FI }, { ";", TOKEN_SEMICOLON },
{ "\n", TOKEN_EOL }, { "\0", TOKEN_EOF }
struct lex_match lex_match[] = {
{ "if", TOKEN_IF }, { "then", TOKEN_THEN }, { "elif", TOKEN_ELIF },
{ "else", TOKEN_ELSE }, { "fi", TOKEN_FI }, { ";", TOKEN_SEMICOLON },
{ "\n", TOKEN_EOL }, { "\0", TOKEN_EOF },

{ "&&", TOKEN_AND }, { "||", TOKEN_OR }, { "|", TOKEN_PIPE },
{ "!", TOKEN_NEGATE }, { "<", TOKEN_REDIR }, { ">", TOKEN_REDIR },
{ ">>", TOKEN_REDIR }, { "<&", TOKEN_REDIR }, { ">&", TOKEN_REDIR },
{ ">|", TOKEN_REDIR }, { "<>", TOKEN_REDIR }
};

struct lexer *lexer_new(const char *input)
Expand Down Expand Up @@ -45,140 +51,107 @@ void token_free(struct token token)
free(token.data);
}

/**
* \brief Handle the backslash character.
*
* \return false if it's the end of the string, true otherwise.
*/
bool handle_backslash(struct lexer *lexer, bool *is_diactivated, char *word,
unsigned word_index)
char *get_word(struct lexer *lexer, bool *is_diactivated)
{
*is_diactivated = true;
if (lexer->data[lexer->index] != '\0')
{
word[word_index - 1] = lexer->data[lexer->index];
++lexer->index;
}
else
{
word[word_index - 1] = '\0';
return false;
}

return true;
}
char *word = malloc(sizeof(char) * 2);
unsigned word_index = 0;

/**
* \brief Handle the simple quote character.
*
* \return false if a closing quote was not found, true otherwise.
*/
char *handle_simple_quote(struct lexer *lexer, bool *is_diactivated, char *word,
unsigned *word_index)
{
*is_diactivated = true;
*word_index -= 1;
while (lexer->data[lexer->index] != '\'')
if (lexer->data[lexer->index] == ';' || lexer->data[lexer->index] == '\n')
{
if (lexer->data[lexer->index] == '\0')
word[0] = lexer->data[lexer->index];
word_index = 1;
++lexer->index;
if (lexer->data[lexer->index] == ' ')
{
free(word);
word = NULL;
return NULL;
++lexer->index;
}
word = realloc(word, sizeof(char) * (*word_index + 1));
word[*word_index] = lexer->data[lexer->index];
*word_index += 1;
lexer->index += 1;
}

return word;
}

char *handle_comment(struct lexer *lexer, char *word, unsigned word_index)
{
// Skip the comment
++lexer->index;

// Find the end of the comment
while (lexer->data[lexer->index] != '\n'
&& lexer->data[lexer->index] != '\0')
{
++lexer->index;
}
word[word_index] = lexer->data[lexer->index];
++lexer->index;
// If the comment isn't the last thing in the string, we need to add a '\0'
// at the end of the word.
if (word[word_index] != '\0')
{
word[word_index + 1] = '\0';
}

// Skip the spaces after the comment
while (lexer->data[lexer->index] == ' ')
else if (lexer->data[lexer->index] == '#')
{
++lexer->index;
word = handle_comment(lexer, word, &word_index);
}
return word;
}

char *get_word(struct lexer *lexer, bool *is_diactivated)
{
char *word = malloc(sizeof(char) * 2);
unsigned word_index = 0;
if (lexer->data[lexer->index] == '\0')
else if (lexer->data[lexer->index] == '>'
|| lexer->data[lexer->index] == '<')
{
++lexer->index;
word[0] = '\0';
return word;
word = handle_redir(lexer, &word_index);
}
if (lexer->data[lexer->index] == ';' || lexer->data[lexer->index] == '\n')
else if (lexer->data[lexer->index] == '|'
|| lexer->data[lexer->index] == '&')
{
word[0] = lexer->data[lexer->index];
word[1] = '\0';
word_index = 1;
++lexer->index;
if (lexer->data[lexer->index] == ' ')

if (lexer->data[lexer->index] == '|'
|| lexer->data[lexer->index] == '&')
{
word = realloc(word, sizeof(char) * (word_index + 1));
word[word_index] = lexer->data[lexer->index];
word_index = 2;
++lexer->index;
}
return word;
}
if (lexer->data[lexer->index] == '#')
{
return handle_comment(lexer, word, 0);
}
while (lexer->data[lexer->index] != ' ' && lexer->data[lexer->index] != '\0'
&& lexer->data[lexer->index] != ';'
&& lexer->data[lexer->index] != '\n'
&& lexer->data[lexer->index] != '\t')
else
{
word = realloc(word, sizeof(char) * (word_index + 1));
word[word_index] = lexer->data[lexer->index];
++word_index;
++lexer->index;
if (lexer->data[lexer->index - 1] == '\\')
while (lexer->data[lexer->index] != ' '
&& lexer->data[lexer->index] != '\0'
&& lexer->data[lexer->index] != ';'
&& lexer->data[lexer->index] != '\n'
&& lexer->data[lexer->index] != '\t'
&& lexer->data[lexer->index] != '>'
&& lexer->data[lexer->index] != '<'
&& lexer->data[lexer->index] != '|'
&& lexer->data[lexer->index] != '&')
{
if (!handle_backslash(lexer, is_diactivated, word, word_index))
if (lexer->data[lexer->index] == '=' && word_index > 0 && lexer->curr_tok.type != TOKEN_DOUBLE_QUOTE)
{
printf("word1: %s\n", word);
lexer->curr_tok.type = TOKEN_WORD_ASSIGNMENT;
lexer->index += 1;
break;
}
word = realloc(word, sizeof(char) * (word_index + 1));
word[word_index] = lexer->data[lexer->index];
++word_index;
++lexer->index;
if (lexer->data[lexer->index - 1] == '\"'
|| lexer->curr_tok.type == TOKEN_DOUBLE_QUOTE)
{
if (lexer->data[lexer->index - 1] == '\"')
{
word_index -= 1;
lexer->curr_tok.type = TOKEN_DOUBLE_QUOTE;
}
word = handle_double_quote(lexer, is_diactivated, word,
&word_index);
if (!word)
{
return NULL;
}
word[word_index] = '\0';
return word;
}
}
else if (lexer->data[lexer->index - 1] == '\'')
{
word =
handle_simple_quote(lexer, is_diactivated, word, &word_index);
if (!word)
else if (lexer->data[lexer->index - 1] == '\\')
{
handle_backslash(lexer, is_diactivated, word, word_index);
}
else if (lexer->data[lexer->index - 1] == '\'')
{
return NULL;
word = handle_simple_quote(lexer, is_diactivated, word,
&word_index);
if (!word)
{
return NULL;
}
lexer->index += 1;
}
lexer->index += 1;
}
}
word = realloc(word, sizeof(char) * (word_index + 1));
word[word_index] = '\0';

while (lexer->data[lexer->index] == ' ')
while (lexer->data[lexer->index] == ' '
|| lexer->data[lexer->index] == '\t')
{
++lexer->index;
}
Expand Down Expand Up @@ -207,16 +180,25 @@ struct token parse_input_for_tok(struct lexer *lexer)
return token;
}

if (lexer->curr_tok.type == TOKEN_WORD_ASSIGNMENT)
{
token.type = TOKEN_WORD_ASSIGNMENT;
token.data = word;
lexer->curr_tok.type = TOKEN_EOL;
printf("word: %s\n", word);
return token;
}

for (unsigned i = 0; i < sizeof(lex_match) / sizeof(*lex_match); ++i)
{
if (!strcmp(word, lex_match[i].str) && !is_diactivated)
if (fnmatch(lex_match[i].str, word, 0) == 0 && !is_diactivated)
{
token.type = lex_match[i].type;
token.data = word;
return token;
}
}

token.type = TOKEN_WORD;
token.data = word;
return token;
Expand Down Expand Up @@ -247,6 +229,9 @@ struct token lexer_pop(struct lexer *lexer)
return token;
}
struct token token = parse_input_for_tok(lexer);
lexer->curr_tok = token;
if (token.type == TOKEN_EOF)
{
lexer->curr_tok = token;
}
return token;
}
Loading

0 comments on commit 097bd32

Please sign in to comment.