Skip to content

Commit

Permalink
feat<lexer>: add new tokens except double quote and variable
Browse files Browse the repository at this point in the history
  • Loading branch information
ErwannLesech committed Jan 15, 2024
1 parent f290017 commit daae1d4
Show file tree
Hide file tree
Showing 7 changed files with 297 additions and 104 deletions.
2 changes: 1 addition & 1 deletion src/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ bin_PROGRAMS = 42sh

42sh_CPPFLAGS = -I%D%

42sh_CFLAGS = -std=c99 -Werror -Wall -Wextra -Wvla -pedantic
42sh_CFLAGS = -std=c99 -Werror -Wall -Wextra -Wvla -pedantic -fsanitize=address -g

42sh_LDADD = lexer/liblexer.a \
ast/libast.a \
Expand Down
122 changes: 67 additions & 55 deletions src/lexer/lexer.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,32 +15,18 @@
#include <stdlib.h>
#include <string.h>

struct lex_match lex_match[25] = { { "if", TOKEN_IF },
{ "then", TOKEN_THEN },
{ "elif", TOKEN_ELIF },
{ "else", TOKEN_ELSE },
{ "fi", TOKEN_FI },
{ ";", TOKEN_SEMICOLON },
{ "\n", TOKEN_EOL },
{ "\0", TOKEN_EOF },

{ "&&", TOKEN_AND },
{ "||", TOKEN_OR },
{ "|", TOKEN_PIPE },
{ "!", TOKEN_NEGATE },
{ "[0-9]*<", TOKEN_INPUT_REDIR },
{ "[0-9]*>", TOKEN_OUTPUT_REDIR },
{ "[0-9]*>>", TOKEN_APPEND },
{ "[0-9]*<&", TOKEN_DUP_INPUT },
{ "[0-9]*>&", TOKEN_DUP_INPUT_OUTPUT },
{ "[0-9]*>|", TOKEN_NOCLOBBER },
{ "[0-9]*<>", TOKEN_DUP_INPUT_OUTPUT },
{ "while", TOKEN_WHILE },
{ "until", TOKEN_UNTIL },
{ "for", TOKEN_FOR },
{ "do", TOKEN_DO },
{ "done", TOKEN_DONE },
{ "$*", TOKEN_VARIABLE } };
struct lex_match lex_match[] = {
{ "if", TOKEN_IF }, { "then", TOKEN_THEN }, { "elif", TOKEN_ELIF },
{ "else", TOKEN_ELSE }, { "fi", TOKEN_FI }, { ";", TOKEN_SEMICOLON },
{ "\n", TOKEN_EOL }, { "\0", TOKEN_EOF },

{ "&&", TOKEN_AND }, { "||", TOKEN_OR }, { "|", TOKEN_PIPE },
{ "!", TOKEN_NEGATE }, { "<", TOKEN_REDIR }, { ">", TOKEN_REDIR },
{ ">>", TOKEN_REDIR }, { "<&", TOKEN_REDIR }, { ">&", TOKEN_REDIR },
{ ">|", TOKEN_REDIR }, { "<>", TOKEN_REDIR },

{ "$*", TOKEN_VARIABLE }
};

struct lexer *lexer_new(const char *input)
{
Expand Down Expand Up @@ -71,52 +57,75 @@ char *get_word(struct lexer *lexer, bool *is_diactivated)
{
char *word = malloc(sizeof(char) * 2);
unsigned word_index = 0;
if (lexer->data[lexer->index] == '\0')
{
++lexer->index;
word[0] = '\0';
return word;
}

if (lexer->data[lexer->index] == ';' || lexer->data[lexer->index] == '\n')
{
word[0] = lexer->data[lexer->index];
word[1] = '\0';
word_index = 1;
++lexer->index;
if (lexer->data[lexer->index] == ' ')
{
++lexer->index;
}
return word;
}
if (lexer->data[lexer->index] == '#')
else if (lexer->data[lexer->index] == '#')
{
word = handle_comment(lexer, word, &word_index);
}
else if (lexer->data[lexer->index] == '>'
|| lexer->data[lexer->index] == '<')
{
return handle_comment(lexer, word, 0);
word = handle_redir(lexer, &word_index);
}
while (lexer->data[lexer->index] != ' ' && lexer->data[lexer->index] != '\0'
&& lexer->data[lexer->index] != ';'
&& lexer->data[lexer->index] != '\n'
&& lexer->data[lexer->index] != '\t')
else if (lexer->data[lexer->index] == '|'
|| lexer->data[lexer->index] == '&')
{
word = realloc(word, sizeof(char) * (word_index + 1));
word[word_index] = lexer->data[lexer->index];
++word_index;
word[0] = lexer->data[lexer->index];
word_index = 1;
++lexer->index;
if (lexer->data[lexer->index - 1] == '\\')

if (lexer->data[lexer->index] == '|'
|| lexer->data[lexer->index] == '&')
{
if (!handle_backslash(lexer, is_diactivated, word, word_index))
{
return word;
}
word = realloc(word, sizeof(char) * (word_index + 1));
word[word_index] = lexer->data[lexer->index];
word_index = 2;
++lexer->index;
}
else if (lexer->data[lexer->index - 1] == '\'')
}
else
{
while (lexer->data[lexer->index] != ' '
&& lexer->data[lexer->index] != '\0'
&& lexer->data[lexer->index] != ';'
&& lexer->data[lexer->index] != '\n'
&& lexer->data[lexer->index] != '\t'
&& lexer->data[lexer->index] != '>'
&& lexer->data[lexer->index] != '<'
&& lexer->data[lexer->index] != '|'
&& lexer->data[lexer->index] != '&')
{
word =
handle_simple_quote(lexer, is_diactivated, word, &word_index);
if (!word)
word = realloc(word, sizeof(char) * (word_index + 1));
word[word_index] = lexer->data[lexer->index];
++word_index;
++lexer->index;
if (lexer->data[lexer->index - 1] == '\\')
{
if (!handle_backslash(lexer, is_diactivated, word, word_index))
{
return word;
}
}
else if (lexer->data[lexer->index - 1] == '\'')
{
return NULL;
word = handle_simple_quote(lexer, is_diactivated, word,
&word_index);
if (!word)
{
return NULL;
}
lexer->index += 1;
}
lexer->index += 1;
}
}
word = realloc(word, sizeof(char) * (word_index + 1));
Expand Down Expand Up @@ -192,6 +201,9 @@ struct token lexer_pop(struct lexer *lexer)
return token;
}
struct token token = parse_input_for_tok(lexer);
lexer->curr_tok = token;
if (token.type != TOKEN_EOF)
{
lexer->curr_tok = token;
}
return token;
}
9 changes: 8 additions & 1 deletion src/lexer/lexer.h
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,14 @@ char *handle_simple_quote(struct lexer *lexer, bool *is_diactivated, char *word,
*
* \return The next word.
*/
char *handle_comment(struct lexer *lexer, char *word, unsigned word_index);
char *handle_comment(struct lexer *lexer, char *word, unsigned *word_index);

/**
* \brief Handle the redirection character.
* \param lexer The lexer.
* \return The next redirection word.
*/
char *handle_redir(struct lexer *lexer, unsigned *word_index);

/**
* \brief Returns the next word in the input string.
Expand Down
36 changes: 29 additions & 7 deletions src/lexer/lexer_utils.c
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ char *handle_simple_quote(struct lexer *lexer, bool *is_diactivated, char *word,
return word;
}

char *handle_comment(struct lexer *lexer, char *word, unsigned word_index)
char *handle_comment(struct lexer *lexer, char *word, unsigned *word_index)
{
// Skip the comment
++lexer->index;
Expand All @@ -53,13 +53,11 @@ char *handle_comment(struct lexer *lexer, char *word, unsigned word_index)
{
++lexer->index;
}
word[word_index] = lexer->data[lexer->index];
++lexer->index;
// If the comment isn't the last thing in the string, we need to add a '\0'
// at the end of the word.
if (word[word_index] != '\0')
word[*word_index] = lexer->data[lexer->index];
*word_index += 1;
if (lexer->data[lexer->index] != '\0')
{
word[word_index + 1] = '\0';
++lexer->index;
}

// Skip the spaces after the comment
Expand All @@ -68,4 +66,28 @@ char *handle_comment(struct lexer *lexer, char *word, unsigned word_index)
++lexer->index;
}
return word;
}

char *handle_redir(struct lexer *lexer, unsigned *word_index)
{
char *redir = malloc(sizeof(char) * 2);
redir[0] = lexer->data[lexer->index];
++lexer->index;
*word_index += 1;
if (lexer->data[lexer->index] == '>' || lexer->data[lexer->index] == '&'
|| lexer->data[lexer->index] == '|')
{
*word_index += 1;
redir = realloc(redir, sizeof(char) * 3);
if (lexer->data[lexer->index - 1] == '<'
&& lexer->data[lexer->index] == '|')
{
free(redir);
return NULL;
}

redir[1] = lexer->data[lexer->index];
++lexer->index;
}
return redir;
}
Loading

0 comments on commit daae1d4

Please sign in to comment.