Skip to content

Commit

Permalink
Merge pull request #52 from ErwannLesech/fix_lexer_part2
Browse files Browse the repository at this point in the history
Fix lexer part2
  • Loading branch information
majerugo authored Jan 16, 2024
2 parents f3c8bb8 + 7185179 commit 1eff055
Show file tree
Hide file tree
Showing 5 changed files with 282 additions and 71 deletions.
87 changes: 77 additions & 10 deletions src/lexer/lexer.c
Original file line number Diff line number Diff line change
Expand Up @@ -55,34 +55,39 @@ void token_free(struct token token)
char *get_word(struct lexer *lexer, bool *is_diactivated)
{
char *word = malloc(sizeof(char) * 2);

unsigned word_index = 0;

// Word start with ; or \n and return its token
if (lexer->data[lexer->index] == ';' || lexer->data[lexer->index] == '\n')
{
word[0] = lexer->data[lexer->index];
word_index = 1;
++lexer->index;
if (lexer->data[lexer->index] == ' ')
{
++lexer->index;
}
}

// Handle comments return the next word
else if (lexer->data[lexer->index] == '#')
{
word = handle_comment(lexer, word, &word_index);
}

// Handle redirections return the token
else if (lexer->data[lexer->index] == '>'
|| lexer->data[lexer->index] == '<')
{
word = handle_redir(lexer, &word_index);
}

// Handle (||, &&, | and &) return the token
else if (lexer->data[lexer->index] == '|'
|| lexer->data[lexer->index] == '&')
{
word[0] = lexer->data[lexer->index];
word_index = 1;
++lexer->index;

// Handle ||, &&
if (lexer->data[lexer->index] == '|'
|| lexer->data[lexer->index] == '&')
{
Expand All @@ -94,6 +99,7 @@ char *get_word(struct lexer *lexer, bool *is_diactivated)
}
else
{
// Handle the word
while (lexer->data[lexer->index] != ' '
&& lexer->data[lexer->index] != '\0'
&& lexer->data[lexer->index] != ';'
Expand All @@ -104,41 +110,71 @@ char *get_word(struct lexer *lexer, bool *is_diactivated)
&& lexer->data[lexer->index] != '|'
&& lexer->data[lexer->index] != '&')
{
if (lexer->data[lexer->index] == '=' && word_index > 0 && lexer->curr_tok.type != TOKEN_DOUBLE_QUOTE)
// Handle the variable
if (lexer->data[lexer->index] == '$')
{
if (word_index != 0)
{
break;
}
if (handle_dollar(lexer, &word, &word_index))
{
word = realloc(word, sizeof(char) * (word_index + 1));
word[word_index] = '\0';
return word;
}
}
// Handle the word assignement if it's contain '=' and it's not the first character
else if (lexer->data[lexer->index] == '=' && word_index > 0 && lexer->curr_tok.type != TOKEN_DOUBLE_QUOTE && lexer->curr_tok.type != TOKEN_VARIABLE_VALUE)
{
lexer->curr_tok.type = TOKEN_WORD_ASSIGNMENT;
lexer->index += 1;
break;
}

// Take next char and put it in the word
word = realloc(word, sizeof(char) * (word_index + 1));
word[word_index] = lexer->data[lexer->index];
++word_index;
++lexer->index;

// Handle the double quote
if (lexer->data[lexer->index - 1] == '\"'
|| lexer->curr_tok.type == TOKEN_DOUBLE_QUOTE)
|| lexer->curr_tok.type == TOKEN_DOUBLE_QUOTE || lexer->curr_tok.type == TOKEN_VARIABLE_AND_DOUBLE_QUOTE)
{
// Handle the end of the double quote
if (lexer->data[lexer->index - 1] == '\"')
{
word_index -= 1;
lexer->curr_tok.type = TOKEN_DOUBLE_QUOTE;
}

// Handle the double quote
word = handle_double_quote(lexer, is_diactivated, word,
&word_index);
// Missing closing double quote
if (!word)
{
return NULL;
}
word[word_index] = '\0';
return word;
}

// Handle backslash
else if (lexer->data[lexer->index - 1] == '\\')
{
// TODO: check if it's handle backslash in double quote
handle_backslash(lexer, is_diactivated, word, word_index);
}

// Handle simple quote
else if (lexer->data[lexer->index - 1] == '\'')
{
word = handle_simple_quote(lexer, is_diactivated, word,
&word_index);

// Missing closing simple quote
if (!word)
{
return NULL;
Expand All @@ -147,9 +183,12 @@ char *get_word(struct lexer *lexer, bool *is_diactivated)
}
}
}

// End of the word
word = realloc(word, sizeof(char) * (word_index + 1));
word[word_index] = '\0';

// Skip spaces and tabs
while (lexer->data[lexer->index] == ' '
|| lexer->data[lexer->index] == '\t')
{
Expand All @@ -163,31 +202,53 @@ struct token parse_input_for_tok(struct lexer *lexer)
{
struct token token;

// Usefull to diactivate the special meaning of the next character when it's a backslash
bool is_diactivated = false;

// Skip spaces and tabs at first
while (lexer->data[lexer->index] == ' '
|| lexer->data[lexer->index] == '\t')
{
++lexer->index;
}

// Return the next word
char *word = get_word(lexer, &is_diactivated);

// If word is NULL, return an EOF tokens
if (!word)
{
token.type = TOKEN_ERROR;
token.data = "get_word - Missing closing quote.";
return token;
}

if (lexer->curr_tok.type == TOKEN_WORD_ASSIGNMENT)
// Check if the word is a word_assignement (contains a '=') and if it's a variable name is valid
if (lexer->curr_tok.type == TOKEN_WORD_ASSIGNMENT && check_variable_assignement(word))
{
token.type = TOKEN_WORD_ASSIGNMENT;
token.data = word;
lexer->curr_tok.type = TOKEN_EOL;
// Usefull to have the next word token
lexer->curr_tok.type = TOKEN_VARIABLE_VALUE;
return token;
}

// Check if the word is a variable name
if (lexer->curr_tok.type == TOKEN_VARIABLE || lexer->curr_tok.type == TOKEN_VARIABLE_AND_DOUBLE_QUOTE)
{
token.type = TOKEN_VARIABLE;
token.data = word;
if (lexer->curr_tok.type == TOKEN_VARIABLE_AND_DOUBLE_QUOTE)
{
lexer->curr_tok.type = TOKEN_DOUBLE_QUOTE;
}
else
{
lexer->curr_tok.type = TOKEN_EOL;
}
return token;
}

// Check if is in the lex_match table
for (unsigned i = 0; i < sizeof(lex_match) / sizeof(*lex_match); ++i)
{
if (fnmatch(lex_match[i].str, word, 0) == 0 && !is_diactivated)
Expand All @@ -197,7 +258,13 @@ struct token parse_input_for_tok(struct lexer *lexer)
return token;
}
}


// Check if it's a variable value
if (lexer->curr_tok.type == TOKEN_VARIABLE_VALUE)
{
lexer->curr_tok.type = TOKEN_EOL;
}
// Else it's a word
token.type = TOKEN_WORD;
token.data = word;
return token;
Expand Down
30 changes: 30 additions & 0 deletions src/lexer/lexer.h
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,36 @@ void handle_backslash(struct lexer *lexer, bool *is_diactivated, char *word,
char *handle_simple_quote(struct lexer *lexer, bool *is_diactivated, char *word,
unsigned *word_index);


/**
* \brief Check if the word is a variable assignement.
*
* \param lexer The lexer.
* \param word The word to check.
* \param word_index The index of the word.
*
* \return if the word is a variable assignement.
*/
bool check_variable_assignement(char *word);

/**
* \brief Check if the given word is a variable name.
* \param lexer The lexer.
* \param word The word to check.
* \param word_index The index of the word.
*
* \return if the word is a variable name.
*/
bool check_variable_name(struct lexer *lexer, char **word, unsigned *word_index);

/**
* \brief Handle the dollar character.
* \param lexer The lexer.
* \return The next word.
*/
bool handle_dollar(struct lexer *lexer, char **word,
unsigned *word_index);

/**
* \brief Handle the double quote character.
* \param lexer The lexer.
Expand Down
Loading

0 comments on commit 1eff055

Please sign in to comment.