Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix lexer part2 #52

Merged
merged 4 commits into from
Jan 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
87 changes: 77 additions & 10 deletions src/lexer/lexer.c
Original file line number Diff line number Diff line change
Expand Up @@ -55,34 +55,39 @@ void token_free(struct token token)
char *get_word(struct lexer *lexer, bool *is_diactivated)
{
char *word = malloc(sizeof(char) * 2);

unsigned word_index = 0;

// Word start with ; or \n and return its token
if (lexer->data[lexer->index] == ';' || lexer->data[lexer->index] == '\n')
{
word[0] = lexer->data[lexer->index];
word_index = 1;
++lexer->index;
if (lexer->data[lexer->index] == ' ')
{
++lexer->index;
}
}

// Handle comments return the next word
else if (lexer->data[lexer->index] == '#')
{
word = handle_comment(lexer, word, &word_index);
}

// Handle redirections return the token
else if (lexer->data[lexer->index] == '>'
|| lexer->data[lexer->index] == '<')
{
word = handle_redir(lexer, &word_index);
}

// Handle (||, &&, | and &) return the token
else if (lexer->data[lexer->index] == '|'
|| lexer->data[lexer->index] == '&')
{
word[0] = lexer->data[lexer->index];
word_index = 1;
++lexer->index;

// Handle ||, &&
if (lexer->data[lexer->index] == '|'
|| lexer->data[lexer->index] == '&')
{
Expand All @@ -94,6 +99,7 @@ char *get_word(struct lexer *lexer, bool *is_diactivated)
}
else
{
// Handle the word
while (lexer->data[lexer->index] != ' '
&& lexer->data[lexer->index] != '\0'
&& lexer->data[lexer->index] != ';'
Expand All @@ -104,41 +110,71 @@ char *get_word(struct lexer *lexer, bool *is_diactivated)
&& lexer->data[lexer->index] != '|'
&& lexer->data[lexer->index] != '&')
{
if (lexer->data[lexer->index] == '=' && word_index > 0 && lexer->curr_tok.type != TOKEN_DOUBLE_QUOTE)
// Handle the variable
if (lexer->data[lexer->index] == '$')
{
if (word_index != 0)
{
break;
}
if (handle_dollar(lexer, &word, &word_index))
{
word = realloc(word, sizeof(char) * (word_index + 1));
word[word_index] = '\0';
return word;
}
}
// Handle the word assignement if it's contain '=' and it's not the first character
else if (lexer->data[lexer->index] == '=' && word_index > 0 && lexer->curr_tok.type != TOKEN_DOUBLE_QUOTE && lexer->curr_tok.type != TOKEN_VARIABLE_VALUE)
{
lexer->curr_tok.type = TOKEN_WORD_ASSIGNMENT;
lexer->index += 1;
break;
}

// Take next char and put it in the word
word = realloc(word, sizeof(char) * (word_index + 1));
word[word_index] = lexer->data[lexer->index];
++word_index;
++lexer->index;

// Handle the double quote
if (lexer->data[lexer->index - 1] == '\"'
|| lexer->curr_tok.type == TOKEN_DOUBLE_QUOTE)
|| lexer->curr_tok.type == TOKEN_DOUBLE_QUOTE || lexer->curr_tok.type == TOKEN_VARIABLE_AND_DOUBLE_QUOTE)
{
// Handle the end of the double quote
if (lexer->data[lexer->index - 1] == '\"')
{
word_index -= 1;
lexer->curr_tok.type = TOKEN_DOUBLE_QUOTE;
}

// Handle the double quote
word = handle_double_quote(lexer, is_diactivated, word,
&word_index);
// Missing closing double quote
if (!word)
{
return NULL;
}
word[word_index] = '\0';
return word;
}

// Handle backslash
else if (lexer->data[lexer->index - 1] == '\\')
{
// TODO: check if it's handle backslash in double quote
handle_backslash(lexer, is_diactivated, word, word_index);
}

// Handle simple quote
else if (lexer->data[lexer->index - 1] == '\'')
{
word = handle_simple_quote(lexer, is_diactivated, word,
&word_index);

// Missing closing simple quote
if (!word)
{
return NULL;
Expand All @@ -147,9 +183,12 @@ char *get_word(struct lexer *lexer, bool *is_diactivated)
}
}
}

// End of the word
word = realloc(word, sizeof(char) * (word_index + 1));
word[word_index] = '\0';

// Skip spaces and tabs
while (lexer->data[lexer->index] == ' '
|| lexer->data[lexer->index] == '\t')
{
Expand All @@ -163,31 +202,53 @@ struct token parse_input_for_tok(struct lexer *lexer)
{
struct token token;

// Usefull to diactivate the special meaning of the next character when it's a backslash
bool is_diactivated = false;

// Skip spaces and tabs at first
while (lexer->data[lexer->index] == ' '
|| lexer->data[lexer->index] == '\t')
{
++lexer->index;
}

// Return the next word
char *word = get_word(lexer, &is_diactivated);

// If word is NULL, return an EOF tokens
if (!word)
{
token.type = TOKEN_ERROR;
token.data = "get_word - Missing closing quote.";
return token;
}

if (lexer->curr_tok.type == TOKEN_WORD_ASSIGNMENT)
// Check if the word is a word_assignement (contains a '=') and if it's a variable name is valid
if (lexer->curr_tok.type == TOKEN_WORD_ASSIGNMENT && check_variable_assignement(word))
{
token.type = TOKEN_WORD_ASSIGNMENT;
token.data = word;
lexer->curr_tok.type = TOKEN_EOL;
// Usefull to have the next word token
lexer->curr_tok.type = TOKEN_VARIABLE_VALUE;
return token;
}

// Check if the word is a variable name
if (lexer->curr_tok.type == TOKEN_VARIABLE || lexer->curr_tok.type == TOKEN_VARIABLE_AND_DOUBLE_QUOTE)
{
token.type = TOKEN_VARIABLE;
token.data = word;
if (lexer->curr_tok.type == TOKEN_VARIABLE_AND_DOUBLE_QUOTE)
{
lexer->curr_tok.type = TOKEN_DOUBLE_QUOTE;
}
else
{
lexer->curr_tok.type = TOKEN_EOL;
}
return token;
}

// Check if is in the lex_match table
for (unsigned i = 0; i < sizeof(lex_match) / sizeof(*lex_match); ++i)
{
if (fnmatch(lex_match[i].str, word, 0) == 0 && !is_diactivated)
Expand All @@ -197,7 +258,13 @@ struct token parse_input_for_tok(struct lexer *lexer)
return token;
}
}


// Check if it's a variable value
if (lexer->curr_tok.type == TOKEN_VARIABLE_VALUE)
{
lexer->curr_tok.type = TOKEN_EOL;
}
// Else it's a word
token.type = TOKEN_WORD;
token.data = word;
return token;
Expand Down
30 changes: 30 additions & 0 deletions src/lexer/lexer.h
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,36 @@ void handle_backslash(struct lexer *lexer, bool *is_diactivated, char *word,
char *handle_simple_quote(struct lexer *lexer, bool *is_diactivated, char *word,
unsigned *word_index);


/**
* \brief Check if the word is a variable assignement.
*
* \param lexer The lexer.
* \param word The word to check.
* \param word_index The index of the word.
*
* \return if the word is a variable assignement.
*/
bool check_variable_assignement(char *word);

/**
* \brief Check if the given word is a variable name.
* \param lexer The lexer.
* \param word The word to check.
* \param word_index The index of the word.
*
* \return if the word is a variable name.
*/
bool check_variable_name(struct lexer *lexer, char **word, unsigned *word_index);

/**
* \brief Handle the dollar character.
* \param lexer The lexer.
* \return The next word.
*/
bool handle_dollar(struct lexer *lexer, char **word,
unsigned *word_index);

/**
* \brief Handle the double quote character.
* \param lexer The lexer.
Expand Down
Loading
Loading