Skip to content

Commit

Permalink
Merge pull request #24 from Ghost8345/docs
Browse files Browse the repository at this point in the history
Docs
  • Loading branch information
Ghost8345 authored Jan 23, 2024
2 parents 2d8cd79 + 8aaedda commit 986f99c
Show file tree
Hide file tree
Showing 18 changed files with 235 additions and 6 deletions.
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ project(Compiler)

set(CMAKE_CXX_STANDARD 20)

add_executable(Compiler main.cpp)
add_executable(Compiler driver.cpp)

target_link_libraries(Compiler Util_lib)
target_link_libraries(Compiler RulesParser_lib)
Expand Down
File renamed without changes.
1 change: 0 additions & 1 deletion GrammarTest.txt → Inputs/GrammarTest.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,3 @@
# F` ::= '*' F | '\\L'
# P ::= '(' E ')' | 'a' | 'b' | 'Em'

pukk
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
18 changes: 18 additions & 0 deletions Outputs/parsingTable.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
,$,(,),+,-,;,addop,float,id,if,int,mulop,num,relop,while,},
STATEMENT_LIST,sync,N/A,N/A,N/A,N/A,N/A,N/A,--> STATEMENT STATEMENT_LIST` ,--> STATEMENT STATEMENT_LIST` ,--> STATEMENT STATEMENT_LIST` ,--> STATEMENT STATEMENT_LIST` ,N/A,N/A,N/A,--> STATEMENT STATEMENT_LIST` ,N/A,
WHILE,sync,N/A,N/A,N/A,N/A,N/A,N/A,sync,sync,sync,sync,N/A,N/A,N/A,--> while ( EXPRESSION ) { STATEMENT } ,sync,
EXPRESSION1,N/A,N/A,epsilon,N/A,N/A,epsilon,N/A,N/A,N/A,N/A,N/A,N/A,N/A,--> relop SIMPLE_EXPRESSION ,N/A,N/A,
TERM,N/A,--> FACTOR TERM` ,sync,N/A,N/A,sync,sync,N/A,--> FACTOR TERM` ,N/A,N/A,N/A,--> FACTOR TERM` ,sync,N/A,N/A,
DECLARATION,sync,N/A,N/A,N/A,N/A,N/A,N/A,--> PRIMITIVE_TYPE id ; ,sync,sync,--> PRIMITIVE_TYPE id ; ,N/A,N/A,N/A,sync,sync,
IF,sync,N/A,N/A,N/A,N/A,N/A,N/A,sync,sync,--> if ( EXPRESSION ) { STATEMENT } else { STATEMENT } ,sync,N/A,N/A,N/A,sync,sync,
STATEMENT_LIST`,epsilon,N/A,N/A,N/A,N/A,N/A,N/A,--> STATEMENT STATEMENT_LIST` ,--> STATEMENT STATEMENT_LIST` ,--> STATEMENT STATEMENT_LIST` ,--> STATEMENT STATEMENT_LIST` ,N/A,N/A,N/A,--> STATEMENT STATEMENT_LIST` ,N/A,
SIMPLE_EXPRESSION,N/A,--> TERM SIMPLE_EXPRESSION` ,sync,--> SIGN TERM SIMPLE_EXPRESSION` ,--> SIGN TERM SIMPLE_EXPRESSION` ,sync,N/A,N/A,--> TERM SIMPLE_EXPRESSION` ,N/A,N/A,N/A,--> TERM SIMPLE_EXPRESSION` ,sync,N/A,N/A,
TERM`,N/A,N/A,epsilon,N/A,N/A,epsilon,epsilon,N/A,N/A,N/A,N/A,--> mulop FACTOR TERM` ,N/A,epsilon,N/A,N/A,
EXPRESSION,N/A,--> SIMPLE_EXPRESSION EXPRESSION1 ,sync,--> SIMPLE_EXPRESSION EXPRESSION1 ,--> SIMPLE_EXPRESSION EXPRESSION1 ,sync,N/A,N/A,--> SIMPLE_EXPRESSION EXPRESSION1 ,N/A,N/A,N/A,--> SIMPLE_EXPRESSION EXPRESSION1 ,N/A,N/A,N/A,
FACTOR,N/A,--> ( EXPRESSION ) ,sync,N/A,N/A,sync,sync,N/A,--> id ,N/A,N/A,sync,--> num ,sync,N/A,N/A,
METHOD_BODY,sync,N/A,N/A,N/A,N/A,N/A,N/A,--> STATEMENT_LIST ,--> STATEMENT_LIST ,--> STATEMENT_LIST ,--> STATEMENT_LIST ,N/A,N/A,N/A,--> STATEMENT_LIST ,N/A,
STATEMENT,sync,N/A,N/A,N/A,N/A,N/A,N/A,--> DECLARATION ,--> ASSIGNMENT ,--> IF ,--> DECLARATION ,N/A,N/A,N/A,--> WHILE ,sync,
SIMPLE_EXPRESSION`,N/A,N/A,epsilon,N/A,N/A,epsilon,--> addop TERM SIMPLE_EXPRESSION` ,N/A,N/A,N/A,N/A,N/A,N/A,epsilon,N/A,N/A,
ASSIGNMENT,sync,N/A,N/A,N/A,N/A,N/A,N/A,sync,--> id = EXPRESSION ; ,sync,sync,N/A,N/A,N/A,sync,sync,
PRIMITIVE_TYPE,N/A,N/A,N/A,N/A,N/A,N/A,N/A,--> float ,sync,N/A,--> int ,N/A,N/A,N/A,N/A,N/A,
SIGN,N/A,sync,N/A,--> + ,--> - ,N/A,N/A,N/A,sync,N/A,N/A,N/A,sync,N/A,N/A,N/A,
55 changes: 55 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
# Compiler Generator
<p align="center">
<img src="https://raw.githubusercontent.com/Ghost8345/Compiler-Generator/docs/docs/Images/CompilerGenerator.png" alt="Logo" width="35%" height="35%">
</p>


## Overview
> This is a plug and play compiler which can accommodate with any language once you provide it with its lexical rules and grammar.
## Authors
+ [Ahmed Adel Abudef](https://github.com/Deffo0)
+ [Abdelmeniem Hany](https://github.com/Ghost8345)
+ [Youssef Saeed](https://github.com/usefSaeed)
+ [Zyad Samy](https://github.com/ZyadSamy)

## Setup
1. Clone the repo.
```
git clone https://github.com/Ghost8345/Compiler-Generator.git
```
2. Change the directory to the cloned repo.
```
cd Compiler-Generator
```
3. Load CMakeLists.txt to your project.
4. Add the `Lexical Rules` and `Grammar` paths as program arguments for `Driver.cpp`.
5. Compile `Driver.cpp`.
6. Run the compiled version.

## System Flow Chart
<p align="center">
<img src="https://raw.githubusercontent.com/Ghost8345/Compiler-Generator/docs/docs/Images/GeneralizedCompilerFlowchart.jpg" alt="Logo" width="70%" height="100%">
</p>

## Documentation
+ [Lexical Phase Documentation](https://github.com/Ghost8345/Compiler-Generator/blob/docs/docs/Lexical%20Phase%20Documentation.md)
+ [Syntax Phase Documentation](https://github.com/Ghost8345/Compiler-Generator/blob/docs/docs/Syntax%20Phase%20Documentation.md)
## Inputs
+ Lexical Rules
+ Grammar
+ Language Code (Program)

## Outputs
+ Stored Data in Files
+ Parsing Table
+ Console Output
+ Symbol Table
+ Parsing Tree
+ Parsing Stack Trace
+ Production Output
+ Errors Reporting

## What Next 🤔
+ Implement the Syntax Directed Translation Scheme and Type checkers.
+ Implement the Intermediate Code Generation Phase.
5 changes: 2 additions & 3 deletions SyntaxPhase/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
project(SyntaxPhase)

add_subdirectory(GrammarParser)
add_subdirectory(Common)
add_subdirectory(FirstAndFollowGenerator)
add_subdirectory(PredictiveParser)

add_subdirectory(GrammarParser)
add_subdirectory(PredictiveParser)
2 changes: 1 addition & 1 deletion SyntaxPhase/PredictiveParser/Parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,7 @@ void Parser::printParsingTable() {


void Parser::writeParsingTableToCSV() {
std::string filename = "./parsingTable.csv";
std::string filename = "../Outputs/parsingTable.csv";
std::unordered_set<NonTerminal*> nonTerminalsSet;
std::unordered_set<std::string> terminalsSet;

Expand Down
Binary file added docs/Images/CompilerGenerator.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added docs/Images/GeneralizedCompilerFlowchart.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added docs/Images/Logo.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
75 changes: 75 additions & 0 deletions docs/Lexical Phase Documentation.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@

# Lexical Phase Documentation
## Table of Contents
+ [Rules File to RE](#Rules-File-to-RE)
+ [RE to NFA](#RE-to-NFA)
+ [NFA to DFA](#NFA-to-DFA)
+ [Symbol Table Generator](#Symbol-Table-Generator)

## Rules File to RE
#### Description
The first part handles parsing the rules file and transforming it into a normalized standardized regular expression that can be easily turned into an NFA.

#### Steps
1. Read the rules file line by line.
2. Detect if it's a keyword, punctuation, regular definition, or regular expression, and handle the cases appropriately. Exit gracefully if there is an error or an unstructured line.
3. Handle keywords and punctuation by assigning each one its own regular expression with the same name and token type.
4. Standardize regular definitions and expressions.
5. Return a list of standardized regular expressions.

#### Data Structures
- `Vector<String> allKeywords`: Holds all keywords while parsing the rules file.
- `Vector<String> allPunctuation`: Holds all punctuation while parsing the rules file.
- `Vector<String> regularDefinitions`: Holds all regular definitions.
- `Vector<String> regularExpressions`: Holds the final regular expressions.

#### Assumptions
- Keywords and punctuation must be separated by a space.
- '=' and ':' are added to the reserved symbols that must be escaped by ''.
- Space doesn't matter in disjunction, closure, or range.
- Keywords and punctuation can come anywhere in the rules file, and they will have the highest priority.
- Other regular expressions' priorities will be based on their precedence in the rules file.
- Regular Definitions must be defined before being used.

## RE to NFA
#### Description
The second part converts the regular expression with its operations to a non-finite automaton (NFA).

#### Steps
1. Construct fine-grain classes for NFA graph.
2. Define NFA arms and brain, considering parentheses.
3. Define Thompson algorithm transformations.
4. Walk through the regExp and convert it to NFA represented by the start state.
5. Combine NFAs into one.
6. Convert the graph to a map for easier reading and conversions (using DFS).

#### Data Structures
- `Vector<Transition> transitions`: Holds all transitions in each state.
- `stack<stack<State*>> nfaStack`: Holds the start and end state of each NFA during conversion.
- `stack<stack<String>> disjunctionStack`: Holds disjunction operations in each parentheses level.
- `unordered_map<pair<State*, char>, vector<State*>> transitionTable`: Represents the adjacency matrix between states.

#### Assumptions
- All disjunction operands must be surrounded by brackets ().

## NFA to DFA
#### Description
The third part turns the NFA into a DFA with minimized states.

#### Steps
1. Use the epsilon closure of the NFA start state as a seed to the unmarked stack.
2. While unmarked states are not empty, repeat for each state T and each symbol a:
- Find the epsilon closure of states resulting from moving state T using symbol a.
- If the set of states doesn't have a DFA state mapping, create a new state with the token of the highest priority.

#### Data Structures
- `unordered_map<std::unordered_set<State*>, State*>`: Mapping from a set of NFA states to a DFA state.
- `stack<unordered_set<State*>>`: Holds unmarked states.

#### Assumptions

## Symbol Table Generator
#### Description
SimplyTakes the DFA as input and the input code file satisfying the grammar rules to generate the symbol table.
Takes the DFA as input and the input code file satisfying the grammar rules to generate the symbol table.
it takes the DFA as input to initiate the constructor, so it has all the main states and all their transitions. Then it takes the input code file that supposedly satisfies the grammar rules that generated the DFA. The output of this part is the symbol table, where it has all the strings that represent a token and their corresponding token names, the syntax errors representing the indices and the characters that didn’t satisfy the DFA, and the trace which tells us how exactly the symbol table was generated and what happened in each state transition.
83 changes: 83 additions & 0 deletions docs/Syntax Phase Documentation.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@

# Syntax Phase Documentation

## Table of Contents
+ [Grammar Reader and Converter](#Grammar-Reader-and-Converter)
+ [First and Follow Generator](#First-and-Follow-Generator)
+ [Predictive Parser: Parsing Table](#Parsing-Table)
+ [Predictive Parser: Parsing](#Parsing)

## Grammar Reader and Converter
#### Description
The first part handles parsing the Grammar rules file and turning it into a standardized list of Non-Terminals and Start Symbol.

#### Steps
1. (1st Pass) Read the Grammar rules file line by line, validate correctness, and detect undefined or doubly defined Non-Terminals.
2. (2nd Pass) Read the Grammar rules file line by line, find Terminals in Productions, handle escaped reserved characters, populate Non-Terminal productions.
3. Apply Left Factoring (bonus) and Eliminate Left Recursion (bonus).
4. Return a list of standardized Non-Terminals and the Non-Terminal Start Symbol.

#### Data Structures
- `std::unordered_set<std::string> terminals`: Holds all Terminal names in the grammar.
- `std::unordered_set<std::string> nonTerminalNames`: Holds all Non-Terminal names in the grammar.
- `std::vector<NonTerminal> nonTerminals`: Holds all Non-Terminals with their productions.

#### Assumptions
- Reserved characters need to be escaped to be used as a Terminal.
- The Start Symbol is the first Non-Terminal in the grammar file.
- No escaped characters are present in the Non-Terminal Name.

## First and Follow Generator
#### Description
This part generates the First and Follow sets for each nonterminal.

#### Steps
1. Compute First sets for nonterminals.
2. Compute Follow sets for nonterminals.
3. Handle epsilon and locked nonterminals to avoid infinite recursion.

#### Data Structures
- `std::vector<std::shared_ptr<NonTerminal>> NTs`: Holds all non-terminals of the grammar.
- `std::unordered_set<Terminal*> firstSet`: First set attribute in each non-terminal.
- `std::unordered_set<Terminal*> followSet`: Follow set attribute in each non-terminal.
- `std::unordered_set<NonTerminal*> lockedNTs`: Contains non-terminals being computed to eliminate infinite recursion.

#### Assumptions
- Input grammar should be properly formatted with all productions.
- Input grammar should be left-refactored and free of any left recursion.
- Nonterminal vector size equals the number of nonterminals in the grammar.

## Parsing Table
#### Description
This part constructs the parsing table for predictive parsing.

#### Steps
1. Compute NTs with First Set and Follow Set.
2. Search for matched production in the First set.
3. Construct the parsing table based on First and Follow sets.
4. Print and export the parsing table.

#### Data Structures
- `std::vector<std::shared_ptr<NonTerminal>> NTs`: Holds all non-terminals of the grammar.
- `std::unordered_map<std::pair<NonTerminal*, std::string>, ParsingTableEntry> parsingTable`: Holds the parsing table.

#### Assumptions
- Sync and Epsilon represented by boolean vars inside the ParsingTableEntry.

## Parsing
#### Description
This part handles the actual parsing, generating the parsing tree, and error recovery.

#### Steps
1. Seed the stack with the Start Symbol and repeat until the stack is empty.
2. Match or handle errors based on the parsing table entry and input token.
3. Implement panic-mode error recovery.

#### Data Structures
- `std::stack<Symbol*> stack`: Stack for parsing.
- `std::stack<ParsingTreeNode*> nodes`: Stack for parsing tree nodes.
- `std::vector<ParsingTrace> traces`: Keeps track of traces while parsing.
- `ParsingTreeNode`: Used for creating the parsing tree.
- `ParsingTree`: Wrapper for the root node.
- `ParsingTrace`: Used for keeping track of parsing traces.
- `ParsingResult`: Wrapper that contains traces and the final parsing tree.
File renamed without changes.

0 comments on commit 986f99c

Please sign in to comment.