diff --git a/README.md b/README.md index 79ffccc..1b4c0d9 100644 --- a/README.md +++ b/README.md @@ -33,6 +33,43 @@ Expected output: 0610: fb 60 00 ``` +## Data types +Compiler works with primative date types. + +### Byte +It takes up one byte of space. It is written in three different ways depending on the number type. +Examples: +```assembly +$01 ; in hexadecimal format +$CC ; in hexadecimal format + +%00000000 ; in binary format +%01010011 ; in binary format + +128 ; in decimal format +2 ; in decimal format +``` + +### Word +It takes up two bytes of space. It is written in three different ways depending on the number type. +Examples: +```assembly +$0122 ; in hexadecimal format +$CC33 ; in hexadecimal format + +%0000000000000000 ; in binary format +%0101001100000000 ; in binary format + +123456 ; in decimal format +888888 ; in decimal format +``` + +### Ascii +It takes up different sizes of space depending on the definition. The text must be written between double quotes. +```assembly +"Hello world" ; in decimal format +``` + ## Available directives ### .org @@ -105,6 +142,17 @@ Print warning message on compilation time. 22:05:16 [WARN] timu6502asm compiler works partial ``` +### .include +Import another file. +```assembly +.include "header.asm" +.include "body.asm" +.include "footer.asm" +``` +``` +22:05:16 [WARN] timu6502asm compiler works partial +``` + There are many things to do. Here are the some todos: - [ ] Case insensitivity - [ ] Rom file generation diff --git a/src/ast.rs b/src/ast.rs index a1ae6ef..3268fa8 100644 --- a/src/ast.rs +++ b/src/ast.rs @@ -1,6 +1,8 @@ -use std::{cell::Cell, marker::PhantomData}; +use std::{cell::Cell, fs::File, io::Read, marker::PhantomData}; -use crate::{context::Context, opcode::{ModeType, BRANCH_INSTS, INSTS_SIZE, JUMP_INSTS}, options::{DirectiveEnum, DirectiveType, DirectiveValue, DIRECTIVE_ENUMS, OPTIONS, OPTION_MODES}, parser::{Token, TokenInfo, TokenType}, tool::{print_error, upper_case}}; +use thiserror::Error; + +use crate::{context::Context, directive::{DirectiveEnum, DirectiveType, DirectiveValue, SYSTEM_DIRECTIVES}, opcode::{ModeType, BRANCH_INSTS, INSTS_SIZE, JUMP_INSTS}, parser::{Parser, Token, TokenInfo, TokenType}, tool::{print_error, upper_case}}; #[derive(Debug, Copy, Clone)] pub enum BranchType { @@ -9,27 +11,25 @@ pub enum BranchType { } #[derive(Debug)] -pub enum Ast<'a> { +pub enum Ast { InstrImplied(usize), - InstrBranch(usize, &'a [u8]), - InstrJump(usize, &'a [u8]), + InstrBranch(usize, String), + InstrJump(usize, String), Instr(usize, u16, ModeType), - InstrRef(usize, &'a [u8]), - Branch(&'a [u8], BranchType), - Directive(DirectiveEnum, Vec>), - Assign(&'a [u8], u16, ModeType) + Branch(String, BranchType), + Directive(DirectiveEnum, Vec) } #[derive(Debug)] -pub struct AstInfo<'a> { +pub struct AstInfo { pub line: usize, pub column: usize, - pub ast: Ast<'a>, + pub ast: Ast, pub end: usize, } -impl<'a> AstInfo<'a> { - pub fn new(token: &'a TokenInfo<'a>, ast: Ast<'a>) -> Self { +impl AstInfo { + pub fn new(token: &TokenInfo, ast: Ast) -> Self { Self { line: token.line, column: token.column, @@ -39,38 +39,44 @@ impl<'a> AstInfo<'a> { } } -#[derive(Debug)] +#[derive(Debug, Error)] pub enum AstGeneratorError { + #[error("Syntax issue")] SyntaxIssue { #[allow(dead_code)] line: usize, #[allow(dead_code)] column: usize, #[allow(dead_code)] end: usize, - #[allow(dead_code)] message: &'static str + #[allow(dead_code)] message: String }, + + #[error("Out of scope")] OutOfScope, - InternalError + + #[error("Internal error")] + InternalError, + + #[error("IO Error ({0})")] + IOError(#[from] std::io::Error), } impl AstGeneratorError { - pub fn syntax_issue<'a>(context: &Context<'a>, token_index: usize, message: &'static str) -> Self { + pub fn syntax_issue(context: &Context, token_index: usize, message: String) -> Self { let token_info = &context.tokens.borrow()[token_index]; AstGeneratorError::SyntaxIssue { column: token_info.column, end: token_info.end, line: token_info.line, message } } } #[derive(Debug)] -pub struct AstGenerator<'a> { +pub struct AstGenerator { pub index: Cell, pub size: Cell, - marker: PhantomData<&'a u8> } -impl<'a> AstGenerator<'a> { +impl AstGenerator { pub fn new() -> Self { Self { index: Cell::new(0), size: Cell::new(0), - marker: Default::default() } } @@ -91,17 +97,27 @@ impl<'a> AstGenerator<'a> { self.empty_check()?; Ok(self.index.get()) } + + fn eat_expected(&self, context: &Context, token_type: TokenType, error: AstGeneratorError) -> Result<(), AstGeneratorError> { + let token_index = self.eat()?; + let token = &context.tokens.borrow()[token_index]; - fn eat_space(&self, context: &Context<'a>) -> Result<(), AstGeneratorError> { + if TokenType::from(&token.token) != token_type { + return Err(error); + } + Ok(()) + } + + fn eat_space(&self, context: &Context) -> Result<(), AstGeneratorError> { let token_index= self.eat()?; let token = &context.tokens.borrow()[token_index]; match token.token { Token::Space(_) => Ok(()), - _ => Err(AstGeneratorError::syntax_issue(context, token_index, "Expected space")) + _ => Err(AstGeneratorError::syntax_issue(context, token_index, "Expected space".to_string())) } } - fn cleanup_space(&self, context: &Context<'a>) -> Result<(), AstGeneratorError> { + fn cleanup_space(&self, context: &Context) -> Result<(), AstGeneratorError> { let token_index = self.peek()?; let token = &context.tokens.borrow()[token_index]; if let Token::Space(_) = token.token { @@ -110,7 +126,7 @@ impl<'a> AstGenerator<'a> { Ok(()) } - fn eat_if(&self, context: &Context<'a>, expected: TokenType) -> Option { + fn eat_if(&self, context: &Context, expected: TokenType) -> Option { let token_index = match self.peek() { Ok(token_index) => token_index, Err(_) => return None @@ -128,146 +144,356 @@ impl<'a> AstGenerator<'a> { } } - fn eat_if_string(&self, context: &Context<'a>) -> Option<&'a [u8]> { + fn eat_if_string(&self, context: &Context) -> Option { let index = self.eat_if(context, TokenType::String)?; let token = &context.tokens.borrow()[index]; - match token.token { - Token::String(string) => Some(string), + match &token.token { + Token::String(string) => Some(string.clone()), _ => None } } + + fn eat_if_number(&self, context: &Context) -> Option<(u16, ModeType)> { - fn eat_if_number(&self, context: &Context<'a>) -> Option<(u16, ModeType)> { - let index = self.eat_if(context, TokenType::Number)?; - let token = &context.tokens.borrow()[index]; - match token.token { - Token::Number(number, mode) => Some((number, mode)), - _ => None + if let Ok(mut position) = self.peek() { + let tokens = context.tokens.borrow(); + let mut immediate = false; + let mut mode = ModeType::ZeroPage; + let mut number = 0_u16; + let index = self.index.get(); + + if let Token::Sharp = &tokens[position].token { + let _ = self.eat(); + immediate = true; + if let Ok(new_position) = self.peek() { + position = new_position; + } else { + self.index.set(index); + return None; + } + } + + if let Token::Byte(byte) = &tokens[position].token { + let _ = self.eat(); + mode = ModeType::ZeroPage; + number = *byte as u16; + } + + else if let Token::Word(word) = &tokens[position].token { + let _ = self.eat(); + mode = ModeType::Absolute; + number = *word; + } + + else if let Token::Keyword(keyword) = &tokens[position].token { + let references = context.references.borrow(); + let values = references.get(keyword)?; + if values.len() != 1 { + self.index.set(index); + return None + } + + let first_value = &values[0]; + (number, mode) = match first_value { + DirectiveValue::Byte(number) => (*number as u16, ModeType::ZeroPage), + DirectiveValue::Word(number) => (*number as u16, ModeType::Absolute), + _ => { + self.index.set(index); + return None + } + }; + let _ = self.eat(); + } + + return match immediate { + true => Some((number, ModeType::Immediate)), + false => match mode == ModeType::Absolute { + true => Some((number, ModeType::Absolute)), + false => Some(((number as u8) as u16, ModeType::ZeroPage)), + }, + }; } + + None } - fn eat_number(&self, context: &Context<'a>) -> Result<(u16, ModeType), AstGeneratorError> { + fn eat_number(&self, context: &Context) -> Result<(u16, ModeType), AstGeneratorError> { let token_index= self.eat()?; let token = &context.tokens.borrow()[token_index]; match token.token { - Token::Number(number, mode) => Ok((number, mode)), - _ => Err(AstGeneratorError::syntax_issue(context, token_index, "Expected number")) + Token::Byte(number) => Ok((number as u16, ModeType::ZeroPage)), + Token::Word(number) => Ok((number, ModeType::Absolute)), + _ => Err(AstGeneratorError::syntax_issue(context, token_index, "Expected number".to_string())) } } - fn eat_string(&self, context: &Context<'a>) -> Result<&'a [u8], AstGeneratorError> { + fn eat_string(&self, context: &Context) -> Result { let token_index= self.eat()?; let token = &context.tokens.borrow()[token_index]; - match token.token { - Token::String(string) => Ok(string), - _ => Err(AstGeneratorError::syntax_issue(context, token_index, "Expected string")) + match &token.token { + Token::String(string) => Ok(string.clone()), + _ => Err(AstGeneratorError::syntax_issue(context, token_index, "Expected string".to_string())) } } - - fn eat_assign(&self, context: &Context<'a>) -> Result<(), AstGeneratorError> { + + fn eat_assign(&self, context: &Context) -> Result<(), AstGeneratorError> { let token_index= self.eat()?; let token = &context.tokens.borrow()[token_index]; match token.token { Token::Assign => Ok(()), - _ => Err(AstGeneratorError::syntax_issue(context, token_index, "Expected assign")) + _ => Err(AstGeneratorError::syntax_issue(context, token_index, "Expected assign".to_string())) } } - fn eat_text(&self, context: &Context<'a>) -> Result<&'a [u8], AstGeneratorError> { + fn eat_text(&self, context: &Context) -> Result { let token_index= self.eat()?; let token = &context.tokens.borrow()[token_index]; - match token.token { - Token::Keyword(text) => Ok(text), - _ => Err(AstGeneratorError::syntax_issue(context, token_index, "Expected text")) + match &token.token { + Token::Keyword(text) => Ok(text.clone()), + _ => Err(AstGeneratorError::syntax_issue(context, token_index, "Expected text".to_string())) } } - fn generate_directive(&self, context: &Context<'a>, token_index: usize, option: &'a [u8]) -> Result<(), AstGeneratorError> { - let option = upper_case(option); - if let Some(position) = OPTIONS.iter().position(|item| *item == &option[..]) { - let modes = OPTION_MODES[position]; - let directive_type = DIRECTIVE_ENUMS[position]; - let tokens = context.tokens.borrow(); + fn parse_list(&self, context: &Context, validator: impl Fn(DirectiveType) -> bool) -> Result, AstGeneratorError> { + let tokens = context.tokens.borrow(); - let mut token_found = false; - let mut finish = false; + let mut token_found = false; + let mut finish = false; - self.cleanup_space(context)?; - let mut values = Vec::new(); - - while self.size.get() > self.index.get() { - let value_index = self.eat()?; - let value_token = &tokens.get(value_index).map(|item| &item.token); - - if token_found { - /* comma, space, new line, end or comment expected */ - match value_token { - Some(Token::NewLine(_)) => finish = true, - Some(Token::Comment(_)) => finish = true, - Some(Token::End) => finish = true, - Some(Token::Space(_)) => (), - Some(Token::Comma) => token_found = false, - _ => return Err(AstGeneratorError::syntax_issue(context, value_index, "Unexpected syntax")) - } - } - else { - /* Expected parseable token */ - match value_token { - Some(Token::Keyword(keyword)) => { values.push(DirectiveValue::Reference(*keyword)); token_found = true; }, - Some(Token::Number(number, ModeType::Absolute)) => { values.push(DirectiveValue::Word(*number)); token_found = true; }, - Some(Token::Number(number, ModeType::ZeroPage)) => { values.push(DirectiveValue::Byte((*number) as u8)); token_found = true; }, - Some(Token::Number(number, ModeType::Relative)) => { values.push(DirectiveValue::Byte((*number) as u8)); token_found = true; }, - Some(Token::String(string)) => { values.push(DirectiveValue::String(*string)); token_found = true; }, - Some(Token::BranchNext(name)) => { values.push(DirectiveValue::Reference(*name)); token_found = true; }, - Some(Token::NewLine(_)) => finish = true, - Some(Token::Comment(_)) => finish = true, - Some(Token::End) => finish = true, - Some(Token::Space(_)) => (), - Some(Token::Comma) => return Err(AstGeneratorError::syntax_issue(&context, value_index, "',' not expected")), - Some(_) => return Err(AstGeneratorError::syntax_issue(context, value_index, "Unexpected syntax")), - None => return Err(AstGeneratorError::InternalError) - }; - } + self.cleanup_space(context)?; + let mut values = Vec::new(); - if token_found { - /* Is it expected token? */ - let last_token_type = DirectiveType::from(&values[values.len()-1]); - if !modes.iter().any(|mode| *mode == last_token_type) { - return Err(AstGeneratorError::syntax_issue(context, value_index, "Unexpected syntax")) - } + while self.size.get() > self.index.get() { + let value_index = self.eat()?; + let value_token = &tokens.get(value_index).map(|item| &item.token); + + if token_found { + /* comma, space, new line, end or comment expected */ + match value_token { + Some(Token::NewLine(_)) => finish = true, + Some(Token::Comment(_)) => finish = true, + Some(Token::End) => finish = true, + Some(Token::Space(_)) => (), + Some(Token::Comma) => token_found = false, + _ => return Err(AstGeneratorError::syntax_issue(context, value_index, "Unexpected syntax".to_string())) } + } + else { + /* Expected parseable token */ + match value_token { + Some(Token::Keyword(keyword)) => { values.push(DirectiveValue::Reference(keyword.clone())); token_found = true; }, + Some(Token::Word(number)) => { values.push(DirectiveValue::Word(*number)); token_found = true; }, + Some(Token::Byte(number)) => { values.push(DirectiveValue::Byte((*number) as u8)); token_found = true; }, + Some(Token::String(string)) => { values.push(DirectiveValue::String(string.clone())); token_found = true; }, + Some(Token::BranchNext(name)) => { values.push(DirectiveValue::Reference(name.clone())); token_found = true; }, + Some(Token::NewLine(_)) => finish = true, + Some(Token::Comment(_)) => finish = true, + Some(Token::End) => finish = true, + Some(Token::Space(_)) => (), + Some(Token::Comma) => return Err(AstGeneratorError::syntax_issue(&context, value_index, "',' not expected".to_string())), + Some(_) => return Err(AstGeneratorError::syntax_issue(context, value_index, "Unexpected syntax".to_string())), + None => return Err(AstGeneratorError::InternalError) + }; + } - if finish { - break; + if token_found { + if !validator(DirectiveType::from(&values[values.len()-1])) { + return Err(AstGeneratorError::syntax_issue(context, value_index, "Unexpected syntax".to_string())) } } - if modes.len() > 0 && values.len() == 0 { - return Err(AstGeneratorError::syntax_issue(context, token_index, "Missing information")) + if finish { + break; + } + } + + Ok(values) + } + + fn generate_directive(&self, context: &Context, token_index: usize, directive_name: &String) -> Result<(), AstGeneratorError> { + let directive_name = directive_name.to_uppercase(); + if let Some(directive) = SYSTEM_DIRECTIVES.iter().find(|item| item.name == &directive_name[..]) { + + let values = self.parse_list(context, |directive_type| -> bool { + return directive_type == DirectiveType::Reference || directive.values.iter().any(|mode| *mode == directive_type) + })?; + + match directive.size { + crate::directive::DirectiveVariableSize::None => { + if values.len() != 0 { + return Err(AstGeneratorError::syntax_issue(context, token_index, "No value expected".to_string())); + } + }, + crate::directive::DirectiveVariableSize::Min(min) => { + if values.len() < min { + return Err(AstGeneratorError::syntax_issue(context, token_index, format!("Minimum {} value(s) expected", min))); + } + }, + crate::directive::DirectiveVariableSize::Length(len) => { + if values.len() != len { + return Err(AstGeneratorError::syntax_issue(context, token_index, format!("Expected {} value(s)", len))); + } + }, + } + + if directive.values.len() > 0 && values.len() == 0 { + return Err(AstGeneratorError::syntax_issue(context, token_index, "Missing information".to_string())) } - context.add_ast(token_index,Ast::Directive(directive_type, values)); + context.add_ast(token_index, Ast::Directive(directive.directive, values)); + } else { - return Err(AstGeneratorError::syntax_issue(context, token_index, "Unsupported compiler configuration")) + return Err(AstGeneratorError::syntax_issue(context, token_index, "Unsupported compiler configuration".to_string())) + } + Ok(()) + } + + fn process_include(&self, context: &Context, token_index: usize, value: &DirectiveValue) -> Result<(), AstGeneratorError> { + let file_path = match value { + DirectiveValue::String(name) => name, + _ => return Err(AstGeneratorError::syntax_issue(&context, token_index, "Path expected as a string".to_string())) + }; + + println!("File: {}", &file_path); + let mut file = File::open(&file_path)?; + + + let mut code = Vec::new(); + file.read_to_end(&mut code)?; + + let new_context = Context::default(); + context.add_file(file_path.to_string()); + + let mut parser = Parser::new(context.last_file_id(), &code[..], new_context); + parser.parse().unwrap(); + + let new_context = parser.context; + + let mut tokens = context.tokens.borrow_mut(); + let new_tokens = new_context.tokens.borrow(); + let current_position = self.index.get(); + + if new_tokens.len() > 0 { + for token in new_tokens.iter().take(new_tokens.len()-1).rev() { + tokens.insert(current_position, token.clone()); + } + + self.size.set(tokens.len()); } + Ok(()) } - fn generate_branch(&self, context: &Context<'a>, token_index: usize, name: &'a [u8], branch_type: BranchType) -> Result<(), AstGeneratorError> { - context.add_ast(token_index,Ast::Branch(name, branch_type)); + fn generate_branch(&self, context: &Context, token_index: usize, name: &String, branch_type: BranchType) -> Result<(), AstGeneratorError> { + context.add_ast(token_index, Ast::Branch(name.clone(), branch_type)); Ok(()) } - fn generate_assign(&self, context: &Context<'a>, token_index: usize, name: &'a [u8]) -> Result<(), AstGeneratorError> { + fn generate_assign(&self, context: &Context, token_index: usize, name: &String) -> Result<(), AstGeneratorError> { self.cleanup_space(context)?; self.eat_assign(context)?; self.cleanup_space(context)?; - let (number, mode) = self.eat_number(context)?; - context.add_ast(token_index,Ast::Assign(name, number, mode)); + + let values = self.parse_list(context, |_| true)?; + context.references.borrow_mut().insert(name.clone(), values); Ok(()) } + + pub(crate) fn try_parse_number(&self, context: &Context) -> Result<(u16, ModeType), AstGeneratorError> { + self.cleanup_space(context)?; + let tokens = context.tokens.borrow(); + let token_index = self.peek()?; + let token = &tokens[token_index]; + + if let Token::OpenParenthesis = token.token { + let mut mode = ModeType::Indirect; + let mut parenthesis_closed = false; + self.eat()?; + self.cleanup_space(context)?; + + let Some((number, _)) = self.eat_if_number(context) else { + return Err(AstGeneratorError::syntax_issue(context, token_index, "Invalid numbering number format".to_string())); + }; + + self.cleanup_space(context)?; + + let token_index = self.peek()?; + let token = &tokens[token_index]; + if let Token::OpenParenthesis = token.token { + self.eat()?; + parenthesis_closed = true; + } + + self.cleanup_space(context)?; + let token_index = self.peek()?; + let token = &tokens[token_index]; + if let Token::Comma = token.token { + self.eat()?; + self.cleanup_space(context)?; + + let token_index = self.peek()?; + let token = &tokens[token_index]; + + mode = match &token.token { + Token::Keyword(value) if value == "x" || value == "X" => ModeType::IndirectX, + Token::Keyword(value) if value == "y" || value == "Y" => ModeType::IndirectY, + _ => return Err(AstGeneratorError::syntax_issue(context, token_index, "Expected X or Y".to_string())) + }; + + + self.eat()?; + } + + self.cleanup_space(context)?; + + if !parenthesis_closed { + self.eat_expected(context, TokenType::CloseParenthesis, AstGeneratorError::syntax_issue(context, token_index, "Expected ')'".to_string()))?; + } + + return Ok((number, mode)); + + } else { + self.cleanup_space(context)?; + + let Some((number, mut mode)) = self.eat_if_number(context) else { + return Err(AstGeneratorError::syntax_issue(context, token_index, "Invalid numbering number format".to_string())); + }; + + if mode == ModeType::Immediate { + return Ok((number, mode)); + } + + self.cleanup_space(context)?; + let token_index = self.peek()?; + let token = &tokens[token_index]; + if let Token::Comma = token.token { + self.eat()?; + self.cleanup_space(context)?; + + let token_index = self.peek()?; + let token = &tokens[token_index]; + + mode = match &token.token { + Token::Keyword(value) if value == "x" || value == "X" => match mode { + ModeType::Absolute => ModeType::AbsoluteX, + ModeType::ZeroPage => ModeType::ZeroPageX, + _ => return Err(AstGeneratorError::syntax_issue(context, token_index, "Invalid usage".to_string())) + }, + Token::Keyword(value) if value == "y" || value == "Y" => match mode { + ModeType::Absolute => ModeType::AbsoluteY, + ModeType::ZeroPage => ModeType::ZeroPageY, + _ => return Err(AstGeneratorError::syntax_issue(context, token_index, "Invalid usage".to_string())) + }, + _ => return Err(AstGeneratorError::syntax_issue(context, token_index, "Expected X or Y".to_string())) + }; + self.eat()?; + } + + return Ok((number, mode)); + } + } - fn generate_code_block(&self, context: &Context<'a>, token_index: usize, positon: usize) -> Result<(), AstGeneratorError> { + fn generate_code_block(&self, context: &Context, token_index: usize, positon: usize) -> Result<(), AstGeneratorError> { if INSTS_SIZE[positon] == 1 { context.add_ast(token_index,Ast::InstrImplied(positon)); @@ -283,70 +509,79 @@ impl<'a> AstGenerator<'a> { else if JUMP_INSTS.contains(&positon) { // Jump inst self.eat_space(context)?; - let token_index= self.eat()?; - let token = &context.tokens.borrow()[token_index]; - let ast = match token.token { - Token::Keyword(name) => Ast::InstrJump(positon, name), - Token::Number(number, ModeType::Absolute) => Ast::Instr(positon, number, ModeType::Absolute), - Token::Number(number, ModeType::Indirect) => Ast::Instr(positon, number, ModeType::Indirect), - _ => return Err(AstGeneratorError::syntax_issue(context, token_index, "Branch name, absolute address or indirect address expected")), - }; - context.add_ast(token_index, ast); - } + let index = self.index.get(); + if let Ok((number, mode)) = self.try_parse_number(context) { + context.add_ast(token_index, Ast::Instr(positon, number, mode)); + return Ok(()) + } - else { - self.eat_space(context)?; + self.index.set(index); // Restore index let token_index= self.eat()?; let token = &context.tokens.borrow()[token_index]; + if let Token::Keyword(name) = &token.token { + context.add_ast(token_index, Ast::InstrJump(positon, name.clone())); + return Ok(()) + } - let ast = match &token.token { - Token::Keyword(keyword) => Ast::InstrRef(positon, keyword), - Token::Number(number, mode) => Ast::Instr(positon, *number, *mode), - _ => return Err(AstGeneratorError::syntax_issue(context, token_index, "Keyword or reference expected")) - }; + return Err(AstGeneratorError::syntax_issue(context, token_index, "Branch name, absolute address or indirect address expected".to_string())) + } - context.add_ast(token_index, ast); + else { + self.eat_space(context)?; + let (number, mode) = self.try_parse_number(context)?; + context.add_ast(token_index, Ast::Instr(positon, number, mode)); } Ok(()) } - fn inline_generate(&self, context: &Context<'a>) -> Result<(), AstGeneratorError> { + fn inline_generate(&self, context: &Context) -> Result<(), AstGeneratorError> { self.size.set(context.tokens.borrow().len()); + let mut token_index = 0; while self.size.get() > self.index.get() { - let token_index = self.eat()?; - - match &context.tokens.borrow().get(token_index).map(|item| &item.token) { - Some(Token::Instr(positon)) => self.generate_code_block(&context, token_index, *positon)?, - Some(Token::Keyword(keyword)) => self.generate_assign(&context, token_index, keyword)?, - Some(Token::Directive(option)) => self.generate_directive(&context, token_index, option)?, - Some(Token::Comment(_)) => (), - Some(Token::Branch(name)) => self.generate_branch(&context, token_index, name, BranchType::Generic)?, - Some(Token::Number(_, _)) => return Err(AstGeneratorError::syntax_issue(&context, token_index, "Number not expected")), - Some(Token::NewLine(_)) => (), - Some(Token::Space(_)) => (), - Some(Token::Assign) => return Err(AstGeneratorError::syntax_issue(&context, token_index, "'=' not expected")), - Some(Token::Comma) => return Err(AstGeneratorError::syntax_issue(&context, token_index, "',' not expected")), - Some(Token::String(_)) => return Err(AstGeneratorError::syntax_issue(&context, token_index, "String not expected")), - Some(Token::BranchNext(name)) => self.generate_branch(&context, token_index, name, BranchType::Next)?, - Some(Token::End) => break, - None => return Err(AstGeneratorError::InternalError) + { + token_index = self.eat()?; + let tokens = context.tokens.borrow(); + + match &tokens.get(token_index).map(|item| &item.token) { + Some(Token::Instr(positon)) => self.generate_code_block(&context, token_index, *positon)?, + Some(Token::Keyword(keyword)) => self.generate_assign(&context, token_index, keyword)?, + Some(Token::Directive(option)) => self.generate_directive(&context, token_index, option)?, + Some(Token::Comment(_)) => (), + Some(Token::Branch(name)) => self.generate_branch(&context, token_index, name, BranchType::Generic)?, + Some(Token::Byte(_)) => return Err(AstGeneratorError::syntax_issue(&context, token_index, "Number not expected".to_string())), + Some(Token::Word(_)) => return Err(AstGeneratorError::syntax_issue(&context, token_index, "Number not expected".to_string())), + Some(Token::NewLine(_)) => (), + Some(Token::Space(_)) => (), + Some(Token::OpenParenthesis) => return Err(AstGeneratorError::syntax_issue(&context, token_index, "'(' not expected".to_string())), + Some(Token::CloseParenthesis) => return Err(AstGeneratorError::syntax_issue(&context, token_index, "')' not expected".to_string())), + Some(Token::Sharp) => return Err(AstGeneratorError::syntax_issue(&context, token_index, "'#' not expected".to_string())), + Some(Token::Assign) => return Err(AstGeneratorError::syntax_issue(&context, token_index, "'=' not expected".to_string())), + Some(Token::Comma) => return Err(AstGeneratorError::syntax_issue(&context, token_index, "',' not expected".to_string())), + Some(Token::String(_)) => return Err(AstGeneratorError::syntax_issue(&context, token_index, "String not expected".to_string())), + Some(Token::BranchNext(name)) => self.generate_branch(&context, token_index, name, BranchType::Next)?, + Some(Token::End) => break, + None => return Err(AstGeneratorError::InternalError) + } + } + + if let Some(Ast::Directive(DirectiveEnum::Include, values)) = context.asts.borrow().last().map(|ast| &ast.ast) { + self.process_include(context, token_index, &values[0])?; } } Ok(()) } - pub fn generate(&self, context: Context<'a>) -> Result, AstGeneratorError> { + pub fn generate(&self, context: Context) -> Result { match self.inline_generate(&context) { Ok(_) => Ok(context), Err(error) => { let tokens = context.tokens.borrow(); let token = &tokens[self.index.get() - 1]; - println!("1{:?}", context.source); - print_error(&context.source, &error, token.line, token.column, token.end); + print_error(&context.target, &error, token.line, token.column, token.end); Err(error) } } diff --git a/src/code_gen.rs b/src/code_gen.rs index 7a2d032..06bfa5c 100644 --- a/src/code_gen.rs +++ b/src/code_gen.rs @@ -4,67 +4,45 @@ use std::io::BufReader; use std::io::Read; use log::{info, warn}; use thiserror::Error; -use strum_macros::EnumDiscriminants; use crate::context::Context; use crate::tool::print_error; -use crate::{ast::{Ast, BranchType}, opcode::{ModeType, MODES}, options::{DirectiveEnum, DirectiveValue}}; +use crate::{ast::{Ast, BranchType}, opcode::{ModeType, MODES}, directive::{DirectiveEnum, DirectiveValue}}; #[derive(Error, Debug)] pub enum CodeGeneratorError { #[error("Internal error")] InternalError, + #[error("Illegal opcode")] + IllegalOpcode, #[error("Number not applicable")] NumberNotApplicable, #[error("Branch information not found")] UnresolvedBranches, #[error("Reference information not found")] UnresolvedReference, - #[error("Invalid reference value")] - InvalidReferenceValue, - #[error("Expected string")] + #[error("Expected &String")] StringExpected, #[error("IO Error ({0})")] IOError(#[from] std::io::Error), #[error("Text convertion issue ({0})")] - Utf8Error(#[from] Utf8Error), - - #[allow(unused_variables)] - #[error("Unsupported number format")] - UnsupportedNumberFormat, - - #[allow(unused_variables)] - #[error("Word expected")] - WordExpected, + Utf8Error(#[from] Utf8Error), #[error("Expected {0}")] - ExpectedThis(&'static str), - #[error("More than expected")] - MoreThanExpected -} - -#[derive(Debug, PartialEq, Copy, Clone)] -#[derive(EnumDiscriminants)] -#[strum_discriminants(name(ReferenceType))] -pub enum ReferenceValue { - AbsoluteAddress(u16), - #[allow(unused_variables)] - RelativeAddress(u16), - Value(u16, ModeType), + ExpectedThis(&'static str) } #[derive(Debug)] -pub struct CodeGenerator<'a> { +pub struct CodeGenerator { pub index: usize, pub size: usize, pub start_point: u16, - pub branches: HashMap<&'a [u8], usize>, - pub references: HashMap<&'a [u8], ReferenceValue>, - pub unresolved_branches: Vec<(&'a [u8], usize, usize)>, - pub unresolved_jumps: Vec<(&'a [u8], usize, usize)> + pub branches: HashMap, + pub unresolved_branches: Vec<(String, usize, usize)>, + pub unresolved_jumps: Vec<(String, usize, usize)> } -impl<'a> CodeGenerator<'a> { +impl CodeGenerator { pub fn new() -> Self { Self { index: 0, @@ -73,7 +51,6 @@ impl<'a> CodeGenerator<'a> { branches: Default::default(), unresolved_branches: Default::default(), unresolved_jumps: Default::default(), - references: Default::default() } } @@ -108,46 +85,30 @@ impl<'a> CodeGenerator<'a> { fn generate_instr(&mut self, target: &mut Vec, instr: usize, number: u16, mode: ModeType) -> Result<(), CodeGeneratorError> { let modes = MODES[instr]; + let mut found = false; for search_mode in modes.iter() { if search_mode.mode == mode { target.push(search_mode.opcode); self.push_number(target, number, mode)?; + found = true; + break; } } - Ok(()) - } - - fn generate_instr_reference(&mut self, target: &mut Vec, instr: usize, reference: &'a [u8]) -> Result<(), CodeGeneratorError> { - let modes = MODES[instr]; - let value = match self.references.get(reference) { - Some(value) => value, - None=> return Err(CodeGeneratorError::UnresolvedReference) - }; - - let (number, mode) = match value { - ReferenceValue::AbsoluteAddress(_) => return Err(CodeGeneratorError::InvalidReferenceValue), - ReferenceValue::RelativeAddress(_) => return Err(CodeGeneratorError::InvalidReferenceValue), - ReferenceValue::Value(number, mode) => (*number, *mode), - }; - for search_mode in modes.iter() { - if search_mode.mode == mode { - target.push(search_mode.opcode); - self.push_number(target, number, mode)?; - } + if !found { + return Err(CodeGeneratorError::IllegalOpcode) } - Ok(()) } - fn generate_instr_branch(&mut self, target: &mut Vec, ast_index: usize, position: usize, branch_name: &'a [u8]) -> Result<(), CodeGeneratorError> { + fn generate_instr_branch(&mut self, target: &mut Vec, ast_index: usize, position: usize, branch_name: &String) -> Result<(), CodeGeneratorError> { let branch_position = match self.branches.get(branch_name) { Some(branch_position) => { let distance_position = *branch_position as i8 - (target.len() + 2) as i8; distance_position as u16 }, None => { - self.unresolved_branches.push((branch_name, target.len() + 1, ast_index)); + self.unresolved_branches.push((branch_name.clone(), target.len() + 1, ast_index)); 0 } }; @@ -159,11 +120,11 @@ impl<'a> CodeGenerator<'a> { Ok(()) } - fn generate_instr_jump(&mut self, target: &mut Vec, ast_index: usize, position: usize, branch_name: &'a [u8]) -> Result<(), CodeGeneratorError> { + fn generate_instr_jump(&mut self, target: &mut Vec, ast_index: usize, position: usize, branch_name: &String) -> Result<(), CodeGeneratorError> { let jump_position = match self.branches.get(branch_name) { Some(jump_position) => self.start_point + *jump_position as u16, None => { - self.unresolved_jumps.push((branch_name, target.len() + 1, ast_index)); + self.unresolved_jumps.push((branch_name.clone(), target.len() + 1, ast_index)); 0 } }; @@ -174,11 +135,6 @@ impl<'a> CodeGenerator<'a> { Ok(()) } - fn configure_assign(&mut self, name: &'a [u8], number: u16, mode: ModeType) -> Result<(), CodeGeneratorError> { - self.references.insert(name, ReferenceValue::Value(number, mode)); - Ok(()) - } - fn generate_implied(&mut self, target: &mut Vec, position: usize) -> Result<(), CodeGeneratorError> { let modes = MODES[position]; for search_mode in modes.iter() { @@ -190,9 +146,9 @@ impl<'a> CodeGenerator<'a> { Ok(()) } - fn generate_branch(&mut self, target: &mut Vec, name: &'a [u8], _: BranchType) -> Result<(), CodeGeneratorError> { - self.branches.insert(name, target.len()); - self.references.insert(name, ReferenceValue::AbsoluteAddress(0)); + fn generate_branch(&mut self, target: &mut Vec, name: &String, _: BranchType) -> Result<(), CodeGeneratorError> { + self.branches.insert(name.clone(), target.len()); + //self.references.insert(name, ReferenceValue::AbsoluteAddress(0)); Ok(()) } @@ -223,41 +179,18 @@ impl<'a> CodeGenerator<'a> { Ok(()) } - fn directive_org(&mut self, values: &Vec>) -> Result<(), CodeGeneratorError> { - if values.len() == 0 { - return Err(CodeGeneratorError::ExpectedThis("word")); - } - else if values.len() > 1 { - return Err(CodeGeneratorError::MoreThanExpected); - } - + fn directive_org(&mut self, values: &Vec) -> Result<(), CodeGeneratorError> { self.start_point = values[0].get_word()?; Ok(()) } - fn directive_incbin(&mut self, target: &mut Vec, values: &Vec>) -> Result<(), CodeGeneratorError> { - - if values.len() == 0 { - return Err(CodeGeneratorError::ExpectedThis("word")); - } - else if values.len() > 1 { - return Err(CodeGeneratorError::MoreThanExpected); - } - - let file_path = match values[0] { + fn directive_incbin(&mut self, target: &mut Vec, values: &Vec) -> Result<(), CodeGeneratorError> { + let file_path = match &values[0] { DirectiveValue::String(name) => name, _ => return Err(CodeGeneratorError::StringExpected) }; - - let file_path = match std::str::from_utf8(file_path) { - Ok(file_path) => file_path, - Err(error) => return Err(CodeGeneratorError::Utf8Error(error)) - }; - let file = match File::open(file_path) { - Ok(file) => file, - Err(error) => return Err(CodeGeneratorError::IOError(error)) - }; + let file = File::open(file_path)?; let buffer_reader: BufReader = BufReader::new(file); for buffer in buffer_reader.bytes() { @@ -269,28 +202,24 @@ impl<'a> CodeGenerator<'a> { Ok(()) } - fn directive_byte(&mut self, target: &mut Vec, values: &Vec>) -> Result<(), CodeGeneratorError> { - if values.len() == 0 { - return Err(CodeGeneratorError::ExpectedThis("byte(s)")); - } - + fn directive_byte(&mut self, target: &mut Vec, values: &Vec) -> Result<(), CodeGeneratorError> { for value in values.iter() { match value { DirectiveValue::Byte(byte) => target.push(*byte), - DirectiveValue::String(string) => string.into_iter().for_each(|byte| target.push(*byte)), - _ => return Err(CodeGeneratorError::ExpectedThis("byte or string")) + DirectiveValue::String(string) => string.as_bytes().into_iter().for_each(|byte| target.push(*byte)), + _ => return Err(CodeGeneratorError::ExpectedThis("byte or &String")) }; } Ok(()) } - fn directive_word(&mut self, target: &mut Vec, values: &Vec>) -> Result<(), CodeGeneratorError> { - if values.len() == 0 { - return Err(CodeGeneratorError::ExpectedThis("byte(s)")); - } - + fn directive_word(&mut self, target: &mut Vec, values: &Vec) -> Result<(), CodeGeneratorError> { for value in values.iter() { match value { + DirectiveValue::Byte(word) => { + target.push(*word as u8); + target.push(0x00); + }, DirectiveValue::Word(word) => { target.push(*word as u8); target.push((*word >> 8) as u8); @@ -301,44 +230,40 @@ impl<'a> CodeGenerator<'a> { Ok(()) } - fn directive_ascii(&mut self, target: &mut Vec, values: &Vec>, add_null: bool) -> Result<(), CodeGeneratorError> { - if values.len() == 0 { - return Err(CodeGeneratorError::ExpectedThis("string")); - } - else if values.len() > 1 { - return Err(CodeGeneratorError::MoreThanExpected); - } - + fn directive_ascii(&mut self, target: &mut Vec, values: &Vec, add_null: bool) -> Result<(), CodeGeneratorError> { for value in values.into_iter() { let string = match value { DirectiveValue::String(string) => string, _ => return Err(CodeGeneratorError::ExpectedThis("string")) }; - string.into_iter().for_each(|byte| target.push(*byte)); + string.as_bytes().into_iter().for_each(|byte| target.push(*byte)); - if add_null && string[string.len()-1] != 0x0 { + let bytes = string.as_bytes(); + if add_null && bytes[bytes.len()-1] != 0x0 { target.push(0x0); } } Ok(()) } - fn directive_warning(&mut self, _: &mut Vec, values: &Vec>) -> Result<(), CodeGeneratorError> { - if values.len() == 0 { - return Err(CodeGeneratorError::ExpectedThis("string")); - } + fn directive_warning(&mut self, _: &mut Vec, values: &Vec) -> Result<(), CodeGeneratorError> { + let mut message = String::new(); for value in values.into_iter() { match value { - DirectiveValue::String(string) => warn!("{}", std::str::from_utf8(&string).map_err(|error| CodeGeneratorError::Utf8Error(error))?), + DirectiveValue::String(string) => message += &string[..], + DirectiveValue::Word(word) => message += &format!("0x{:02X}", word), + DirectiveValue::Byte(byte) => message += &format!("0x{:02X}", byte), _ => return Err(CodeGeneratorError::ExpectedThis("string")) }; } + + warn!("{}", message); Ok(()) } - fn generate_directive(&mut self, target: &mut Vec, option: DirectiveEnum, values: &Vec>) -> Result<(), CodeGeneratorError> { + fn generate_directive(&mut self, target: &mut Vec, option: DirectiveEnum, values: &Vec) -> Result<(), CodeGeneratorError> { match option { DirectiveEnum::Org => self.directive_org(values)?, DirectiveEnum::Incbin => self.directive_incbin(target, values)?, @@ -347,11 +272,12 @@ impl<'a> CodeGenerator<'a> { DirectiveEnum::Ascii => self.directive_ascii(target, values, false)?, DirectiveEnum::Asciiz => self.directive_ascii(target, values, true)?, DirectiveEnum::Warning => self.directive_warning(target, values)?, + DirectiveEnum::Include => (), }; Ok(()) } - fn inner_generate(&mut self, context: &mut Context<'a>) -> Result<(), CodeGeneratorError> { + fn inner_generate(&mut self, context: &mut Context) -> Result<(), CodeGeneratorError> { self.size = context.asts.borrow().len(); let asts = context.asts.borrow(); @@ -361,13 +287,11 @@ impl<'a> CodeGenerator<'a> { match ast { Some(Ast::InstrImplied(position)) => self.generate_implied(&mut context.target, *position)?, - Some(Ast::InstrBranch(position, branch)) => self.generate_instr_branch(&mut context.target, ast_index, *position, *branch)?, - Some(Ast::InstrJump(position, branch)) => self.generate_instr_jump(&mut context.target, ast_index, *position, *branch)?, + Some(Ast::InstrBranch(position, branch)) => self.generate_instr_branch(&mut context.target, ast_index, *position, branch)?, + Some(Ast::InstrJump(position, branch)) => self.generate_instr_jump(&mut context.target, ast_index, *position, branch)?, Some(Ast::Instr(position, number, mode)) => self.generate_instr(&mut context.target, *position, *number, *mode)?, - Some(Ast::InstrRef(position, reference)) => self.generate_instr_reference(&mut context.target, *position, *reference)?, Some(Ast::Branch(name, branch_type)) => self.generate_branch(&mut context.target, name, *branch_type)?, Some(Ast::Directive(option, values)) => self.generate_directive(&mut context.target, *option, &values)?, - Some(Ast::Assign(name, number, mode)) => self.configure_assign(*name, *number, *mode)?, None => return Err(CodeGeneratorError::InternalError) }; } @@ -377,7 +301,7 @@ impl<'a> CodeGenerator<'a> { Ok(()) } - pub fn generate(&mut self, context: Context<'a>) -> Result, CodeGeneratorError> { + pub fn generate(&mut self, context: Context) -> Result { let mut context = context; match self.inner_generate(&mut context) { @@ -385,15 +309,14 @@ impl<'a> CodeGenerator<'a> { Err(error) => { let asts = context.asts.borrow(); let ast = &asts[self.index - 1]; - print_error(&context.source, &error, ast.line, ast.column, ast.end); + print_error(&context.target, &error, ast.line, ast.column, ast.end); Err(error) } } } - pub fn dump(&self, context: &Context<'a>) { + pub fn dump(&self, context: &Context) { - println!(); info!("Binary Output"); let total_byte_per_row = 8; let position = self.start_point; @@ -403,9 +326,9 @@ impl<'a> CodeGenerator<'a> { for (index, data) in context.target.iter().enumerate() { print!("{:02X} ", data); - if index != 0 && index % total_byte_per_row == 0 && index != total_bytes-1 { + if index > 1 && (index+1) % total_byte_per_row == 0 && index != total_bytes-1 { println!(); - print!("{:04X}: ", position + (index as u16)); + print!("{:04X}: ", position + 1 + (index as u16)); } } diff --git a/src/context.rs b/src/context.rs index b3a6caa..4500d92 100644 --- a/src/context.rs +++ b/src/context.rs @@ -1,26 +1,18 @@ -use std::cell::RefCell; +use std::{cell::RefCell, collections::HashMap}; -use crate::{ast::{Ast, AstInfo}, parser::TokenInfo}; +use crate::{ast::{Ast, AstInfo}, directive::DirectiveValue, parser::TokenInfo}; -#[derive(Debug)] -pub struct Context<'a> { - pub source: &'a [u8], +#[derive(Debug, Default)] +pub struct Context { pub target: Vec, - pub tokens: RefCell>>, - pub asts: RefCell>>, + pub tokens: RefCell>, + pub asts: RefCell>, + pub references: RefCell>>, + pub files: RefCell> } -impl<'a> Context<'a> { - pub fn new(data: &'a [u8]) -> Self { - Self { - target: Vec::new(), - asts: Default::default(), - source: data, - tokens: Default::default() - } - } - - pub fn add_ast(&self, token_index: usize, ast: Ast<'a>) { +impl Context { + pub fn add_ast(&self, token_index: usize, ast: Ast) { let token_info = &self.tokens.borrow()[token_index]; let info = AstInfo { @@ -32,4 +24,12 @@ impl<'a> Context<'a> { self.asts.borrow_mut().push(info); } + + pub fn add_file(&self, file: String) { + self.files.borrow_mut().push(file); + } + + pub fn last_file_id(&self) -> usize { + self.files.borrow().len() - 1 + } } diff --git a/src/directive.rs b/src/directive.rs new file mode 100644 index 0000000..6e12bf5 --- /dev/null +++ b/src/directive.rs @@ -0,0 +1,71 @@ +use strum_macros::EnumDiscriminants; + +use crate::code_gen::CodeGeneratorError; + +#[derive(Debug, PartialEq, Copy, Clone)] +pub enum DirectiveEnum { + Org, + Incbin, + Byte, + Word, + Ascii, + Asciiz, + Warning, + Include +} + +#[derive(Debug, PartialEq, Clone)] +#[derive(EnumDiscriminants)] +#[strum_discriminants(name(DirectiveType))] +pub enum DirectiveValue { + Byte(u8), + Word(u16), + String(String), + Reference(String), +} + +impl DirectiveValue { + pub fn get_word(&self) -> Result { + + match self { + DirectiveValue::Word(number) => Ok(*number), + _ => Err(CodeGeneratorError::ExpectedThis("Word information")) + } + } + + pub fn get_byte(&self) -> Result { + + match self { + DirectiveValue::Byte(number) => Ok(*number), + _ => Err(CodeGeneratorError::ExpectedThis("Byte information")) + } + } +} + +#[derive(Debug, PartialEq, Copy, Clone)] +pub enum DirectiveVariableSize { + None, + Min(usize), + Length(usize) +} + +#[derive(Debug, PartialEq, Clone)] +pub struct DirectiveInfo { + pub name: &'static str, + pub directive: DirectiveEnum, + pub size: DirectiveVariableSize, + pub values: &'static [DirectiveType] +} + +pub const SYSTEM_DIRECTIVES: &[DirectiveInfo] = &[ + DirectiveInfo { name: "BYTE", directive: DirectiveEnum::Byte, size: DirectiveVariableSize::Min(1), values: &[DirectiveType::Byte, DirectiveType::String] }, + DirectiveInfo { name: "DB", directive: DirectiveEnum::Byte, size: DirectiveVariableSize::Min(1), values: &[DirectiveType::Byte, DirectiveType::String] }, + DirectiveInfo { name: "WORD", directive: DirectiveEnum::Word, size: DirectiveVariableSize::Min(1), values: &[DirectiveType::Byte, DirectiveType::Word] }, + DirectiveInfo { name: "DW", directive: DirectiveEnum::Word, size: DirectiveVariableSize::Min(1), values: &[DirectiveType::Byte, DirectiveType::Word] }, + DirectiveInfo { name: "ORG", directive: DirectiveEnum::Org, size: DirectiveVariableSize::Length(1), values: &[DirectiveType::Word] }, + DirectiveInfo { name: "INCBIN", directive: DirectiveEnum::Incbin, size: DirectiveVariableSize::Length(1), values: &[DirectiveType::String] }, + DirectiveInfo { name: "ASCII", directive: DirectiveEnum::Ascii, size: DirectiveVariableSize::Min(1), values: &[DirectiveType::String] }, + DirectiveInfo { name: "ASCIIZ", directive: DirectiveEnum::Asciiz, size: DirectiveVariableSize::Min(1), values: &[DirectiveType::String] }, + DirectiveInfo { name: "WARNING", directive: DirectiveEnum::Warning, size: DirectiveVariableSize::Min(1), values: &[DirectiveType::String, DirectiveType::Word, DirectiveType::Byte] }, + DirectiveInfo { name: "INCLUDE", directive: DirectiveEnum::Include, size: DirectiveVariableSize::Length(1), values: &[DirectiveType::String] }, +]; \ No newline at end of file diff --git a/src/main.rs b/src/main.rs index ed3c64b..e10c595 100644 --- a/src/main.rs +++ b/src/main.rs @@ -2,7 +2,7 @@ mod opcode; mod parser; mod code_gen; mod ast; -mod options; +mod directive; mod tool; mod context; #[cfg(test)] @@ -16,16 +16,22 @@ use code_gen::CodeGenerator; use context::Context; use parser::Parser; +use std::fs::File; +use std::io::prelude::*; fn main() { let _ = CombinedLogger::init(vec![TermLogger::new(LevelFilter::Debug, Config::default(), TerminalMode::Mixed, ColorChoice::Auto)]); info!("timu6502asm Compiler"); - let data = br#".byte $ff"#; + let data = br#" + .include "test2.asm" + ADC TEST + "#; - let context = Context::new(data); + let context = Context::default(); + context.add_file("main.asm".to_string()); - let mut parser = Parser::new(context); + let mut parser = Parser::new(0, data, context); parser.parse().unwrap(); parser.friendly_dump(); @@ -36,5 +42,5 @@ fn main() { let mut generator = CodeGenerator::new(); let context = generator.generate(context).unwrap(); - generator.dump(&context); + generator.dump(&context); } diff --git a/src/opcode.rs b/src/opcode.rs index ee487e6..c10e279 100644 --- a/src/opcode.rs +++ b/src/opcode.rs @@ -61,7 +61,7 @@ pub const EOR_MODES: [ModeInfo; 8] = [ModeInfo { mode: ModeType::Immediate, opco pub const INC_MODES: [ModeInfo; 4] = [ModeInfo { mode: ModeType::ZeroPage, opcode: 0xE6}, ModeInfo { mode: ModeType::ZeroPageX, opcode: 0xF6}, ModeInfo { mode: ModeType::Absolute, opcode: 0xEE}, ModeInfo { mode: ModeType::AbsoluteX, opcode: 0xFE}]; pub const INX_MODES: [ModeInfo; 1] = [ModeInfo { mode: ModeType::Implied, opcode: 0xE8}]; pub const INY_MODES: [ModeInfo; 1] = [ModeInfo { mode: ModeType::Implied, opcode: 0xC8}]; -pub const JMP_MODES: [ModeInfo; 2] = [ModeInfo { mode: ModeType::Absolute, opcode: 0x4C}, ModeInfo { mode: ModeType::Indirect , opcode: 0x6C}]; +pub const JMP_MODES: [ModeInfo; 2] = [ModeInfo { mode: ModeType::Absolute, opcode: 0x4C}, ModeInfo { mode: ModeType::Indirect, opcode: 0x6C}]; pub const JSR_MODES: [ModeInfo; 1] = [ModeInfo { mode: ModeType::Absolute, opcode: 0x20}]; pub const LDA_MODES: [ModeInfo; 8] = [ModeInfo { mode: ModeType::Immediate, opcode: 0xA9}, ModeInfo { mode: ModeType::ZeroPage, opcode: 0xA5}, ModeInfo { mode: ModeType::ZeroPageX, opcode: 0xB5}, ModeInfo { mode: ModeType::Absolute, opcode: 0xAD}, ModeInfo { mode: ModeType::AbsoluteX, opcode: 0xBD}, ModeInfo { mode: ModeType::AbsoluteY, opcode: 0xB9}, ModeInfo { mode: ModeType::IndirectX, opcode: 0xA1}, ModeInfo { mode: ModeType::IndirectY, opcode: 0xB1}]; pub const LDX_MODES: [ModeInfo; 5] = [ModeInfo { mode: ModeType::Immediate, opcode: 0xA2}, ModeInfo { mode: ModeType::ZeroPage, opcode: 0xA6}, ModeInfo { mode: ModeType::ZeroPageY, opcode: 0xB6}, ModeInfo { mode: ModeType::Absolute, opcode: 0xAE}, ModeInfo { mode: ModeType::AbsoluteY, opcode: 0xBE}]; diff --git a/src/options.rs b/src/options.rs index 137932d..96b39a3 100644 --- a/src/options.rs +++ b/src/options.rs @@ -16,14 +16,14 @@ pub enum DirectiveEnum { #[derive(Debug, PartialEq, Copy, Clone)] #[derive(EnumDiscriminants)] #[strum_discriminants(name(DirectiveType))] -pub enum DirectiveValue<'a> { +pub enum DirectiveValue { Byte(u8), Word(u16), String(&'a [u8]), Reference(&'a [u8]), } -impl<'a> DirectiveValue<'a> { +impl DirectiveValue { pub fn get_word(&self) -> Result { match self { @@ -41,14 +41,28 @@ impl<'a> DirectiveValue<'a> { } } -pub const OPTIONS: [&[u8]; 7] = [b"ORG", b"INCBIN", b"BYTE", b"WORD", b"ASCII", b"ASCIIZ", b"WARNING"]; -pub const ORG_TYPES: [DirectiveType; 1] = [DirectiveType::Word]; -pub const INCBIN_TYPES: [DirectiveType; 1] = [DirectiveType::String]; -pub const BYTE_TYPES: [DirectiveType; 2] = [DirectiveType::Byte, DirectiveType::String]; -pub const WORD_TYPES: [DirectiveType; 1] = [DirectiveType::Word]; -pub const ASCII_TYPES: [DirectiveType; 1] = [DirectiveType::String]; -pub const ASCIIZ_TYPES: [DirectiveType; 1] = [DirectiveType::String]; -pub const WARNING_TYPES: [DirectiveType; 1] = [DirectiveType::String]; - -pub const OPTION_MODES: [&[DirectiveType]; 7] = [&ORG_TYPES, &INCBIN_TYPES, &BYTE_TYPES, &WORD_TYPES, &ASCII_TYPES, &ASCIIZ_TYPES, &WARNING_TYPES]; -pub const DIRECTIVE_ENUMS: [DirectiveEnum; 7] = [DirectiveEnum::Org, DirectiveEnum::Incbin, DirectiveEnum::Byte, DirectiveEnum::Word, DirectiveEnum::Ascii, DirectiveEnum::Asciiz, DirectiveEnum::Warning]; +#[derive(Debug, PartialEq, Copy, Clone)] +pub enum DirectiveVariableSize { + None, + Min(usize), + Length(usize) +} + +#[derive(Debug, PartialEq, Clone)] +pub struct DirectiveInfo { + pub name: &'static [u8], + pub directive: DirectiveEnum, + pub size: DirectiveVariableSize, + pub value_types: &'static [DirectiveType] +} + +pub const SYSTEM_DIRECTIVES: &[DirectiveInfo] = &[ + DirectiveInfo { name: b"BYTE", directive: DirectiveEnum::Byte, size: DirectiveVariableSize::Min(1), value_types: &[DirectiveType::Byte, DirectiveType::String] }, + DirectiveInfo { name: b"DB", directive: DirectiveEnum::Byte, size: DirectiveVariableSize::Min(1), value_types: &[DirectiveType::Byte, DirectiveType::String] }, + DirectiveInfo { name: b"DB", directive: DirectiveEnum::Word, size: DirectiveVariableSize::Min(1), value_types: &[DirectiveType::Word] }, + DirectiveInfo { name: b"ORG", directive: DirectiveEnum::Org, size: DirectiveVariableSize::Length(1), value_types: &[DirectiveType::Word] }, + DirectiveInfo { name: b"INCBIN", directive: DirectiveEnum::Incbin, size: DirectiveVariableSize::Length(1), value_types: &[DirectiveType::String] }, + DirectiveInfo { name: b"ASCII", directive: DirectiveEnum::Ascii, size: DirectiveVariableSize::Min(1), value_types: &[DirectiveType::String] }, + DirectiveInfo { name: b"ASCIIZ", directive: DirectiveEnum::Asciiz, size: DirectiveVariableSize::Min(1), value_types: &[DirectiveType::String] }, + DirectiveInfo { name: b"WARNING", directive: DirectiveEnum::Warning, size: DirectiveVariableSize::Length(1), value_types: &[DirectiveType::String] }, +]; \ No newline at end of file diff --git a/src/parser.rs b/src/parser.rs index ab8da4d..bb6ef70 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1,6 +1,10 @@ -use crate::{context::Context, opcode::{ModeType, INSTS}, tool::{print_error, upper_case_byte}}; +use core::str; +use std::str::Utf8Error; + +use crate::{context::Context, opcode::INSTS, tool::{print_error, upper_case_byte}}; use log::info; use strum_macros::EnumDiscriminants; +use thiserror::Error; /* Address Modes @@ -27,51 +31,77 @@ pub struct Parser<'a> { pub column: usize, pub end: usize, size: usize, - pub context: Context<'a> + pub context: Context, + pub data: &'a [u8], + pub file_id: usize } #[derive(Debug, PartialEq, Clone)] #[derive(EnumDiscriminants)] #[strum_discriminants(name(TokenType))] -pub enum Token<'a> { +pub enum Token { Instr(usize), - Keyword(&'a [u8]), - String(&'a [u8]), - Directive(&'a [u8]), - Comment(&'a [u8]), + Keyword(String), + String(String), + Directive(String), + Comment(String), Assign, Comma, - Branch(&'a [u8]), - BranchNext(&'a [u8]), - Number(u16, ModeType), + OpenParenthesis, + CloseParenthesis, + Sharp, + Branch(String), + BranchNext(String), + Byte(u8), + Word(u16), NewLine(usize), Space(usize), End, } #[derive(Debug)] -pub struct TokenInfo<'a> { +#[derive(Clone)] +pub struct TokenInfo { pub line: usize, pub column: usize, - pub token: Token<'a>, + pub token: Token, pub end: usize, + pub file_id: usize } -#[derive(Debug, PartialEq)] +#[derive(Debug, PartialEq, Error)] pub enum ParseError { + #[error("Out of scope")] OutOfScope, + + #[error("Unexpeted symbol")] UnexpectedSymbol, + + #[error("Unknown token")] UnknownToken, + + #[error("Invalid number format")] InvalidNumberFormat, + + #[error("Invalid comment format")] InvalidCommentFormat, + + #[error("Invalid keyword")] InvalidKeyword, + + #[error("Invalid directive")] InvalidDirective, - InvalidString + + #[error("Invalid string")] + InvalidString, + + #[error("Invalid text format ({0})")] + Utf8Error(#[from] Utf8Error), } impl<'a> Parser<'a> { - pub fn new(context: Context<'a>) -> Self { - let size = context.source.len(); + pub fn new(file_id: usize, data: &'a [u8], context: Context) -> Self { + let size = data.len(); Self { index: 0, @@ -79,16 +109,19 @@ impl<'a> Parser<'a> { column: 0, end: 0, size, - context + context, + data, + file_id } } - fn add_token(&mut self, token: Token<'a>) { + fn add_token(&mut self, token: Token) { self.context.tokens.borrow_mut().push(TokenInfo { line: self.line, column: self.column, end: self.end, token, + file_id: self.file_id }); } @@ -119,8 +152,8 @@ impl<'a> Parser<'a> { match self.inner_parse() { Ok(_) => Ok(()), Err(error) => { - println!("2{:?}", self.context.source); - print_error(&self.context.source, &error, self.line, self.column, self.end); + println!("2{:?}", self.data); + print_error(&self.data, &error, self.line, self.column, self.end); Err(error) } } @@ -128,12 +161,12 @@ impl<'a> Parser<'a> { fn peek(&mut self) -> Result { self.empty_check()?; - Ok(self.context.source[self.index]) + Ok(self.data[self.index]) } fn peek2(&mut self) -> Result { self.empty_check2()?; - Ok(self.context.source[self.index+1]) + Ok(self.data[self.index+1]) } fn peek_expected(&mut self, byte: u8, error: ParseError) -> Result<(), ParseError> { @@ -147,7 +180,7 @@ impl<'a> Parser<'a> { self.empty_check()?; self.index += 1; self.end += 1; - Ok(self.context.source[self.index - 1]) + Ok(self.data[self.index - 1]) } fn eat_expected(&mut self, byte: u8, error: ParseError) -> Result<(), ParseError> { @@ -193,20 +226,21 @@ impl<'a> Parser<'a> { } } - fn next(&mut self) -> Result, ParseError> { + fn next(&mut self) -> Result { let first = self.peek()?; match first { - b'$' => self.parse_absolute_hex(), - b'%' => self.parse_absolute_binary(), + b'$' => self.parse_hex(), + b'%' => self.parse_binary(), b'0'..=b'9' => self.parse_absolute_decimal(), - b'(' => self.parse_indirect(), - b'#' => self.parse_immediate(), + b'#' => self.parse_sharp(), b'a'..=b'z' | b'A'..=b'Z' => self.parse_keyword(), b'.' => self.parse_directive(), b'"' => self.parse_string(), b';' => self.parse_comment(), b'=' => self.parse_assign(), + b'(' => self.parse_open_parenthesis(), + b')' => self.parse_close_parenthesis(), b',' => self.parse_comma(), b'\r' | b'\n' => self.parse_newline(), b' ' | b'\t' => self.parse_whitespace(), @@ -217,125 +251,36 @@ impl<'a> Parser<'a> { } } - fn parse_absolute_mode(&mut self, number: u16, is_absolute: bool) -> Result, ParseError> { - self.eat_spaces()?; - - let current_index = self.index; + fn parse_absolute_decimal(&mut self) -> Result { - if self.peek() == Ok(b',') { - self.eat()?; // Eat , - self.eat_spaces()?; - - match self.eat()? { - b'x' | b'X' => Ok(Token::Number(number, match is_absolute { - true => ModeType::AbsoluteX, - false => ModeType::ZeroPageX - })), - b'y' | b'Y' => Ok(Token::Number(number, match is_absolute { - true => ModeType::AbsoluteY, - false => ModeType::ZeroPageY - })), - _ => { - self.index = current_index; // Restore index - Ok(Token::Number(number, match is_absolute { - true => ModeType::Absolute, - false => ModeType::ZeroPage - })) - }, - } - } else { - Ok(Token::Number(number, match is_absolute { - true => ModeType::Absolute, - false => ModeType::ZeroPage - })) - } - } - - fn parse_absolute_decimal(&mut self) -> Result, ParseError> { - let (size, number) = self.parse_decimal()?; - - self.parse_absolute_mode(number, size == 2) - } - - fn parse_absolute_hex(&mut self) -> Result, ParseError> { - self.eat_expected(b'$', ParseError::InvalidNumberFormat)?; - - let (size, number) = self.parse_hex()?; - self.parse_absolute_mode(number, size == 2) - } - - fn parse_absolute_binary(&mut self) -> Result, ParseError> { - self.eat_expected(b'%', ParseError::InvalidNumberFormat)?; - - let (size, number) = self.parse_binary()?; - self.parse_absolute_mode(number, size == 2) - } - - fn parse_indirect(&mut self) -> Result, ParseError> { - self.eat_expected(b'(', ParseError::InvalidNumberFormat)?; - self.eat_spaces()?; - - let first = self.eat(); - - let (size, number) = match first { - Ok(b'$') => self.parse_hex()?, - Ok(b'%') => self.parse_binary()?, - Ok(b'0'..=b'9') => { - let _ = self.dec(); // Give back what you eat - self.parse_decimal()? - }, - _ => return Err(ParseError::InvalidNumberFormat), - }; - - if size == 2 { // For ($0x0000) to ($0xffff) numbers - self.eat_spaces()?; - self.eat_expected(b')', ParseError::InvalidNumberFormat)?; - return Ok(Token::Number(number, ModeType::Indirect)); - } - - self.eat_spaces()?; - let next_byte = self.eat()?; - match next_byte { - b',' => { - self.eat_spaces()?; - self.peek_expected(b'X', ParseError::InvalidNumberFormat).or(self.peek_expected(b'x', ParseError::InvalidNumberFormat))?; - let _ = self.eat(); // Eat x or X + let mut decimal_number: u16 = 0; + + while let Ok(n) = self.peek() { + let number = match n { + n @ b'0'..=b'9' => n - b'0', + b' ' | b'\r' | b'\t' | b'\n' | b',' | b')' => break, + _ => return Err(ParseError::InvalidNumberFormat), + }; - self.eat_expected(b')', ParseError::InvalidNumberFormat)?; - Ok(Token::Number(number, ModeType::IndirectX)) - } - b')' => { - self.eat_spaces()?; - self.eat_expected(b',', ParseError::InvalidNumberFormat)?; - self.eat_spaces()?; - - self.peek_expected(b'Y', ParseError::InvalidNumberFormat).or(self.peek_expected(b'y', ParseError::InvalidNumberFormat))?; - let _ = self.eat(); // Eat y or Y - Ok(Token::Number(number, ModeType::IndirectY)) - } - _ => Err(ParseError::InvalidNumberFormat), + decimal_number = (decimal_number * 10) + number as u16; + let _ = self.eat(); } - } - fn parse_immediate(&mut self) -> Result, ParseError> { - self.eat()?; //Eat # char - - let number = self.parse_number()?; - Ok(Token::Number(number, ModeType::Immediate)) - } + let size = match decimal_number > 0xff_u16 { + true => 2, + false => 1 + }; - fn parse_number(&mut self) -> Result { - match self.eat()? { - b'$' => self.parse_hex().map(|(_, number)| number), - b'%' => self.parse_binary().map(|(_, number)| number), - _ => { - self.dec()?; - self.parse_decimal().map(|(_, number)| number) - } + match size { + 1 => Ok(Token::Byte(decimal_number as u8)), + 2 => Ok(Token::Word(decimal_number as u16)), + _ => Err(ParseError::InvalidNumberFormat) } } - fn parse_hex(&mut self) -> Result<(u8, u16), ParseError> { + fn parse_hex(&mut self) -> Result { + self.eat_expected(b'$', ParseError::InvalidNumberFormat)?; + let mut hex_number: u16 = 0; let mut count: u8 = 0; @@ -357,10 +302,16 @@ impl<'a> Parser<'a> { return Err(ParseError::InvalidNumberFormat); } - Ok((count / 2, hex_number)) + match count / 2 { + 1 => Ok(Token::Byte(hex_number as u8)), + 2 => Ok(Token::Word(hex_number as u16)), + _ => Err(ParseError::InvalidNumberFormat) + } } - fn parse_binary(&mut self) -> Result<(u8, u16), ParseError> { + fn parse_binary(&mut self) -> Result { + self.eat_expected(b'%', ParseError::InvalidNumberFormat)?; + let mut binary_number: u16 = 0b0000_0000_0000_0000; let mut count: u8 = 0; @@ -380,31 +331,31 @@ impl<'a> Parser<'a> { if count != 8 && count != 16 { return Err(ParseError::InvalidNumberFormat); } + + match count / 8 { + 1 => Ok(Token::Byte(binary_number as u8)), + 2 => Ok(Token::Word(binary_number as u16)), + _ => Err(ParseError::InvalidNumberFormat) + } - Ok((count / 8, binary_number)) } - fn parse_decimal(&mut self) -> Result<(u8, u16), ParseError> { - let mut decimal_number: u16 = 0; - - while let Ok(n) = self.peek() { - let number = match n { - n @ b'0'..=b'9' => n - b'0', - b' ' | b'\r' | b'\t' | b'\n' | b',' | b')' => break, - _ => return Err(ParseError::InvalidNumberFormat), - }; + fn parse_open_parenthesis(&mut self) -> Result { + self.eat_expected(b'(', ParseError::InvalidNumberFormat)?; + Ok(Token::OpenParenthesis) + } - decimal_number = (decimal_number * 10) + number as u16; - let _ = self.eat(); - } + fn parse_close_parenthesis(&mut self) -> Result { + self.eat_expected(b')', ParseError::InvalidNumberFormat)?; + Ok(Token::CloseParenthesis) + } - Ok((match decimal_number > 0xff_u16 { - true => 2, - false => 1 - }, decimal_number)) + fn parse_sharp(&mut self) -> Result { + self.eat_expected(b'#', ParseError::InvalidNumberFormat)?; + Ok(Token::Sharp) } - fn parse_keyword(&mut self) -> Result, ParseError> { + fn parse_keyword(&mut self) -> Result { let start = self.index; let mut valid = false; @@ -418,7 +369,7 @@ impl<'a> Parser<'a> { b'a'..=b'z' => valid = true, b'A'..=b'Z' => valid = true, b'_' => (), - b' ' | b'\t' => break, + b' ' | b',' | b')' | b'=' | b'\t' => break, b'\n' | b'\r' => break, b':' => { branch = true; @@ -439,20 +390,20 @@ impl<'a> Parser<'a> { } if branch { - return Ok(Token::Branch(&self.context.source[start..self.index - 1])); + return Ok(Token::Branch(str::from_utf8(&self.data[start..self.index - 1])?.to_string())); } if self.index - start == 3 { - let search_insts: [u8; 3] = [upper_case_byte(self.context.source[start]), upper_case_byte(self.context.source[start + 1]), upper_case_byte(self.context.source[start + 2])]; + let search_insts: [u8; 3] = [upper_case_byte(self.data[start]), upper_case_byte(self.data[start + 1]), upper_case_byte(self.data[start + 2])]; if let Some(position) = INSTS.iter().position(|item| *item == &search_insts) { return Ok(Token::Instr(position)); } } - Ok(Token::Keyword(&self.context.source[start..self.index])) + Ok(Token::Keyword(str::from_utf8(&self.data[start..self.index])?.to_string())) } - fn parse_string(&mut self) -> Result, ParseError> { + fn parse_string(&mut self) -> Result { self.eat_expected(b'"', ParseError::InvalidString)?; let start = self.index; @@ -475,10 +426,10 @@ impl<'a> Parser<'a> { } self.eat_expected(b'"', ParseError::InvalidString)?; - Ok(Token::String(&self.context.source[start..self.index-1])) + Ok(Token::String(str::from_utf8(&self.data[start..self.index - 1])?.to_string())) } - fn parse_directive(&mut self) -> Result, ParseError> { + fn parse_directive(&mut self) -> Result { self.eat_expected(b'.', ParseError::InvalidDirective)?; let start = self.index; @@ -512,13 +463,13 @@ impl<'a> Parser<'a> { } if branch { - return Ok(Token::BranchNext(&self.context.source[start..self.index - 1])); + return Ok(Token::BranchNext(str::from_utf8(&self.data[start..self.index - 1])?.to_string())); } - Ok(Token::Directive(&self.context.source[start..self.index])) + Ok(Token::Directive(str::from_utf8(&self.data[start..self.index])?.to_string())) } - fn parse_comment(&mut self) -> Result, ParseError> { + fn parse_comment(&mut self) -> Result { let start = self.index; loop { @@ -527,27 +478,27 @@ impl<'a> Parser<'a> { b'\n' | b'\r' => { self.dec()?; break; - } + }, _ => continue, }, Err(ParseError::OutOfScope) => break, _ => return Err(ParseError::InvalidCommentFormat), }; } - Ok(Token::Comment(&self.context.source[start..self.index - 1])) + Ok(Token::Comment(str::from_utf8(&self.data[start..self.index - 1])?.to_string())) } - fn parse_assign(&mut self) -> Result, ParseError> { + fn parse_assign(&mut self) -> Result { self.eat_expected(b'=', ParseError::UnexpectedSymbol)?; Ok(Token::Assign) } - fn parse_comma(&mut self) -> Result, ParseError> { + fn parse_comma(&mut self) -> Result { self.eat_expected(b',', ParseError::UnexpectedSymbol)?; Ok(Token::Comma) } - fn parse_newline(&mut self) -> Result, ParseError> { + fn parse_newline(&mut self) -> Result { let mut total_lines = 0; loop { @@ -561,7 +512,7 @@ impl<'a> Parser<'a> { Ok(Token::NewLine(total_lines)) } - fn parse_whitespace(&mut self) -> Result, ParseError> { + fn parse_whitespace(&mut self) -> Result { let mut total_whitespaces = 0; while let Ok(b' ') | Ok(b'\t') = self.peek() { @@ -584,7 +535,11 @@ impl<'a> Parser<'a> { Token::Directive(_) => "DIRECTIVE", Token::Comment(_) => "COMMENT", Token::Branch(_) => "BRANCH", - Token::Number(_, _) => "NUMBER", + Token::Byte(_) => "BYTE", + Token::Word(_) => "WORD", + Token::OpenParenthesis => "(", + Token::CloseParenthesis => ")", + Token::Sharp => "#", Token::NewLine(_) => "NEWLINE", Token::Space(_) => "SPACE", Token::End => "END", diff --git a/src/tests/asms/tables.asm b/src/tests/asms/tables.asm new file mode 100644 index 0000000..8464323 --- /dev/null +++ b/src/tests/asms/tables.asm @@ -0,0 +1,61 @@ + +;; TABLES + +attributes: ; + .db %00000000, %00000000, %00000000, %00000000, %00000000, %00000000, %00000000, %00000000 + .db %00000000, %00000000, %00000000, %00000000, %00000000, %00000000, %00000000, %00000000 + .db %00000000, %00000000, %00000000, %00000000, %00000000, %00000000, %00000000, %00000000 + .db %00000000, %00000000, %00000000, %00000000, %00000000, %00000000, %00000000, %00000000 + .db %00000000, %00000000, %00000000, %00000000, %00000000, %00000000, %00000000, %00000000 + .db %00000000, %00000000, %00000000, %00000000, %00000000, %00000000, %00000000, %00000000 + .db %00000000, %00000000, %00000000, %00000000, %00000000, %00000000, %00000000, %00000000 + .db %00000000, %00000000, %00000000, %00000000, %00000000, %00000000, %00000000, %00000000 + +;; + +palettes: ; + .db $0F,$30,$16,$30, $0F,$0F,$0F,$0F, $0F,$0F,$0F,$0F, $0F,$0F,$0F,$0F + .db $0F,$30,$17,$07, $0F,$0F,$0F,$0F, $0F,$0F,$0F,$0F, $0F,$0F,$0F,$0F + +;; + +sprites: ; + ; vert tile attr horiz + .db $80, $32, $00, $80 ; sprite 0 + .db $80, $33, $00, $88 ; sprite 1 + .db $88, $34, $00, $80 ; sprite 2 + .db $88, $35, $00, $88 ; sprite 3 + +;; + +background: ; + .db $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00 + .db $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00 + .db $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00 + .db $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00 + .db $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00 + .db $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00 + .db $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00 + .db $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00 + .db $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00 + .db $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00 + .db $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00 + .db $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00 + .db $00,$00,$00,$00,$00,$00,$0f,$04,$04,$04,$0e,$01,$04,$04,$04,$0e,$10,$11,$00,$00,$02,$0f,$04,$04,$04,$0e,$00,$00,$00,$00,$00,$00 + .db $00,$00,$00,$00,$00,$00,$08,$00,$00,$00,$08,$06,$04,$04,$04,$0d,$08,$14,$15,$12,$08,$0f,$04,$04,$04,$0d,$00,$00,$00,$00,$00,$00 + .db $00,$00,$00,$00,$00,$00,$0c,$04,$04,$04,$0d,$0a,$00,$00,$00,$00,$0a,$00,$00,$16,$17,$09,$04,$04,$04,$07,$00,$00,$00,$00,$00,$00 + .db $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00 + .db $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00 + .db $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00 + .db $00,$00,$00,$00,$00,$00,$18,$19,$1a,$1b,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$1c,$00,$00,$00,$00,$00,$00 + .db $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00 + .db $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00 + .db $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00 + .db $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00 + .db $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00 + .db $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00 + .db $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00 + .db $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00 + .db $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00 + .db $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00 + .db $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00 \ No newline at end of file diff --git a/src/tests/bins/tables.bin b/src/tests/bins/tables.bin new file mode 100644 index 0000000..55f7a50 Binary files /dev/null and b/src/tests/bins/tables.bin differ diff --git a/src/tests/generic.rs b/src/tests/generic.rs index 07f7909..a82d740 100644 --- a/src/tests/generic.rs +++ b/src/tests/generic.rs @@ -1,9 +1,17 @@ +use std::{fs::File, io::Read}; + use rstest::*; -use crate::{ast::AstGenerator, code_gen::CodeGenerator, context::Context, parser::Parser}; +use crate::{ + ast::AstGenerator, + code_gen::{CodeGenerator, CodeGeneratorError}, + context::Context, + parser::Parser, +}; #[rstest] -#[case(br#"LDX #$08 +#[case( + br#"LDX #$08 decrement2: STX $0201 decrement: @@ -13,24 +21,32 @@ CPX #$03 BNE decrement BNE decrement2 STX $0201 -BRK"#)] -#[case(br#"LDA #$01 +BRK"# +)] +#[case( + br#"LDA #$01 STA $0200 LDA #$05 STA $0201 LDA #$08 -STA $0202"#)] -#[case(br#"LDA #$c0 ;Load the hex value $c0 into the A register +STA $0202"# +)] +#[case( + br#"LDA #$c0 ;Load the hex value $c0 into the A register TAX ;Transfer the value in the A register to X INX ;Increment the value in the X register ADC #$c4 ;Add the hex value $c4 to the A register -BRK ;Break - we're done"#)] -#[case(br#" +BRK ;Break - we're done"# +)] +#[case( + br#" LDA #$80 STA $01 ADC $01 -"#)] -#[case(br#"LDX #$08 +"# +)] +#[case( + br#"LDX #$08 decrement: DEX STX $0200 @@ -40,11 +56,13 @@ BNE decrement2 STX $0201 decrement2: STX $0201 -BRK"#)] +BRK"# +)] fn compile_test(#[case] data: &'_ [u8]) { - let context = Context::new(data); + let context = Context::default(); + context.add_file("main.asm".to_string()); - let mut parser = Parser::new(context); + let mut parser = Parser::new(0, data, context); parser.parse().unwrap(); parser.friendly_dump(); @@ -55,11 +73,11 @@ fn compile_test(#[case] data: &'_ [u8]) { let mut generator = CodeGenerator::new(); let context = generator.generate(context).unwrap(); - generator.dump(&context); + generator.dump(&context); } /* - */ + */ #[rstest] #[case(br#"LDX #$08 decrement: @@ -169,10 +187,25 @@ LDx IOREST"#, &[0xad, 0x4a, 0xff, 0xae, 0x3f, 0xff])] #[case(br#".word $2211, $4433,$6655, $8877"#, &[0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88])] #[case(br#".byte $ff .asciiz "abcd""#, &[0xFF, 0x61, 0x62, 0x63, 0x64, 0x00])] +#[case(br#".word $ccff"#, &[0xff, 0xcc])] +#[case(br#".word $ff"#, &[0xff, 0x00])] +#[case(br#"AND #$dd"#, &[0x29, 0xdd])] +#[case(br#"AND #$ffdd"#, &[0x29, 0xdd])] +#[case(br#"AND $dd"#, &[0x25, 0xdd])] +#[case(br#"AND $ffdd"#, &[0x2d, 0xdd, 0xff])] +#[case(br#"AND ($ff, x)"#, &[0x21, 0xff])] +#[case(br#"AND ($00ff, x)"#, &[0x21, 0xff])] +#[case(br#"AND ($ff,Y )"#, &[0x31, 0xff])] +#[case(br#"LDX $ff,Y"#, &[0xb6, 0xff])] +#[case(br#"AND $ff,x"#, &[0x35, 0xff])] +#[case(br#"AND $ffdd , x"#, &[0x3d, 0xdd, 0xff])] +#[case(br#"LDX $ffdd , y"#, &[0xBE, 0xdd, 0xff])] +#[case(br#"JMP ($ffdd)"#, &[0x6c, 0xdd, 0xff])] // Only jump has indirect mode fn check_codes(#[case] data: &'_ [u8], #[case] codes: &'_ [u8]) { - let context = Context::new(data); + let context = Context::default(); + context.add_file("main.asm".to_string()); - let mut parser = Parser::new(context); + let mut parser = Parser::new(0, data, context); parser.parse().unwrap(); parser.friendly_dump(); @@ -190,9 +223,10 @@ fn check_codes(#[case] data: &'_ [u8], #[case] codes: &'_ [u8]) { #[rstest] #[case(br#".INCBIN "src/tests/bins/test1.bin""#, &[0x00, 0x01, 0x02, 0x03])] fn binary_read(#[case] data: &'_ [u8], #[case] binary: &'_ [u8]) { - let context = Context::new(data); + let context = Context::default(); + context.add_file("main.asm".to_string()); - let mut parser = Parser::new(context); + let mut parser = Parser::new(0, data, context); parser.parse().unwrap(); parser.friendly_dump(); @@ -214,12 +248,13 @@ fn binary_read(#[case] data: &'_ [u8], #[case] binary: &'_ [u8]) { #[case(br#"= :"#)] #[case(br#"? :"#)] fn parser_fail(#[case] data: &'_ [u8]) { - let context = Context::new(data); - let mut parser = Parser::new(context); + let context = Context::default(); + context.add_file("main.asm".to_string()); + + let mut parser = Parser::new(0, data, context); assert!(parser.parse().is_err()); } - #[rstest] #[case(br#".INCBIN"#)] #[case(br#"BNE"#)] @@ -229,13 +264,65 @@ fn parser_fail(#[case] data: &'_ [u8]) { #[case(br#"BNE = "Hello""#)] #[case(br#".fBNE = "Hello""#)] fn ast_generator_fail(#[case] data: &'_ [u8]) { - let context = Context::new(data); - let mut parser = Parser::new(context); - parser.parse().unwrap(); - parser.friendly_dump(); + let context = Context::default(); + context.add_file("main.asm".to_string()); + + let mut parser = Parser::new(0, data, context); + parser.parse().unwrap(); + parser.friendly_dump(); + + let context = parser.context; + + let ast_generator = AstGenerator::new(); + assert!(ast_generator.generate(context).is_err()); +} +#[rstest] +#[case(br#"AND ($ffdd)"#)] +fn compile_failure(#[case] data: &'_ [u8]) { + let context = Context::default(); + context.add_file("main.asm".to_string()); + + let mut parser = Parser::new(0, data, context); + parser.parse().unwrap(); + parser.friendly_dump(); + + let context = parser.context; + + let ast_generator = AstGenerator::new(); + let context = ast_generator.generate(context).unwrap(); - let context = parser.context; + let mut generator = CodeGenerator::new(); + match generator.generate(context).unwrap_err() { + CodeGeneratorError::IllegalOpcode => (), + _ => { + panic!("Invalid error code"); + } + }; +} - let ast_generator = AstGenerator::new(); - assert!(ast_generator.generate(context).is_err()); -} \ No newline at end of file +#[rstest] +#[case("src/tests/asms/tables.asm", "src/tests/bins/tables.bin")] +fn test_file(#[case] code_filename: &str, #[case] expected_filename: &str) { + let mut code = Vec::new(); + let mut file = File::open(code_filename).unwrap(); + file.read_to_end(&mut code).unwrap(); + + let mut binary = Vec::new(); + let mut file = File::open(expected_filename).unwrap(); + file.read_to_end(&mut binary).unwrap(); + + let context = Context::default(); + context.add_file("main.asm".to_string()); + + let mut parser = Parser::new(0, &code, context); + parser.parse().unwrap(); + + let context = parser.context; + + let ast_generator = AstGenerator::new(); + let context = ast_generator.generate(context).unwrap(); + + let mut generator = CodeGenerator::new(); + let context = generator.generate(context).unwrap(); + assert_eq!(context.target, binary); +} diff --git a/src/tests/parser.rs b/src/tests/parser.rs index 2dfd07f..d696b5b 100644 --- a/src/tests/parser.rs +++ b/src/tests/parser.rs @@ -1,93 +1,66 @@ use rstest::*; -use crate::{context::Context, opcode::ModeType, parser::{Parser, Token}}; +use crate::{ast::AstGenerator, context::Context, parser::{Parser, Token}}; #[rstest] // Hex numbers -#[case(b"#$a0", Token::Number(0xA0, ModeType::Immediate))] -#[case(b"$a0", Token::Number(0xA0, ModeType::ZeroPage))] -#[case(b"$a0,X", Token::Number(0xA0, ModeType::ZeroPageX))] -#[case(b"$a0,Y", Token::Number(0xA0, ModeType::ZeroPageY))] -#[case(b"$a0, x", Token::Number(0xA0, ModeType::ZeroPageX))] -#[case(b"$a0, y", Token::Number(0xA0, ModeType::ZeroPageY))] -#[case(b"$a000", Token::Number(0xA000, ModeType::Absolute))] -#[case(b"$a000,X", Token::Number(0xA000, ModeType::AbsoluteX))] -#[case(b"$a000,Y", Token::Number(0xA000, ModeType::AbsoluteY))] -#[case(b"($a0,X)", Token::Number(0xA0, ModeType::IndirectX))] -#[case(b"($a0),Y", Token::Number(0xA0, ModeType::IndirectY))] -#[case(b"($a0, x)", Token::Number(0xA0, ModeType::IndirectX))] -#[case(b"($a0), y", Token::Number(0xA0, ModeType::IndirectY))] -#[case(b"($a000)", Token::Number(0xa000, ModeType::Indirect))] -#[case(b"( $a000 )", Token::Number(0xA000, ModeType::Indirect))] +#[case(b"$a0", 0xa0)] +#[case(b"$a000", 0xa000)] // Binary numbers -#[case(b"#%10100000", Token::Number(0xA0, ModeType::Immediate))] -#[case(b"%10100000", Token::Number(0xA0, ModeType::ZeroPage))] -#[case(b"%10100000,X", Token::Number(0xA0, ModeType::ZeroPageX))] -#[case(b"%10100000,Y", Token::Number(0xA0, ModeType::ZeroPageY))] -#[case(b"%10100000, x", Token::Number(0xA0, ModeType::ZeroPageX))] -#[case(b"%10100000, y", Token::Number(0xA0, ModeType::ZeroPageY))] -#[case(b"%1010000000000000", Token::Number(0xA000, ModeType::Absolute))] -#[case(b"%1010000000000000,X", Token::Number(0xA000, ModeType::AbsoluteX))] -#[case(b"%1010000000000000,Y", Token::Number(0xA000, ModeType::AbsoluteY))] -#[case(b"(%10100000,X)", Token::Number(0xA0, ModeType::IndirectX))] -#[case(b"(%10100000),Y", Token::Number(0xA0, ModeType::IndirectY))] -#[case(b"(%10100000, x)", Token::Number(0xA0, ModeType::IndirectX))] -#[case(b"(%10100000), y", Token::Number(0xA0, ModeType::IndirectY))] -#[case(b"(%1010000000000000)", Token::Number(0xa000, ModeType::Indirect))] -#[case(b"( %1010000000000000 )", Token::Number(0xA000, ModeType::Indirect))] +#[case(b"%10100000", 0xa0)] +#[case(b"%1010000000000000", 40960)] // Decimal numbers -#[case(b"#160", Token::Number(0xA0, ModeType::Immediate))] -#[case(b"160", Token::Number(0xA0, ModeType::ZeroPage))] -#[case(b"160,X", Token::Number(0xA0, ModeType::ZeroPageX))] -#[case(b"160,Y", Token::Number(0xA0, ModeType::ZeroPageY))] -#[case(b"160, x", Token::Number(0xA0, ModeType::ZeroPageX))] -#[case(b"160, y", Token::Number(0xA0, ModeType::ZeroPageY))] -#[case(b"40960", Token::Number(0xA000, ModeType::Absolute))] -#[case(b"40960,X", Token::Number(0xA000, ModeType::AbsoluteX))] -#[case(b"40960,Y", Token::Number(0xA000, ModeType::AbsoluteY))] -#[case(b"(160,X)", Token::Number(0xA0, ModeType::IndirectX))] -#[case(b"(160),Y", Token::Number(0xA0, ModeType::IndirectY))] -#[case(b"(160, x)", Token::Number(0xA0, ModeType::IndirectX))] -#[case(b"(160), y", Token::Number(0xA0, ModeType::IndirectY))] -#[case(b"(40960)", Token::Number(0xa000, ModeType::Indirect))] -#[case(b"( 40960 )", Token::Number(0xA000, ModeType::Indirect))] -fn number_check(#[case] data: &'_ [u8], #[case] token: Token<'_>) { - let context = Context::new(data); - let mut parser = Parser::new(context); +#[case(b"160", 0xa0)] +fn number_check(#[case] data: &'_ [u8], #[case] expected: u16) { + let context = Context::default(); + context.add_file("main.asm".to_string()); + + let mut parser = Parser::new(0, data, context); parser.parse().unwrap(); assert_eq!(parser.context.tokens.borrow().len(), 2); - assert_eq!(parser.context.tokens.borrow()[0].token, token); + match parser.context.tokens.borrow()[0].token { + Token::Byte(current) => assert_eq!(current, expected as u8), + Token::Word(current) => assert_eq!(current, expected), + _ => panic!("Unexpected token") + } assert_eq!(parser.context.tokens.borrow()[1].token, Token::End); } #[rstest] -#[case(b"#$a00", 3)] -#[case(b"#%123", 3)] -#[case(b"#%001", 3)] -#[case(b"#%00111", 3)] -#[case(b"#% 00111", 3)] -#[case(b"#%a00111", 3)] -#[case(b"#$", 0)] -#[case(b"#$1", 0)] -#[case(b"$a01", 3)] -#[case(b"$a0111", 3)] -#[case(b"$a", 0)] -#[case(b"$ta000", 0)] -#[case(b"$a000-,X", 0)] -#[case(b"($a0,X", 0)] -#[case(b"$a0),Y", 0)] -#[case(b"$a0 , Y)", 0)] -#[case(b"$a0 Y)", 0)] -#[case(b"($a0)", 0)] -#[case(b"($a000", 0)] -#[case(b"$a000)", 0)] -fn invalid_number_check(#[case] data: &'_ [u8], #[case] count: usize) { - let context = Context::new(data); - let mut parser = Parser::new(context); - if let Ok(_) = parser.parse() { - assert_eq!(parser.context.tokens.borrow().len(), count); +#[case(b"#$a00")] +#[case(b"#%123")] +#[case(b"#%001")] +#[case(b"#%00111")] +#[case(b"#% 00111")] +#[case(b"#%a00111")] +#[case(b"#$")] +#[case(b"#$1")] +#[case(b"$a01")] +#[case(b"$a0111")] +#[case(b"$a")] +#[case(b"$ta000")] +#[case(b"$a000-,X")] +#[case(b"($a0,X")] +#[case(b"$a0),Y")] +#[case(b"$a0 , Y)")] +#[case(b"$a0 Y)")] +#[case(b"($a0)")] +#[case(b"($a000")] +#[case(b"$a000)")] +fn invalid_number_check(#[case] data: &'_ [u8]) { + let context = Context::default(); + context.add_file("main.asm".to_string()); + + let mut parser = Parser::new(0, data, context); + + match parser.parse() { + Ok(_) => { + let ast_generator = AstGenerator::new(); + ast_generator.generate(parser.context).unwrap_err(); + }, + Err(_) => () } } @@ -98,8 +71,10 @@ fn invalid_number_check(#[case] data: &'_ [u8], #[case] count: usize) { #[case(b";''''''")] #[case(b";;;;;;;;;;;;;")] fn check_comment(#[case] data: &'_ [u8]) { - let context = Context::new(data); - let mut parser = Parser::new(context); + let context = Context::default(); + context.add_file("main.asm".to_string()); + + let mut parser = Parser::new(0, data, context); parser.parse().unwrap(); assert_eq!(parser.context.tokens.borrow().len(), 2); if let Token::Comment(_) = parser.context.tokens.borrow()[0].token { diff --git a/src/tool.rs b/src/tool.rs index 85b4e74..12f5907 100644 --- a/src/tool.rs +++ b/src/tool.rs @@ -15,6 +15,7 @@ pub fn upper_case_byte(byte: u8) -> u8 { } pub fn print_error(data: &'_ [u8], error: &T, line: usize, column: usize, end: usize) { + return; let mut line_index = 0; let mut start_index = 0; let mut end_index = data.len()-1; diff --git a/test2.asm b/test2.asm new file mode 100644 index 0000000..12bb637 --- /dev/null +++ b/test2.asm @@ -0,0 +1,5 @@ +.byte $ff + + +.warning "Hello world", " ", $10, " ", " - " +TEST = $cc