From 79566ff01372ebf81e1930cf16c1a3c2412dfeed Mon Sep 17 00:00:00 2001 From: Erhan BARIS Date: Mon, 19 Aug 2024 22:01:25 +0200 Subject: [PATCH] Local branch, .dsb, .dsw implementations --- README.md | 84 +++++++++++++++---- src/ast.rs | 42 ++++------ src/code_gen.rs | 149 +++++++++++++++++++-------------- src/context.rs | 19 +---- src/directive.rs | 6 +- src/parser.rs | 2 +- src/tests/asms/import-test.asm | 2 + src/tests/generic.rs | 50 ++++++++--- 8 files changed, 216 insertions(+), 138 deletions(-) diff --git a/README.md b/README.md index 25294a8..34c9105 100644 --- a/README.md +++ b/README.md @@ -5,10 +5,27 @@ Yet another 6502 Asm compiler project. The goal is make a multi platform (include web) compiler generator. Project is still in very early stage and there is no easy way to use it. You can check the code or wait to get more usable version. -Example code what compiler can compile now. -```assembly -.ORG $0600 ; change location +## Building +timu6502 builded with latest Rust Language. You have to install Rust Language. After installetion execute ```cargo build --release``` command. The executable will be located under _target/release/_ folder. +Compiler tested under Windows and MacOS operating system. It should work under Linux OS but not yet tested. + +## Usage +timu6502 is terminal based compiler. So, basic usage is: +```bash +timu6502asm test.asm --target test.bin +timu6502asm test.asm --binary-dump +timu6502asm test.asm --token-dump +timu6502asm test.asm --token-dump --slient +timu6502asm --help +``` +If the compilation operation failed, process exit code will be **1** and print error descriptions if silent mode is off. + +## Branches +Basically, branches is referencing the location at the execution code. If you want to jump location, it is hard to calculate and remember the address, but, with branches you just need to remember branch name and the compiler will be assign address automatically. + +Example: +```assembly JSR init JSR loop JSR end @@ -26,28 +43,59 @@ loop: end: BRK ``` +As you can see in the example there are **init**, **loop** and **end** branches defined and used with the instruction code. + +Also, compiler has a support for local branches. +```assembly +branch1: + @local1: + INX + + @local2: + INY + jump @local1 -Expected output: +branch2: + @local1: + DEX + + @local2: + DEY + jump @local1 ``` -0600: 20 09 06 20 0c 06 20 12 06 a2 00 60 e8 e0 05 d0 -0610: fb 60 00 + +As you can see in the example there are **init**, **loop** and **end** branches defined and used with the instruction code. + +Also, compiler has a support for local branches. +```assembly +branch1: + @local1: + INX + + @local2: + INY + jump @local1 + +branch2: + @local1: + DEX + + @local2: + DEY + jump @local1 ``` -## Building -timu6502 builded with latest Rust Language. You have to install Rust Language. After installetion execute ```cargo build --release``` command. The executable will be located under _target/release/_ folder. -Compiler tested under Windows and MacOS operating system. It should work under Linux OS but not yet tested. +## Variable +You can define static variable and use it with instruction. +Example: +```assembly +var1 = $10 +var2 = 22 +var3 = %11001100 -## Usage -timu6502 is terminal based compiler. So, basic usage is: -```bash -timu6502asm test.asm --target test.bin -timu6502asm test.asm --binary-dump -timu6502asm test.asm --token-dump -timu6502asm test.asm --token-dump --slient -timu6502asm --help +CPX #var1 ``` -If the compilation operation failed, process exit code will be **1** and print error descriptions if silent mode is off. ## Data types Compiler works with primative data types. diff --git a/src/ast.rs b/src/ast.rs index 33da23e..a5d4911 100644 --- a/src/ast.rs +++ b/src/ast.rs @@ -7,7 +7,7 @@ use log::{info, warn}; // Use log crate when building application use std::{println as info, println as warn}; // Workaround to use prinltn! for logs. use thiserror::Error; -use crate::{context::Context, directive::{DirectiveEnum, DirectiveType, DirectiveValue, SYSTEM_DIRECTIVES}, opcode::{ModeType, BRANCH_INSTS, INSTS_SIZE, JUMP_INSTS}, parser::{Parser, Token, TokenType}, tool::print_error}; +use crate::{context::Context, directive::{DirectiveEnum, DirectiveType, DirectiveValue, SYSTEM_DIRECTIVES}, opcode::{BRANCH_INSTS, INSTS_SIZE}, parser::{Parser, Token, TokenType}, tool::print_error}; #[derive(Debug, PartialEq)] pub enum InstrValue { @@ -41,8 +41,6 @@ pub enum BranchType { #[derive(Debug)] pub enum Ast { InstrImplied(usize), - InstrBranch(usize, String), - InstrJump(usize, String), Instr(usize, InstrInfo), Branch(String, BranchType), Directive(DirectiveEnum, Vec) @@ -74,6 +72,9 @@ pub enum AstGeneratorError { #[error("IO Error ({0})")] IOError(#[from] std::io::Error), + + #[error("'{0}' reference already defined)")] + ReferenceAlreadyDefined(String) } impl AstGeneratorError { @@ -319,7 +320,11 @@ impl AstGenerator { self.cleanup_space(context)?; let values = self.parse_list(context, |_| true)?; - context.references.borrow_mut().insert(name.to_owned(), values); + let has_reference = context.references.borrow_mut().insert(name.to_owned(), values).is_some(); + + if has_reference { + return Err(AstGeneratorError::ReferenceAlreadyDefined(name.to_owned())); + } Ok(()) } @@ -373,6 +378,7 @@ impl AstGenerator { inst_info.value = InstrValue::Reference(keyword.to_owned()); } }, + Token::LocalKeyword(keyword) => inst_info.value = InstrValue::LocalReference(keyword.to_owned()), Token::Byte(byte) => inst_info.value = InstrValue::Byte(*byte), Token::Word(word) => inst_info.value = InstrValue::Word(*word), _ => return Err(AstGeneratorError::syntax_issue(context, token_index, "Invalid numbering number format".to_string())) @@ -445,30 +451,14 @@ impl AstGenerator { else if BRANCH_INSTS.contains(&positon) { // Branch inst self.eat_space(context)?; - let text = self.eat_text(context)?; - context.add_ast(token_index, Ast::InstrBranch(positon, text)); - } - - else if JUMP_INSTS.contains(&positon) { - // Jump inst - self.eat_space(context)?; - let index = self.index.get(); - - if let Ok(value) = self.parse_instr_value(context) { - context.add_ast(token_index, Ast::Instr(positon, value)); - return Ok(()) - } - - self.index.set(index); // Restore index + let value = self.parse_instr_value(context)?; - let token_index= self.eat()?; - let token = &context.tokens.borrow()[token_index]; - if let Token::Keyword(name) = &token.token { - context.add_ast(token_index, Ast::InstrJump(positon, name.clone())); - return Ok(()) + match value.value { + InstrValue::Byte(_) => context.add_ast(token_index, Ast::Instr(positon, value)), + InstrValue::Reference(_) => context.add_ast(token_index, Ast::Instr(positon, value)), + InstrValue::LocalReference(_) => context.add_ast(token_index, Ast::Instr(positon, value)), + _ => return Err(AstGeneratorError::syntax_issue(context, token_index, "Relative number or branch name expected".to_string())) } - - return Err(AstGeneratorError::syntax_issue(context, token_index, "Branch name, absolute address or indirect address expected".to_string())) } else { diff --git a/src/code_gen.rs b/src/code_gen.rs index 2452702..f8e447c 100644 --- a/src/code_gen.rs +++ b/src/code_gen.rs @@ -11,11 +11,14 @@ use thiserror::Error; use crate::ast::{InstrInfo, InstrValue, InstrInfoRegister}; use crate::context::Context; +use crate::opcode::BRANCH_INSTS; use crate::tool::print_error; use crate::{ast::{Ast, BranchType}, opcode::{ModeType, MODES}, directive::{DirectiveEnum, DirectiveValue}}; #[derive(Error, Debug)] pub enum CodeGeneratorError { + #[error("Unsupported directive value")] + UnsupportedDirectiveValue, #[error("Internal error")] InternalError, #[error("Illegal opcode")] @@ -48,8 +51,8 @@ pub struct CodeGenerator { pub fillvalue : u8, pub branches: HashMap, pub local_branches: HashMap, - pub unresolved_branches: Vec<(String, usize, usize)>, - pub unresolved_jumps: Vec<(String, usize, usize)>, + pub unresolved_relative_jump: Vec<(String, usize, usize)>, + pub unresolved_absolute_jumps: Vec<(String, usize, usize)>, pub unresolved_local_branches: Vec<(String, usize, usize)> } @@ -64,8 +67,8 @@ impl CodeGenerator { branches: Default::default(), local_branches: Default::default(), unresolved_local_branches: Default::default(), - unresolved_branches: Default::default(), - unresolved_jumps: Default::default(), + unresolved_relative_jump: Default::default(), + unresolved_absolute_jumps: Default::default(), } } @@ -98,32 +101,41 @@ impl CodeGenerator { Ok(()) } + fn build_relative_jump(target: &mut [u8], ast_index: usize, reference: &String, branches: &mut HashMap, unresolved_jump: &mut Vec<(String, usize, usize)>) -> (u16, ModeType) { + match branches.get(reference) { + Some(branch_position) => ((*branch_position as i16 - (target.len() + 2) as i16) as u16, ModeType::Relative), + None => { + unresolved_jump.push((reference.clone(), target.len() + 1, ast_index)); + (0, ModeType::Relative) + } + } + } + + fn build_absolute_jump(target: &mut [u8], ast_index: usize, reference: &String, branches: &mut HashMap, unresolved_jump: &mut Vec<(String, usize, usize)>) -> (u16, ModeType) { + match branches.get(reference) { + Some(branch_position) => (*branch_position as u16, ModeType::Absolute), + None => { + unresolved_jump.push((reference.clone(), target.len() + 1, ast_index)); + (0, ModeType::Absolute) + } + } + } + fn generate_instr(&mut self, target: &mut Vec, ast_index: usize, instr: usize, value: &InstrInfo) -> Result<(), CodeGeneratorError> { let modes = MODES[instr]; let mut found = false; + let relative_jump = BRANCH_INSTS.contains(&instr); let (number, mut possible_mode) = match &value.value { InstrValue::Byte(byte) => (*byte as u16, ModeType::ZeroPage), InstrValue::Word(word) => (*word, ModeType::Absolute), - InstrValue::Reference(reference) => match self.branches.get(reference) { - Some(branch_position) => { - let distance_position = *branch_position as i8; - (distance_position as u16, ModeType::Absolute) - }, - None => { - self.unresolved_jumps.push((reference.clone(), target.len() + 1, ast_index)); - (0, ModeType::Absolute) - } + InstrValue::Reference(reference) => match relative_jump { + true => Self::build_relative_jump(target, ast_index, reference, &mut self.branches, &mut self.unresolved_relative_jump), + false => Self::build_absolute_jump(target, ast_index, reference, &mut self.branches, &mut self.unresolved_absolute_jumps), }, - InstrValue::LocalReference(reference) => match self.local_branches.get(reference) { - Some(branch_position) => { - let distance_position = *branch_position as i8 - (target.len() + 2) as i8; - (distance_position as u16, ModeType::Absolute) - }, - None => { - self.unresolved_local_branches.push((reference.clone(), target.len() + 1, ast_index)); - (0, ModeType::Absolute) - } + InstrValue::LocalReference(reference) => match relative_jump { + true => Self::build_relative_jump(target, ast_index, reference, &mut self.local_branches, &mut self.unresolved_local_branches), + false => Self::build_absolute_jump(target, ast_index, reference, &mut self.local_branches, &mut self.unresolved_local_branches), } }; @@ -151,6 +163,10 @@ impl CodeGenerator { possible_mode = ModeType::Immediate; } + if BRANCH_INSTS.contains(&instr) { + possible_mode = ModeType::Relative; + } + for search_mode in modes.iter() { if search_mode.mode == possible_mode { target.push(search_mode.opcode); @@ -166,40 +182,6 @@ impl CodeGenerator { Ok(()) } - fn generate_instr_branch(&mut self, target: &mut Vec, ast_index: usize, position: usize, branch_name: &String) -> Result<(), CodeGeneratorError> { - let branch_position = match self.branches.get(branch_name) { - Some(branch_position) => { - let distance_position = *branch_position as i8 - (target.len() + 2) as i8; - distance_position as u16 - }, - None => { - self.unresolved_branches.push((branch_name.clone(), target.len() + 1, ast_index)); - 0 - } - }; - - let modes = MODES[position]; - target.push(modes[0].opcode); - self.push_number(target, branch_position, ModeType::Relative)?; - - Ok(()) - } - - fn generate_instr_jump(&mut self, target: &mut Vec, ast_index: usize, position: usize, branch_name: &String) -> Result<(), CodeGeneratorError> { - let jump_position = match self.branches.get(branch_name) { - Some(jump_position) => self.start_point + *jump_position as u16, - None => { - self.unresolved_jumps.push((branch_name.clone(), target.len() + 1, ast_index)); - 0 - } - }; - - let modes = MODES[position]; - target.push(modes[0].opcode); - self.push_number(target, jump_position, ModeType::Absolute)?; - Ok(()) - } - fn generate_implied(&mut self, target: &mut Vec, position: usize) -> Result<(), CodeGeneratorError> { let modes = MODES[position]; for search_mode in modes.iter() { @@ -225,8 +207,8 @@ impl CodeGenerator { Ok(()) } - fn build_unresolved_branches(&mut self, target: &mut [u8]) -> Result<(), CodeGeneratorError> { - for (branch_name, position, _) in self.unresolved_branches.iter() { + fn build_unresolved_relative_jump(&mut self, target: &mut [u8]) -> Result<(), CodeGeneratorError> { + for (branch_name, position, _) in self.unresolved_relative_jump.iter() { match self.branches.get(branch_name) { Some(branch_position) => target[*position] = (*branch_position as i8 - *position as i8 - 1) as u8, None => return Err(CodeGeneratorError::UnresolvedBranches) @@ -247,7 +229,7 @@ impl CodeGenerator { } fn build_unresolved_jumps(&mut self, target: &mut [u8]) -> Result<(), CodeGeneratorError> { - for (branch_name, position, _) in self.unresolved_jumps.iter() { + for (branch_name, position, _) in self.unresolved_absolute_jumps.iter() { match self.branches.get(branch_name) { Some(branch_position) => { let jump_position = self.start_point + *branch_position as u16; @@ -380,6 +362,49 @@ impl CodeGenerator { Ok(()) } + fn directive_define_storage_byte(&self, target: &mut Vec, values: &[DirectiveValue]) -> Result<(), CodeGeneratorError> { + let mut filler = 0x00; + let times = match &values[0] { + DirectiveValue::Byte(byte) => *byte, + DirectiveValue::Word(word) => *word as u8, + _ => return Err(CodeGeneratorError::UnsupportedDirectiveValue) + }; + + if values.len() > 1 { + if let Ok(new_filler) = values[1].get_byte() { + filler = new_filler; + } + } + + for _ in 0..times { + target.push(filler); + } + + Ok(()) + } + + fn directive_define_storage_word(&self, target: &mut Vec, values: &[DirectiveValue]) -> Result<(), CodeGeneratorError> { + let mut filler: u16 = 0x00; + let times = match &values[0] { + DirectiveValue::Byte(byte) => *byte as u16, + DirectiveValue::Word(word) => *word, + _ => return Err(CodeGeneratorError::UnsupportedDirectiveValue) + }; + + if values.len() > 1 { + if let Ok(new_filler) = values[1].get_word() { + filler = new_filler; + } + } + + for _ in 0..times { + target.push(filler as u8); + target.push((filler >> 8) as u8); + } + + Ok(()) + } + fn generate_directive(&mut self, target: &mut Vec, option: DirectiveEnum, values: &[DirectiveValue]) -> Result<(), CodeGeneratorError> { match option { DirectiveEnum::Org => self.directive_org(values)?, @@ -393,6 +418,8 @@ impl CodeGenerator { DirectiveEnum::Include => (), DirectiveEnum::Pad => self.directive_pad(target, values)?, DirectiveEnum::Fillvalue => self.directive_fillvalue(values)?, + DirectiveEnum::Dsb => self.directive_define_storage_byte(target, values)?, + DirectiveEnum::Dsw => self.directive_define_storage_word(target, values)?, }; Ok(()) } @@ -407,8 +434,6 @@ impl CodeGenerator { match ast { Some(Ast::InstrImplied(position)) => self.generate_implied(&mut context.target, *position)?, - Some(Ast::InstrBranch(position, branch)) => self.generate_instr_branch(&mut context.target, ast_index, *position, branch)?, - Some(Ast::InstrJump(position, branch)) => self.generate_instr_jump(&mut context.target, ast_index, *position, branch)?, Some(Ast::Instr(position, value)) => self.generate_instr(&mut context.target, ast_index, *position, value)?, Some(Ast::Branch(name, branch_type)) => self.generate_branch(&mut context.target, name, *branch_type)?, Some(Ast::Directive(option, values)) => self.generate_directive(&mut context.target, *option, values)?, @@ -416,7 +441,7 @@ impl CodeGenerator { }; } - self.build_unresolved_branches(&mut context.target)?; + self.build_unresolved_relative_jump(&mut context.target)?; self.build_unresolved_jumps(&mut context.target)?; Ok(()) } diff --git a/src/context.rs b/src/context.rs index 2482281..36793dc 100644 --- a/src/context.rs +++ b/src/context.rs @@ -1,4 +1,4 @@ -use std::{cell::RefCell, collections::HashMap, default, path::PathBuf}; +use std::{cell::RefCell, collections::HashMap, path::PathBuf}; use crate::{ast::{Ast, AstInfo}, directive::DirectiveValue, parser::TokenInfo}; @@ -57,23 +57,6 @@ impl Context { pub fn last_file_id(&self) -> usize { self.files.borrow().len() - 1 } - - pub fn last_path(&self) -> Option { - match self.files.borrow().last() { - Some(path) => match path.parent() { - Some(parent) => parent.as_os_str().to_str().map(|path| path.to_string()), - None => None - }, - None => self.work_directory.as_os_str().to_str().map(|path| path.to_string()) - } - } - - pub fn get_path(&self, file_id: usize) -> Option { - match self.files.borrow().get(file_id) { - Some(path) => path.parent().map(|parent| parent.to_owned()), - None => None - } - } } diff --git a/src/directive.rs b/src/directive.rs index 87cb225..dedcb01 100644 --- a/src/directive.rs +++ b/src/directive.rs @@ -14,7 +14,9 @@ pub enum DirectiveEnum { Fail, Include, Pad, - Fillvalue + Fillvalue, + Dsb, + Dsw } #[derive(Debug, PartialEq, Clone)] @@ -74,4 +76,6 @@ pub const SYSTEM_DIRECTIVES: &[DirectiveInfo] = &[ DirectiveInfo { name: "INCLUDE", directive: DirectiveEnum::Include, size: DirectiveVariableSize::Length(1), values: &[DirectiveType::String] }, DirectiveInfo { name: "PAD", directive: DirectiveEnum::Pad, size: DirectiveVariableSize::Length(1), values: &[DirectiveType::Word] }, DirectiveInfo { name: "FILLVALUE", directive: DirectiveEnum::Fillvalue, size: DirectiveVariableSize::Length(1), values: &[DirectiveType::Byte] }, + DirectiveInfo { name: "DSB", directive: DirectiveEnum::Dsb, size: DirectiveVariableSize::Min(1), values: &[DirectiveType::Byte, DirectiveType::Word] }, + DirectiveInfo { name: "DSW", directive: DirectiveEnum::Dsw, size: DirectiveVariableSize::Min(1), values: &[DirectiveType::Byte, DirectiveType::Word] }, ]; \ No newline at end of file diff --git a/src/parser.rs b/src/parser.rs index 048c3b2..f4dbf0f 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -425,7 +425,7 @@ impl<'a> Parser<'a> { return Ok(Token::LocalBranch(str::from_utf8(&self.data[start..self.index-1])?.to_string())) } - return Ok(Token::LocalKeyword(str::from_utf8(&self.data[start..self.index])?.to_string())); + Ok(Token::LocalKeyword(str::from_utf8(&self.data[start..self.index])?.to_string())) } fn parse_string(&mut self) -> Result { diff --git a/src/tests/asms/import-test.asm b/src/tests/asms/import-test.asm index 9dddfb3..3f02a35 100644 --- a/src/tests/asms/import-test.asm +++ b/src/tests/asms/import-test.asm @@ -10,3 +10,5 @@ .fillvalue $22 .pad $0060 + +.warning "test warning" \ No newline at end of file diff --git a/src/tests/generic.rs b/src/tests/generic.rs index f3b70d0..3db3c76 100644 --- a/src/tests/generic.rs +++ b/src/tests/generic.rs @@ -11,7 +11,9 @@ use crate::{ #[rstest] #[case( - br#"LDX #$08 + br#" +VAR = "\"" +LDX #$08 decrement2: STX $0201 decrement: @@ -236,6 +238,26 @@ LDx IOREST"#, &[0xad, 0x4a, 0xff, 0xae, 0x3f, 0xff])] #[case(br#"AND $ffdd , x"#, &[0x3d, 0xdd, 0xff])] #[case(br#"LDX $ffdd , y"#, &[0xBE, 0xdd, 0xff])] #[case(br#"JMP ($ffdd)"#, &[0x6c, 0xdd, 0xff])] // Only jump has indirect mode +#[case(br#"LDX #$08 +decrement2: + STX $0201 +@decrement: + DEX + STX $0200 + CPX #$03 + BNE @decrement + BNE decrement2 + STX $0201 + BRK"#, &[0xA2, 0x08, 0x8E, 0x01, 0x02, 0xCA, 0x8E, 0x00, 0x02, 0xE0, 0x03, 0xD0, 0xF8, 0xD0, 0xF3, 0x8E, 0x01, 0x02, 0x00])] +#[case(br#"var1 = $10 +var2 = 22 +var3 = %11001100 + +CPX #var1"#, &[0xe0, 0x10])] +#[case(br#".dsb 5"#, &[0x00, 0x00, 0x00, 0x00, 0x00])] +#[case(br#".dsb 5 , $10"#, &[0x10, 0x10, 0x10, 0x10, 0x10])] +#[case(br#".dsw 5"#, &[0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00])] +#[case(br#".dsw 5 , $1122"#, &[0x22, 0x11, 0x22, 0x11, 0x22, 0x11, 0x22, 0x11, 0x22, 0x11])] fn check_codes(#[case] data: &'_ [u8], #[case] codes: &'_ [u8]) { let context = Context::default(); let path = PathBuf::from("main.asm"); @@ -306,6 +328,10 @@ fn parser_fail(#[case] data: &'_ [u8]) { #[case(br#".fBNE = "Hello""#)] #[case(br#"AND ($0008) , x"#)] #[case(br#"AND ($0008 , Y)"#)] +#[case(br#" +VAR = 1 +VAR = 1 +"#)] fn ast_generator_fail(#[case] data: &'_ [u8]) { let context = Context::default(); let path = PathBuf::from("main.asm"); @@ -403,17 +429,17 @@ fn fail_test(#[case] code_filename: &str) { #[rstest] #[case(br#"@decrement:"#)] -//#[case(br#"LDX #$08 -//decrement2: -// STX $0201 -//@decrement: -// DEX -// STX $0200 -// CPX #$03 -// BNE @decrement -// BNE decrement2 -// STX $0201 -// BRK"#)] +#[case(br#"LDX #$08 +decrement2: + STX $0201 +@decrement: + DEX + STX $0200 + CPX #$03 + BNE @decrement + BNE decrement2 + STX $0201 + BRK"#)] fn local_branch_test(#[case] data: &'_ [u8]) { let context = Context::default(); let path = PathBuf::from("main.asm");