From f7e8f9decb8ce1cd2ab164139ac92eb642b13985 Mon Sep 17 00:00:00 2001 From: Erhan BARIS Date: Thu, 15 Aug 2024 18:21:38 +0200 Subject: [PATCH] New directives added. --- Cargo.toml | 2 + README.md | 74 ++++++++++++++++- src/ast.rs | 88 +++++++++++++------- src/code_gen.rs | 192 ++++++++++++++++++++++++++++++++----------- src/main.rs | 9 +- src/opcode.rs | 2 + src/options.rs | 43 +++++++--- src/parser.rs | 25 +++++- src/tests/generic.rs | 8 +- src/tests/parser.rs | 2 - src/tool.rs | 13 +-- 11 files changed, 354 insertions(+), 104 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 4c7bac4..aa12561 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -4,6 +4,8 @@ version = "0.1.0" edition = "2021" [dependencies] +log = "0.4.22" +simplelog = "^0.12.2" strum = "0.26.3" strum_macros = "0.26.4" thiserror = "1.0.63" diff --git a/README.md b/README.md index 7e36d90..79ffccc 100644 --- a/README.md +++ b/README.md @@ -33,10 +33,82 @@ Expected output: 0610: fb 60 00 ``` +## Available directives + +### .org +Change reference locations. It is not changing where the codes are stored, it is changing jump and branch references. +```assembly +.ORG $0600 +.byte $11 +``` +``` +0600: 11 +``` + +### .byte +Define byte sized data. Must be followed by a sequence of (byte ranged) expressions or strings. + +```assembly +.byte $11 +.byte $22, $33 +.byte "Hello" +``` +``` +0000: 11 22 33 48 65 6C 6C 6F +``` + +### .word +Write 1 or many word information into memory +```assembly +.byte $1122 +.byte $3344, $5566 +``` +``` +0000: 22 11 44 33 66 55 +``` + +### .ascii +Write ascii information into memory. Also, byte directive can be used. +```assembly +.ascii "hello world" +``` +``` +0000: 68 65 6C 6C 6F 20 77 6F +0008: 72 6C 64 +``` + +### .asciiz +Write ascii information into memory. If there is no 0x00 at end of the string, compiler will add 0x00. +```assembly +.asciiz "hello world" +``` +``` +0000: 68 65 6C 6C 6F 20 77 6F +0008: 72 6C 64 00 +``` + +### .incbin +Include a file as binary data. +```assembly +.incbin "src/tests/bins/test1.bin" +``` +``` +0000: 00 01 02 03 +``` + +### .warning +Print warning message on compilation time. +```assembly +.warning "timu6502asm compiler works partial" +``` +``` +22:05:16 [WARN] timu6502asm compiler works partial +``` + There are many things to do. Here are the some todos: - [ ] Case insensitivity - [ ] Rom file generation - - [ ] Decompile binaries + - [ ] Decompiler - [ ] Human friendly prints - [ ] Import different asm files - [ ] Performance measurement diff --git a/src/ast.rs b/src/ast.rs index c60b9d8..a1ae6ef 100644 --- a/src/ast.rs +++ b/src/ast.rs @@ -16,7 +16,7 @@ pub enum Ast<'a> { Instr(usize, u16, ModeType), InstrRef(usize, &'a [u8]), Branch(&'a [u8], BranchType), - Directive(DirectiveEnum, DirectiveValue<'a>), + Directive(DirectiveEnum, Vec>), Assign(&'a [u8], u16, ModeType) } @@ -87,12 +87,6 @@ impl<'a> AstGenerator<'a> { Ok(self.index.get() - 1) } - fn dec(&self) -> Result<(), AstGeneratorError> { - self.empty_check()?; - self.index.set(self.index.get() - 1); - Ok(()) - } - fn peek(&self)-> Result { self.empty_check()?; Ok(self.index.get()) @@ -123,7 +117,7 @@ impl<'a> AstGenerator<'a> { }; let token = &context.tokens.borrow()[token_index]; - let token_type: TokenType = TokenType::from(token.token); + let token_type: TokenType = TokenType::from(&token.token); match token_type == expected { true => { @@ -193,32 +187,66 @@ impl<'a> AstGenerator<'a> { if let Some(position) = OPTIONS.iter().position(|item| *item == &option[..]) { let modes = OPTION_MODES[position]; let directive_type = DIRECTIVE_ENUMS[position]; - let mut found = false; + let tokens = context.tokens.borrow(); - self.cleanup_space(context)?; + let mut token_found = false; + let mut finish = false; - for mode in modes.iter() { - match mode { - DirectiveType::Number => { - if let Some((number, mode)) = self.eat_if_number(context) { - context.add_ast(token_index, Ast::Directive(directive_type, DirectiveValue::Number(number, mode))); - found = true; - break; - } - }, - DirectiveType::String => { - if let Some(string) = self.eat_if_string(context) { - context.add_ast(token_index,Ast::Directive(directive_type, DirectiveValue::String(string))); - found = true; - break; - } - }, - } + self.cleanup_space(context)?; + let mut values = Vec::new(); + + while self.size.get() > self.index.get() { + let value_index = self.eat()?; + let value_token = &tokens.get(value_index).map(|item| &item.token); + + if token_found { + /* comma, space, new line, end or comment expected */ + match value_token { + Some(Token::NewLine(_)) => finish = true, + Some(Token::Comment(_)) => finish = true, + Some(Token::End) => finish = true, + Some(Token::Space(_)) => (), + Some(Token::Comma) => token_found = false, + _ => return Err(AstGeneratorError::syntax_issue(context, value_index, "Unexpected syntax")) + } + } + else { + /* Expected parseable token */ + match value_token { + Some(Token::Keyword(keyword)) => { values.push(DirectiveValue::Reference(*keyword)); token_found = true; }, + Some(Token::Number(number, ModeType::Absolute)) => { values.push(DirectiveValue::Word(*number)); token_found = true; }, + Some(Token::Number(number, ModeType::ZeroPage)) => { values.push(DirectiveValue::Byte((*number) as u8)); token_found = true; }, + Some(Token::Number(number, ModeType::Relative)) => { values.push(DirectiveValue::Byte((*number) as u8)); token_found = true; }, + Some(Token::String(string)) => { values.push(DirectiveValue::String(*string)); token_found = true; }, + Some(Token::BranchNext(name)) => { values.push(DirectiveValue::Reference(*name)); token_found = true; }, + Some(Token::NewLine(_)) => finish = true, + Some(Token::Comment(_)) => finish = true, + Some(Token::End) => finish = true, + Some(Token::Space(_)) => (), + Some(Token::Comma) => return Err(AstGeneratorError::syntax_issue(&context, value_index, "',' not expected")), + Some(_) => return Err(AstGeneratorError::syntax_issue(context, value_index, "Unexpected syntax")), + None => return Err(AstGeneratorError::InternalError) + }; + } + + if token_found { + /* Is it expected token? */ + let last_token_type = DirectiveType::from(&values[values.len()-1]); + if !modes.iter().any(|mode| *mode == last_token_type) { + return Err(AstGeneratorError::syntax_issue(context, value_index, "Unexpected syntax")) + } + } + + if finish { + break; + } } - if !found { + if modes.len() > 0 && values.len() == 0 { return Err(AstGeneratorError::syntax_issue(context, token_index, "Missing information")) } + + context.add_ast(token_index,Ast::Directive(directive_type, values)); } else { return Err(AstGeneratorError::syntax_issue(context, token_index, "Unsupported compiler configuration")) } @@ -290,7 +318,7 @@ impl<'a> AstGenerator<'a> { while self.size.get() > self.index.get() { let token_index = self.eat()?; - match &context.tokens.borrow().get(token_index).map(|item| item.token) { + match &context.tokens.borrow().get(token_index).map(|item| &item.token) { Some(Token::Instr(positon)) => self.generate_code_block(&context, token_index, *positon)?, Some(Token::Keyword(keyword)) => self.generate_assign(&context, token_index, keyword)?, Some(Token::Directive(option)) => self.generate_directive(&context, token_index, option)?, @@ -300,6 +328,7 @@ impl<'a> AstGenerator<'a> { Some(Token::NewLine(_)) => (), Some(Token::Space(_)) => (), Some(Token::Assign) => return Err(AstGeneratorError::syntax_issue(&context, token_index, "'=' not expected")), + Some(Token::Comma) => return Err(AstGeneratorError::syntax_issue(&context, token_index, "',' not expected")), Some(Token::String(_)) => return Err(AstGeneratorError::syntax_issue(&context, token_index, "String not expected")), Some(Token::BranchNext(name)) => self.generate_branch(&context, token_index, name, BranchType::Next)?, Some(Token::End) => break, @@ -316,6 +345,7 @@ impl<'a> AstGenerator<'a> { Err(error) => { let tokens = context.tokens.borrow(); let token = &tokens[self.index.get() - 1]; + println!("1{:?}", context.source); print_error(&context.source, &error, token.line, token.column, token.end); Err(error) } diff --git a/src/code_gen.rs b/src/code_gen.rs index 64b891a..7a2d032 100644 --- a/src/code_gen.rs +++ b/src/code_gen.rs @@ -2,6 +2,7 @@ use std::{collections::HashMap, str::Utf8Error}; use std::fs::File; use std::io::BufReader; use std::io::Read; +use log::{info, warn}; use thiserror::Error; use strum_macros::EnumDiscriminants; @@ -27,8 +28,18 @@ pub enum CodeGeneratorError { IOError(#[from] std::io::Error), #[error("Text convertion issue ({0})")] Utf8Error(#[from] Utf8Error), + + #[allow(unused_variables)] #[error("Unsupported number format")] - UnsupportedNumberFormat + UnsupportedNumberFormat, + + #[allow(unused_variables)] + #[error("Word expected")] + WordExpected, + #[error("Expected {0}")] + ExpectedThis(&'static str), + #[error("More than expected")] + MoreThanExpected } #[derive(Debug, PartialEq, Copy, Clone)] @@ -36,6 +47,7 @@ pub enum CodeGeneratorError { #[strum_discriminants(name(ReferenceType))] pub enum ReferenceValue { AbsoluteAddress(u16), + #[allow(unused_variables)] RelativeAddress(u16), Value(u16, ModeType), } @@ -178,7 +190,7 @@ impl<'a> CodeGenerator<'a> { Ok(()) } - fn generate_branch(&mut self, target: &mut Vec, name: &'a [u8], branch_type: BranchType) -> Result<(), CodeGeneratorError> { + fn generate_branch(&mut self, target: &mut Vec, name: &'a [u8], _: BranchType) -> Result<(), CodeGeneratorError> { self.branches.insert(name, target.len()); self.references.insert(name, ReferenceValue::AbsoluteAddress(0)); Ok(()) @@ -211,46 +223,130 @@ impl<'a> CodeGenerator<'a> { Ok(()) } - fn configure_directive(&mut self, target: &mut Vec, option: DirectiveEnum, value: DirectiveValue<'a>) -> Result<(), CodeGeneratorError> { + fn directive_org(&mut self, values: &Vec>) -> Result<(), CodeGeneratorError> { + if values.len() == 0 { + return Err(CodeGeneratorError::ExpectedThis("word")); + } + else if values.len() > 1 { + return Err(CodeGeneratorError::MoreThanExpected); + } + + self.start_point = values[0].get_word()?; + Ok(()) + } + + fn directive_incbin(&mut self, target: &mut Vec, values: &Vec>) -> Result<(), CodeGeneratorError> { + + if values.len() == 0 { + return Err(CodeGeneratorError::ExpectedThis("word")); + } + else if values.len() > 1 { + return Err(CodeGeneratorError::MoreThanExpected); + } + + let file_path = match values[0] { + DirectiveValue::String(name) => name, + _ => return Err(CodeGeneratorError::StringExpected) + }; + + let file_path = match std::str::from_utf8(file_path) { + Ok(file_path) => file_path, + Err(error) => return Err(CodeGeneratorError::Utf8Error(error)) + }; + + let file = match File::open(file_path) { + Ok(file) => file, + Err(error) => return Err(CodeGeneratorError::IOError(error)) + }; + + let buffer_reader: BufReader = BufReader::new(file); + for buffer in buffer_reader.bytes() { + match buffer { + Ok(byte) => target.push(byte), + Err(error) => return Err(CodeGeneratorError::IOError(error)) + } + } + Ok(()) + } + + fn directive_byte(&mut self, target: &mut Vec, values: &Vec>) -> Result<(), CodeGeneratorError> { + if values.len() == 0 { + return Err(CodeGeneratorError::ExpectedThis("byte(s)")); + } + + for value in values.iter() { + match value { + DirectiveValue::Byte(byte) => target.push(*byte), + DirectiveValue::String(string) => string.into_iter().for_each(|byte| target.push(*byte)), + _ => return Err(CodeGeneratorError::ExpectedThis("byte or string")) + }; + } + Ok(()) + } + + fn directive_word(&mut self, target: &mut Vec, values: &Vec>) -> Result<(), CodeGeneratorError> { + if values.len() == 0 { + return Err(CodeGeneratorError::ExpectedThis("byte(s)")); + } + + for value in values.iter() { + match value { + DirectiveValue::Word(word) => { + target.push(*word as u8); + target.push((*word >> 8) as u8); + }, + _ => return Err(CodeGeneratorError::ExpectedThis("word")) + } + } + Ok(()) + } + + fn directive_ascii(&mut self, target: &mut Vec, values: &Vec>, add_null: bool) -> Result<(), CodeGeneratorError> { + if values.len() == 0 { + return Err(CodeGeneratorError::ExpectedThis("string")); + } + else if values.len() > 1 { + return Err(CodeGeneratorError::MoreThanExpected); + } + + for value in values.into_iter() { + let string = match value { + DirectiveValue::String(string) => string, + _ => return Err(CodeGeneratorError::ExpectedThis("string")) + }; + + string.into_iter().for_each(|byte| target.push(*byte)); + + if add_null && string[string.len()-1] != 0x0 { + target.push(0x0); + } + } + Ok(()) + } + + fn directive_warning(&mut self, _: &mut Vec, values: &Vec>) -> Result<(), CodeGeneratorError> { + if values.len() == 0 { + return Err(CodeGeneratorError::ExpectedThis("string")); + } + + for value in values.into_iter() { + match value { + DirectiveValue::String(string) => warn!("{}", std::str::from_utf8(&string).map_err(|error| CodeGeneratorError::Utf8Error(error))?), + _ => return Err(CodeGeneratorError::ExpectedThis("string")) + }; + } + Ok(()) + } + + fn generate_directive(&mut self, target: &mut Vec, option: DirectiveEnum, values: &Vec>) -> Result<(), CodeGeneratorError> { match option { - DirectiveEnum::Org => self.start_point = value.as_u16(), - DirectiveEnum::Incbin => { - - let file_path = match value { - DirectiveValue::String(name) => name, - _ => return Err(CodeGeneratorError::StringExpected) - }; - - let file_path = match std::str::from_utf8(file_path) { - Ok(file_path) => file_path, - Err(error) => return Err(CodeGeneratorError::Utf8Error(error)) - }; - - let file = match File::open(file_path) { - Ok(file) => file, - Err(error) => return Err(CodeGeneratorError::IOError(error)) - }; - - let buffer_reader: BufReader = BufReader::new(file); - for buffer in buffer_reader.bytes() { - match buffer { - Ok(byte) => target.push(byte), - Err(error) => return Err(CodeGeneratorError::IOError(error)) - } - } - }, - DirectiveEnum::Byte => { - match value { - DirectiveValue::String(value) => value.into_iter().for_each(|byte| target.push(*byte)), - DirectiveValue::Number(number, mode) => { - match mode { - ModeType::Relative | ModeType::Absolute => self.push_number(target, number, mode)?, - ModeType::ZeroPage => self.push_number(target, number, mode)?, - _ => return Err(CodeGeneratorError::UnsupportedNumberFormat) - } - } - }; - }, + DirectiveEnum::Org => self.directive_org(values)?, + DirectiveEnum::Incbin => self.directive_incbin(target, values)?, + DirectiveEnum::Byte => self.directive_byte(target, values)?, + DirectiveEnum::Word => self.directive_word(target, values)?, + DirectiveEnum::Ascii => self.directive_ascii(target, values, false)?, + DirectiveEnum::Asciiz => self.directive_ascii(target, values, true)?, + DirectiveEnum::Warning => self.directive_warning(target, values)?, }; Ok(()) } @@ -270,7 +366,7 @@ impl<'a> CodeGenerator<'a> { Some(Ast::Instr(position, number, mode)) => self.generate_instr(&mut context.target, *position, *number, *mode)?, Some(Ast::InstrRef(position, reference)) => self.generate_instr_reference(&mut context.target, *position, *reference)?, Some(Ast::Branch(name, branch_type)) => self.generate_branch(&mut context.target, name, *branch_type)?, - Some(Ast::Directive(option, value)) => self.configure_directive(&mut context.target, *option, *value)?, + Some(Ast::Directive(option, values)) => self.generate_directive(&mut context.target, *option, &values)?, Some(Ast::Assign(name, number, mode)) => self.configure_assign(*name, *number, *mode)?, None => return Err(CodeGeneratorError::InternalError) }; @@ -296,18 +392,20 @@ impl<'a> CodeGenerator<'a> { } pub fn dump(&self, context: &Context<'a>) { + + println!(); + info!("Binary Output"); let total_byte_per_row = 8; let position = self.start_point; - let mut index = 0; + let total_bytes = context.target.len(); print!("{:04X}: ", position); - for data in context.target.iter() { + for (index, data) in context.target.iter().enumerate() { print!("{:02X} ", data); - index += 1; - if index % total_byte_per_row == 0 { + if index != 0 && index % total_byte_per_row == 0 && index != total_bytes-1 { println!(); - print!("{:04X}: ", position + index); + print!("{:04X}: ", position + (index as u16)); } } diff --git a/src/main.rs b/src/main.rs index 3016c58..ed3c64b 100644 --- a/src/main.rs +++ b/src/main.rs @@ -8,6 +8,9 @@ mod context; #[cfg(test)] mod tests; +use log::{info, LevelFilter}; +use simplelog::*; + use ast::AstGenerator; use code_gen::CodeGenerator; use context::Context; @@ -15,13 +18,15 @@ use parser::Parser; fn main() { + let _ = CombinedLogger::init(vec![TermLogger::new(LevelFilter::Debug, Config::default(), TerminalMode::Mixed, ColorChoice::Auto)]); + info!("timu6502asm Compiler"); + let data = br#".byte $ff"#; let context = Context::new(data); let mut parser = Parser::new(context); parser.parse().unwrap(); - println!("{:?}", &parser.context.tokens); parser.friendly_dump(); let context = parser.context; @@ -33,5 +38,3 @@ fn main() { let context = generator.generate(context).unwrap(); generator.dump(&context); } - - diff --git a/src/opcode.rs b/src/opcode.rs index 632da3e..ee487e6 100644 --- a/src/opcode.rs +++ b/src/opcode.rs @@ -30,6 +30,8 @@ pub const INSTS: [&[u8; 3]; 56] = [ ]; pub const INSTS_SIZE: [u8; 56] = [2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 2, 2, 1, 1, 3, 3, 2, 2, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1]; + +#[allow(unused_variables)] pub const INSTR_NAMES: [&str; 56] = ["ADC", "AND", "ASL", "BCC", "BCS", "BEQ", "BIT", "BMI", "BNE", "BPL", "BRK", "BVC", "BVS", "CLC", "CLD", "CLI", "CLV", "CMP", "CPX", "CPY", "DEC", "DEX", "DEY", "EOR", "INC", "INX", "INY", "JMP", "JSR", "LDA", "LDX", "LDY", "LSR", "NOP", "ORA", "PHA", "PHP", "PLA", "PLP", "ROL", "ROR", "RTI", "RTS", "SBC", "SEC", "SED", "SEI", "STA", "STX", "STY", "TAX", "TAY", "TSX", "TXA", "TXS", "TYA"]; pub const ADC_MODES: [ModeInfo; 8] = [ModeInfo { mode: ModeType::Immediate, opcode: 0x69}, ModeInfo { mode: ModeType::ZeroPage, opcode: 0x65}, ModeInfo { mode: ModeType::ZeroPageX, opcode: 0x75}, ModeInfo { mode: ModeType::Absolute, opcode: 0x6D}, ModeInfo { mode: ModeType::AbsoluteX, opcode: 0x7D}, ModeInfo { mode: ModeType::AbsoluteY, opcode: 0x79}, ModeInfo { mode: ModeType::IndirectX, opcode: 0x61}, ModeInfo { mode: ModeType::IndirectY, opcode: 0x71}]; diff --git a/src/options.rs b/src/options.rs index 34fddd5..137932d 100644 --- a/src/options.rs +++ b/src/options.rs @@ -1,35 +1,54 @@ use strum_macros::EnumDiscriminants; -use crate::opcode::ModeType; +use crate::code_gen::CodeGeneratorError; #[derive(Debug, PartialEq, Copy, Clone)] pub enum DirectiveEnum { Org, Incbin, - Byte + Byte, + Word, + Ascii, + Asciiz, + Warning } #[derive(Debug, PartialEq, Copy, Clone)] #[derive(EnumDiscriminants)] #[strum_discriminants(name(DirectiveType))] pub enum DirectiveValue<'a> { - Number(u16, ModeType), - String(&'a [u8]) + Byte(u8), + Word(u16), + String(&'a [u8]), + Reference(&'a [u8]), } impl<'a> DirectiveValue<'a> { - pub fn as_u16(&self) -> u16 { + pub fn get_word(&self) -> Result { + match self { - DirectiveValue::Number(number, _) => *number, - DirectiveValue::String(_) => 0 + DirectiveValue::Word(number) => Ok(*number), + _ => Err(CodeGeneratorError::ExpectedThis("Word information")) + } + } + + pub fn get_byte(&self) -> Result { + + match self { + DirectiveValue::Byte(number) => Ok(*number), + _ => Err(CodeGeneratorError::ExpectedThis("Byte information")) } } } -pub const OPTIONS: [&[u8]; 3] = [b"ORG", b"INCBIN", b"BYTE"]; -pub const ORG_TYPES: [DirectiveType; 1] = [DirectiveType::Number]; +pub const OPTIONS: [&[u8]; 7] = [b"ORG", b"INCBIN", b"BYTE", b"WORD", b"ASCII", b"ASCIIZ", b"WARNING"]; +pub const ORG_TYPES: [DirectiveType; 1] = [DirectiveType::Word]; pub const INCBIN_TYPES: [DirectiveType; 1] = [DirectiveType::String]; -pub const BYTE_TYPES: [DirectiveType; 2] = [DirectiveType::String, DirectiveType::Number]; +pub const BYTE_TYPES: [DirectiveType; 2] = [DirectiveType::Byte, DirectiveType::String]; +pub const WORD_TYPES: [DirectiveType; 1] = [DirectiveType::Word]; +pub const ASCII_TYPES: [DirectiveType; 1] = [DirectiveType::String]; +pub const ASCIIZ_TYPES: [DirectiveType; 1] = [DirectiveType::String]; +pub const WARNING_TYPES: [DirectiveType; 1] = [DirectiveType::String]; -pub const OPTION_MODES: [&[DirectiveType]; 3] = [&ORG_TYPES, &INCBIN_TYPES, &BYTE_TYPES]; -pub const DIRECTIVE_ENUMS: [DirectiveEnum; 3] = [DirectiveEnum::Org, DirectiveEnum::Incbin, DirectiveEnum::Byte]; +pub const OPTION_MODES: [&[DirectiveType]; 7] = [&ORG_TYPES, &INCBIN_TYPES, &BYTE_TYPES, &WORD_TYPES, &ASCII_TYPES, &ASCIIZ_TYPES, &WARNING_TYPES]; +pub const DIRECTIVE_ENUMS: [DirectiveEnum; 7] = [DirectiveEnum::Org, DirectiveEnum::Incbin, DirectiveEnum::Byte, DirectiveEnum::Word, DirectiveEnum::Ascii, DirectiveEnum::Asciiz, DirectiveEnum::Warning]; diff --git a/src/parser.rs b/src/parser.rs index 8090ab7..ab8da4d 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1,4 +1,5 @@ use crate::{context::Context, opcode::{ModeType, INSTS}, tool::{print_error, upper_case_byte}}; +use log::info; use strum_macros::EnumDiscriminants; /* @@ -29,7 +30,7 @@ pub struct Parser<'a> { pub context: Context<'a> } -#[derive(Debug, PartialEq, Copy, Clone)] +#[derive(Debug, PartialEq, Clone)] #[derive(EnumDiscriminants)] #[strum_discriminants(name(TokenType))] pub enum Token<'a> { @@ -39,6 +40,7 @@ pub enum Token<'a> { Directive(&'a [u8]), Comment(&'a [u8]), Assign, + Comma, Branch(&'a [u8]), BranchNext(&'a [u8]), Number(u16, ModeType), @@ -117,6 +119,7 @@ impl<'a> Parser<'a> { match self.inner_parse() { Ok(_) => Ok(()), Err(error) => { + println!("2{:?}", self.context.source); print_error(&self.context.source, &error, self.line, self.column, self.end); Err(error) } @@ -204,6 +207,7 @@ impl<'a> Parser<'a> { b'"' => self.parse_string(), b';' => self.parse_comment(), b'=' => self.parse_assign(), + b',' => self.parse_comma(), b'\r' | b'\n' => self.parse_newline(), b' ' | b'\t' => self.parse_whitespace(), n => { @@ -216,6 +220,8 @@ impl<'a> Parser<'a> { fn parse_absolute_mode(&mut self, number: u16, is_absolute: bool) -> Result, ParseError> { self.eat_spaces()?; + let current_index = self.index; + if self.peek() == Ok(b',') { self.eat()?; // Eat , self.eat_spaces()?; @@ -229,7 +235,13 @@ impl<'a> Parser<'a> { true => ModeType::AbsoluteY, false => ModeType::ZeroPageY })), - _ => Err(ParseError::InvalidNumberFormat), + _ => { + self.index = current_index; // Restore index + Ok(Token::Number(number, match is_absolute { + true => ModeType::Absolute, + false => ModeType::ZeroPage + })) + }, } } else { Ok(Token::Number(number, match is_absolute { @@ -530,6 +542,11 @@ impl<'a> Parser<'a> { Ok(Token::Assign) } + fn parse_comma(&mut self) -> Result, ParseError> { + self.eat_expected(b',', ParseError::UnexpectedSymbol)?; + Ok(Token::Comma) + } + fn parse_newline(&mut self) -> Result, ParseError> { let mut total_lines = 0; @@ -558,12 +575,13 @@ impl<'a> Parser<'a> { pub fn friendly_dump(&self) { let mut line = 0; + info!("Tokens"); print!("{:>5}. ", line); for ast in self.context.tokens.borrow().iter() { let type_name = match ast.token { Token::Instr(_) => "INSTR", Token::Keyword(_) => "KEYWORD", - Token::Directive(_) => "OPTION", + Token::Directive(_) => "DIRECTIVE", Token::Comment(_) => "COMMENT", Token::Branch(_) => "BRANCH", Token::Number(_, _) => "NUMBER", @@ -573,6 +591,7 @@ impl<'a> Parser<'a> { Token::String(_) => "STRING", Token::BranchNext(_) => "BRANCHNEXT", Token::Assign => "ASSIGN", + Token::Comma => "COMMA", }; if ast.line != line { diff --git a/src/tests/generic.rs b/src/tests/generic.rs index 9096e67..07f7909 100644 --- a/src/tests/generic.rs +++ b/src/tests/generic.rs @@ -162,9 +162,13 @@ IOREST = $FF3F ; restore the A, X, and Y registers lda IOSAVE LDx IOREST"#, &[0xad, 0x4a, 0xff, 0xae, 0x3f, 0xff])] -#[case(br#".byte "abcd""#, &[0x61, 0x62, 0x63, 0x64])] +#[case(br#".ascii "abcd""#, &[0x61, 0x62, 0x63, 0x64])] +#[case(br#".asciiz "abcd""#, &[0x61, 0x62, 0x63, 0x64, 0x00])] #[case(br#".byte $ff"#, &[0xFF])] -#[case(br#".byte $ff .byte "abcd""#, &[0xFF, 0x61, 0x62, 0x63, 0x64])] +#[case(br#".byte $11, $22,$33,$44"#, &[0x11, 0x22, 0x33, 0x44])] +#[case(br#".word $2211, $4433,$6655, $8877"#, &[0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88])] +#[case(br#".byte $ff +.asciiz "abcd""#, &[0xFF, 0x61, 0x62, 0x63, 0x64, 0x00])] fn check_codes(#[case] data: &'_ [u8], #[case] codes: &'_ [u8]) { let context = Context::new(data); diff --git a/src/tests/parser.rs b/src/tests/parser.rs index 8218c65..2dfd07f 100644 --- a/src/tests/parser.rs +++ b/src/tests/parser.rs @@ -74,8 +74,6 @@ fn number_check(#[case] data: &'_ [u8], #[case] token: Token<'_>) { #[case(b"$a01", 3)] #[case(b"$a0111", 3)] #[case(b"$a", 0)] -#[case(b"$a0,b", 0)] -#[case(b"$a0,", 0)] #[case(b"$ta000", 0)] #[case(b"$a000-,X", 0)] #[case(b"($a0,X", 0)] diff --git a/src/tool.rs b/src/tool.rs index 8bfa5c7..85b4e74 100644 --- a/src/tool.rs +++ b/src/tool.rs @@ -1,6 +1,8 @@ use core::str; use std::fmt::Debug; +use log::error; + pub fn upper_case(bytes: &[u8]) -> Vec { bytes .into_iter() @@ -23,8 +25,9 @@ pub fn print_error(data: &'_ [u8], error: &T, line: usize, column: usi line_index += 1; if line_index == line { - start_index = index; + start_index = index+1; line_found = true; + continue; } if line_found { @@ -35,9 +38,9 @@ pub fn print_error(data: &'_ [u8], error: &T, line: usize, column: usi } println!(""); - println!("Error: {:?}", &error); - println!("Line: {}, column: {}", line + 1, column); - println!("{}", str::from_utf8(&data[start_index..end_index]).unwrap()); - println!("{}{}", (0..column).map(|_| " ").collect::(), (0..end-column).map(|_| "^").collect::()); + error!("{:?}", &error); + error!("Line: {}, column: {}", line + 1, column); + error!("{}", str::from_utf8(&data[start_index..end_index]).unwrap()); + error!("{}{}", (0..column).map(|_| " ").collect::(), (0..end-column).map(|_| "^").collect::()); println!(""); }