Skip to content

Commit

Permalink
[Data converter] Improve the hex parsing & general functionality (#2352)
Browse files Browse the repository at this point in the history
I spent some more time hacking on this instead of spending my time in a
more productive way. This does the following:
- switch the `dat` deserializing from custom matching stuff to a proper
`nom` parser that accounts for comments and leading `0x` tags.
- `dat` files can now parse values with leading zeroes truncated (though
we continue to generate `dat` files with the leading zeroes included)
- Output `dat` files will be generated by default as `MEMNAME.dat`
though this can be customized with `-e` flag. I.e. `-e out` will
generate `MEMNAME.out`
- Similarly, when reading in a `dat` directory, the tool will look for
`MEMNAME.dat` which can be retargeted via `-e` flag.
- The tool will also infer the `--to json` target when given a directory
as input
  • Loading branch information
EclecticGriffin authored Nov 14, 2024
1 parent dd55f4a commit fa298e9
Show file tree
Hide file tree
Showing 5 changed files with 213 additions and 31 deletions.
19 changes: 18 additions & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions tools/cider-data-converter/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ thiserror = "1.0.59"
num-bigint = { version = "0.4.6" }
num-rational = { version = "0.4.2" }
num-traits = { version = "0.2.19" }
nom = "7.1.3"

[dev-dependencies]
proptest = "1.0.0"
133 changes: 133 additions & 0 deletions tools/cider-data-converter/src/dat_parser.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
use nom::{
branch::alt,
bytes::complete::{tag, take_while_m_n},
character::complete::{anychar, line_ending, multispace0},
combinator::{eof, map_res, opt},
error::Error,
multi::{many1, many_till},
sequence::{preceded, tuple},
IResult,
};

fn is_hex_digit(c: char) -> bool {
c.is_ascii_hexdigit()
}

fn from_hex(input: &str) -> Result<u8, std::num::ParseIntError> {
u8::from_str_radix(input, 16)
}

fn parse_hex(input: &str) -> IResult<&str, u8> {
map_res(take_while_m_n(1, 2, is_hex_digit), from_hex)(input)
}

/// Parse a single line of hex characters into a vector of bytes in the order
/// the characters are given, i.e. reversed.
fn hex_line(input: &str) -> IResult<&str, LineOrComment> {
// strip any leading whitespace
let (input, bytes) = preceded(
tuple((multispace0, opt(tag("0x")))),
many1(parse_hex),
)(input)?;

Ok((input, LineOrComment::Line(bytes)))
}

fn comment(input: &str) -> IResult<&str, LineOrComment> {
// skip any whitespace
let (input, _) = multispace0(input)?;
let (input, _) = tag("//")(input)?;
let (input, _) = many_till(anychar, alt((line_ending, eof)))(input)?;
Ok((input, LineOrComment::Comment))
}
/// Parse a line which only contains whitespace
fn empty_line(input: &str) -> IResult<&str, LineOrComment> {
// skip any whitespace
let (input, _) = multispace0(input)?;
Ok((input, LineOrComment::EmptyLine))
}

pub fn line_or_comment(
input: &str,
) -> Result<LineOrComment, nom::Err<Error<&str>>> {
let (_, res) = alt((hex_line, comment, empty_line))(input)?;
Ok(res)
}

#[derive(Debug, PartialEq)]
pub enum LineOrComment {
Line(Vec<u8>),
Comment,
EmptyLine,
}

/// Parse a single line of hex characters, or a comment. Returns None if it's a
/// comment or an empty line and Some(Vec<u8>) if it's a hex line. Panics on a
/// parse error.
///
/// For the fallible version, see `line_or_comment`.
pub fn unwrap_line_or_comment(input: &str) -> Option<Vec<u8>> {
match line_or_comment(input).expect("hex parse failed") {
LineOrComment::Line(vec) => Some(vec),
LineOrComment::Comment => None,
LineOrComment::EmptyLine => None,
}
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn test_comment() {
assert_eq!(comment("// comment"), Ok(("", LineOrComment::Comment)));
assert_eq!(comment("// comment\n"), Ok(("", LineOrComment::Comment)));
}

#[test]
fn test_hex_line() {
assert_eq!(hex_line("0x01"), Ok(("", LineOrComment::Line(vec![1]))));
assert_eq!(hex_line("0x02"), Ok(("", LineOrComment::Line(vec![2]))));
assert_eq!(hex_line("0x03"), Ok(("", LineOrComment::Line(vec![3]))));
assert_eq!(hex_line("0x04"), Ok(("", LineOrComment::Line(vec![4]))));
assert_eq!(hex_line("0x05"), Ok(("", LineOrComment::Line(vec![5]))));
assert_eq!(hex_line("0x06"), Ok(("", LineOrComment::Line(vec![6]))));
assert_eq!(hex_line("0x07"), Ok(("", LineOrComment::Line(vec![7]))));
assert_eq!(hex_line("0x08"), Ok(("", LineOrComment::Line(vec![8]))));
assert_eq!(hex_line("0x09"), Ok(("", LineOrComment::Line(vec![9]))));
assert_eq!(hex_line("0x0a"), Ok(("", LineOrComment::Line(vec![10]))));
assert_eq!(hex_line("0x0b"), Ok(("", LineOrComment::Line(vec![11]))));
assert_eq!(hex_line("0x0c"), Ok(("", LineOrComment::Line(vec![12]))));
assert_eq!(hex_line("0x0d"), Ok(("", LineOrComment::Line(vec![13]))));
assert_eq!(hex_line("0x0e"), Ok(("", LineOrComment::Line(vec![14]))));
assert_eq!(hex_line("0x0f"), Ok(("", LineOrComment::Line(vec![15]))));
assert_eq!(hex_line("0xff"), Ok(("", LineOrComment::Line(vec![255]))));
assert_eq!(
hex_line("0x00ff"),
Ok(("", LineOrComment::Line(vec![0, 255])))
);
}

#[test]
fn test_from_hex() {
assert_eq!(from_hex("0"), Ok(0));
assert_eq!(from_hex("1"), Ok(1));
assert_eq!(from_hex("2"), Ok(2));
assert_eq!(from_hex("3"), Ok(3));
assert_eq!(from_hex("4"), Ok(4));
assert_eq!(from_hex("5"), Ok(5));
assert_eq!(from_hex("6"), Ok(6));
assert_eq!(from_hex("7"), Ok(7));
assert_eq!(from_hex("8"), Ok(8));
assert_eq!(from_hex("9"), Ok(9));
assert_eq!(from_hex("a"), Ok(10));
assert_eq!(from_hex("b"), Ok(11));
assert_eq!(from_hex("c"), Ok(12));
assert_eq!(from_hex("d"), Ok(13));
assert_eq!(from_hex("e"), Ok(14));
assert_eq!(from_hex("f"), Ok(15));

assert_eq!(from_hex("FF"), Ok(255));
assert_eq!(from_hex("ff"), Ok(255));
}
}
1 change: 1 addition & 0 deletions tools/cider-data-converter/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
pub mod converter;
pub mod dat_parser;
pub mod json_data;
90 changes: 60 additions & 30 deletions tools/cider-data-converter/src/main.rs
Original file line number Diff line number Diff line change
@@ -1,18 +1,21 @@
use argh::FromArgs;
use cider_data_converter::{converter, json_data::JsonData};
use cider_data_converter::{
converter, dat_parser::unwrap_line_or_comment, json_data::JsonData,
};
use core::str;
use interp::serialization::{self, DataDump, SerializationError};
use itertools::Itertools;
use std::{
fs::File,
io::{self, BufRead, BufReader, BufWriter, Read, Write},
iter::repeat,
path::PathBuf,
str::FromStr,
};
use thiserror::Error;

const JSON_EXTENSION: &str = "data";
const CIDER_EXTENSION: &str = "dump";
const DAT_EXTENSION: &str = "dat";

const HEADER_FILENAME: &str = "header";

Expand All @@ -32,6 +35,14 @@ enum CiderDataConverterError {

#[error(transparent)]
DataDumpError(#[from] SerializationError),

#[error(
"Missing output path. This is required for the \"to dat\" conversion"
)]
MissingDatOutputPath,

#[error("Output path for \"to dat\" exists but it is a file")]
DatOutputPathIsFile,
}

impl std::fmt::Debug for CiderDataConverterError {
Expand Down Expand Up @@ -90,26 +101,40 @@ struct Opts {
/// exists solely for backwards compatibility with the old display format.
#[argh(switch, long = "legacy-quotes")]
use_quotes: bool,

/// the file extension to use for the output/input file when parsing to and
/// from the dat target. If not provided, the extension is assumed to be .dat
#[argh(option, short = 'e', long = "dat-file-extension")]
#[argh(default = "String::from(DAT_EXTENSION)")]
file_extension: String,
}

fn main() -> Result<(), CiderDataConverterError> {
let mut opts: Opts = argh::from_env();

// if no action is specified, try to guess based on file extensions
if opts.action.is_none()
// input is .json
&& (opts.input_path.as_ref().is_some_and(|x| {
x.extension().map_or(false, |y| y == JSON_EXTENSION)
}) || opts.output_path.as_ref().is_some_and(|x| {
})
// output is .dump
|| opts.output_path.as_ref().is_some_and(|x| {
x.extension().map_or(false, |y| y == CIDER_EXTENSION)
}))
{
opts.action = Some(Target::DataDump);
} else if opts.action.is_none()
// output is .json
&& (opts.output_path.as_ref().is_some_and(|x| {
x.extension().map_or(false, |x| x == JSON_EXTENSION)
}) || opts.input_path.as_ref().is_some_and(|x| {
})
// input is .dump
|| opts.input_path.as_ref().is_some_and(|x| {
x.extension().map_or(false, |x| x == CIDER_EXTENSION)
}))
})
// input is a directory (suggesting a deserialization from dat)
|| opts.input_path.as_ref().is_some_and(|x| x.is_dir()))
{
opts.action = Some(Target::Json);
}
Expand Down Expand Up @@ -144,30 +169,31 @@ fn main() -> Result<(), CiderDataConverterError> {
for mem_dec in &header.memories {
let starting_len = data.len();
let mem_file = BufReader::new(File::open(
path.join(&mem_dec.name),
path.join(format!(
"{}.{}",
mem_dec.name, opts.file_extension
)),
)?);

let mut line_data = vec![];
for line in mem_file.lines() {
let line = line?;
for pair in &line.chars().chunks(2) {
// there has got to be a better way to do this...
let string =
pair.into_iter().collect::<String>();
let val = u8::from_str_radix(&string, 16)
.expect("invalid hex");
line_data.push(val);
if let Some(line_data) =
unwrap_line_or_comment(&line)
{
assert!(
line_data.len()
<= mem_dec.bytes_per_entry()
as usize,
"line data too long"
);

let padding = (mem_dec.bytes_per_entry()
as usize)
- line_data.len();

data.extend(line_data.into_iter().rev());
data.extend(repeat(0u8).take(padding))
}
// TODO griffin: handle inputs that are
// truncated or otherwise shorter than expected

assert!(
line_data.len()
== (mem_dec.bytes_per_entry() as usize)
);
// reverse the byte order to get the expected
// little endian and reuse the vec
data.extend(line_data.drain(..).rev())
}

assert_eq!(
Expand Down Expand Up @@ -213,17 +239,22 @@ fn main() -> Result<(), CiderDataConverterError> {

if let Some(path) = opts.output_path {
if path.exists() && !path.is_dir() {
// TODO griffin: Make this an actual error
panic!("Output path exists but is not a directory")
return Err(
CiderDataConverterError::DatOutputPathIsFile,
);
} else if !path.exists() {
std::fs::create_dir(&path)?;
}

let mut header_output = File::create(path.join("header"))?;
let mut header_output =
File::create(path.join(HEADER_FILENAME))?;
header_output.write_all(&data.header.serialize()?)?;

for memory in &data.header.memories {
let file = File::create(path.join(&memory.name))?;
let file = File::create(path.join(format!(
"{}.{}",
memory.name, opts.file_extension
)))?;
let mut writer = BufWriter::new(file);
for bytes in data
.get_data(&memory.name)
Expand All @@ -243,8 +274,7 @@ fn main() -> Result<(), CiderDataConverterError> {
}
}
} else {
// TODO griffin: Make this an actual error
panic!("Output path not specified, this is required for the dat target")
return Err(CiderDataConverterError::MissingDatOutputPath);
}
}
}
Expand Down

0 comments on commit fa298e9

Please sign in to comment.