From 9b6f6945347e75ae7e59941341f15b5fd68d6bc3 Mon Sep 17 00:00:00 2001 From: Colin Roberts Date: Thu, 15 Aug 2024 16:36:19 -0600 Subject: [PATCH] feat: upgrade input JSON creation (#14) * add: `reddit_response.json` * refactor tests + add failing case * easier fix * test: parse to key * tests: key parsing * bug: `next_end_of_kv` on read `:` * fix: `end_of_kv` bug * test: find value * tests: `inside_value` and `inside_value_to_exit` * test: parse to NEXT key * parses JSON with two string keys * WIP: value inside value * comment * refactor (#10) * wip: start with bitmask * WIP: time to start testing * tests: `ArrayAdd` and `ArrayMul` * tests passing * update comments * feat: 2 key depth 1 json * 2 kv json and all tests passing * nested json works!!! * reduce constraints * cleanup * rename variables * more cleaning * more cleanup * make comments clean * WAYLON NITPICKING ME LOL * feat: improved CLI for witness * gitignore input.json * Update main.rs * Squashed commit of the following: --- .gitignore | 3 +- Cargo.lock | 220 +++++++- Cargo.toml | 11 +- create_witness/Cargo.toml | 7 - create_witness/src/main.rs | 96 ---- inputs/extract/input.json | 803 ---------------------------- inputs/test_extract/input.json | 31 -- inputs/test_extract_hard/input.json | 58 -- src/item.rs | 37 -- src/lib.rs | 122 ----- src/main.rs | 66 +++ 11 files changed, 282 insertions(+), 1172 deletions(-) delete mode 100644 create_witness/Cargo.toml delete mode 100644 create_witness/src/main.rs delete mode 100644 inputs/extract/input.json delete mode 100644 inputs/test_extract/input.json delete mode 100644 inputs/test_extract_hard/input.json delete mode 100644 src/item.rs delete mode 100644 src/lib.rs create mode 100644 src/main.rs diff --git a/.gitignore b/.gitignore index 5b586b4..40d4c9c 100644 --- a/.gitignore +++ b/.gitignore @@ -5,4 +5,5 @@ node_modules/* build/* ptau/* circuits/test/*.circom -circuits/main/* \ No newline at end of file +circuits/main/* +inputs/**/*.json \ No newline at end of file diff --git a/Cargo.lock b/Cargo.lock index 256c9d6..2479f30 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3,16 +3,111 @@ version = 3 [[package]] -name = "create-witness" -version = "0.0.0" +name = "anstream" +version = "0.6.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "64e15c1ab1f89faffbf04a634d5e1962e9074f2741eef6d97f3c4e322426d526" dependencies = [ - "serde", - "serde_json", + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "is_terminal_polyfill", + "utf8parse", +] + +[[package]] +name = "anstyle" +version = "1.0.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bec1de6f59aedf83baf9ff929c98f2ad654b97c9510f4e70cf6f661d49fd5b1" + +[[package]] +name = "anstyle-parse" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eb47de1e80c2b463c735db5b217a0ddc39d612e7ac9e2e96a5aed1f57616c1cb" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d36fc52c7f6c869915e99412912f22093507da8d9e942ceaf66fe4b7c14422a" +dependencies = [ + "windows-sys", +] + +[[package]] +name = "anstyle-wincon" +version = "3.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5bf74e1b6e971609db8ca7a9ce79fd5768ab6ae46441c572e46cf596f59e57f8" +dependencies = [ + "anstyle", + "windows-sys", +] + +[[package]] +name = "clap" +version = "4.5.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c937d4061031a6d0c8da4b9a4f98a172fc2976dfb1c19213a9cf7d0d3c837e36" +dependencies = [ + "clap_builder", + "clap_derive", +] + +[[package]] +name = "clap_builder" +version = "4.5.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85379ba512b21a328adf887e85f7742d12e96eb31f3ef077df4ffc26b506ffed" +dependencies = [ + "anstream", + "anstyle", + "clap_lex", + "strsim", ] [[package]] -name = "extractor" -version = "0.1.0" +name = "clap_derive" +version = "4.5.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "501d359d5f3dcaf6ecdeee48833ae73ec6e42723a1e52419c79abf9507eec0a0" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "clap_lex" +version = "0.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1462739cb27611015575c0c11df5df7601141071f07518d56fcc1be504cbec97" + +[[package]] +name = "colorchoice" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3fd119d74b830634cea2a0f58bbd0d54540518a14397557951e79340abc28c0" + +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + +[[package]] +name = "is_terminal_polyfill" +version = "1.70.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" [[package]] name = "itoa" @@ -20,6 +115,12 @@ version = "1.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" +[[package]] +name = "memchr" +version = "2.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" + [[package]] name = "proc-macro2" version = "1.0.86" @@ -46,18 +147,18 @@ checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f" [[package]] name = "serde" -version = "1.0.204" +version = "1.0.205" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc76f558e0cbb2a839d37354c575f1dc3fdc6546b5be373ba43d95f231bf7c12" +checksum = "e33aedb1a7135da52b7c21791455563facbbcc43d0f0f66165b42c21b3dfb150" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.204" +version = "1.0.205" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e0cd7e117be63d3c3678776753929474f3b04a43a080c744d6b0ae2a8c28e222" +checksum = "692d6f5ac90220161d6774db30c662202721e64aed9058d2c394f451261420c1" dependencies = [ "proc-macro2", "quote", @@ -66,15 +167,22 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.120" +version = "1.0.122" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4e0d21c9a8cae1235ad58a00c11cb40d4b1e5c784f1ef2c537876ed6ffd8b7c5" +checksum = "784b6203951c57ff748476b126ccb5e8e2959a5c19e5c617ab1956be3dbc68da" dependencies = [ "itoa", + "memchr", "ryu", "serde", ] +[[package]] +name = "strsim" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" + [[package]] name = "syn" version = "2.0.72" @@ -91,3 +199,91 @@ name = "unicode-ident" version = "1.0.12" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" + +[[package]] +name = "utf8parse" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" + +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-targets" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_gnullvm", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" + +[[package]] +name = "witness" +version = "0.0.0" +dependencies = [ + "clap", + "serde", + "serde_json", +] diff --git a/Cargo.toml b/Cargo.toml index 5a51ecc..25d2171 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,8 @@ -[workspace] -members = ["create_witness"] - [package] -name = "extractor" -version = "0.1.0" +name = "witness" edition = "2021" + +[dependencies] +serde = { version = "1.0.204", features = ["derive"] } +serde_json = "1.0.120" +clap = { version = "4.5.14", features = ["derive"] } diff --git a/create_witness/Cargo.toml b/create_witness/Cargo.toml deleted file mode 100644 index ffa0959..0000000 --- a/create_witness/Cargo.toml +++ /dev/null @@ -1,7 +0,0 @@ -[package] -name = "create-witness" -edition = "2021" - -[dependencies] -serde = { version = "1.0.204", features = ["derive"] } -serde_json = "1.0.120" diff --git a/create_witness/src/main.rs b/create_witness/src/main.rs deleted file mode 100644 index dbe494b..0000000 --- a/create_witness/src/main.rs +++ /dev/null @@ -1,96 +0,0 @@ -use std::io::Write; - -// pub const KEY: &[u8] = b"\"glossary\"".as_slice(); -pub const KEY: &[u8] = b"key1".as_slice(); - -pub const KEYS: &[&[u8]] = &[ - b"\"glossary\"".as_slice(), - b"\"GlossDiv\"".as_slice(), - b"\"title\"".as_slice(), -]; -// pub const DATA: &[u8] = include_bytes!("../../json_examples/example.json"); -pub const DATA: &[u8] = include_bytes!("../../json_examples/sambhav_example.json"); - -#[derive(serde::Serialize)] -pub struct Witness { - // num_keys: usize, - // key_sizes: Vec, - // keys: Vec>, - // num_key_bytes: usize, - key: Vec, - // num_data_bytes: usize, - data: Vec, -} - -pub fn main() { - // Properly serialize information about the keys we want to extract - // let mut max_num_keys = 0; - // let mut max_num_key_bytes = 0; - // let mut key_sizes = vec![]; - // let mut keys = vec![]; - // for &key in KEYS { - // let key_len = key.len(); - // key_sizes.push(key_len); - // if key_len > max_num_key_bytes { - // max_num_key_bytes = key_len; - // } - // keys.push(key.to_vec()); - // max_num_keys += 1; - // } - // println!("MAX_NUM_KEYS: {max_num_keys}"); - // println!("MAX_NUM_KEY_BYTES: {max_num_key_bytes}"); - - // Enforce that each key comes in as af fixed length (TODO: we need to make sure we encode this somehow, perhaps we pass in a vector of key lengths) - // for key in &mut keys { - // key.extend(vec![0; max_num_key_bytes - key.len()]); - // } - - // Properly serialize information about the data we extract from - println!("NUM_KEY_BYTES: {}", KEY.len()); - println!("NUM_DATA_BYTES: {}", DATA.len()); - - // Create a witness file as `input.json` - let witness = Witness { - // num_keys: max_num_keys, // For now we can set this to be the same - // key_sizes, - // keys, - // num_key_bytes: KEY.len(), - key: KEY.to_vec(), - // num_data_bytes: DATA.len(), // For now we can set this to be the same - data: DATA.to_vec(), - }; - let mut file = std::fs::File::create("inputs/test_extract_sambhav/input.json").unwrap(); - file.write_all(serde_json::to_string_pretty(&witness).unwrap().as_bytes()) - .unwrap(); -} - -// fn get_bits(bytes: &[u8]) -> Vec { -// bytes -// .iter() -// .flat_map(|&byte| { -// (0..8) -// .rev() -// .map(move |i| ((byte.to_be_bytes()[0] >> i) & 1) == 1) // ensure this is all big-endian -// }) -// .collect() -// } - -// #[cfg(test)] -// mod tests { -// use super::*; -// // Use example.json which has first two ASCII chars: `{` and `\n` -// // ASCII code for `{` 01111011 -// // ASCII code for `\n` 00001010 -// #[test] -// fn test_get_bits() { -// let bits = get_bits(DATA); -// #[allow(clippy::inconsistent_digit_grouping)] -// let compare_bits: Vec = vec![0, 1, 1, 1, 1, 0, 1, 1_, 0, 0, 0, 0, 1, 0, 1, 0] -// .into_iter() -// .map(|x| x == 1) -// .collect(); -// bits.iter() -// .zip(compare_bits.iter()) -// .for_each(|(x, y)| assert_eq!(x, y)); -// } -// } diff --git a/inputs/extract/input.json b/inputs/extract/input.json deleted file mode 100644 index 556be54..0000000 --- a/inputs/extract/input.json +++ /dev/null @@ -1,803 +0,0 @@ -{ - "key": [ - 34, - 103, - 108, - 111, - 115, - 115, - 97, - 114, - 121, - 34 - ], - "data": [ - 123, - 10, - 32, - 32, - 32, - 32, - 34, - 103, - 108, - 111, - 115, - 115, - 97, - 114, - 121, - 34, - 58, - 32, - 123, - 10, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 34, - 116, - 105, - 116, - 108, - 101, - 34, - 58, - 32, - 34, - 101, - 120, - 97, - 109, - 112, - 108, - 101, - 32, - 103, - 108, - 111, - 115, - 115, - 97, - 114, - 121, - 34, - 44, - 10, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 34, - 71, - 108, - 111, - 115, - 115, - 68, - 105, - 118, - 34, - 58, - 32, - 123, - 10, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 34, - 116, - 105, - 116, - 108, - 101, - 34, - 58, - 32, - 34, - 83, - 34, - 44, - 10, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 34, - 71, - 108, - 111, - 115, - 115, - 76, - 105, - 115, - 116, - 34, - 58, - 32, - 123, - 10, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 34, - 71, - 108, - 111, - 115, - 115, - 69, - 110, - 116, - 114, - 121, - 34, - 58, - 32, - 123, - 10, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 34, - 73, - 68, - 34, - 58, - 32, - 34, - 83, - 71, - 77, - 76, - 34, - 44, - 10, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 34, - 83, - 111, - 114, - 116, - 65, - 115, - 34, - 58, - 32, - 34, - 83, - 71, - 77, - 76, - 34, - 44, - 10, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 34, - 71, - 108, - 111, - 115, - 115, - 84, - 101, - 114, - 109, - 34, - 58, - 32, - 34, - 83, - 116, - 97, - 110, - 100, - 97, - 114, - 100, - 32, - 71, - 101, - 110, - 101, - 114, - 97, - 108, - 105, - 122, - 101, - 100, - 32, - 77, - 97, - 114, - 107, - 117, - 112, - 32, - 76, - 97, - 110, - 103, - 117, - 97, - 103, - 101, - 34, - 44, - 10, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 34, - 65, - 99, - 114, - 111, - 110, - 121, - 109, - 34, - 58, - 32, - 34, - 83, - 71, - 77, - 76, - 34, - 44, - 10, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 34, - 65, - 98, - 98, - 114, - 101, - 118, - 34, - 58, - 32, - 34, - 73, - 83, - 79, - 32, - 56, - 56, - 55, - 57, - 58, - 49, - 57, - 56, - 54, - 34, - 44, - 10, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 34, - 71, - 108, - 111, - 115, - 115, - 68, - 101, - 102, - 34, - 58, - 32, - 123, - 10, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 34, - 112, - 97, - 114, - 97, - 34, - 58, - 32, - 34, - 65, - 32, - 109, - 101, - 116, - 97, - 45, - 109, - 97, - 114, - 107, - 117, - 112, - 32, - 108, - 97, - 110, - 103, - 117, - 97, - 103, - 101, - 44, - 32, - 117, - 115, - 101, - 100, - 32, - 116, - 111, - 32, - 99, - 114, - 101, - 97, - 116, - 101, - 32, - 109, - 97, - 114, - 107, - 117, - 112, - 32, - 108, - 97, - 110, - 103, - 117, - 97, - 103, - 101, - 115, - 32, - 115, - 117, - 99, - 104, - 32, - 97, - 115, - 32, - 68, - 111, - 99, - 66, - 111, - 111, - 107, - 46, - 34, - 44, - 10, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 34, - 71, - 108, - 111, - 115, - 115, - 83, - 101, - 101, - 65, - 108, - 115, - 111, - 34, - 58, - 32, - 91, - 10, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 34, - 71, - 77, - 76, - 34, - 44, - 10, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 34, - 88, - 77, - 76, - 34, - 10, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 93, - 10, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 125, - 44, - 10, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 34, - 71, - 108, - 111, - 115, - 115, - 83, - 101, - 101, - 34, - 58, - 32, - 34, - 109, - 97, - 114, - 107, - 117, - 112, - 34, - 10, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 125, - 10, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 125, - 10, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 125, - 10, - 32, - 32, - 32, - 32, - 125, - 10, - 125 - ] -} \ No newline at end of file diff --git a/inputs/test_extract/input.json b/inputs/test_extract/input.json deleted file mode 100644 index d99f3f3..0000000 --- a/inputs/test_extract/input.json +++ /dev/null @@ -1,31 +0,0 @@ -{ - "key": [ - 107, - 101, - 121, - 49 - ], - "data": [ - 123, - 10, - 32, - 32, - 32, - 32, - 34, - 107, - 101, - 121, - 49, - 34, - 58, - 32, - 34, - 97, - 98, - 99, - 34, - 10, - 125 - ] -} \ No newline at end of file diff --git a/inputs/test_extract_hard/input.json b/inputs/test_extract_hard/input.json deleted file mode 100644 index a80958f..0000000 --- a/inputs/test_extract_hard/input.json +++ /dev/null @@ -1,58 +0,0 @@ -{ - "key": [ - 107, - 101, - 121, - 49 - ], - "data": [ - 123, - 10, - 32, - 32, - 32, - 32, - 34, - 107, - 101, - 121, - 49, - 34, - 58, - 32, - 34, - 123, - 125, - 97, - 44, - 98, - 99, - 125, - 34, - 44, - 10, - 32, - 32, - 32, - 32, - 34, - 107, - 101, - 121, - 50, - 34, - 58, - 32, - 34, - 92, - 34, - 97, - 98, - 99, - 92, - 34, - 34, - 10, - 125 - ] -} \ No newline at end of file diff --git a/src/item.rs b/src/item.rs deleted file mode 100644 index 8823da8..0000000 --- a/src/item.rs +++ /dev/null @@ -1,37 +0,0 @@ -#[derive(Clone, Copy, Debug)] -pub struct Item<'a, T> { - pub label: &'a str, - inner: T, -} - -impl<'a> Item<'a, ()> { - pub fn create(label: &'a str) -> Self { - Item { label, inner: () } - } -} - -impl<'a, T: Copy> Item<'a, T> { - pub fn inner(&self) -> T { - self.inner - } - - pub fn append(self, label: &'a str) -> Item<'a, Item> { - Item { label, inner: self } - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn create_nested_item() { - let item = Item::create("data") - .append("profile") - .append("identity") - .append("balance") - .append("userBalance") - .append("value"); - println!("{item:?}"); - } -} diff --git a/src/lib.rs b/src/lib.rs deleted file mode 100644 index 71db018..0000000 --- a/src/lib.rs +++ /dev/null @@ -1,122 +0,0 @@ -pub const EXAMPLE_JSON: &[u8] = include_bytes!("../json_examples/example.json"); -pub const VENMO_JSON: &[u8] = include_bytes!("../json_examples/venmo_response.json"); - -pub mod item; - -// TODO: Mark when we're in a key versus in a value, versus whitespace, etc.? - -pub struct Machine<'a> { - pub keys: Vec<&'a [u8]>, - depth: usize, - pointer: usize, -} - -#[derive(Debug)] -pub enum Instruction { - IncreaseDepth(usize), - DecreaseDepth(usize), - EOF, -} - -impl<'a> Machine<'a> { - pub fn new(keys: Vec<&'a [u8]>) -> Self { - Machine { - keys, - depth: 0, - pointer: 0, - } - } - - pub fn extract(&mut self, data_bytes: &'a [u8]) -> Option<&'a [u8]> { - // Make sure that there is more data in the JSON than what we have expressed in all of our keys else this makes no sense at all. - assert!(data_bytes.len() > self.keys.iter().map(|k| k.len()).sum()); - // Make sure the JSON begins with an opening bracket - assert_eq!(data_bytes[0] ^ b"{"[0], 0); - - while self.depth < self.keys.len() { - match get_key(self.keys[self.depth], &data_bytes[self.pointer..]) { - Instruction::EOF => return None, - _inst @ Instruction::DecreaseDepth(offset) => { - // dbg!(inst); - self.depth -= 1; - self.pointer += offset; - // dbg!(String::from_utf8_lossy(&[data_bytes[self.pointer]])); - } - _inst @ Instruction::IncreaseDepth(offset) => { - // dbg!(inst); - self.depth += 1; - self.pointer += offset; - // dbg!(String::from_utf8_lossy(&[data_bytes[self.pointer]])); - } - } - } - - // Get the value as a raw str at this location in the JSON and offset by one to bypass a `:` - let value_start = self.pointer + 1; - let mut value_length = 0; - // Grab the value up to the next delimiter doken (TODO: if a `,` or `}` is present in a string, we are doomed, so we need to track these objects better!) - while (data_bytes[value_start + value_length] != b"}"[0]) - & (data_bytes[value_start + value_length] != b","[0]) - { - value_length += 1; - } - Some(&data_bytes[value_start..value_start + value_length]) - } -} - -fn get_key(key: &[u8], data_bytes: &[u8]) -> Instruction { - let key_length = key.len(); - - // dbg!(String::from_utf8_lossy(key)); - - 'outer: for i in 0..(data_bytes.len() - key_length) { - #[allow(clippy::needless_range_loop)] - for j in 0..key_length { - // dbg!(String::from_utf8_lossy(&[data_bytes[i..i + key_length][j]])); - if data_bytes[i..i + key_length][j] == b"}"[0] { - // Hit an end brace "}" so we need to return the current pointer as an offset and decrease depth - return Instruction::DecreaseDepth(i + j); - } - if key[j] ^ data_bytes[i..i + key_length][j] != 0 { - continue 'outer; - } - } - // If we hit here then we must have fully matched a key so we return the current pointer as an offset - return Instruction::IncreaseDepth(i + key_length); - } - // If we hit here, we must have hit EOF (which is actually an error?) - Instruction::EOF -} - -#[cfg(test)] -mod tests { - - use super::*; - - #[test] - fn get_value_venmo() { - let keys = vec![ - b"\"data\"".as_slice(), - b"\"profile\"".as_slice(), - b"\"identity\"".as_slice(), - b"\"balance\"".as_slice(), - b"\"userBalance\"".as_slice(), - b"\"value\"".as_slice(), - ]; - let mut machine = Machine::new(keys); - let value = String::from_utf8_lossy(machine.extract(VENMO_JSON).unwrap()); - assert_eq!(value, " 523.69\n ") - } - - #[test] - fn get_value_example() { - let keys = vec![ - b"\"glossary\"".as_slice(), - b"\"GlossDiv\"".as_slice(), - b"\"title\"".as_slice(), - ]; - let mut machine = Machine::new(keys); - let value = String::from_utf8_lossy(machine.extract(EXAMPLE_JSON).unwrap()); - assert_eq!(value, " \"S\"a") - } -} diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..cf94f9d --- /dev/null +++ b/src/main.rs @@ -0,0 +1,66 @@ +use clap::Parser; +use serde_json::Value; +use std::io::Write; +use std::path::PathBuf; + +#[derive(Parser, Debug)] +#[command(name = "witness")] +struct Args { + /// Path to the JSON file + #[arg(short, long)] + json_file: PathBuf, + + /// Keys to extract (can be specified multiple times) + #[arg(short, long)] + keys: Vec, + + /// Output directory (will be created if it doesn't exist) + #[arg(short, long, default_value = ".")] + output_dir: PathBuf, + + /// Output filename (will be created if it doesn't exist) + #[arg(short, long, default_value = "output.json")] + filename: String, +} + +#[derive(serde::Serialize)] +pub struct Witness { + #[serde(flatten)] + keys: serde_json::Map, + data: Vec, +} + +pub fn main() -> Result<(), Box> { + let args = Args::parse(); + + // Read the JSON file + let data = std::fs::read(&args.json_file)?; + + // Create a map to store keys + let mut keys_map = serde_json::Map::new(); + for (index, key) in args.keys.iter().enumerate() { + keys_map.insert( + format!("key{}", index + 1), + Value::Array( + key.as_bytes() + .iter() + .map(|x| serde_json::json!(x)) + .collect(), + ), + ); + } + + // Create a witness file as `input.json` + let witness = Witness { + keys: keys_map, + data, + }; + + let output_file = args.output_dir.join(args.filename); + let mut file = std::fs::File::create(output_file)?; + file.write_all(serde_json::to_string_pretty(&witness)?.as_bytes())?; + + println!("Input file created successfully."); + + Ok(()) +}