Skip to content

Commit

Permalink
remove SmallVec and LazyIndexMap from json value
Browse files Browse the repository at this point in the history
  • Loading branch information
davidhewitt committed Jan 23, 2025
1 parent 4cefa8b commit 7cff228
Show file tree
Hide file tree
Showing 2 changed files with 155 additions and 159 deletions.
118 changes: 62 additions & 56 deletions crates/jiter/src/value.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
use std::borrow::Cow;
use std::sync::Arc;
use std::sync::{Arc, OnceLock};

#[cfg(feature = "num-bigint")]
use num_bigint::BigInt;
use smallvec::SmallVec;

use crate::errors::{json_error, JsonError, JsonResult, DEFAULT_RECURSION_LIMIT};
use crate::lazy_index_map::LazyIndexMap;
use crate::number_decoder::{NumberAny, NumberInt, NumberRange};
use crate::parse::{Parser, Peek};
use crate::string_decoder::{StringDecoder, StringDecoderRange, StringOutput, Tape};
Expand All @@ -26,8 +25,13 @@ pub enum JsonValue<'s> {
Object(JsonObject<'s>),
}

pub type JsonArray<'s> = Arc<SmallVec<[JsonValue<'s>; 8]>>;
pub type JsonObject<'s> = Arc<LazyIndexMap<Cow<'s, str>, JsonValue<'s>>>;
/// Parsed JSON array.
pub type JsonArray<'s> = Arc<Vec<JsonValue<'s>>>;
/// Parsed JSON object. Note that `jiter` does not attempt to deduplicate keys,
/// so it is possible that the key occurs multiple times in the object.
///
/// It is up to the user to handle this case and decide how to proceed.
pub type JsonObject<'s> = Arc<Vec<(Cow<'s, str>, JsonValue<'s>)>>;

#[cfg(feature = "python")]
#[allow(deprecated)] // keeping around for sake of allowing downstream to migrate
Expand Down Expand Up @@ -149,6 +153,16 @@ impl<'j> JsonValue<'j> {
pub fn to_static(&self) -> JsonValue<'static> {
value_static(self.clone())
}

fn empty_array() -> JsonValue<'static> {
static EMPTY_ARRAY: OnceLock<JsonArray<'static>> = OnceLock::new();
JsonValue::Array(EMPTY_ARRAY.get_or_init(|| Arc::new(Vec::new())).clone())
}

fn empty_object() -> JsonValue<'static> {
static EMPTY_OBJECT: OnceLock<JsonObject<'static>> = OnceLock::new();
JsonValue::Object(EMPTY_OBJECT.get_or_init(|| Arc::new(Vec::new())).clone())
}
}

fn value_static(v: JsonValue<'_>) -> JsonValue<'static> {
Expand All @@ -160,8 +174,12 @@ fn value_static(v: JsonValue<'_>) -> JsonValue<'static> {
JsonValue::BigInt(b) => JsonValue::BigInt(b),
JsonValue::Float(f) => JsonValue::Float(f),
JsonValue::Str(s) => JsonValue::Str(s.into_owned().into()),
JsonValue::Array(v) => JsonValue::Array(Arc::new(v.iter().map(JsonValue::to_static).collect::<SmallVec<_>>())),
JsonValue::Object(o) => JsonValue::Object(Arc::new(o.to_static())),
JsonValue::Array(v) => JsonValue::Array(Arc::new(v.iter().map(JsonValue::to_static).collect())),
JsonValue::Object(o) => JsonValue::Object(Arc::new(
o.iter()
.map(|(k, v)| (k.clone().into_owned().into(), v.to_static()))
.collect(),
)),
}
}

Expand Down Expand Up @@ -252,15 +270,14 @@ fn take_value<'j, 's>(
Ok(JsonValue::Str(create_cow(s)))
}
Peek::Array => {
let array = Arc::new(SmallVec::new());
let peek_first = match parser.array_first() {
Ok(Some(peek)) => peek,
Err(e) if !(partial_active && e.allowed_if_partial()) => return Err(e),
Ok(None) | Err(_) => return Ok(JsonValue::Array(array)),
Ok(None) | Err(_) => return Ok(JsonValue::empty_array()),
};
take_value_recursive(
peek_first,
RecursedValue::Array(array),
RecursedValue::new_array(),
parser,
tape,
recursion_limit,
Expand All @@ -271,20 +288,16 @@ fn take_value<'j, 's>(
}
Peek::Object => {
// same for objects
let object = Arc::new(LazyIndexMap::new());
let first_key = match parser.object_first::<StringDecoder>(tape) {
Ok(Some(first_key)) => first_key,
Err(e) if !(partial_active && e.allowed_if_partial()) => return Err(e),
_ => return Ok(JsonValue::Object(object)),
_ => return Ok(JsonValue::empty_object()),
};
let first_key = create_cow(first_key);
match parser.peek() {
Ok(peek) => take_value_recursive(
peek,
RecursedValue::Object {
partial: object,
next_key: first_key,
},
RecursedValue::new_object(first_key),
parser,
tape,
recursion_limit,
Expand All @@ -293,7 +306,7 @@ fn take_value<'j, 's>(
create_cow,
),
Err(e) if !(partial_active && e.allowed_if_partial()) => Err(e),
_ => Ok(JsonValue::Object(object)),
_ => Ok(JsonValue::empty_object()),
}
}
_ => {
Expand All @@ -316,13 +329,26 @@ fn take_value<'j, 's>(
}

enum RecursedValue<'s> {
Array(JsonArray<'s>),
Array(Vec<JsonValue<'s>>),
Object {
partial: JsonObject<'s>,
partial: Vec<(Cow<'s, str>, JsonValue<'s>)>,
next_key: Cow<'s, str>,
},
}

impl<'s> RecursedValue<'s> {
fn new_array() -> Self {
RecursedValue::Array(Vec::with_capacity(8))
}

fn new_object(next_key: Cow<'s, str>) -> Self {
RecursedValue::Object {
partial: Vec::with_capacity(8),
next_key,
}
}
}

#[inline(never)] // this is an iterative algo called only from take_value, no point in inlining
#[allow(clippy::too_many_lines)] // FIXME?
#[allow(clippy::too_many_arguments)]
Expand Down Expand Up @@ -354,7 +380,6 @@ fn take_value_recursive<'j, 's>(
'recursion: loop {
let mut value = match &mut current_recursion {
RecursedValue::Array(array) => {
let array = Arc::get_mut(array).expect("sole writer");
loop {
let result = match peek {
Peek::True => parser.consume_true().map(|()| JsonValue::Bool(true)),
Expand All @@ -364,30 +389,22 @@ fn take_value_recursive<'j, 's>(
.consume_string::<StringDecoder>(tape, allow_partial.allow_trailing_str())
.map(|s| JsonValue::Str(create_cow(s))),
Peek::Array => {
let array = Arc::new(SmallVec::new());
match parser.array_first() {
Ok(Some(first_peek)) => {
push_recursion!(first_peek, RecursedValue::Array(array));
push_recursion!(first_peek, RecursedValue::new_array());
// immediately jump to process the first value in the array
continue 'recursion;
}
Err(e) if !(partial_active && e.allowed_if_partial()) => return Err(e),
_ => (),
};
Ok(JsonValue::Array(array))
Ok(JsonValue::empty_array())
}
Peek::Object => {
let object = Arc::new(LazyIndexMap::new());
match parser.object_first::<StringDecoder>(tape) {
Ok(Some(first_key)) => match parser.peek() {
Ok(peek) => {
push_recursion!(
peek,
RecursedValue::Object {
partial: object,
next_key: create_cow(first_key)
}
);
push_recursion!(peek, RecursedValue::new_object(create_cow(first_key)));
continue 'recursion;
}
Err(e) if !(partial_active && e.allowed_if_partial()) => return Err(e),
Expand All @@ -396,7 +413,7 @@ fn take_value_recursive<'j, 's>(
Err(e) if !(partial_active && e.allowed_if_partial()) => return Err(e),
_ => (),
};
Ok(JsonValue::Object(object))
Ok(JsonValue::empty_object())
}
_ => parser
.consume_number::<NumberAny>(peek.into_inner(), allow_inf_nan)
Expand Down Expand Up @@ -432,8 +449,7 @@ fn take_value_recursive<'j, 's>(
let RecursedValue::Array(mut array) = current_recursion else {
unreachable!("known to be in array recursion");
};

Arc::get_mut(&mut array).expect("sole writer to value").push(value);
array.push(value);
array
}
Err(e) if !(partial_active && e.allowed_if_partial()) => return Err(e),
Expand All @@ -445,11 +461,10 @@ fn take_value_recursive<'j, 's>(
}
};

break JsonValue::Array(array);
break JsonValue::Array(Arc::new(array));
}
}
RecursedValue::Object { partial, next_key } => {
let partial = Arc::get_mut(partial).expect("sole writer");
loop {
let result = match peek {
Peek::True => parser.consume_true().map(|()| JsonValue::Bool(true)),
Expand All @@ -459,30 +474,22 @@ fn take_value_recursive<'j, 's>(
.consume_string::<StringDecoder>(tape, allow_partial.allow_trailing_str())
.map(|s| JsonValue::Str(create_cow(s))),
Peek::Array => {
let array = Arc::new(SmallVec::new());
match parser.array_first() {
Ok(Some(first_peek)) => {
push_recursion!(first_peek, RecursedValue::Array(array));
push_recursion!(first_peek, RecursedValue::new_array());
// immediately jump to process the first value in the array
continue 'recursion;
}
Err(e) if !(partial_active && e.allowed_if_partial()) => return Err(e),
_ => (),
};
Ok(JsonValue::Array(array))
Ok(JsonValue::empty_array())
}
Peek::Object => {
let object = Arc::new(LazyIndexMap::new());
match parser.object_first::<StringDecoder>(tape) {
Ok(Some(first_key)) => match parser.peek() {
Ok(peek) => {
push_recursion!(
peek,
RecursedValue::Object {
partial: object,
next_key: create_cow(first_key)
}
);
push_recursion!(peek, RecursedValue::new_object(create_cow(first_key)));
continue 'recursion;
}
Err(e) if !(partial_active && e.allowed_if_partial()) => return Err(e),
Expand All @@ -491,7 +498,7 @@ fn take_value_recursive<'j, 's>(
Err(e) if !(partial_active && e.allowed_if_partial()) => return Err(e),
_ => (),
};
Ok(JsonValue::Object(object))
Ok(JsonValue::empty_object())
}
_ => parser
.consume_number::<NumberAny>(peek.into_inner(), allow_inf_nan)
Expand All @@ -518,10 +525,10 @@ fn take_value_recursive<'j, 's>(
match parser.peek() {
Ok(next_peek) => {
// object continuing
partial.insert(
partial.push((
std::mem::replace(next_key, create_cow(yet_another_key)),
value,
);
));
peek = next_peek;
continue;
}
Expand All @@ -536,8 +543,7 @@ fn take_value_recursive<'j, 's>(
let RecursedValue::Object { mut partial, next_key } = current_recursion else {
unreachable!("known to be in object recursion");
};

Arc::get_mut(&mut partial).expect("sole writer").insert(next_key, value);
partial.push((next_key, value));
partial
}
Err(e) if !(partial_active && e.allowed_if_partial()) => return Err(e),
Expand All @@ -549,7 +555,7 @@ fn take_value_recursive<'j, 's>(
}
};

break JsonValue::Object(object);
break JsonValue::Object(Arc::new(object));
}
}
};
Expand All @@ -565,7 +571,7 @@ fn take_value_recursive<'j, 's>(

value = match current_recursion {
RecursedValue::Array(mut array) => {
Arc::get_mut(&mut array).expect("sole writer").push(value);
array.push(value);
match parser.array_step() {
Ok(Some(next_peek)) => {
current_recursion = RecursedValue::Array(array);
Expand All @@ -574,10 +580,10 @@ fn take_value_recursive<'j, 's>(
Err(e) if !(partial_active && e.allowed_if_partial()) => return Err(e),
_ => (),
}
JsonValue::Array(array)
JsonValue::Array(Arc::new(array))
}
RecursedValue::Object { mut partial, next_key } => {
Arc::get_mut(&mut partial).expect("sole writer").insert(next_key, value);
partial.push((next_key, value));

match parser.object_step::<StringDecoder>(tape) {
Ok(Some(next_key)) => match parser.peek() {
Expand All @@ -595,7 +601,7 @@ fn take_value_recursive<'j, 's>(
_ => (),
}

JsonValue::Object(partial)
JsonValue::Object(Arc::new(partial))
}
}
};
Expand Down
Loading

0 comments on commit 7cff228

Please sign in to comment.