Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add allow_partial #1512

Merged
merged 21 commits into from
Nov 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
270 changes: 219 additions & 51 deletions benches/main.rs

Large diffs are not rendered by default.

17 changes: 16 additions & 1 deletion python/pydantic_core/_pydantic_core.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ class SchemaValidator:
from_attributes: bool | None = None,
context: Any | None = None,
self_instance: Any | None = None,
allow_partial: bool = False,
) -> Any:
"""
Validate a Python object against the schema and return the validated object.
Expand All @@ -110,6 +111,8 @@ class SchemaValidator:
[`info.context`][pydantic_core.core_schema.ValidationInfo.context].
self_instance: An instance of a model set attributes on from validation, this is used when running
validation from the `__init__` method of a model.
allow_partial: Whether to allow partial validation; if `True` errors in the last element of sequences
and mappings are ignored.

Raises:
ValidationError: If validation fails.
Expand Down Expand Up @@ -143,6 +146,7 @@ class SchemaValidator:
strict: bool | None = None,
context: Any | None = None,
self_instance: Any | None = None,
allow_partial: bool = False,
) -> Any:
"""
Validate JSON data directly against the schema and return the validated Python object.
Expand All @@ -160,6 +164,8 @@ class SchemaValidator:
context: The context to use for validation, this is passed to functional validators as
[`info.context`][pydantic_core.core_schema.ValidationInfo.context].
self_instance: An instance of a model set attributes on from validation.
allow_partial: Whether to allow partial validation; if `True` incomplete JSON will be parsed successfully
and errors in the last element of sequences and mappings are ignored.

Raises:
ValidationError: If validation fails or if the JSON data is invalid.
Expand All @@ -168,7 +174,14 @@ class SchemaValidator:
Returns:
The validated Python object.
"""
def validate_strings(self, input: _StringInput, *, strict: bool | None = None, context: Any | None = None) -> Any:
def validate_strings(
self,
input: _StringInput,
*,
strict: bool | None = None,
context: Any | None = None,
allow_partial: bool = False,
) -> Any:
"""
Validate a string against the schema and return the validated Python object.

Expand All @@ -181,6 +194,8 @@ class SchemaValidator:
If `None`, the value of [`CoreConfig.strict`][pydantic_core.core_schema.CoreConfig] is used.
context: The context to use for validation, this is passed to functional validators as
[`info.context`][pydantic_core.core_schema.ValidationInfo.context].
allow_partial: Whether to allow partial validation; if `True` errors in the last element of sequences
and mappings are ignored.

Raises:
ValidationError: If validation fails or if the JSON data is invalid.
Expand Down
4 changes: 2 additions & 2 deletions python/pydantic_core/core_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -2840,7 +2840,7 @@ def typed_dict_field(

Args:
schema: The schema to use for the field
required: Whether the field is required
required: Whether the field is required, otherwise uses the value from `total` on the typed dict
validation_alias: The alias(es) to use to find the field in the validation data
serialization_alias: The alias to use as a key when serializing
serialization_exclude: Whether to exclude the field when serializing
Expand Down Expand Up @@ -2916,7 +2916,7 @@ class MyTypedDict(TypedDict):
ref: optional unique identifier of the schema, used to reference the schema in other places
metadata: Any other information you want to include with the schema, not used by pydantic-core
extra_behavior: The extra behavior to use for the typed dict
total: Whether the typed dict is total
total: Whether the typed dict is total, otherwise uses `typed_dict_total` from config
populate_by_name: Whether the typed dict should populate by name
serialization: Custom serialization schema
"""
Expand Down
8 changes: 8 additions & 0 deletions src/errors/line_error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,14 @@ impl ValLineError {
self.error_type = error_type;
self
}

pub fn first_loc_item(&self) -> Option<&LocItem> {
match &self.location {
Location::Empty => None,
// last because order is reversed
Location::List(loc_items) => loc_items.last(),
}
}
}

#[cfg_attr(debug_assertions, derive(Debug))]
Expand Down
2 changes: 1 addition & 1 deletion src/errors/location.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ use crate::lookup_key::{LookupPath, PathItem};

/// Used to store individual items of the error location, e.g. a string for key/field names
/// or a number for array indices.
#[derive(Clone)]
#[derive(Clone, Eq, PartialEq)]
#[cfg_attr(debug_assertions, derive(Debug))]
pub enum LocItem {
/// string type key, used to identify items from a dict or anything that implements `__getitem__`
Expand Down
5 changes: 5 additions & 0 deletions src/input/input_abstract.rs
Original file line number Diff line number Diff line change
Expand Up @@ -236,6 +236,8 @@ pub trait ValidatedDict<'py> {
&'a self,
consumer: impl ConsumeIterator<ValResult<(Self::Key<'a>, Self::Item<'a>)>, Output = R>,
) -> ValResult<R>;
// used in partial mode to check all errors occurred in the last value
fn last_key(&self) -> Option<Self::Key<'_>>;
}

/// For validations from a list
Expand Down Expand Up @@ -276,6 +278,9 @@ impl<'py> ValidatedDict<'py> for Never {
) -> ValResult<R> {
unreachable!()
}
fn last_key(&self) -> Option<Self::Key<'_>> {
unreachable!()
davidhewitt marked this conversation as resolved.
Show resolved Hide resolved
}
}

impl<'py> ValidatedList<'py> for Never {
Expand Down
4 changes: 4 additions & 0 deletions src/input/input_json.rs
Original file line number Diff line number Diff line change
Expand Up @@ -516,6 +516,10 @@ impl<'py, 'data> ValidatedDict<'py> for &'_ JsonObject<'data> {
) -> ValResult<R> {
Ok(consumer.consume_iterator(LazyIndexMap::iter(self).map(|(k, v)| Ok((k.as_ref(), v)))))
}

fn last_key(&self) -> Option<Self::Key<'_>> {
self.keys().last().map(AsRef::as_ref)
}
}

impl<'a, 'py, 'data> ValidatedList<'py> for &'a JsonArray<'data> {
Expand Down
15 changes: 15 additions & 0 deletions src/input/input_python.rs
Original file line number Diff line number Diff line change
Expand Up @@ -823,6 +823,21 @@ impl<'py> ValidatedDict<'py> for GenericPyMapping<'_, 'py> {
Self::GetAttr(obj, _) => Ok(consumer.consume_iterator(iterate_attributes(obj)?)),
}
}

fn last_key(&self) -> Option<Self::Key<'_>> {
match self {
Self::Dict(dict) => dict.keys().iter().last(),
// see https://github.com/pydantic/pydantic-core/pull/1512#discussion_r1826057970
Self::Mapping(mapping) => mapping
.call_method0(intern!(mapping.py(), "keys"))
.ok()?
.iter()
.ok()?
.last()?
.ok(),
Self::GetAttr(_, _) => None,
}
}
}

/// Container for all the collections (sized iterable containers) types, which
Expand Down
8 changes: 8 additions & 0 deletions src/input/input_string.rs
Original file line number Diff line number Diff line change
Expand Up @@ -303,4 +303,12 @@ impl<'py> ValidatedDict<'py> for StringMappingDict<'py> {
.map(|(key, val)| Ok((StringMapping::new_key(key)?, StringMapping::new_value(val)?))),
))
}

fn last_key(&self) -> Option<Self::Key<'_>> {
self.0
.keys()
.iter()
.last()
.and_then(|key| StringMapping::new_key(key).ok())
}
}
22 changes: 15 additions & 7 deletions src/input/return_enums.rs
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,9 @@ pub(crate) fn validate_iter_to_vec<'py>(
) -> ValResult<Vec<PyObject>> {
let mut output: Vec<PyObject> = Vec::with_capacity(capacity);
let mut errors: Vec<ValLineError> = Vec::new();
for (index, item_result) in iter.enumerate() {

for (index, is_last_partial, item_result) in state.enumerate_last_partial(iter) {
state.allow_partial = is_last_partial;
let item = item_result.map_err(|e| any_next_error!(py, e, max_length_check.input, index))?;
match validator.validate(py, item.borrow_input(), state) {
Ok(item) => {
Expand All @@ -137,9 +139,11 @@ pub(crate) fn validate_iter_to_vec<'py>(
}
Err(ValError::LineErrors(line_errors)) => {
max_length_check.incr()?;
errors.extend(line_errors.into_iter().map(|err| err.with_outer_location(index)));
if fail_fast {
break;
if !is_last_partial {
errors.extend(line_errors.into_iter().map(|err| err.with_outer_location(index)));
if fail_fast {
return Err(ValError::LineErrors(errors));
}
}
}
Err(ValError::Omit) => (),
Expand Down Expand Up @@ -197,7 +201,9 @@ pub(crate) fn validate_iter_to_set<'py>(
fail_fast: bool,
) -> ValResult<()> {
let mut errors: Vec<ValLineError> = Vec::new();
for (index, item_result) in iter.enumerate() {

for (index, is_last_partial, item_result) in state.enumerate_last_partial(iter) {
state.allow_partial = is_last_partial;
let item = item_result.map_err(|e| any_next_error!(py, e, input, index))?;
match validator.validate(py, item.borrow_input(), state) {
Ok(item) => {
Expand All @@ -220,13 +226,15 @@ pub(crate) fn validate_iter_to_set<'py>(
}
}
Err(ValError::LineErrors(line_errors)) => {
errors.extend(line_errors.into_iter().map(|err| err.with_outer_location(index)));
if !is_last_partial {
errors.extend(line_errors.into_iter().map(|err| err.with_outer_location(index)));
}
}
Err(ValError::Omit) => (),
Err(err) => return Err(err),
}
if fail_fast && !errors.is_empty() {
break;
return Err(ValError::LineErrors(errors));
}
}

Expand Down
2 changes: 1 addition & 1 deletion src/serializers/fields.rs
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ impl SerField {
}
}

pub fn get_key_py<'py>(&'py self, py: Python<'py>, extra: &Extra) -> &Bound<'py, PyAny> {
pub fn get_key_py<'py>(&self, py: Python<'py>, extra: &Extra) -> &Bound<'py, PyAny> {
if extra.by_alias {
if let Some(ref alias_py) = self.alias_py {
return alias_py.bind(py);
Expand Down
4 changes: 2 additions & 2 deletions src/url.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ impl PyUrl {
pub fn py_new(py: Python, url: &Bound<'_, PyAny>) -> PyResult<Self> {
let schema_obj = SCHEMA_DEFINITION_URL
.get_or_init(py, || build_schema_validator(py, "url"))
.validate_python(py, url, None, None, None, None)?;
.validate_python(py, url, None, None, None, None, false)?;
schema_obj.extract(py)
}

Expand Down Expand Up @@ -225,7 +225,7 @@ impl PyMultiHostUrl {
pub fn py_new(py: Python, url: &Bound<'_, PyAny>) -> PyResult<Self> {
let schema_obj = SCHEMA_DEFINITION_MULTI_HOST_URL
.get_or_init(py, || build_schema_validator(py, "multi-host-url"))
.validate_python(py, url, None, None, None, None)?;
.validate_python(py, url, None, None, None, None, false)?;
schema_obj.extract(py)
}

Expand Down
3 changes: 3 additions & 0 deletions src/validators/arguments.rs
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,9 @@ impl Validator for ArgumentsValidator {
input: &(impl Input<'py> + ?Sized),
state: &mut ValidationState<'_, 'py>,
) -> ValResult<PyObject> {
// this validator does not yet support partial validation, disable it to avoid incorrect results
state.allow_partial = false;

let args = input.validate_args()?;

let mut output_args: Vec<PyObject> = Vec::with_capacity(self.positional_params_count);
Expand Down
3 changes: 3 additions & 0 deletions src/validators/dataclass.rs
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,9 @@ impl Validator for DataclassArgsValidator {
input: &(impl Input<'py> + ?Sized),
state: &mut ValidationState<'_, 'py>,
) -> ValResult<PyObject> {
// this validator does not yet support partial validation, disable it to avoid incorrect results
state.allow_partial = false;

let args = input.validate_dataclass_args(&self.dataclass_name)?;

let output_dict = PyDict::new_bound(py);
Expand Down
3 changes: 3 additions & 0 deletions src/validators/definitions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,9 @@ impl Validator for DefinitionRefValidator {
input: &(impl Input<'py> + ?Sized),
state: &mut ValidationState<'_, 'py>,
) -> ValResult<PyObject> {
// this validator does not yet support partial validation, disable it to avoid incorrect results
state.allow_partial = false;

self.definition.read(|validator| {
let validator = validator.unwrap();
if let Some(id) = input.as_python().map(py_identity) {
Expand Down
16 changes: 9 additions & 7 deletions src/validators/dict.rs
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,8 @@ where
let output = PyDict::new_bound(self.py);
let mut errors: Vec<ValLineError> = Vec::new();

for item_result in iterator {
for (_, is_last_partial, item_result) in self.state.enumerate_last_partial(iterator) {
self.state.allow_partial = false;
let (key, value) = item_result?;
let output_key = match self.key_validator.validate(self.py, key.borrow_input(), self.state) {
Ok(value) => Some(value),
Expand All @@ -124,19 +125,20 @@ where
Err(ValError::Omit) => continue,
Err(err) => return Err(err),
};
self.state.allow_partial = is_last_partial;
let output_value = match self.value_validator.validate(self.py, value.borrow_input(), self.state) {
Ok(value) => Some(value),
Ok(value) => value,
Err(ValError::LineErrors(line_errors)) => {
for err in line_errors {
errors.push(err.with_outer_location(key.clone()));
if !is_last_partial {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok I'm feeling very baited to refactor these error combiners now 😂

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Opened #1517 as a spike to start down that road.

errors.extend(line_errors.into_iter().map(|err| err.with_outer_location(key.clone())));
}
None
continue;
}
Err(ValError::Omit) => continue,
Err(err) => return Err(err),
};
if let (Some(key), Some(value)) = (output_key, output_value) {
output.set_item(key, value)?;
if let Some(key) = output_key {
output.set_item(key, output_value)?;
}
}

Expand Down
7 changes: 5 additions & 2 deletions src/validators/generator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,9 @@ impl Validator for GeneratorValidator {
input: &(impl Input<'py> + ?Sized),
state: &mut ValidationState<'_, 'py>,
) -> ValResult<PyObject> {
// this validator does not yet support partial validation, disable it to avoid incorrect results
state.allow_partial = false;

let iterator = input.validate_iter()?.into_static();
let validator = self.item_validator.as_ref().map(|v| {
InternalValidator::new(
Expand Down Expand Up @@ -279,7 +282,7 @@ impl InternalValidator {
self_instance: self.self_instance.as_ref().map(|data| data.bind(py)),
cache_str: self.cache_str,
};
let mut state = ValidationState::new(extra, &mut self.recursion_guard);
let mut state = ValidationState::new(extra, &mut self.recursion_guard, false);
state.exactness = self.exactness;
let result = self
.validator
Expand Down Expand Up @@ -314,7 +317,7 @@ impl InternalValidator {
self_instance: self.self_instance.as_ref().map(|data| data.bind(py)),
cache_str: self.cache_str,
};
let mut state = ValidationState::new(extra, &mut self.recursion_guard);
let mut state = ValidationState::new(extra, &mut self.recursion_guard, false);
state.exactness = self.exactness;
let result = self.validator.validate(py, input, &mut state).map_err(|e| {
ValidationError::from_val_error(
Expand Down
12 changes: 8 additions & 4 deletions src/validators/json.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
use jiter::FloatMode;
use pyo3::intern;
use pyo3::prelude::*;
use pyo3::types::PyDict;

use jiter::{JsonValue, PartialMode, PythonParse};
use jiter::{FloatMode, JsonValue, PartialMode, PythonParse};

use crate::errors::{ErrorType, ErrorTypeDefaults, ValError, ValLineError, ValResult};
use crate::input::{EitherBytes, Input, InputType, ValidationMatch};
Expand Down Expand Up @@ -60,7 +59,8 @@ impl Validator for JsonValidator {
let json_bytes = json_either_bytes.as_slice();
match self.validator {
Some(ref validator) => {
let json_value = JsonValue::parse(json_bytes, true).map_err(|e| map_json_err(input, e, json_bytes))?;
let json_value = JsonValue::parse_with_config(json_bytes, true, state.allow_partial)
.map_err(|e| map_json_err(input, e, json_bytes))?;
let mut json_state = state.rebind_extra(|e| {
e.input_type = InputType::Json;
});
Expand All @@ -70,7 +70,11 @@ impl Validator for JsonValidator {
let parse_builder = PythonParse {
allow_inf_nan: true,
cache_mode: state.cache_str(),
partial_mode: PartialMode::Off,
partial_mode: if state.allow_partial {
PartialMode::TrailingStrings
} else {
PartialMode::Off
},
catch_duplicate_keys: false,
float_mode: FloatMode::Float,
};
Expand Down
Loading
Loading