From 682cbd4f887d583dbd957b246a29635e98ccf975 Mon Sep 17 00:00:00 2001 From: Dmitry Dygalo Date: Wed, 9 Oct 2024 12:38:02 +0200 Subject: [PATCH] fix: Incomplete eager reference resolving Signed-off-by: Dmitry Dygalo --- CHANGELOG.md | 4 ++ crates/jsonschema-py/CHANGELOG.md | 4 ++ crates/jsonschema-referencing/src/registry.rs | 33 ++++++++++--- crates/jsonschema-referencing/src/uri.rs | 2 +- crates/jsonschema/src/keywords/ref_.rs | 46 +++++++++++++++++++ 5 files changed, 81 insertions(+), 8 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 83febcf4..3a1def73 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,10 @@ ## [Unreleased] +### Fixed + +- Incomplete external reference resolution. + ## [0.24.0] - 2024-10-20 ### Added diff --git a/crates/jsonschema-py/CHANGELOG.md b/crates/jsonschema-py/CHANGELOG.md index 8bf472e9..72ae0caa 100644 --- a/crates/jsonschema-py/CHANGELOG.md +++ b/crates/jsonschema-py/CHANGELOG.md @@ -2,6 +2,10 @@ ## [Unreleased] +### Fixed + +- Incomplete external reference resolution. + ## [0.24.0] - 2024-10-20 ### Added diff --git a/crates/jsonschema-referencing/src/registry.rs b/crates/jsonschema-referencing/src/registry.rs index 3647b83a..f35463a5 100644 --- a/crates/jsonschema-referencing/src/registry.rs +++ b/crates/jsonschema-referencing/src/registry.rs @@ -407,16 +407,32 @@ fn process_resources( } // Retrieve external resources for uri in external.drain() { - if !resources.contains_key(&uri) { + let mut fragmentless = uri.clone(); + fragmentless.set_fragment(None); + if !resources.contains_key(&fragmentless) { let retrieved = retriever - .retrieve(&uri.borrow()) - .map_err(|err| Error::unretrievable(uri.as_str(), Some(err)))?; + .retrieve(&fragmentless.borrow()) + .map_err(|err| Error::unretrievable(fragmentless.as_str(), Some(err)))?; let resource = Arc::new(Resource::from_contents_and_specification( retrieved, default_draft, )?); - resources.insert(uri.clone(), Arc::clone(&resource)); - queue.push_back((uri, resource)); + resources.insert(fragmentless.clone(), Arc::clone(&resource)); + if let Some(fragment) = uri.fragment() { + // The original `$ref` could have a fragment that points to a place that won't + // be discovered via the regular sub-resources discovery. Therefore we need to + // explicitly check it + if let Some(resolved) = resource.contents().pointer(fragment.as_str()) { + queue.push_back(( + uri, + Arc::new(Resource::from_contents_and_specification( + resolved.clone(), + default_draft, + )?), + )); + } + } + queue.push_back((fragmentless, resource)); } } } @@ -452,8 +468,11 @@ fn collect_external_resources( // Reference has already been seen return Ok(()); } - let mut resolved = uri::resolve_against(&base.borrow(), reference)?; - resolved.set_fragment(None); + let resolved = if reference.contains('#') && base.has_fragment() { + uri::resolve_against(&uri::DEFAULT_ROOT_URI.borrow(), reference)? + } else { + uri::resolve_against(&base.borrow(), reference)? + }; collected.insert(resolved); } } diff --git a/crates/jsonschema-referencing/src/uri.rs b/crates/jsonschema-referencing/src/uri.rs index 08d54c97..5ed59781 100644 --- a/crates/jsonschema-referencing/src/uri.rs +++ b/crates/jsonschema-referencing/src/uri.rs @@ -40,7 +40,7 @@ pub fn from_str(uri: &str) -> Result, Error> { } } -static DEFAULT_ROOT_URI: Lazy> = +pub(crate) static DEFAULT_ROOT_URI: Lazy> = Lazy::new(|| Uri::parse("json-schema:///".to_string()).expect("Invalid URI")); pub type EncodedString = EStr; diff --git a/crates/jsonschema/src/keywords/ref_.rs b/crates/jsonschema/src/keywords/ref_.rs index 905f21fa..9a91e91a 100644 --- a/crates/jsonschema/src/keywords/ref_.rs +++ b/crates/jsonschema/src/keywords/ref_.rs @@ -215,6 +215,7 @@ pub(crate) fn compile_recursive_ref<'a>( #[cfg(test)] mod tests { use crate::tests_util; + use referencing::{Draft, Retrieve, Uri}; use serde_json::{json, Value}; use test_case::test_case; @@ -403,4 +404,49 @@ mod tests { ); } } + + #[test] + fn test_resolving_finds_references_in_referenced_resources() { + let schema = json!({"$ref": "/indirection#/baz"}); + + struct MyRetrieve; + + impl Retrieve for MyRetrieve { + fn retrieve( + &self, + uri: &Uri<&str>, + ) -> Result> { + match uri.path().as_str() { + "/indirection" => Ok(json!({ + "$id": "/indirection", + "baz": { + "$ref": "/types#/foo" + } + })), + "/types" => Ok(json!({ + "$id": "/types", + "foo": { + "$ref": "#/bar" + }, + "bar": { + "type": "integer" + } + })), + _ => panic!("Not found"), + } + } + } + + let validator = match crate::options() + .with_draft(Draft::Draft201909) + .with_retriever(MyRetrieve) + .build(&schema) + { + Ok(validator) => validator, + Err(error) => panic!("{error}"), + }; + + assert!(validator.is_valid(&json!(2))); + assert!(!validator.is_valid(&json!(""))); + } }