From 971a0911e6ba5a82176dd765fd457abf31ec4e9e Mon Sep 17 00:00:00 2001 From: odow Date: Fri, 23 Aug 2024 11:58:53 +1200 Subject: [PATCH 1/5] Fixes and improvements for the 23.1.0 test schema --- src/validation.jl | 53 +++++++++++++++++++++++++++++++++++------------ 1 file changed, 40 insertions(+), 13 deletions(-) diff --git a/src/validation.jl b/src/validation.jl index a049236..86fb7e7 100644 --- a/src/validation.jl +++ b/src/validation.jl @@ -117,6 +117,28 @@ _resolve_refs(schema, explored_refs = Any[]) = schema # Default fallback _validate(::Any, ::Any, ::Val, ::Any, ::String) = nothing +# JSON treats == between Bool and Number differently to Julia, so: +# false != 0 +# true != 1 +# 0 == 0.0 +# 1.0 == 1 +_isequal(x, y) = x == y + +_isequal(::Bool, ::Number) = false + +_isequal(::Number, ::Bool) = false + +_isequal(x::Bool, y::Bool) = x == y + +function _isequal(x::Vector, y::Vector) + return length(x) == length(y) && all(_isequal.(x, y)) +end + +function _isequal(x::Dict, y::Dict) + return Set(keys(x)) == Set(keys(y)) && + all(_isequal(v, y[k]) for (k, v) in x) +end + ### ### Core JSON Schema ### @@ -471,6 +493,7 @@ _is_type(::Any, ::Val) = false _is_type(::Array, ::Val{:array}) = true _is_type(::Bool, ::Val{:boolean}) = true _is_type(::Integer, ::Val{:integer}) = true +_is_type(x::Float64, ::Val{:integer}) = isinteger(x) _is_type(::Real, ::Val{:number}) = true _is_type(::Nothing, ::Val{:null}) = true _is_type(::Missing, ::Val{:null}) = true @@ -482,7 +505,7 @@ _is_type(::Bool, ::Val{:integer}) = false # 6.1.2 function _validate(x, schema, ::Val{:enum}, val, path::String) - if !any(x == v for v in val) + if !any(_isequal(x, v) for v in val) return SingleIssue(x, path, "enum", val) end return @@ -490,7 +513,7 @@ end # 6.1.3 function _validate(x, schema, ::Val{:const}, val, path::String) - if x != val + if !_isequal(x, val) return SingleIssue(x, path, "const", val) end return @@ -605,7 +628,7 @@ function _validate( x::String, schema, ::Val{:maxLength}, - val::Integer, + val::Union{Integer,Float64}, path::String, ) if length(x) > val @@ -619,7 +642,7 @@ function _validate( x::String, schema, ::Val{:minLength}, - val::Integer, + val::Union{Integer,Float64}, path::String, ) if length(x) < val @@ -651,7 +674,7 @@ function _validate( x::AbstractVector, schema, ::Val{:maxItems}, - val::Integer, + val::Union{Integer,Float64}, path::String, ) if length(x) > val @@ -665,7 +688,7 @@ function _validate( x::AbstractVector, schema, ::Val{:minItems}, - val::Integer, + val::Union{Integer,Float64}, path::String, ) if length(x) < val @@ -682,11 +705,15 @@ function _validate( val::Bool, path::String, ) - # It isn't sufficient to just compare allunique on x, because Julia treats 0 == false, - # but JSON distinguishes them. - y = [(xx, typeof(xx)) for xx in x] - if val && !allunique(y) - return SingleIssue(x, path, "uniqueItems", val) + if !val + return + end + # TODO(odow): O(n^2) here. But probably not too bad, because there shouldn't + # be a large x. + for i in eachindex(x), j in eachindex(x) + if i != j && _isequal(x[i], x[j]) + return SingleIssue(x, path, "uniqueItems", val) + end end return end @@ -704,7 +731,7 @@ function _validate( x::AbstractDict, schema, ::Val{:maxProperties}, - val::Integer, + val::Union{Integer,Float64}, path::String, ) if length(x) > val @@ -718,7 +745,7 @@ function _validate( x::AbstractDict, schema, ::Val{:minProperties}, - val::Integer, + val::Union{Integer,Float64}, path::String, ) if length(x) < val From f7eee4fb528866e16e98e0cc5deeb6ba7d04574d Mon Sep 17 00:00:00 2001 From: odow Date: Fri, 23 Aug 2024 12:19:27 +1200 Subject: [PATCH 2/5] Update --- src/schema.jl | 6 +++++- src/validation.jl | 3 ++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/src/schema.jl b/src/schema.jl index 343a065..23edf29 100644 --- a/src/schema.jl +++ b/src/schema.jl @@ -36,7 +36,11 @@ function update_id(uri::URIs.URI, s::String) end function get_element(schema, path::AbstractString) - for element in split(path, "/"; keepempty = false) + elements = split(path, "/"; keepempty = true) + if isempty(first(elements)) + popfirst!(elements) + end + for element in elements schema = _recurse_get_element(schema, unescape_jpath(String(element))) end return schema diff --git a/src/validation.jl b/src/validation.jl index 86fb7e7..ac9de73 100644 --- a/src/validation.jl +++ b/src/validation.jl @@ -531,7 +531,8 @@ function _validate( val::Number, path::String, ) - if !isapprox(x / val, round(x / val)) + y = x / val + if !isfinite(y) || !isapprox(y, round(y)) return SingleIssue(x, path, "multipleOf", val) end return From 8abd5e77355ab103f3353bf04b3c9898817a46e2 Mon Sep 17 00:00:00 2001 From: odow Date: Fri, 23 Aug 2024 15:31:59 +1200 Subject: [PATCH 3/5] Update --- src/schema.jl | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/src/schema.jl b/src/schema.jl index 23edf29..ce1708d 100644 --- a/src/schema.jl +++ b/src/schema.jl @@ -28,9 +28,16 @@ function update_id(uri::URIs.URI, s::String) delete!(els, :uri) els[:fragment] = id2.fragment if !isempty(id2.path) - oldpath = match(r"^(.*/).*$", uri.path) - els[:path] = - oldpath === nothing ? id2.path : oldpath.captures[1] * id2.path + if startswith(id2.path, "/") # Absolute path + els[:path] = id2.path + else # Relative path + old_path = match(r"^(.*/).*$", uri.path) + if old_path === nothing + els[:path] = id2.path + else + els[:path] = old_path.captures[1] * id2.path + end + end end return URIs.URI(; els...) end @@ -106,13 +113,13 @@ function find_ref( end if !haskey(id_map, string(uri2)) # id_map doesn't have this key so, fetch the ref and add it to id_map. - id_map[string(uri2)] = if startswith(uri2.scheme, "http") + if startswith(uri2.scheme, "http") @info("fetching remote ref $(uri2)") - get_remote_schema(uri2).data + id_map[string(uri2)] = get_remote_schema(uri2).data else @assert is_file_uri @info("loading local ref $(uri2)") - Schema( + id_map[string(uri2)] = Schema( JSON.parsefile(uri2.path); parent_dir = dirname(uri2.path), ).data @@ -158,6 +165,8 @@ function resolve_refs!( # marking it as resolved. This should prevent infinite recursions caused by # self referencing. schema["\$ref"] = find_ref(uri, id_map, v, parent_dir) + elseif k == "enum" || k == "const" + continue # Don't unpack refs inside const and enum. else resolve_refs!(v, uri, id_map, parent_dir) end @@ -197,7 +206,10 @@ function build_id_map!( uri = update_id(uri, schema["\$id"]) id_map[string(uri)] = schema end - for value in values(schema) + for (k, value) in schema + if k == "enum" || k == "const" + continue + end build_id_map!(id_map, value, uri) end return From ab10a6451c76b4beba54d304e8c351432987f965 Mon Sep 17 00:00:00 2001 From: odow Date: Fri, 23 Aug 2024 16:14:52 +1200 Subject: [PATCH 4/5] Update --- src/schema.jl | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/src/schema.jl b/src/schema.jl index ce1708d..d3fdfff 100644 --- a/src/schema.jl +++ b/src/schema.jl @@ -119,10 +119,11 @@ function find_ref( else @assert is_file_uri @info("loading local ref $(uri2)") - id_map[string(uri2)] = Schema( + local_schema = Schema( JSON.parsefile(uri2.path); parent_dir = dirname(uri2.path), - ).data + ) + id_map[string(uri2)] = schema.data end end return get_element(id_map[string(uri2)], uri.fragment) @@ -150,6 +151,17 @@ function resolve_refs!( id_map::AbstractDict, parent_dir::String, ) + # This $ref has not been resolved yet (otherwise it would not be a String). + # We will replace the path string with the schema element pointed at, thus + # marking it as resolved. This should prevent infinite recursions caused by + # self referencing. We also unpack the $ref first so that fields like $id + # do not interfere with it. + ref = get(schema, "\$ref", nothing) + ref_unpacked = false + if ref isa String + schema["\$ref"] = find_ref(uri, id_map, ref, parent_dir) + ref_unpacked = true + end if haskey(schema, "id") && schema["id"] isa String # This block is for draft 4. uri = update_id(uri, schema["id"]) @@ -159,13 +171,9 @@ function resolve_refs!( uri = update_id(uri, schema["\$id"]) end for (k, v) in schema - if k == "\$ref" && v isa String - # This ref has not been resolved yet (otherwise it would not be a String). - # We will replace the path string with the schema element pointed at, thus - # marking it as resolved. This should prevent infinite recursions caused by - # self referencing. - schema["\$ref"] = find_ref(uri, id_map, v, parent_dir) - elseif k == "enum" || k == "const" + if k == "\$ref" && ref_unpacked + continue # We've already unpacked this ref + elseif k in ("enum", "const") continue # Don't unpack refs inside const and enum. else resolve_refs!(v, uri, id_map, parent_dir) From 4b436dee2b4e8a90f3b7bf511847144432cd5cef Mon Sep 17 00:00:00 2001 From: odow Date: Fri, 23 Aug 2024 16:18:27 +1200 Subject: [PATCH 5/5] Update --- src/schema.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/schema.jl b/src/schema.jl index d3fdfff..1166133 100644 --- a/src/schema.jl +++ b/src/schema.jl @@ -123,7 +123,7 @@ function find_ref( JSON.parsefile(uri2.path); parent_dir = dirname(uri2.path), ) - id_map[string(uri2)] = schema.data + id_map[string(uri2)] = local_schema.data end end return get_element(id_map[string(uri2)], uri.fragment)