Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

trs/wip/json-schema #243

Draft
wants to merge 2 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
307 changes: 307 additions & 0 deletions schema/deploy/functions/validate_json_schema.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,307 @@
-- Deploy seattleflu/schema:functions/validate_json_schema to pg

begin;

set search_path to public;

/* Since we intend to stay on cloud database providers (Azure, AWS), we can't
* install custom Pg extensions. Luckily, this JSON Schema extension is
* written in PL/pgSQL and under an MIT-style license, which means we can
* vendor the definitions very easily ourselves.
* -trs, 9 April 2019
*/

/*
Based on <https://raw.githubusercontent.com/gavinwahl/postgres-json-schema/5a257e19a1569a77b82e9182b0b7d9fc8b6f6382/postgres-json-schema--0.1.0.sql>,
with modifications to mark the functions parallel-safe and to return null when
given null data.

Copyright (c) 2016, Gavin Wahl

Permission to use, copy, modify, and distribute this software and its
documentation for any purpose, without fee, and without a written agreement is
hereby granted, provided that the above copyright notice and this paragraph and
the following two paragraphs appear in all copies.

IN NO EVENT SHALL GAVIN WAHL BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT,
SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING
OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF GAVIN WAHL HAS
BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

GAVIN WAHL SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS" BASIS, AND GAVIN WAHL
HAS NO OBLIGATIONS TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR
MODIFICATIONS.
*/

CREATE OR REPLACE FUNCTION _validate_json_schema_type(type text, data jsonb) RETURNS boolean AS $f$
BEGIN
IF type = 'integer' THEN
IF jsonb_typeof(data) != 'number' THEN
RETURN false;
END IF;
IF trunc(data::text::numeric) != data::text::numeric THEN
RETURN false;
END IF;
ELSE
IF type != jsonb_typeof(data) THEN
RETURN false;
END IF;
END IF;
RETURN true;
END;
$f$ LANGUAGE 'plpgsql' IMMUTABLE PARALLEL SAFE;


CREATE OR REPLACE FUNCTION validate_json_schema(schema jsonb, data jsonb, root_schema jsonb DEFAULT NULL) RETURNS boolean AS $f$
DECLARE
prop text;
item jsonb;
path text[];
types text[];
pattern text;
props text[];
BEGIN
IF data IS NULL THEN
RETURN NULL;
END IF;

IF root_schema IS NULL THEN
root_schema = schema;
END IF;

IF schema ? 'type' THEN
IF jsonb_typeof(schema->'type') = 'array' THEN
types = ARRAY(SELECT jsonb_array_elements_text(schema->'type'));
ELSE
types = ARRAY[schema->>'type'];
END IF;
IF (SELECT NOT bool_or(_validate_json_schema_type(type, data)) FROM unnest(types) type) THEN
RETURN false;
END IF;
END IF;

IF schema ? 'properties' THEN
FOR prop IN SELECT jsonb_object_keys(schema->'properties') LOOP
IF data ? prop AND NOT validate_json_schema(schema->'properties'->prop, data->prop, root_schema) THEN
RETURN false;
END IF;
END LOOP;
END IF;

IF schema ? 'required' AND jsonb_typeof(data) = 'object' THEN
IF NOT ARRAY(SELECT jsonb_object_keys(data)) @>
ARRAY(SELECT jsonb_array_elements_text(schema->'required')) THEN
RETURN false;
END IF;
END IF;

IF schema ? 'items' AND jsonb_typeof(data) = 'array' THEN
IF jsonb_typeof(schema->'items') = 'object' THEN
FOR item IN SELECT jsonb_array_elements(data) LOOP
IF NOT validate_json_schema(schema->'items', item, root_schema) THEN
RETURN false;
END IF;
END LOOP;
ELSE
IF NOT (
SELECT bool_and(i > jsonb_array_length(schema->'items') OR validate_json_schema(schema->'items'->(i::int - 1), elem, root_schema))
FROM jsonb_array_elements(data) WITH ORDINALITY AS t(elem, i)
) THEN
RETURN false;
END IF;
END IF;
END IF;

IF jsonb_typeof(schema->'additionalItems') = 'boolean' and NOT (schema->'additionalItems')::text::boolean AND jsonb_typeof(schema->'items') = 'array' THEN
IF jsonb_array_length(data) > jsonb_array_length(schema->'items') THEN
RETURN false;
END IF;
END IF;

IF jsonb_typeof(schema->'additionalItems') = 'object' THEN
IF NOT (
SELECT bool_and(validate_json_schema(schema->'additionalItems', elem, root_schema))
FROM jsonb_array_elements(data) WITH ORDINALITY AS t(elem, i)
WHERE i > jsonb_array_length(schema->'items')
) THEN
RETURN false;
END IF;
END IF;

IF schema ? 'minimum' AND jsonb_typeof(data) = 'number' THEN
IF data::text::numeric < (schema->>'minimum')::numeric THEN
RETURN false;
END IF;
END IF;

IF schema ? 'maximum' AND jsonb_typeof(data) = 'number' THEN
IF data::text::numeric > (schema->>'maximum')::numeric THEN
RETURN false;
END IF;
END IF;

IF COALESCE((schema->'exclusiveMinimum')::text::bool, FALSE) THEN
IF data::text::numeric = (schema->>'minimum')::numeric THEN
RETURN false;
END IF;
END IF;

IF COALESCE((schema->'exclusiveMaximum')::text::bool, FALSE) THEN
IF data::text::numeric = (schema->>'maximum')::numeric THEN
RETURN false;
END IF;
END IF;

IF schema ? 'anyOf' THEN
IF NOT (SELECT bool_or(validate_json_schema(sub_schema, data, root_schema)) FROM jsonb_array_elements(schema->'anyOf') sub_schema) THEN
RETURN false;
END IF;
END IF;

IF schema ? 'allOf' THEN
IF NOT (SELECT bool_and(validate_json_schema(sub_schema, data, root_schema)) FROM jsonb_array_elements(schema->'allOf') sub_schema) THEN
RETURN false;
END IF;
END IF;

IF schema ? 'oneOf' THEN
IF 1 != (SELECT COUNT(*) FROM jsonb_array_elements(schema->'oneOf') sub_schema WHERE validate_json_schema(sub_schema, data, root_schema)) THEN
RETURN false;
END IF;
END IF;

IF COALESCE((schema->'uniqueItems')::text::boolean, false) THEN
IF (SELECT COUNT(*) FROM jsonb_array_elements(data)) != (SELECT count(DISTINCT val) FROM jsonb_array_elements(data) val) THEN
RETURN false;
END IF;
END IF;

IF schema ? 'additionalProperties' AND jsonb_typeof(data) = 'object' THEN
props := ARRAY(
SELECT key
FROM jsonb_object_keys(data) key
WHERE key NOT IN (SELECT jsonb_object_keys(schema->'properties'))
AND NOT EXISTS (SELECT * FROM jsonb_object_keys(schema->'patternProperties') pat WHERE key ~ pat)
);
IF jsonb_typeof(schema->'additionalProperties') = 'boolean' THEN
IF NOT (schema->'additionalProperties')::text::boolean AND jsonb_typeof(data) = 'object' AND NOT props <@ ARRAY(SELECT jsonb_object_keys(schema->'properties')) THEN
RETURN false;
END IF;
ELSEIF NOT (
SELECT bool_and(validate_json_schema(schema->'additionalProperties', data->key, root_schema))
FROM unnest(props) key
) THEN
RETURN false;
END IF;
END IF;

IF schema ? '$ref' THEN
path := ARRAY(
SELECT regexp_replace(regexp_replace(path_part, '~1', '/'), '~0', '~')
FROM UNNEST(regexp_split_to_array(schema->>'$ref', '/')) path_part
);
-- ASSERT path[1] = '#', 'only refs anchored at the root are supported';
IF NOT validate_json_schema(root_schema #> path[2:array_length(path, 1)], data, root_schema) THEN
RETURN false;
END IF;
END IF;

IF schema ? 'enum' THEN
IF NOT EXISTS (SELECT * FROM jsonb_array_elements(schema->'enum') val WHERE val = data) THEN
RETURN false;
END IF;
END IF;

IF schema ? 'minLength' AND jsonb_typeof(data) = 'string' THEN
IF char_length(data #>> '{}') < (schema->>'minLength')::numeric THEN
RETURN false;
END IF;
END IF;

IF schema ? 'maxLength' AND jsonb_typeof(data) = 'string' THEN
IF char_length(data #>> '{}') > (schema->>'maxLength')::numeric THEN
RETURN false;
END IF;
END IF;

IF schema ? 'not' THEN
IF validate_json_schema(schema->'not', data, root_schema) THEN
RETURN false;
END IF;
END IF;

IF schema ? 'maxProperties' AND jsonb_typeof(data) = 'object' THEN
IF (SELECT count(*) FROM jsonb_object_keys(data)) > (schema->>'maxProperties')::numeric THEN
RETURN false;
END IF;
END IF;

IF schema ? 'minProperties' AND jsonb_typeof(data) = 'object' THEN
IF (SELECT count(*) FROM jsonb_object_keys(data)) < (schema->>'minProperties')::numeric THEN
RETURN false;
END IF;
END IF;

IF schema ? 'maxItems' AND jsonb_typeof(data) = 'array' THEN
IF (SELECT count(*) FROM jsonb_array_elements(data)) > (schema->>'maxItems')::numeric THEN
RETURN false;
END IF;
END IF;

IF schema ? 'minItems' AND jsonb_typeof(data) = 'array' THEN
IF (SELECT count(*) FROM jsonb_array_elements(data)) < (schema->>'minItems')::numeric THEN
RETURN false;
END IF;
END IF;

IF schema ? 'dependencies' THEN
FOR prop IN SELECT jsonb_object_keys(schema->'dependencies') LOOP
IF data ? prop THEN
IF jsonb_typeof(schema->'dependencies'->prop) = 'array' THEN
IF NOT (SELECT bool_and(data ? dep) FROM jsonb_array_elements_text(schema->'dependencies'->prop) dep) THEN
RETURN false;
END IF;
ELSE
IF NOT validate_json_schema(schema->'dependencies'->prop, data, root_schema) THEN
RETURN false;
END IF;
END IF;
END IF;
END LOOP;
END IF;

IF schema ? 'pattern' AND jsonb_typeof(data) = 'string' THEN
IF (data #>> '{}') !~ (schema->>'pattern') THEN
RETURN false;
END IF;
END IF;

IF schema ? 'patternProperties' AND jsonb_typeof(data) = 'object' THEN
FOR prop IN SELECT jsonb_object_keys(data) LOOP
FOR pattern IN SELECT jsonb_object_keys(schema->'patternProperties') LOOP
RAISE NOTICE 'prop %s, pattern %, schema %', prop, pattern, schema->'patternProperties'->pattern;
IF prop ~ pattern AND NOT validate_json_schema(schema->'patternProperties'->pattern, data->prop, root_schema) THEN
RETURN false;
END IF;
END LOOP;
END LOOP;
END IF;

IF schema ? 'multipleOf' AND jsonb_typeof(data) = 'number' THEN
IF data::text::numeric % (schema->>'multipleOf')::numeric != 0 THEN
RETURN false;
END IF;
END IF;

RETURN true;
END;
$f$ LANGUAGE 'plpgsql' IMMUTABLE PARALLEL SAFE;

/* End of third-party copyright. */

comment on function validate_json_schema is
'Validate a jsonb value against a JSON Schema (draft-04)';

commit;
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
-- Deploy seattleflu/schema:receiving/presence-absence/document-validation to pg
-- requires: receiving/presence-absence
-- requires: functions/validate_json_schema

-- This constraint is in a separate change from the table definition so that it
-- may be easily reworked with sqitch later.

begin;

alter table receiving.presence_absence
add constraint presence_absence_document_validates check (
validate_json_schema($$
{
"$schema": "http://json-schema.org/draft-07/schema#",
"$id": "https://seattleflu.org/_schema/presence-absence.json",
"type": "object",
"title": "presence/absence document",
"required": [
"calls"
],
"properties": {
"calls": {
"type": "array",
"title": "set of call results",
"minItems": 1,
"items": {
"type": "object",
"title": "individual call result for a (source, target) pair",
"required": [
"source",
"target",
"present"
],
"properties": {
"source": {
"type": "object",
"title": "source material characterized by this call",
"description": "details such as unique identifiers",
"minProperties": 1
},
"target": {
"type": "object",
"title": "known target material being probed for within the source material",
"description": "details such as unique identifiers",
"minProperties": 1
},
"present": {
"type": ["boolean", "null"],
"title": "result of presence/absence call",
"description": "present (true), absent (false), indeterminate (null)"
}
}
}
}
}
} $$,
document::jsonb
) is true
);

commit;
8 changes: 8 additions & 0 deletions schema/revert/functions/validate_json_schema.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
-- Revert seattleflu/schema:functions/validate_json_schema from pg

begin;

drop function validate_json_schema(jsonb, jsonb, jsonb);
drop function _validate_json_schema_type(text, jsonb);

commit;
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
-- Revert seattleflu/schema:receiving/presence-absence/document-validation from pg

begin;

alter table receiving.presence_absence
drop constraint presence_absence_document_validates;

commit;
2 changes: 2 additions & 0 deletions schema/sqitch.plan
Original file line number Diff line number Diff line change
Expand Up @@ -33,3 +33,5 @@ warehouse/identifier [warehouse/schema uuid-ossp] 2019-03-29T18:17:42Z Thomas Si
warehouse/identifier/triggers/barcode-default-from-uuid [warehouse/identifier] 2019-03-29T22:42:40Z Thomas Sibley <[email protected]> # Default warehouse.identifier.barcode from uuid
warehouse/identifier/triggers/barcode-distance-check [warehouse/identifier functions/hamming_distance] 2019-03-29T22:55:18Z Thomas Sibley <[email protected]> # Exclude new identifiers with barcodes too close to existing barcodes
receiving/presence-absence [receiving/schema] 2019-04-08T22:27:17Z Thomas Sibley <[email protected]> # Receiving table for presence/absence calls
functions/validate_json_schema 2019-04-08T23:49:59Z Thomas Sibley <[email protected]> # Function to validate a JSON document using a JSON Schema
receiving/presence-absence/document-validation [receiving/presence-absence functions/validate_json_schema] 2019-04-09T16:00:56Z Thomas Sibley <[email protected]> # Validate presence/absence document with a JSON Schema
Loading