Skip to content

Commit

Permalink
Format code and add docstrings
Browse files Browse the repository at this point in the history
  • Loading branch information
MartinHammarstedt committed Jan 29, 2024
1 parent eeaaccb commit c431634
Showing 1 changed file with 46 additions and 22 deletions.
68 changes: 46 additions & 22 deletions sparv/core/schema.py
Original file line number Diff line number Diff line change
@@ -1,30 +1,34 @@
"""
Functions for creating and validating JSON schemas.
"""
"""Functions for creating and validating JSON schemas."""

import itertools
import json
import re
from collections import defaultdict
from typing import DefaultDict, List, Optional, Tuple, Type, Union
from typing import DefaultDict, List, Optional, Sequence, Tuple, Type, Union

import typing_inspect

from sparv.api import Config, SparvErrorMessage
from sparv.core import registry

NO_COND = ((), ())


class BaseProperty:
def __init__(self, prop_type: Optional[str], allow_null: Optional[bool] = False, **kwargs):
"""Base class for other types of properties."""
def __init__(self, prop_type: Optional[str], allow_null: Optional[bool] = False, **kwargs) -> None:
self.schema = {
"type": prop_type if not allow_null else [prop_type, "null"],
**kwargs
} if prop_type else kwargs


class Any(BaseProperty):
"""Class representing any type."""
def __init__(self, **kwargs):
super().__init__(None, **kwargs)


class String(BaseProperty):
"""Class representing a string."""
def __init__(
Expand All @@ -48,6 +52,7 @@ def __init__(
kwargs["maxLength"] = max_len
super().__init__("string", allow_null, **kwargs)


class Integer(BaseProperty):
"""Class representing an integer."""
def __init__(
Expand All @@ -62,6 +67,7 @@ def __init__(
kwargs["maximum"] = max_value
super().__init__("integer", **kwargs)


class Number(BaseProperty):
"""Class representing either a float or an integer."""
def __init__(
Expand All @@ -76,15 +82,21 @@ def __init__(
kwargs["maximum"] = max_value
super().__init__("number", **kwargs)


class Boolean(BaseProperty):
"""Class representing a boolean."""
def __init__(self, **kwargs):
super().__init__("boolean", **kwargs)


class Null(BaseProperty):
"""Class representing a null value."""
def __init__(self, **kwargs):
super().__init__("null", **kwargs)


class Array(BaseProperty):
"""Class representing an array of values."""
def __init__(
self,
items: Optional[Type[Union[String, Integer, Number, Boolean, Null, Any, "Array", "Object"]]] = None,
Expand All @@ -94,8 +106,13 @@ def __init__(
kwargs["items"] = items().schema
super().__init__("array", **kwargs)


class Object:
def __init__(self, additional_properties: Union[dict, bool] = True, description: Optional[str] = None, **kwargs):
"""Class representing an object."""
def __init__(
self, additional_properties: Union[dict, bool] = True, description: Optional[str] = None,
**kwargs
):
if additional_properties is False or isinstance(additional_properties, dict):
kwargs["additionalProperties"] = additional_properties
if description:
Expand Down Expand Up @@ -124,8 +141,9 @@ def add_property(
prop_obj: Union[List, Union[String, Integer, Number, "Object", Any]],
required: bool = False,
condition: Optional[Tuple[Tuple["Object", ...], Tuple["Object", ...]]] = None
):
if condition and not condition == NO_COND:
) -> "Object":
"""Add a property to the object."""
if condition and condition != NO_COND:
self.allof[condition].append((name, prop_obj))
else:
self.properties[name] = prop_obj
Expand All @@ -134,7 +152,8 @@ def add_property(
return self

@property
def schema(self):
def schema(self) -> dict:
"""Return JSON schema for current object and its children as a dictionary."""
prop_schemas = {}
for name, prop_obj in self.properties.items():
if isinstance(prop_obj, list):
Expand Down Expand Up @@ -177,9 +196,12 @@ def schema(self):
self.obj_schema["allOf"] = conditionals
return self.obj_schema


class JsonSchema(Object):
"""Class representing a JSON schema."""

def __init__(self):
def __init__(self) -> None:
"""Initialize the JSON schema."""
super().__init__(**{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$id": "https://spraakbanken.gu.se/sparv/schema.json",
Expand All @@ -189,11 +211,12 @@ def __init__(self):
"unevaluatedProperties": False
})

def to_json(self):
def to_json(self) -> str:
"""Return the JSON schema as a string."""
return json.dumps(self.schema, indent=2)


def get_class_from_type(t):
def get_class_from_type(t: Type) -> Type:
"""Get JSON schema class from Python type."""
types = {
str: String,
Expand Down Expand Up @@ -254,16 +277,16 @@ def handle_object(

for combination in combinations:
if no_cond:
combination = (NO_COND,) + combination
combination = (NO_COND, *combination)

child_obj = Object(additional_properties=False, description=description)

for cond in combination:
for subkey, prop in children[cond]:
child_obj.add_property(subkey, prop, required=is_condition)
positive_conds = tuple(set(cc for c in combination for cc in c[0] or (None,)))
positive_conds = tuple({cc for c in combination for cc in c[0] or (None,)})
negative_conds = tuple(
set(cc for c in conds if c != NO_COND for cc in c[0] if cc not in positive_conds)
{cc for c in conds if c != NO_COND for cc in c[0] if cc not in positive_conds}
)

if not set(positive_conds).intersection(set(negative_conds)):
Expand All @@ -274,7 +297,9 @@ def handle_object(
prop, condition = handle_property(structure[key]["_cfg"])
except ValueError:
full_key = f"{parent_name}.{key}" if parent_name else key
raise ValueError(f"Unsupported datatype for '{full_key}': '{structure[key]['_cfg'].datatype}'")
raise ValueError(
f"Unsupported datatype for '{full_key}': '{structure[key]['_cfg'].datatype}'"
) from None

conditionals[(condition, ())].append((key, prop))

Expand All @@ -293,8 +318,7 @@ def handle_object(
def handle_property(
cfg: Config
) -> Tuple[Union[BaseProperty, List[BaseProperty]], Tuple[Object, ...]]:
"""
Handle a property and its conditions.
"""Handle a property and its conditions.
Args:
cfg: A Config object
Expand Down Expand Up @@ -357,7 +381,7 @@ def handle_property(
elif cfg_datatype is None:
datatype = Any(**kwargs)
else:
raise ValueError()
raise ValueError
datatypes.append(datatype)

if cfg.conditions:
Expand Down Expand Up @@ -391,7 +415,7 @@ def validate(cfg: dict, schema: dict) -> None:
"""Validate a Sparv config using JSON schema."""
import jsonschema

def build_path_string(path):
def build_path_string(path: Sequence) -> str:
parts = []
for part in path:
if isinstance(part, str):
Expand All @@ -408,7 +432,7 @@ def build_path_string(path):
# Rephrase messages about unexpected keys
unknown_key = re.search(r"properties are not allowed \('(.+)' was unexpected", e.message)
if unknown_key:
full_path = ".".join(list(e.absolute_path) + [unknown_key.group(1)])
full_path = ".".join([*list(e.absolute_path), unknown_key.group(1)])
msg.append(f"Unexpected key in config file: {full_path!r}")
else:
msg.append(e.message)
Expand All @@ -417,4 +441,4 @@ def build_path_string(path):
if "description" in e.schema:
msg.append(f"Description of config key: {e.schema['description']}")

raise SparvErrorMessage("\n".join(msg))
raise SparvErrorMessage("\n".join(msg)) from None

0 comments on commit c431634

Please sign in to comment.