Skip to content

Commit

Permalink
Merge pull request #1454 from Sage-Bionetworks/develop
Browse files Browse the repository at this point in the history
Schematic 24.7.1 Release
  • Loading branch information
andrewelamb authored Jul 2, 2024
2 parents 728e717 + 3f4185d commit 0d2527a
Show file tree
Hide file tree
Showing 39 changed files with 2,074 additions and 1,342 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ jobs:
run: |
# ran only on certain files for now
# add here when checked
poetry run black schematic --check
poetry run black schematic tests schematic_api --check
#----------------------------------------------
# type checking/enforcement
Expand All @@ -101,7 +101,7 @@ jobs:
# add here when checked
# poetry run mypy --install-types --non-interactive
# add here when enforced
poetry run mypy --disallow-untyped-defs --install-types --non-interactive schematic/configuration/*.py schematic/exceptions.py schematic/help.py schematic/loader.py schematic/version.py schematic/visualization schematic/utils/
poetry run mypy --disallow-untyped-defs --install-types --non-interactive schematic/schemas/ schematic/configuration/ schematic/exceptions.py schematic/help.py schematic/loader.py schematic/version.py schematic/visualization schematic/utils/
#----------------------------------------------
# linting
Expand Down
10 changes: 9 additions & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,12 @@ repos:
# pre-commit's default_language_version, see
# https://pre-commit.com/#top_level-default_language_version
language_version: python3.10
files: schematic/
files: ^(tests|schematic|schematic_api)/

- repo: https://github.com/pycqa/isort
rev: 5.13.2
hooks:
- id: isort
name: isort (python)
files: ^(tests|schematic|schematic_api)/
args: ["--profile", "black", "--filter-files"]
2 changes: 0 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -141,8 +141,6 @@ model:
# This section is for using google sheets with Schematic
google_sheets:
# The Synapse id of the Google service account credentials.
service_acct_creds_synapse_id: "syn25171627"
# Path to the synapse config file, either absolute or relative to this file
service_acct_creds: "schematic_service_account_creds.json"
# When doing google sheet validation (regex match) with the validation rules.
Expand Down
2 changes: 0 additions & 2 deletions config_example.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,6 @@ model:

# This section is for using google sheets with Schematic
google_sheets:
# The Synapse id of the Google service account credentials.
service_acct_creds_synapse_id: "syn25171627"
# Path to the synapse config file, either absolute or relative to this file
service_acct_creds: "schematic_service_account_creds.json"
# When doing google sheet validation (regex match) with the validation rules.
Expand Down
1,595 changes: 866 additions & 729 deletions poetry.lock

Large diffs are not rendered by default.

10 changes: 6 additions & 4 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,9 @@ graphviz = "^0.20.0"
inflection = "^0.5.1"
jsonschema = "^4.0.0"
networkx = ">=2.2.8"
numpy = "^1.21.1"
numpy = "^1.26.4"
oauth2client = "^4.1.0" # Specified because of bug in version ^4.0.0
pandas = "^2.0.0"
pandas = "^2.2.2"
pygsheets = "^2.0.4"
PyYAML = "^6.0.0"
rdflib = "^6.0.0"
Expand All @@ -67,16 +67,18 @@ dateparser = "^1.1.4"
pandarallel = "^1.6.4"
schematic-db = {version = "0.0.41", extras = ["synapse"]}
pyopenssl = {version = "^23.0.0", optional = true}
typing-extensions = "<4.6.0"
dataclasses-json = "^0.6.1"
pydantic = "^1.10.4"
connexion = {extras = ["swagger-ui"], version = "^2.8.0", optional = true}
Flask = {version = "2.1.3", optional = true}
Flask-Cors = {version = "^3.0.10", optional = true}
uWSGI = {version = "^2.0.21", optional = true}
Jinja2 = {version = ">2.11.3", optional = true}
jaeger-client = {version = "^4.8.0", optional = true}
flask-opentracing = {version="^2.0.0", optional = true}

[tool.poetry.extras]
api = ["connexion", "Flask", "Flask-Cors", "Jinja2", "pyopenssl"]
api = ["connexion", "Flask", "Flask-Cors", "Jinja2", "pyopenssl", "jaeger-client", "flask-opentracing"]
aws = ["uWSGI"]


Expand Down
8 changes: 0 additions & 8 deletions schematic/configuration/configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,14 +164,6 @@ def model_location(self) -> str:
"""
return self._model_config.location

@property
def service_account_credentials_synapse_id(self) -> str:
"""
Returns:
str: The Synapse id of the Google service account credentials.
"""
return self._google_sheets_config.service_acct_creds_synapse_id

@property
def service_account_credentials_path(self) -> str:
"""
Expand Down
3 changes: 0 additions & 3 deletions schematic/configuration/dataclasses.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,12 +124,10 @@ class GoogleSheetsConfig:
strict_validation: When doing google sheet validation (regex match) with the validation rules.
True is alerting the user and not allowing entry of bad values.
False is warning but allowing the entry on to the sheet.
service_acct_creds_synapse_id: The Synapse id of the Google service account credentials.
service_acct_creds: Path to the Google service account credentials,
either absolute or relative to this file
"""

service_acct_creds_synapse_id: str = "syn25171627"
service_acct_creds: str = "schematic_service_account_creds.json"
strict_validation: bool = True

Expand All @@ -151,7 +149,6 @@ def validate_string_is_not_empty(cls, value: str) -> str:
raise ValueError(f"{value} is an empty string")
return value

@validator("service_acct_creds_synapse_id")
@classmethod
def validate_synapse_id(cls, value: str) -> str:
"""Check if string is a valid synapse id
Expand Down
13 changes: 13 additions & 0 deletions schematic/manifest/generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,10 @@
from schematic.configuration.configuration import CONFIG
from schematic.utils.google_api_utils import export_manifest_drive_service

from opentelemetry import trace

logger = logging.getLogger(__name__)
tracer = trace.get_tracer("Schematic")


class ManifestGenerator(object):
Expand Down Expand Up @@ -1289,6 +1291,7 @@ def _gather_all_fields(self, fields, json_schema):
)
return required_metadata_fields

@tracer.start_as_current_span("ManifestGenerator::get_empty_manifest")
def get_empty_manifest(
self,
strict: Optional[bool],
Expand Down Expand Up @@ -1334,6 +1337,7 @@ def _get_missing_columns(self, headers_1: list, headers_2: list) -> list:
"""
return set(headers_1) - set(headers_2)

@tracer.start_as_current_span("ManifestGenerator::set_dataframe_by_url")
def set_dataframe_by_url(
self,
manifest_url: str,
Expand Down Expand Up @@ -1425,6 +1429,7 @@ def map_annotation_names_to_display_names(
# Use the above dictionary to rename columns in question
return annotations.rename(columns=label_map)

@tracer.start_as_current_span("ManifestGenerator::get_manifest_with_annotations")
def get_manifest_with_annotations(
self, annotations: pd.DataFrame, strict: Optional[bool] = None
) -> Tuple[ps.Spreadsheet, pd.DataFrame]:
Expand Down Expand Up @@ -1465,6 +1470,7 @@ def get_manifest_with_annotations(

return manifest_url, manifest_df

@tracer.start_as_current_span("ManifestGenerator::export_sheet_to_excel")
def export_sheet_to_excel(
self, title: str = None, manifest_url: str = None, output_location: str = None
) -> str:
Expand Down Expand Up @@ -1514,6 +1520,7 @@ def export_sheet_to_excel(

return output_excel_file_path

@tracer.start_as_current_span("ManifestGenerator::_handle_output_format_logic")
def _handle_output_format_logic(
self,
output_format: str = None,
Expand Down Expand Up @@ -1570,6 +1577,7 @@ def _handle_output_format_logic(
return dataframe

@staticmethod
@tracer.start_as_current_span("ManifestGenerator::create_single_manifest")
def create_single_manifest(
path_to_data_model: str,
graph_data_model: nx.MultiDiGraph,
Expand Down Expand Up @@ -1623,6 +1631,7 @@ def create_single_manifest(
return result

@staticmethod
@tracer.start_as_current_span("ManifestGenerator::create_manifests")
def create_manifests(
path_to_data_model: str,
data_types: list,
Expand Down Expand Up @@ -1751,6 +1760,7 @@ def create_manifests(

return all_results

@tracer.start_as_current_span("ManifestGenerator::get_manifest")
def get_manifest(
self,
dataset_id: str = None,
Expand Down Expand Up @@ -1998,6 +2008,9 @@ def _format_new_excel_column(self, worksheet, new_column_index: int, col: str):
)
return worksheet

@tracer.start_as_current_span(
"ManifestGenerator::populate_existing_excel_spreadsheet"
)
def populate_existing_excel_spreadsheet(
self, existing_excel_path: str = None, additional_df: pd.DataFrame = None
):
Expand Down
4 changes: 4 additions & 0 deletions schematic/models/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,12 @@
from schematic.utils.df_utils import load_df

from schematic.models.validate_manifest import validate_all
from opentelemetry import trace

logger = logging.getLogger(__name__)

tracer = trace.get_tracer("Schematic")


class MetadataModel(object):
"""Metadata model wrapper around schema.org specification graph.
Expand Down Expand Up @@ -317,6 +320,7 @@ def populateModelManifest(
manifestPath, emptyManifestURL, return_excel=return_excel, title=title
)

@tracer.start_as_current_span("MetadataModel::submit_metadata_manifest")
def submit_metadata_manifest( # pylint: disable=too-many-arguments, too-many-locals
self,
manifest_path: str,
Expand Down
8 changes: 5 additions & 3 deletions schematic/schemas/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import logging
import time
import re
from typing import get_args
from typing import get_args, Optional, Any

import click
import click_log # type: ignore
Expand All @@ -29,7 +29,7 @@
# invoke_without_command=True -> forces the application not to show aids before
# losing them with a --h
@click.group(context_settings=CONTEXT_SETTINGS, invoke_without_command=True)
def schema(): # use as `schematic model ...`
def schema() -> None: # use as `schematic model ...`
"""
Sub-commands for Schema related utilities/methods.
"""
Expand Down Expand Up @@ -59,7 +59,9 @@ def schema(): # use as `schematic model ...`
metavar="<OUTPUT_PATH>",
help=query_dict(schema_commands, ("schema", "convert", "output_jsonld")),
)
def convert(schema, data_model_labels, output_jsonld):
def convert(
schema: Any, data_model_labels: DisplayLabelType, output_jsonld: Optional[str]
) -> None:
"""
Running CLI to convert data model specification in CSV format to
data model in JSON-LD format.
Expand Down
28 changes: 20 additions & 8 deletions schematic/schemas/data_model_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

import networkx as nx # type: ignore
import graphviz # type: ignore
from opentelemetry import trace

from schematic.schemas.data_model_edges import DataModelEdges
from schematic.schemas.data_model_nodes import DataModelNodes
Expand All @@ -20,22 +21,27 @@
from schematic.utils.viz_utils import visualize
from schematic.utils.validate_utils import rule_in_rule_list

logger = logging.getLogger(__name__)


logger = logging.getLogger(__name__)
tracer = trace.get_tracer("Schematic")


class DataModelGraphMeta: # pylint: disable=too-few-public-methods
"""DataModelGraphMeta"""

_instances: dict = {}

def __call__(cls, *args: Any, **kwargs: Any): # pylint: disable=no-self-argument
def __call__( # pylint: disable=no-self-argument
cls, *args: Any, **kwargs: Any
) -> Any:
"""
Possible changes to the value of the `__init__` argument do not affect
the returned instance.
"""
if cls not in cls._instances:
instance = super().__call__(*args, **kwargs) # pylint: disable=no-member
instance = super().__call__(*args, **kwargs) # type: ignore # pylint: disable=no-member
cls._instances[cls] = instance
return cls._instances[cls]

Expand Down Expand Up @@ -83,6 +89,7 @@ def __init__(
)
self.graph = self.generate_data_model_graph()

@tracer.start_as_current_span("DataModelGraph::generate_data_model_graph")
def generate_data_model_graph(self) -> nx.MultiDiGraph:
"""
Generate NetworkX Graph from the Relationships/attributes dictionary, the graph is built
Expand Down Expand Up @@ -295,14 +302,14 @@ def get_component_node_validation_rules(
manifest_component: str,
node_label: Optional[str] = None,
node_display_name: Optional[str] = None,
) -> list[str]:
) -> list:
"""Get valdation rules for a given node and component.
Args:
manifest_component: str, manifest component display name that the node belongs to.
node_label: str, Label of the node you would want to get the comment for.
node_display_name: str, node display name for the node being queried.
Returns:
validation_rules: list[str], validation rules list for a given node and component.
validation_rules: list, validation rules list for a given node and component.
"""
# get any additional validation rules associated with this node (e.g. can this node
# be mapped to a list of other nodes)
Expand All @@ -312,11 +319,14 @@ def get_component_node_validation_rules(

# Parse the validation rules per component if applicable
if node_validation_rules and isinstance(node_validation_rules, dict):
node_validation_rules = extract_component_validation_rules(
node_validation_rules_list = extract_component_validation_rules(
manifest_component=manifest_component,
validation_rules_dict=node_validation_rules,
validation_rules_dict=node_validation_rules, # type: ignore
)
return node_validation_rules
else:
assert isinstance(node_validation_rules, list)
node_validation_rules_list = node_validation_rules
return node_validation_rules_list

def get_component_requirements(
self,
Expand Down Expand Up @@ -830,7 +840,9 @@ def find_child_classes(self, schema_class: str) -> list:
Returns:
list of children to the schema_class.
"""
return unlist(list(self.graph.successors(schema_class)))
child_classes = unlist(list(self.graph.successors(schema_class)))
assert isinstance(child_classes, list)
return child_classes

def find_class_specific_properties(self, schema_class: str) -> list[str]:
"""Find properties specifically associated with a given class
Expand Down
Loading

0 comments on commit 0d2527a

Please sign in to comment.