-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathcollection.py
222 lines (190 loc) · 7.85 KB
/
collection.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
"""
This module provides the Collection class, which can be used to
create a collection of openMINDS metadata nodes.
The collection can be saved to and loaded from disk, in JSON-LD format.
"""
import json
import os
from .registry import lookup_type
from .base import Link
class Collection:
"""
A collection of metadata nodes that can be saved to
and loaded from disk.
Args
----
*nodes (LinkedMetadata):
Nodes to store in the collection when creating it.
Child nodes that are referenced from the explicitly
listed nodes will also be added.
"""
def __init__(self, *nodes):
self.nodes = {}
self.add(*nodes)
def __len__(self):
return len(self.nodes)
def __iter__(self):
return iter(self.nodes.values())
def add(self, *nodes):
"""
Add one or more metadata nodes to the collection.
Child nodes that are referenced from the explicitly
listed nodes will also be added.
"""
for node in nodes:
self._add_node(node)
def _add_node(self, node):
if node.id is None:
node.id = self._get_blank_node_identifier()
self.nodes[node.id] = node
for linked_node in node.links:
self._add_node(linked_node)
def _get_blank_node_identifier(self):
# see https://www.w3.org/TR/json-ld11/#identifying-blank-nodes
# here we're choosing to use a zero-padded identifier to make
# testing and debugging easier.
# It might be easier just to use uuids, however
fmt = f"_:{{identifier:06d}}"
identifier = len(self.nodes)
return fmt.format(identifier=identifier)
def _sort_nodes_by_id(self):
sorted_nodes = dict(sorted(self.nodes.items()))
self.nodes = sorted_nodes
def save(self, path, individual_files=False, include_empty_properties=False):
"""
Save the node collection to disk in JSON-LD format.
Args
----
path (str):
either a file or a directory into which the metadata will be written.
individual_files (bool):
if False (default), save the entire collection into a single file.
if True, `path` must be a directory, and each node is saved into a
separate file within that directory.
Returns
-------
A list of the file paths created.
"""
# in case a user has added additional child nodes _after_ adding the parent node to the collection
# we first re-add all child nodes to the collection.
# This is probably not the most elegant or fast way to do this, but it is simple and robust.
for node in tuple(self.nodes.values()):
if node.type_.startswith("https://openminds.ebrains.eu/"):
data_context = {"@vocab": "https://openminds.ebrains.eu/vocab/"}
else:
data_context = {"@vocab": "https://openminds.om-i.org/props/"}
for linked_node in node.links:
self._add_node(linked_node)
# Now we can actually save the nodes
if not individual_files:
if os.path.exists(path):
if not os.path.isfile(path):
raise OSError(f"Cannot create file {path} because a directory with that name already exists.")
else:
parent_dir = os.path.dirname(path)
if parent_dir:
os.makedirs(parent_dir, exist_ok=True)
self._sort_nodes_by_id()
data = {
"@context": data_context,
"@graph": [
node.to_jsonld(
embed_linked_nodes=False, include_empty_properties=include_empty_properties, with_context=False
)
for node in self
],
}
with open(path, "w") as fp:
json.dump(data, fp, indent=2)
output_paths = [path]
else:
if not os.path.exists(path):
os.makedirs(path, exist_ok=True)
if not os.path.isdir(path):
raise OSError(
f"If saving to multiple files, `path` must be a directory. path={path}, pwd={os.getcwd()}"
)
self._sort_nodes_by_id()
output_paths = []
for node in self:
if node.id.startswith("http"):
file_identifier = node.uuid
else:
assert node.id.startswith("_:")
file_identifier = node.id[2:]
file_path = os.path.join(path, f"{file_identifier}.jsonld")
with open(file_path, "w") as fp:
data = node.to_jsonld(embed_linked_nodes=False, include_empty_properties=include_empty_properties)
json.dump(data, fp, indent=2)
output_paths.append(file_path)
return output_paths
def load(self, *paths):
"""
Load openMINDS metadata from one or more JSON-LD files.
`*paths` may contain either:
1) a single directory, in which case
all JSON-LD files all the top level of this directory will be loaded
(but without descending into subdirectories)
2) one or more JSON-LD files, which will all be loaded.
"""
if len(paths) == 1 and os.path.isdir(paths[0]):
data_dir = paths[0]
json_paths = [
os.path.join(data_dir, item)
for item in os.listdir(data_dir)
if os.path.splitext(item)[1] in (".json", ".jsonld")
]
else:
json_paths = paths
for path in json_paths:
assert os.path.isfile(path)
with open(path, "r") as fp:
data = json.load(fp)
if "@graph" in data:
if data["@context"]["@vocab"].startswith("https://openminds.ebrains.eu/"):
version = "v3"
else:
version = "latest"
for item in data["@graph"]:
if "@type" in item:
cls = lookup_type(item["@type"], version=version)
node = cls.from_jsonld(item)
else:
# allow links to metadata instances outside this collection
if not item["@id"].startswith("http"):
raise ValueError("Local nodes must have @type specified")
node = Link(item["@id"])
self.add(node)
else:
if "@type" in data:
cls = lookup_type(data["@type"])
node = cls.from_jsonld(data)
else:
# allow links to metadata instances outside this collection
if not data["@id"].startswith("http"):
raise ValueError("Local nodes must have @type specified")
node = Link(data["@id"])
self.add(node)
self._resolve_links()
def _resolve_links(self):
"""Replace `Link` attributes with typed Nodes where possible"""
for node in self.nodes.values():
node._resolve_links(self.nodes)
def validate(self, ignore=None):
"""
Check whether all constraints are satisfied.
Arguments:
ignore: an optional list of check types that should be ignored
("required", "type", "multiplicity")
Returns a dict containing information about any validation failures.
"""
all_failures = {}
for node in self:
failures = node.validate(ignore=ignore)
if failures:
all_failures[node.id] = failures
return all_failures
@property
def is_valid(self):
failures = self.validate()
return len(failures) == 0