Skip to content

Commit 0f1515a

Browse files
author
Binh Vu
committed
fix bug in parsing drepr model twice, and allow dump version 2
1 parent 6be1b39 commit 0f1515a

File tree

5 files changed

+98
-10
lines changed

5 files changed

+98
-10
lines changed

pydrepr/drepr/models/drepr.py

+6-6
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,17 @@
1-
from collections import OrderedDict, defaultdict
1+
from collections import OrderedDict
22
from dataclasses import dataclass, asdict
33
from enum import Enum
44
from io import StringIO
5-
from typing import List, Dict, Any, NamedTuple, Optional
5+
from typing import List, Dict, Any, Optional
66

77
import ujson
88
from ruamel.yaml import YAML
99

10-
from drepr.models.parse_v2 import ReprV2Parser
1110
from drepr.utils.validator import Validator, InputError
1211
from .align import Alignment, RangeAlignment, AlignmentType, ValueAlignment, AlignedStep
1312
from .attr import Attr
1413
from .parse_v1 import ReprV1Parser
14+
from .parse_v2 import ReprV2Parser
1515
from .preprocessing import Preprocessing, PMap, PFilter, RMap, PSplit
1616
from .resource import Resource, CSVProp
1717
from .sm import SemanticModel, DataNode, ClassNode, LiteralNode
@@ -162,15 +162,15 @@ def is_valid(self):
162162
if self.sm.is_rel_iri(node.label):
163163
prefix = node.label.split(":", 1)[0]
164164
assert prefix in self.sm.prefixes, f"Unknown prefix `{prefix}` of the " \
165-
f"ontology class {node.label}"
165+
f"ontology class {node.label}"
166166
for edge in self.sm.edges.values():
167167
if self.sm.is_rel_iri(edge.label):
168168
prefix = edge.label.split(":", 1)[0]
169169
assert prefix in self.sm.prefixes, f"Unknown prefix `{prefix}` of the " \
170-
f"ontology predicate {edge.label}"
170+
f"ontology predicate {edge.label}"
171171

172172
def to_lang_format(self, simplify: bool = True, use_json_path: bool = False) -> dict:
173-
return ReprV1Parser.dump(self, simplify, use_json_path)
173+
return ReprV2Parser.dump(self, simplify, use_json_path)
174174

175175
def to_lang_yml(self, simplify: bool = True, use_json_path: bool = False) -> str:
176176
model = self.to_lang_format(simplify, use_json_path)

pydrepr/drepr/models/parse_v2/__init__.py

+86-1
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,18 @@
1+
from collections import defaultdict
2+
from dataclasses import asdict
3+
from typing import List
4+
15
from drepr.models.parse_v2.path_parser import PathParserV2
26
from drepr.utils.validator import *
7+
from ..align import AlignmentType, RangeAlignment
38

49
from ..parse_v1.align_parser import AlignParser
510
from ..parse_v1.attr_parser import AttrParser
611
from ..parse_v1.preprocessing_parser import PreprocessingParser
712
from ..parse_v1.resource_parser import ResourceParser
813

914
from .sm_parser import SMParser
10-
from ..sm import SemanticModel
15+
from ..sm import SemanticModel, ClassNode, DataNode, LiteralNode
1116

1217

1318
class ReprV2Parser:
@@ -57,3 +62,83 @@ def parse(cls, raw: dict):
5762
sm = None
5863

5964
return DRepr(resources, preprocessing, attrs, aligns, sm)
65+
66+
@classmethod
67+
def dump(cls, drepr: 'DRepr', simplify: bool = True, use_json_path: bool = False):
68+
version = '2'
69+
sm = OrderedDict()
70+
71+
class_counter = defaultdict(int)
72+
class_ids: Dict[str, str] = {}
73+
for node in drepr.sm.nodes.values():
74+
if isinstance(node, ClassNode):
75+
class_counter[node.label] += 1
76+
class_ids[node.node_id] = f"{node.label}:{class_counter[node.label]}"
77+
sm[class_ids[node.node_id]] = OrderedDict([
78+
("properties", []),
79+
("static_properties", []),
80+
("links", [])
81+
])
82+
83+
for node in drepr.sm.nodes.values():
84+
if isinstance(node, DataNode):
85+
edge = [e for e in drepr.sm.edges.values() if e.target_id == node.node_id][0]
86+
if node.data_type is not None:
87+
prop = (edge.label, node.attr_id, node.data_type.value)
88+
else:
89+
prop = (edge.label, node.attr_id)
90+
sm[class_ids[edge.source_id]]['properties'].append(prop)
91+
92+
if isinstance(node, LiteralNode):
93+
edge = [e for e in drepr.sm.edges if e.target_id == node.node_id][0]
94+
if node.data_type is not None:
95+
prop = (edge.label, node.value, node.data_type.value)
96+
else:
97+
prop = (edge.label, node.value)
98+
sm[class_ids[edge.source_id]]['static_properties'].append(prop)
99+
100+
for edge in drepr.sm.edges.values():
101+
if isinstance(drepr.sm.nodes[edge.source_id], ClassNode) and isinstance(
102+
drepr.sm.nodes[edge.target_id], ClassNode):
103+
sm[class_ids[edge.source_id]]['links'].append((edge.label, class_ids[edge.target_id]))
104+
if edge.is_subject:
105+
sm[class_ids[edge.source_id]]['subject'] = drepr.sm.nodes[edge.target_id].attr_id
106+
107+
sm['prefixes'] = drepr.sm.prefixes
108+
109+
preprocessing: List[dict] = []
110+
for prepro in drepr.preprocessing:
111+
preprocessing.append(OrderedDict([("type", prepro.type.value)]))
112+
for k, v in asdict(prepro.value).items():
113+
preprocessing[-1][k] = v
114+
preprocessing[-1]["path"] = prepro.value.path.to_lang_format(use_json_path)
115+
116+
return OrderedDict(
117+
[("version", version),
118+
("resources",
119+
OrderedDict(
120+
[(res.id,
121+
OrderedDict([("type", res.type.value)] + (
122+
[(k, v)
123+
for k, v in asdict(res.prop).items()] if res.prop is not None else [])))
124+
for res in drepr.resources])), ("preprocessing", preprocessing),
125+
("attributes",
126+
OrderedDict([(attr.id,
127+
OrderedDict([("resource_id", attr.resource_id),
128+
("path", attr.path.to_lang_format(use_json_path)),
129+
("unique", attr.unique), ("sorted", attr.sorted.value),
130+
("value_type", attr.value_type.value),
131+
("missing_values", attr.missing_values)]))
132+
for attr in drepr.attrs])),
133+
("alignments", [
134+
OrderedDict([("type", AlignmentType.range.value), ("source", align.source),
135+
("target", align.target),
136+
("aligned_dims", [
137+
OrderedDict([
138+
("source", step.source_idx),
139+
("target", step.target_idx),
140+
]) for step in align.aligned_steps
141+
])]) if isinstance(align, RangeAlignment) else
142+
OrderedDict([("type", AlignmentType.value.value), ("source", align.source),
143+
("target", align.target)]) for align in drepr.aligns
144+
]), ("semantic_model", sm)])

pydrepr/drepr/models/parse_v2/sm_parser.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import copy
12
import re
23

34
from drepr.utils.validator import Validator, InputError
@@ -34,8 +35,10 @@ class SMParser:
3435
def parse(cls, sm: dict) -> SemanticModel:
3536
nodes = {}
3637
edges = {}
37-
38+
# shallow copy
39+
sm = copy.copy(sm)
3840
prefixes = sm.pop('prefixes', {})
41+
3942
trace0 = f"Parsing `prefixes` of the semantic model"
4043
Validator.must_be_dict(prefixes, trace0)
4144
for prefix, uri in prefixes.items():

pydrepr/drepr/version.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
1-
__version__ = "2.9.2" # ___PKG_VERSION___: DO NOT MODIFY the version here. Update it via version_manager.py!
1+
__version__ = "2.9.3" # ___PKG_VERSION___: DO NOT MODIFY the version here. Update it via version_manager.py!
22
__engine_version__ = "1.0.8" # ___PKG_VERSION___: DO NOT MODIFY the version here. Update it via version_manager.py!
33
__engine_release_tag__ = "2.7" # ___PKG_VERSION___: DO NOT MODIFY the version here. Update it via version_manager.py!

version_manager.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
# ================================================================================
66
# Note: update the version by changing variables before the `=...=` line
7-
DREPR_PYLIB_VESRION = "2.9.2"
7+
DREPR_PYLIB_VESRION = "2.9.3"
88
DREPR_ENGINE_VERSION = "1.0.8"
99
# this tag marks the release which contains the pre-built engine in it.
1010
DREPR_ENGINE_RELEASE_TAG = "2.7"

0 commit comments

Comments
 (0)