Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Sortinfo #25

Merged
merged 8 commits into from
Jul 3, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 10 additions & 6 deletions delphin/cli/profile_to_rdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,11 +58,15 @@ def __cli_parse__(args):

# open Test Suite and start conversion
ts = itsdb.TestSuite(path)
logger.info(f"Converting {len(ts['result'])} analysis of {len(ts['item'])} sentences from {args.profile}")

# logger.info(f"Converting {len(ts['result'])} analysis of {len(ts['item'])} sentences from {args.profile}")
logger.log(30,f"Converting {len(ts['result'])} analysis of {len(ts['item'])} sentences from {args.profile}")

# The tsql takes some time to be processed:
logger.info(f"Loading the profile")
for (parse_id, result_id, text, mrs_string) in tsql.select('parse-id result-id i-input mrs', ts):
# logger.info(f"Loading the profile")
logger.log(30,f"Loading the profile")
profile_data = tsql.select('parse-id result-id i-input mrs', ts)
logger.log(30,f"Converting the profile")
for (parse_id, result_id, text, mrs_string) in profile_data:
logger.info(f"Converting the result {result_id} of sentence {parse_id}")
m = simplemrs.decode(mrs_string)

Expand All @@ -83,9 +87,9 @@ def __cli_parse__(args):
text=text)

# serializes results
logger.info(f"Serializing results to {args.output}")
logger.log(30,f"Serializing results to {args.output}")
graph.serialize(destination=args.output, format=args.format)
logger.info(f"DONE")
logger.log(30,f"DONE")

# except PyDelphinSyntaxError as e:
# logger.exception(e)
Expand Down
35 changes: 20 additions & 15 deletions delphin/rdf/_dmrs_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
DELPH = Namespace("http://www.delph-in.net/schema/")
POS = Namespace("http://www.delph-in.net/schema/pos#")

def __nodes_to_rdf__(d, graph, dmrsi, NODES):
def __nodes_to_rdf__(d, graph, dmrsi, NODES, namespace):
"""
Creates nodes of variables and nodes specifying their properties.

Expand All @@ -26,15 +26,18 @@ def __nodes_to_rdf__(d, graph, dmrsi, NODES):
representation.

NODES - the URI namespace dedicated to nodes.

namespace - the string namespace of a result of the profile.
"""
for i in range(len(d.nodes)):
node = d.nodes[i]
nodeIRI = NODES["{}".format(node.id)] #era i, mas não da pra fazer link assim. Rever.
nodePredIRI = nodeIRI + "#predicate"
nodePredIRI = URIRef(f"{namespace}predicate-{node.id}")
nodeSortInfoIRI = URIRef(f"{namespace}sortinfo-{node.id}")

#putting it's id
graph.add((nodeIRI, DMRS.hasId, Literal(node.id)))
graph.add((nodeIRI, RDFS.label, Literal("{}<{},{}>".format(node.predicate,node.cfrom,node.cto))))
graph.add((nodeIRI, RDFS.label, Literal(f"{node.predicate}<{node.cfrom},{node.cto}>")))

#Instantiate the Node and putting into the DMRS
graph.add((nodeIRI, RDF.type, DMRS.Node))
Expand All @@ -47,7 +50,7 @@ def __nodes_to_rdf__(d, graph, dmrsi, NODES):
elif delphin.predicate.is_abstract(node.predicate):
graph.add((nodePredIRI, RDF.type, DELPH.AbstractPredicate))
else:
graph.add((nodePredIRI, RDF.type, DMRS.Predicate))
graph.add((nodePredIRI, RDF.type, DELPH.Predicate))
print("An invalid predicate")

if splittedPredicate[0] is not None:
Expand All @@ -61,15 +64,17 @@ def __nodes_to_rdf__(d, graph, dmrsi, NODES):
graph.add((nodeIRI, DELPH.hasPredicate, nodePredIRI))
graph.add((nodePredIRI, DELPH.predText, Literal(delphin.predicate.normalize(node.predicate))))

# links
# lnk
if node.cfrom is not None:
graph.add((nodeIRI, DELPH.cfrom, Literal(node.cfrom)))
if node.cto is not None:
graph.add((nodeIRI, DELPH.cto, Literal(node.cto)))

#properties / sortinfo
graph.add((nodeIRI, DELPH.hasSortInfo, nodeSortInfoIRI))
graph.add((nodeSortInfoIRI, RDF.type, DELPH.SortInfo))
for prop, val in node.properties.items():
graph.add((nodeIRI, ERG[prop.lower()], Literal(val.lower())))
graph.add((nodeSortInfoIRI, ERG[prop.lower()], Literal(val.lower())))

#type:
if node.type is not None:
Expand All @@ -78,7 +83,7 @@ def __nodes_to_rdf__(d, graph, dmrsi, NODES):

# carg
if node.carg is not None:
graph.add((nodeIRI, DELPH.carg, Literal(node.carg)))
graph.add((nodeSortInfoIRI, DELPH.carg, Literal(node.carg)))


def __links_to_rdf__(d, graph, dmrsi, NODES, LINKS):
Expand Down Expand Up @@ -107,8 +112,8 @@ def __links_to_rdf__(d, graph, dmrsi, NODES, LINKS):
graph.add((dmrsi, DMRS.hasLink, linkIRI))

# the directions
graph.add((linkIRI, DMRS.hasFrom, NODES["{}".format(link.start)]))
graph.add((linkIRI, DMRS.hasTo, NODES["{}".format(link.end)]))
graph.add((linkIRI, DMRS.hasFrom, NODES[f"{link.start}"]))
graph.add((linkIRI, DMRS.hasTo, NODES[f"{link.end}"]))

# adding roles and posts and creating (just to make sure, maybe remove the last one)
graph.add((linkIRI, DMRS.hasRole, DMRS[link.role.lower()]))
Expand All @@ -117,7 +122,7 @@ def __links_to_rdf__(d, graph, dmrsi, NODES, LINKS):
graph.add((DMRS[link.role.lower()], RDF.type, DMRS.Role))


def dmrs_to_rdf(d, prefix: str, identifier, iname="dmrsi#dmrs", graph=None, out=None, text=None, format="turtle"):
def dmrs_to_rdf(d, prefix: str, identifier, iname="dmrs", graph=None, out=None, text=None, format="turtle"):
"""
Parses a pydelphin DMRS into RDF representation.

Expand All @@ -132,7 +137,7 @@ def dmrs_to_rdf(d, prefix: str, identifier, iname="dmrsi#dmrs", graph=None, out=
same text admits various mrs interpretations.

iname - the dmrs instance name (the dmrs as RDF node name)
to be used. As default, it is "dmrsi#dmrs".
to be used. As default, it is "dmrs".

graph - and rdflib graph. If given, uses it to store the
dmrs as RDF representation.
Expand All @@ -148,13 +153,13 @@ def dmrs_to_rdf(d, prefix: str, identifier, iname="dmrsi#dmrs", graph=None, out=
if type(identifier) == list:
identifier = "/".join(identifier)

namespace = prefix + "/" + identifier + "/"
namespace = prefix + "/" + identifier + "#"

#creating the instance URI and the namespaces
dmrsi = URIRef(namespace + iname)
graph.add((dmrsi, RDF.type, DMRS.DMRS))
NODES = Namespace(namespace + "nodes/")
LINKS = Namespace(namespace + "links/")
NODES = Namespace(namespace + "node-")
LINKS = Namespace(namespace + "link-")

#creating the prefixes of the output
graph.bind("dmrs", DMRS)
Expand All @@ -163,7 +168,7 @@ def dmrs_to_rdf(d, prefix: str, identifier, iname="dmrsi#dmrs", graph=None, out=
graph.bind("pos", POS)

#Creating RDF triples
__nodes_to_rdf__(d, graph, dmrsi, NODES)
__nodes_to_rdf__(d, graph, dmrsi, NODES, namespace)
#Adding top
graph.add((dmrsi, DMRS['hasTop'], NODES["{}".format(d.top)]))
#Adding index
Expand Down
24 changes: 14 additions & 10 deletions delphin/rdf/_eds_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
DELPH = Namespace("http://www.delph-in.net/schema/")
POS = Namespace("http://www.delph-in.net/schema/pos#")

def __nodes_to_rdf__(e, graph, edsi, NODES):
def __nodes_to_rdf__(e, graph, edsi, NODES, namespace):
"""
Creates nodes of variables and nodes specifying their properties.

Expand All @@ -26,10 +26,13 @@ def __nodes_to_rdf__(e, graph, edsi, NODES):
edsi - The URI of the EDS instance being parsed.

NODES - the URI namespace dedicated to nodes.

namespace - the string namespace of a result of the profile.
"""
for node in e.nodes:
nodeIRI = NODES[node.id]
nodePredIRI = NODES[node.id + "#predicate"]
nodePredIRI = URIRef(f"{namespace}predicate-{node.id}")
nodeSortInfoIRI = URIRef(f"{namespace}sortinfo-{node.id}")

#Instantiate the Node
graph.add((nodeIRI, RDF.type, EDS.Node))
Expand Down Expand Up @@ -68,12 +71,13 @@ def __nodes_to_rdf__(e, graph, edsi, NODES):
graph.add((nodeIRI, RDF.type, DELPH[node.type]))

# properties
graph.add((nodeIRI, DELPH.hasSortInfo, nodeSortInfoIRI))
graph.add((nodeSortInfoIRI, RDF.type, DELPH.SortInfo))
for prop in node.properties.items():
graph.add((nodeIRI, ERG[prop[0].lower()], Literal(prop[1].lower())))

graph.add((nodeSortInfoIRI, ERG[prop[0].lower()], Literal(prop[1].lower())))
# carg
if node.carg:
graph.add((nodeIRI, DELPH.carg, Literal(node.carg)))
graph.add((nodeSortInfoIRI, DELPH.carg, Literal(node.carg)))


def __edges_to_rdf__(e, graph, NODES):
Expand All @@ -92,7 +96,7 @@ def __edges_to_rdf__(e, graph, NODES):



def eds_to_rdf(e, prefix: str, identifier, iname="edsi#eds", graph=None, out=None, text=None, format="turtle"):
def eds_to_rdf(e, prefix: str, identifier, iname="eds", graph=None, out=None, text=None, format="turtle"):
"""
Parses a pydelphin EDS into RDF representation.

Expand All @@ -107,7 +111,7 @@ def eds_to_rdf(e, prefix: str, identifier, iname="edsi#eds", graph=None, out=Non
same text admits various eds interpretations.

iname - the eds instance name (the eds as RDF node name)
to be used. As default, it is "edsi#eds".
to be used. As default, it is "eds".

graph - and rdflib graph. If given, uses it to store the
mrs as RDF representation.
Expand All @@ -122,12 +126,12 @@ def eds_to_rdf(e, prefix: str, identifier, iname="edsi#eds", graph=None, out=Non
if type(identifier) == list:
identifier = "/".join(identifier)

namespace = prefix + "/" + identifier + "/"
namespace = prefix + "/" + identifier + "#"

#creating the instance URI and the namespace of nodes
edsi = URIRef(namespace + iname)
graph.add((edsi, RDF.type, EDS.EDS))
NODES = Namespace(namespace + "nodes/")
NODES = Namespace(namespace + "node-")

#creating the prefixes of the output
graph.bind("eds", EDS)
Expand All @@ -136,7 +140,7 @@ def eds_to_rdf(e, prefix: str, identifier, iname="edsi#eds", graph=None, out=Non
graph.bind("pos", POS)

#Creating the RDF triples
__nodes_to_rdf__(e, graph, edsi, NODES)
__nodes_to_rdf__(e, graph, edsi, NODES, namespace)
#Adding top
graph.add((edsi, DELPH['hasTop'], NODES[e.top]))
__edges_to_rdf__(e, graph, NODES)
Expand Down
31 changes: 17 additions & 14 deletions delphin/rdf/_mrs_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,10 +39,10 @@ def _vars_to_rdf(m, graph, VARS):
# adding the properties of the variables
for props in v[1].items():
graph.add((VARS[v[0]], ERG[props[0].lower()], Literal(props[1])))
#maybe it won't be harmful to reassure that the property is defined in ERG, but it'll be like that for now.
# it won't be harmful to reassure that the property is defined in ERG, but it'll be like that for now.
else:
print("Invalid predicate")
def _rels_to_rdf(m, graph, mrsi, RELS, VARS):
def _rels_to_rdf(m, graph, mrsi, RELS, VARS, namespace):
"""
Describes EPs "RELS" in an MRS-RDF format

Expand All @@ -53,12 +53,14 @@ def _rels_to_rdf(m, graph, mrsi, RELS, VARS):
mrsi: the mrs instance name (the MRS as RDF node name)
RELS: the URI namespace dedicated to EPs
VARS: the URI namespace dedicated to variables
namespace - the string namespace of a result of the profile.
"""

for rel in range(len(m.rels)):
mrs_rel = m.rels[rel]
rdf_rel = RELS["EP{rel}".format(rel=rel)] #maybe label EPs in a different manner is better because they aren't ordered.
pred_rel = RELS["EP{rel}#predicate".format(rel=rel)] #revise
rdf_rel = RELS["{rel}".format(rel=rel)] #maybe label EPs in a different manner is better because they aren't ordered.
pred_rel = URIRef(f"{namespace}predicate-{rel}")
sortinfo_rel = URIRef(f"{namespace}sortinfo-{rel}")

graph.add((mrsi, MRS.hasEP, rdf_rel))
graph.add((rdf_rel, RDF.type, MRS.ElementaryPredication))
Expand Down Expand Up @@ -92,17 +94,18 @@ def _rels_to_rdf(m, graph, mrsi, RELS, VARS):
graph.add((rdf_rel, DELPH.cto, Literal(mrs_rel.cto))) #integer

# parse arguments

graph.add((rdf_rel, DELPH.hasSortInfo, sortinfo_rel))
graph.add((sortinfo_rel, RDF.type, DELPH.SortInfo))
for hole, arg in mrs_rel.args.items():
#if hole == "ARG0": continue
# arg_type = type(eval(arg.title()))
# ?

# mrs variables as arguments
if hole.lower() != "carg" :
graph.add((rdf_rel, MRS[hole.lower()], VARS[arg]))
graph.add((sortinfo_rel, MRS[hole.lower()], VARS[arg]))
else :
graph.add((rdf_rel, DELPH.carg, Literal(arg)))
graph.add((sortinfo_rel, DELPH.carg, Literal(arg)))


def _hcons_to_rdf(m, graph, mrsi, HCONS, VARS):
Expand Down Expand Up @@ -164,7 +167,7 @@ def mrs_to_rdf(
m:delphin.mrs._mrs.MRS,
prefix:str,
identifier:Union[str, list],
iname:str ="mrsi#mrs",
iname:str ="mrs",
graph:rdflib.graph.Graph=None,
text:str=None) -> rdflib.graph.Graph:
"""
Expand All @@ -191,13 +194,13 @@ def mrs_to_rdf(
identifier = "/".join(identifier)

# creating the namespaces for this MRS instance
namespace = prefix + "/" + identifier + "/"
namespace = prefix + "/" + identifier + "#"
mrsi = URIRef(namespace + iname)
graph.add((mrsi, RDF.type, MRS.MRS))
VARS = Namespace(namespace + "variables/")
RELS = Namespace(namespace + "rels/")
HCONS = Namespace(namespace + "hcons/")
ICONS = Namespace(namespace + "icons/")
VARS = Namespace(namespace + "variables-")
RELS = Namespace(namespace + "EP-")
HCONS = Namespace(namespace + "hcons-")
ICONS = Namespace(namespace + "icons-")

# creating the prefixes of the output
graph.bind("mrs", MRS)
Expand All @@ -207,7 +210,7 @@ def mrs_to_rdf(

# creating the RDF triples
_vars_to_rdf(m, graph, VARS)
_rels_to_rdf(m, graph, mrsi, RELS, VARS)
_rels_to_rdf(m, graph, mrsi, RELS, VARS, namespace)
_hcons_to_rdf(m, graph, mrsi, HCONS, VARS)
_icons_to_rdf(m, graph, mrsi, ICONS, VARS)
# adding top
Expand Down
4 changes: 2 additions & 2 deletions vocabularies/erg.ttl
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,8 @@ erg:ind

erg:pt
a rdf:Property ;
rdfs:subPropertyOf delph:x ;
rdfs:domain delph:Identifier ;
rdfs:subPropertyOf delph:hasPropertyValue ;
rdfs:domain delph:x ;
rdfs:range rdfs:Literal ;
rdfs:comment "A property that states the pronoum type"@en-us.

Expand Down
14 changes: 12 additions & 2 deletions vocabularies/semstructs.ttl
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,10 @@ delph:AbstractPredicate
rdfs:subClassOf delph:Predicate;
rdfs:comment "The class of the semantic predicates that represents the overt words in a sentence"@en-us .

delph:SortInfo
a rdfs:Class ;
rdfs:comment "The class for nodes which represents the morphosemantic information of a predication"@en-us .

delph:Pos
a rdf:Class ;

Expand Down Expand Up @@ -99,6 +103,12 @@ delph:hasPredication
rdfs:range delph:Predication ;
rdfs:comment "A property that links a semantic structure to one of its predications"@en-us.

delph:hasSortInfo
a rdf:Property ;
rdfs:domain delph:Predication ;
rdfs:range delph:SortInfo ;
rdfs:comment "A property that links a predication to its information"@en-us.

delph:hasPredicate
a rdf:Property ;
rdfs:domain delph:Predication ;
Expand Down Expand Up @@ -144,9 +154,9 @@ delph:cto

delph:hasPropertyValue
a rdf:Property ;
rdfs:domain delph:Identifier ;
rdfs:domain delph:SortInfo ;
rdfs:range rdfs:Literal ;
rdfs:comment "A general property to link an identifier to a morphosemantic property value"@en-us.
rdfs:comment "A general property to link an SortInfo node to a morphosemantic property value"@en-us.

delph:text
a rdf:Property ;
Expand Down