-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathattendees2vivo.py
147 lines (114 loc) · 5.08 KB
/
attendees2vivo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
#!/usr/bin/env/python
"""
attendees2vivo.py -- Read Attendee data, make VIVO RDF
Perhaps this is a "sufficient" draft -- including the features needed for OpenVIVO.
If the person has an orcid, we make a person entity, otherwise we make a named vcard entity
"""
from rdflib import Graph, Literal, Namespace, URIRef
from rdflib.namespace import RDFS, RDF
import logging
__attendee__ = "Michael Conlon"
__copyright__ = "Copyright 2017 (c) Michael Conlon"
__license__ = "Apache License 2.0"
__version__ = "0.03"
# Constants
uri_prefix = 'http://openvivo.org/a/doi'
date_prefix = 'http://openvivo.org/a/date'
attendee_prefix = 'http://openvivo.org/a/orcid'
vcard_prefix = 'http://openvivo.org/a/vcard'
orcid_prefix = 'http://orcid.org/'
VIVO = Namespace('http://vivoweb.org/ontology/core#')
FOAF = Namespace('http://xmlns.com/foaf/0.1/')
OBO = Namespace('http://purl.obolibrary.org/obo/')
VCARD = Namespace('http://www.w3.org/2006/vcard/ns#')
OWL = Namespace('http://www.w3.org/2002/07/owl#')
# Setup logging
logging.basicConfig()
# Helper functions
def make_attendee(data_line):
attendee = dict(zip(['full_name', 'company', 'orcid', 'optout'], data_line.split('\t')))
attendee['optout'] = attendee['optout'].strip('\n')
if attendee['optout'].lower() != 'no':
return None
name_parts = [x.strip('.') for x in attendee['full_name'].split(' ')]
if len(name_parts) == 1:
attendee['family_name'] = name_parts[0]
attendee['given_name'] = ''
attendee['additional_name'] = ''
elif len(name_parts) == 2:
attendee['given_name'] = name_parts[0]
attendee['additional_name'] = ''
attendee['family_name'] = name_parts[1]
else:
attendee['given_name'] = name_parts[0]
attendee['additional_name'] = ''
attendee['family_name'] = ' '.join(name_parts[1:])
attendee['full_name'] = attendee['family_name'] + ', ' + attendee['given_name'] + ' ' + attendee['additional_name']
attendee['full_name'].strip()
attendee['orcid'] = attendee['orcid'].strip()
attendee['orcid'] = attendee['orcid'].replace('http://orcid.org/', '')
attendee['orcid'] = attendee['orcid'].replace('orcid.org/', '')
attendee['orcid'] = attendee['orcid'].strip('/')
if len(attendee['orcid']) > 0 and attendee['orcid'][0] != '0':
raise ValueError(attendee)
print attendee
return attendee
def make_attendee_rdf(attendee, event_uri):
"""
Given an attendee, make a person if they have an orcid, otherwise make a vcard
:param attendee: a dict containing the attendee's data
:return: triples added to graph
"""
g = Graph()
if 'orcid' in attendee and len(attendee['orcid']) > 0:
print attendee['orcid']
attendee_uri = URIRef(attendee_prefix + attendee['orcid'])
g.add((attendee_uri, RDF.type, FOAF.Person))
g.add((attendee_uri, RDFS.label, Literal(attendee['full_name'].strip())))
orcid_uri = URIRef(orcid_prefix + attendee['orcid'])
g.add((orcid_uri, RDF.type, OWL.Thing))
g.add((attendee_uri, VIVO.orcidId, orcid_uri))
# Make a vcard for the attendee. The vcard has the name of the attendee
vcard_uri = URIRef(str(attendee_uri) + '-vcard')
g.add((attendee_uri, OBO.ARG_2000028, vcard_uri))
g.add((vcard_uri, RDF.type, VCARD.Individual))
name_uri = URIRef(str(vcard_uri) + '-name')
g.add((name_uri, RDF.type, VCARD.Name))
g.add((vcard_uri, VCARD.hasName, name_uri))
if len(attendee['given_name']) > 0:
g.add((name_uri, VCARD.givenName, Literal(attendee['given_name'])))
if len(attendee['family_name']) > 0:
g.add((name_uri, VCARD.familyName, Literal(attendee['family_name'])))
if len(attendee['additional_name']) > 0:
g.add((name_uri, VCARD.additionalName, Literal(attendee['additional_name'])))
# Link attendee to the conference through a role
role_uri = URIRef(str(attendee_uri) + '-' + str(event_uri))
g.add((role_uri, RDF.type, VIVO.ResearcherRole))
g.add((role_uri, RDFS.label, Literal("Registrant")))
g.add((attendee_uri, OBO.RO_0000053, role_uri))
g.add((role_uri, OBO.BFO_0000054, event_uri))
return g
# Main starts here
if __name__ == '__main__':
attendees_graph = Graph()
event_uri = URIRef('http://openvivo.org/a/eventVIVO2017')
count = 0
orcid_count = 0
f = open('attendees.txt', 'rU')
for line in f:
conference_attendee = make_attendee(line)
if conference_attendee is not None:
count += 1
if count % 10 == 0:
print count
attendee_graph = make_attendee_rdf(conference_attendee, event_uri)
print len(attendee_graph)
if len(attendee_graph) > 0:
orcid_count += 1
attendees_graph += attendee_graph
f.close()
print orcid_count, "Attendees with ORCiD"
print "Write", len(attendees_graph), "triples to file"
triples_file = open('attendees.rdf', 'w')
print >>triples_file, attendees_graph.serialize(format='n3')
triples_file.close()