forked from UtrechtUniversity/yoda-ruleset
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathjson_landing_page.py
345 lines (284 loc) · 11.7 KB
/
json_landing_page.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
# -*- coding: utf-8 -*-
"""Functions for transforming JSON to landingpage HTML."""
__copyright__ = 'Copyright (c) 2019-2023, Utrecht University'
__license__ = 'GPLv3, see LICENSE'
from datetime import datetime
import jinja2
from dateutil import parser
from util import *
def persistent_identifier_to_uri(identifier_scheme, identifier):
"""
Transform a persistent identifier to URI.
Supported identifier schemes are Handle, DOI, ORCID and URL.
:param identifier_scheme: Schema of identifier to transform
:param identifier: Identifier to transform to URI
:returns: URI of persistent identifier
"""
# Identifier already is an URI.
if identifier.lower().startswith('https://') or identifier.lower().startswith('http://'):
return identifier
# Create a URI from the identifier scheme and identifier.
uri = ""
if identifier_scheme == 'DOI':
uri = "https://doi.org/{}".format(identifier)
elif identifier_scheme == 'ORCID':
uri = "https://orcid.org/{}".format(identifier)
elif identifier_scheme == 'Handle':
uri = "https://hdl.handle.net/{}".format(identifier)
elif identifier_scheme == 'URL':
uri = identifier
else:
uri = "#{}".format(identifier)
return uri
def json_landing_page_create_json_landing_page(callback, rodsZone, template_name, combiJsonPath, json_schema, baseDOI, versions):
"""Get the landing page of published YoDa metadata as a string.
:param callback: Callback to rule Language
:param rodsZone: Zone name
:param template_name: Name of landingpage template
:param combiJsonPath: path to Yoda metadata JSON
:param json_schema: Dict holding entire contents of metadata.json for the category involved
:param baseDOI: Base DOI of the publication
:param versions: Dict containing all the versions of the publication
:return: Output HTML landing page
"""
# Landing page creation is part of the publication process
# Read user & system metadata from corresponding combi JSON file
# (Python2) 'want_bytes=False': Do not encode embedded unicode strings as
# UTF-8, as that will trip up jinja2.
dictJsonData = jsonutil.read(callback, combiJsonPath, want_bytes=False)
# Remove empty lists, empty dicts, or None elements
# to prevent empty fields on landingpage.
dictJsonData = jsonutil.remove_empty(dictJsonData)
# Load the Jinja template.
landingpage_template_path = '/' + rodsZone + '/yoda/templates/' + template_name
template = data_object.read(callback, landingpage_template_path)
# Enable autoescaping for all templates.
# NOTE: autoescape is no longer an extension starting in jinja 2.9 (2017).
Template = jinja2.Environment(autoescape=True,
extensions=['jinja2.ext.autoescape']).from_string
# Pre work input for render process.
# When empty landing page, take a short cut
if template_name == 'emptylandingpage.html.j2':
persistent_identifier_datapackage = dictJsonData['System']['Persistent_Identifier_Datapackage']
tm = Template(template)
landing_page = tm.render(persistent_identifier_datapackage=persistent_identifier_datapackage)
return landing_page
# Gather all metadata.
# Is this datapackage under embargo?
no_active_embargo = True
# Datapackage under embargo?
embargo_end_date = dictJsonData.get('Embargo_End_Date', None)
if embargo_end_date is not None and len(embargo_end_date):
no_active_embargo = (datetime.now().strftime('%Y-%m-%d') >= embargo_end_date)
title = dictJsonData['Title']
description = dictJsonData['Description']
# Geo specific lab handling
try:
labids = dictJsonData['Lab']
labs = []
schema_labids = json_schema['definitions']['optionsLabs']['enum']
schema_labnames = json_schema['definitions']['optionsLabs']['enumNames']
for id in labids:
index = schema_labids.index(id)
labs.append(schema_labnames[index])
except KeyError:
labs = []
# Geo specific additional lab handling
try:
additional_labs = dictJsonData['Additional_Lab'] # niet verplicht
except KeyError:
additional_labs = []
try:
discipline_ids = dictJsonData['Discipline']
disciplines = []
schema_disc_ids = json_schema['definitions']['optionsDiscipline']['enum']
schema_disc_names = json_schema['definitions']['optionsDiscipline']['enumNames']
for id in discipline_ids:
index = schema_disc_ids.index(id)
disciplines.append(schema_disc_names[index])
except KeyError:
disciplines = []
try:
version = dictJsonData['Version']
except KeyError:
version = ''
try:
language = ''
language_id = dictJsonData['Language']
# Convert just the language schemas to unicode to handle when a language has non-ascii characters (like Volapük)
schema_lang_ids = map(lambda x: x.decode("utf-8"), json_schema['definitions']['optionsISO639-1']['enum'])
schema_lang_names = map(lambda x: x.decode("utf-8"), json_schema['definitions']['optionsISO639-1']['enumNames'])
index = schema_lang_ids.index(language_id)
# Language variable must be kept in unicode, otherwise landing page fails to build with a language with non-ascii characters
language = schema_lang_names[index]
except KeyError:
language = ''
try:
datatype = ''
datatype_id = dictJsonData['Data_Type']
schema_dt_ids = json_schema['definitions']['optionsDataType']['enum']
schema_dt_names = json_schema['definitions']['optionsDataType']['enumNames']
index = schema_dt_ids.index(datatype_id)
datatype = schema_dt_names[index]
except KeyError:
datatype = ''
try:
covered_geolocation_place = dictJsonData['Covered_Geolocation_Place']
except KeyError:
covered_geolocation_place = {}
try:
tags = dictJsonData['Tag'] # not mandatory
except KeyError:
tags = []
try:
keywords = dictJsonData['Keyword'] # not mandatory
except KeyError:
keywords = []
try:
apparatus = dictJsonData['Apparatus']
except KeyError:
apparatus = []
try:
main_setting = dictJsonData['Main_Setting']
except KeyError:
main_setting = []
try:
process_hazard = dictJsonData['Process_Hazard']
except KeyError:
process_hazard = []
try:
geological_structure = dictJsonData['Geological_Structure']
except KeyError:
geological_structure = []
try:
geomorphical_feature = dictJsonData['Geomorphological_Feature']
except KeyError:
geomorphical_feature = []
try:
material = dictJsonData['Material']
except KeyError:
material = []
try:
monitoring = dictJsonData['Monitoring']
except KeyError:
monitoring = []
try:
software = dictJsonData['Software']
except KeyError:
software = []
try:
measured_property = dictJsonData['Measured_Property']
except KeyError:
measured_property = []
# geo hptlab specific
try:
pore_fluid = dictJsonData['Pore_Fluid']
except KeyError:
pore_fluid = []
try:
ancillary_equipment = dictJsonData['Ancillary_Equipment']
except KeyError:
ancillary_equipment = []
try:
inferred_deformation_behaviour = dictJsonData['Inferred_Deformation_Behaviour']
except KeyError:
inferred_deformation_behaviour = []
# Route all domain specific keywords to tag area of landingpage
all_taggebles = (tags + keywords + apparatus + main_setting + process_hazard + geological_structure
+ geomorphical_feature + material + monitoring + software + measured_property
+ pore_fluid + ancillary_equipment + inferred_deformation_behaviour)
# from core-2 and default-3 'Datapackage' is renamed to 'Resource'
try:
related_resources = dictJsonData['Related_Resource'] # not mandatory
except KeyError:
related_resources = []
# Resources backward compatibility with older schema definitions
try:
related_datapackages = dictJsonData['Related_Datapackage'] # not mandatory
except KeyError:
related_datapackages = []
# Presence of rel_resources and rel_datapackage is mutually exclusive.
all_related_resources = related_resources + related_datapackages
try:
creators = dictJsonData['Creator']
except KeyError:
creators = []
try:
contributors = dictJsonData['Contributor']
except KeyError:
contributors = []
try:
contacts = dictJsonData['Contact']
except KeyError:
contacts = []
try:
funding_reference = dictJsonData['Funding_Reference']
except KeyError:
funding_reference = []
license = dictJsonData['License']
data_access_restriction = dictJsonData['Data_Access_Restriction']
data_classification = dictJsonData['Data_Classification']
persistent_identifier_datapackage = dictJsonData['System']['Persistent_Identifier_Datapackage']
open_access_link = dictJsonData['System']['Open_access_Link']
license_uri = dictJsonData['System']['License_URI']
# Format last modified date.
# Python 3: https://docs.python.org/3/library/datetime.html#datetime.date.fromisoformat
# last_modified_date = date.fromisoformat(dictJsonData['System']['Last_Modified_Date'])
last_modified_date = parser.parse(dictJsonData['System']['Last_Modified_Date'])
last_modified_date = last_modified_date.strftime('%Y-%m-%d %H:%M:%S%z')
# Format publication date.
# Python 3: https://docs.python.org/3/library/datetime.html#datetime.date.fromisoformat
# publication_date = date.fromisoformat(dictJsonData['System']['Publication_Date'])
publication_date = parser.parse(dictJsonData['System']['Publication_Date'])
publication_date = publication_date.strftime('%Y-%m-%d %H:%M:%S%z')
try:
geolocations = dictJsonData['GeoLocation']
except KeyError:
geolocations = {}
# Collection name ILAB specific - part of default schemas
try:
collection_name = dictJsonData['Collection_Name']
except KeyError:
collection_name = ''
try:
base_doi = baseDOI
except KeyError:
base_doi = ''
try:
all_versions = versions
except KeyError:
all_versions = []
tm = Template(template)
# tm.globals['custom_function'] = custom_function
tm.globals['persistent_identifier_to_uri'] = persistent_identifier_to_uri
landing_page = tm.render(
title=title,
description=description,
datatype=datatype,
labs=labs,
additional_labs=additional_labs,
disciplines=disciplines,
version=version,
language=language,
tags=all_taggebles,
creators=creators,
contributors=contributors,
contacts=contacts,
publication_date=publication_date,
embargo_end_date=embargo_end_date,
no_active_embargo=no_active_embargo,
data_access_restriction=data_access_restriction,
license=license,
license_uri=license_uri,
open_access_link=open_access_link,
funding_reference=funding_reference,
data_classification=data_classification,
collection_name=collection_name,
last_modified_date=last_modified_date,
related_resources=all_related_resources,
persistent_identifier_datapackage=persistent_identifier_datapackage,
geolocations=geolocations,
covered_geolocation_place=covered_geolocation_place,
base_doi=base_doi,
all_versions=all_versions)
return landing_page