Skip to content

Commit

Permalink
Fixes #12
Browse files Browse the repository at this point in the history
  • Loading branch information
iosonopersia committed Apr 29, 2021
1 parent b1b3ae1 commit 4afe7a5
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 14 deletions.
15 changes: 13 additions & 2 deletions oc_ocdm/storer.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
from oc_ocdm.support.query_utils import get_update_query

if TYPE_CHECKING:
from typing import Dict, List, Tuple, Any, Optional
from typing import Dict, List, Tuple, Any, Optional, Set
from rdflib import URIRef
from oc_ocdm.abstract_entity import AbstractEntity
from oc_ocdm.abstract_set import AbstractSet
Expand All @@ -44,7 +44,18 @@ class Storer(object):
def __init__(self, abstract_set: AbstractSet, repok: Reporter = None, reperr: Reporter = None,
context_map: Dict[str, Any] = None, default_dir: str = "_", dir_split: int = 0,
n_file_item: int = 1, output_format: str = "json-ld") -> None:
self.output_format: str = output_format
# We only accept format strings that:
# 1. are supported by rdflib
# 2. correspond to an output format which is effectively either NT or NQ
# The only exception to this rule is the 'json-ld' format, which is the default value of 'output_format'.
supported_formats: Set[str] = {'application/n-triples', 'ntriples', 'nt', 'nt11',
'application/n-quads', 'nquads', 'json-ld'}
if output_format not in supported_formats:
raise ValueError(f"Given output_format '{self.output_format}' is not supported."
f" Available formats: {supported_formats}.")
else:
self.output_format: str = output_format

self.dir_split: int = dir_split
self.n_file_item: int = n_file_item
self.default_dir: str = default_dir if default_dir != "" else "_"
Expand Down
31 changes: 19 additions & 12 deletions oc_ocdm/support/support.py
Original file line number Diff line number Diff line change
Expand Up @@ -269,11 +269,6 @@ def find_paths(res: URIRef, base_dir: str, base_iri: str, default_dir: str, dir_
"""
string_iri: str = str(res)

if is_json:
format_string: str = ".json"
else:
format_string: str = ".ttl"

if is_dataset(res):
cur_dir_path: str = (base_dir + re.sub(r"^%s(.*)$" % base_iri, r"\1", string_iri))[:-1]
# In case of dataset, the file path is different from regular files, e.g.
Expand Down Expand Up @@ -305,49 +300,61 @@ def find_paths(res: URIRef, base_dir: str, base_iri: str, default_dir: str, dir_
subj_short_name: str = get_prov_subject_short_name(res)
short_name: str = get_short_name(res)
sub_folder: str = get_prov_subject_prefix(res)
file_extension: str = '.json' if is_json else '.nq'
if sub_folder == "":
sub_folder = default_dir
if sub_folder == "":
sub_folder = "_" # enforce default value

cur_dir_path: str = base_dir + subj_short_name + os.sep + sub_folder + \
os.sep + str(cur_split) + os.sep + str(cur_file_split) + os.sep + "prov"
cur_file_path: str = cur_dir_path + os.sep + short_name + format_string
cur_file_path: str = cur_dir_path + os.sep + short_name + file_extension
else: # regular bibliographic entity
short_name: str = get_short_name(res)
sub_folder: str = get_prefix(res)
file_extension: str = '.json' if is_json else '.nt'
if sub_folder == "":
sub_folder = default_dir
if sub_folder == "":
sub_folder = "_" # enforce default value

cur_dir_path: str = base_dir + short_name + os.sep + sub_folder + \
os.sep + str(cur_split)
cur_file_path: str = cur_dir_path + os.sep + str(cur_file_split) + format_string
cur_dir_path: str = base_dir + short_name + os.sep + sub_folder + os.sep + str(cur_split)
cur_file_path: str = cur_dir_path + os.sep + str(cur_file_split) + file_extension
# Enter here if no split is needed
elif dir_split == 0:
if "/prov/" in string_iri:
subj_short_name: str = get_prov_subject_short_name(res)
short_name: str = get_short_name(res)
sub_folder: str = get_prov_subject_prefix(res)
file_extension: str = '.json' if is_json else '.nq'
if sub_folder == "":
sub_folder = default_dir
if sub_folder == "":
sub_folder = "_" # enforce default value

cur_dir_path: str = base_dir + subj_short_name + os.sep + sub_folder + \
os.sep + str(cur_file_split) + os.sep + "prov"
cur_file_path: str = cur_dir_path + os.sep + short_name + format_string
cur_file_path: str = cur_dir_path + os.sep + short_name + file_extension
else:
short_name: str = get_short_name(res)
sub_folder: str = get_prefix(res)
file_extension: str = '.json' if is_json else '.nt'
if sub_folder == "":
sub_folder = default_dir
if sub_folder == "":
sub_folder = "_" # enforce default value

cur_dir_path: str = base_dir + short_name + os.sep + sub_folder
cur_file_path: str = cur_dir_path + os.sep + str(cur_file_split) + format_string
cur_file_path: str = cur_dir_path + os.sep + str(cur_file_split) + file_extension
# Enter here if the data is about a provenance agent, e.g. /corpus/prov/
else:
short_name: str = get_short_name(res)
prefix: str = get_prefix(res)
count: str = get_count(res)
file_extension: str = '.json' if is_json else '.nq'

cur_dir_path: str = base_dir + short_name
cur_file_path: str = cur_dir_path + os.sep + prefix + count + format_string
cur_file_path: str = cur_dir_path + os.sep + prefix + count + file_extension

return cur_dir_path, cur_file_path

Expand Down

0 comments on commit 4afe7a5

Please sign in to comment.