Skip to content

Commit

Permalink
convert fields from text to id only onetime
Browse files Browse the repository at this point in the history
  • Loading branch information
Paurikova2 committed Oct 31, 2024
1 parent 5120f4d commit 045dabd
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 13 deletions.
11 changes: 8 additions & 3 deletions src/pump/_item.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ class items:
}],
]

ignored_mtd_fields = ["bitstream.redirectToURL"]
ignored_fields = ["bitstream.redirectToURL"]

def __init__(self,
item_file_str: str,
Expand Down Expand Up @@ -77,6 +77,7 @@ def __init__(self,
"withdrawn": [],
"not_imported": [],
}
self._ignored_fields_id = []

def __len__(self):
return len(self._items)
Expand Down Expand Up @@ -120,6 +121,10 @@ def import_to(self, cache_file, dspace, handles, metadatas, epersons, collection
Mapped tables: item, collection2item, workspaceitem, cwf_workflowitem,
metadata, handle
"""

# Convert ignored fields from text to id from v5
self._ignored_fields_id = metadatas.get_field_id_from_text(self.ignored_fields)

if "ws" in self._done:
_logger.info("Skipping workspace import")
else:
Expand Down Expand Up @@ -160,7 +165,7 @@ def _import_item(self, dspace, generic_item_d, item, handles, metadatas, eperson
'withdrawn': item['withdrawn']
}
i_meta = (metadatas.
value(items.TYPE, i_id, None, True, self.ignored_mtd_fields))
value(items.TYPE, i_id, None, True, self._ignored_fields_id))
if i_meta is not None:
data['metadata'] = i_meta

Expand Down Expand Up @@ -279,7 +284,7 @@ def _item_import_to(self, dspace, handles, metadatas, epersons, collections):
}

i_meta = metadatas.value(items.TYPE, i_id, None,
True, self.ignored_mtd_fields)
True, self._ignored_fields_id)
if i_meta:
data['metadata'] = i_meta

Expand Down
22 changes: 12 additions & 10 deletions src/pump/_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -536,6 +536,17 @@ def _get_key_v2(self, val):
key += '.' + field_js['qualifier']
return key

def get_field_id_from_text(self, fields: list):
"""
Check if filtered fields exist
"""
return [
field_id
for field in fields
if (field_id := self._v5_fields_name2id.get(field)) is not None
and self.exists_field(field_id)
]

def value(self, res_type_id: int, res_id: int, text_for_field_id: int = None, log_missing: bool = True, ignored_mtd_fields: list = None):
"""
Get metadata value for dspace object.
Expand All @@ -552,20 +563,11 @@ def value(self, res_type_id: int, res_id: int, text_for_field_id: int = None, lo
log_miss(f'Metadata for [{res_id}] are missing in [{res_type_id}] type')
return None

# Check if filtered fields exist
exist_ignored_mtd_fields = [
field_id
for field in ignored_mtd_fields
if ignored_mtd_fields
and (field_id := self._v5_fields_name2id.get(field)) is not None
and self.exists_field(field_id)
]

vals = tp_values[res_id]

vals = [x for x in vals
if self.exists_field(x['metadata_field_id'])
and not (exist_ignored_mtd_fields and any(x['metadata_field_id'] == field for field in exist_ignored_mtd_fields))]
and not (ignored_mtd_fields and any(x['metadata_field_id'] == field for field in ignored_mtd_fields))]

if len(vals) == 0:
return {}
Expand Down

0 comments on commit 045dabd

Please sign in to comment.