From 61c35bd545b70fb8102b82465577a3efd7888bbf Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Fri, 3 Dec 2021 13:17:12 +0100 Subject: [PATCH 1/6] fix MODS name without roles, ht@kba #51 --- mets_mods2tei/api/mets.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mets_mods2tei/api/mets.py b/mets_mods2tei/api/mets.py index 56359ba..b7a0eab 100644 --- a/mets_mods2tei/api/mets.py +++ b/mets_mods2tei/api/mets.py @@ -145,7 +145,7 @@ def __spur(self): person[name_part.get_type()] = name_part.get_valueOf_() # either author or editor - roles = name.get_role()[0].get_roleTerm() + roles = name.get_role()[0].get_roleTerm() if name.get_role() else [] # TODO: handle the complete set of allowed roles for role in roles: if role.get_valueOf_() == "edt": From 499c3ccc064c3e5b1c7c4f70dade68bed100f11a Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Fri, 3 Dec 2021 15:27:59 +0100 Subject: [PATCH 2/6] fallback to empty publicationStmt/date and encodingDesc if metsHdr is missing --- mets_mods2tei/api/mets.py | 26 ++++++++++++++++++++------ mets_mods2tei/api/tei.py | 8 +++++--- 2 files changed, 25 insertions(+), 9 deletions(-) diff --git a/mets_mods2tei/api/mets.py b/mets_mods2tei/api/mets.py index b7a0eab..351f93f 100644 --- a/mets_mods2tei/api/mets.py +++ b/mets_mods2tei/api/mets.py @@ -237,12 +237,26 @@ def __spur(self): # # metsHdr header = self.mets.get_metsHdr() - - # encoding date - self.encoding_date = header.get_CREATEDATE().isoformat() - - # encoding description - self.encoding_desc = list(filter(lambda x: x.get_OTHERTYPE() == "SOFTWARE", header.get_agent()))[0].get_name() + if header: + # encoding date + self.encoding_date = header.get_CREATEDATE() + # encoding description + self.encoding_desc = [agent.get_name() + for agent in header.get_agent() + if agent.get_TYPE() == "OTHER" and agent.get_OTHERTYPE() == "SOFTWARE"] + else: + self.encoding_date = None + self.encoding_desc = None + + if self.encoding_date: + self.encoding_date = self.encoding_date.isoformat() + else: + self.logger.error("Found no @CREATEDATE for publicationStmt/date") + if self.encoding_desc: + self.encoding_desc = self.encoding_desc[0] # or -1? + # what about agent.get_OTHERROLE() and agent.get_note()? + else: + self.logger.error("Found no mets:agent for encodingDesc") # # location of manuscript diff --git a/mets_mods2tei/api/tei.py b/mets_mods2tei/api/tei.py index 741e72b..6a4d260 100644 --- a/mets_mods2tei/api/tei.py +++ b/mets_mods2tei/api/tei.py @@ -462,15 +462,17 @@ def add_encoding_date(self, date): publication_stmt = self.tree.xpath('//tei:publicationStmt', namespaces=ns)[0] encoding_date = etree.SubElement(publication_stmt, "%sdate" % TEI) encoding_date.set("type", "publication") - encoding_date.text = date + if date: + encoding_date.text = date def set_encoding_description(self, creator): """ Set some details on the encoding of the digital edition """ encoding_desc = self.tree.xpath('//tei:encodingDesc', namespaces=ns)[0] - encoding_desc_details = etree.SubElement(encoding_desc, "%sp" % TEI) - encoding_desc_details.text = "Encoded with the help of %s." % creator + if creator: + encoding_desc_details = etree.SubElement(encoding_desc, "%sp" % TEI) + encoding_desc_details.text = "Encoded with the help of %s." % creator def add_repository(self, repository): """ From 8984b1b0c52c2981a55cf53f375c1db877e5409d Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Fri, 3 Dec 2021 17:10:26 +0100 Subject: [PATCH 3/6] get_text_in_line: append HYP content if available --- mets_mods2tei/api/alto.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/mets_mods2tei/api/alto.py b/mets_mods2tei/api/alto.py index e1a2cec..4da9af9 100644 --- a/mets_mods2tei/api/alto.py +++ b/mets_mods2tei/api/alto.py @@ -92,7 +92,11 @@ def get_text_in_line(self, line): Returns the ALTO-encoded text . :param Element line: The line to extract the text from. """ - return " ".join(element.get("CONTENT") for element in line.xpath("./alto:String", namespaces=ns)) + text = " ".join(element.get("CONTENT") for element in line.xpath("./alto:String", namespaces=ns)) + hyp = line.find("alto:HYP", namespaces=ns) + if hyp is not None: + text += hyp.get("CONTENT") + return text def __compute_fuzzy_distance(self, text1, text2): """ From 7b136c8603587c5bd3c323f467e645aebc57637c Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Fri, 3 Dec 2021 17:11:06 +0100 Subject: [PATCH 4/6] log to stderr instead of stdout (to prevent mixing with TEI) --- README.md | 2 +- mets_mods2tei/scripts/mets_mods2tei.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 63df7e0..2df14b2 100644 --- a/README.md +++ b/README.md @@ -118,5 +118,5 @@ including the extracted information from the MODS part of the METS. Example: - mm2tei "https://digital.slub-dresden.de/oai/?verb=GetRecord&metadataPrefix=mets&identifier=oai:de:slub-dresden:db:id-453779263" + mm2tei "https://digital.slub-dresden.de/oai/?verb=GetRecord&metadataPrefix=mets&identifier=oai:de:slub-dresden:db:id-453779263" > tei.xml diff --git a/mets_mods2tei/scripts/mets_mods2tei.py b/mets_mods2tei/scripts/mets_mods2tei.py index 28a3bdc..35e4b1e 100644 --- a/mets_mods2tei/scripts/mets_mods2tei.py +++ b/mets_mods2tei/scripts/mets_mods2tei.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- from __future__ import absolute_import +import sys import os import logging import click @@ -19,7 +20,7 @@ def cli(mets, ocr, text_group, log_level): # # logging level - logging.basicConfig(level=logging.getLevelName(log_level)) + logging.basicConfig(level=logging.getLevelName(log_level), stream=sys.stderr) # # interpret mets argument From 6545b162aacbcc173fe03c1e093fc914b9f87f68 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Fri, 3 Dec 2021 18:20:27 +0100 Subject: [PATCH 5/6] improve makefile --- Makefile | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index fdc422a..3750276 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,6 @@ # Python interpreter. Default: '$(PYTHON)' -PYTHON = python +PYTHON ?= python +PIP ?= pip # BEGIN-EVAL makefile-parser --make-help Makefile @@ -7,12 +8,16 @@ help: @echo "" @echo " Targets" @echo "" + @echo " install Install this package" + @echo " deps Install dependencies only" + @echo " deps-test Install dependencies for testing only" @echo " test Run all unit tests" @echo " coverage Run coverage tests" @echo "" @echo " Variables" @echo "" @echo " PYTHON Python interpreter. Default: '$(PYTHON)'" + @echo " PIP Python packager. Default: '$(PIP)'" # END-EVAL @@ -20,7 +25,16 @@ help: # Tests # -.PHONY: test coverage +.PHONY: install test coverage deps deps-test + +install: + $(PIP) install . + +deps: + $(PIP) install -r requirements.txt + +deps-test: + $(PIP) install -r requirements-test.txt # Run all unit tests test: From 711025a833d46f4e9c0ab8b065dc25e9c72ab803 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Fri, 3 Dec 2021 18:47:57 +0100 Subject: [PATCH 6/6] improve CI --- .circleci/config.yml | 45 +++++++++++++++++++++++++++++++++++++++----- 1 file changed, 40 insertions(+), 5 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 30a331e..668e893 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -1,19 +1,54 @@ # Python CircleCI 2.1 configuration file # for mets-mods2tei # -# Check https://circleci.com/docs/2.1/language-python/ for more details +# Check https://circleci.com/docs/2.0/language-python/ for more details # version: 2.1 orbs: codecov: codecov/codecov@1.0.5 jobs: - build: + test: + parameters: + version: + type: string docker: - - image: python:3.6 + - image: circleci/python:<< parameters.version >> working_directory: ~/repo steps: - checkout - - run: pip install -r requirements-test.txt - - run: pip install . + - run: make deps deps-test + - run: make install + - run: make test - run: make coverage - codecov/upload + pypi: + docker: + - image: circleci/python:3.6 + working_directory: ~/repo + steps: + - checkout + - setup_remote_docker + - run: make install + - run: python setup.py sdist + - run: | + pip install cibuildwheel + cibuildwheel --output-dir dist + - store_artifacts: + path: dist/ + destination: artifacts + # later: upload to PyPI... + +workflows: + version: 2 + test-all: + jobs: + - test: + matrix: + parameters: + version: [3.5.10, 3.6.15, 3.7.12, 3.8.12, 3.9.9] + deploy: + jobs: + - pypi: + filters: + branches: + only: master