Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Simple fixes #56

Merged
merged 6 commits into from
Dec 20, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 40 additions & 5 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
@@ -1,19 +1,54 @@
# Python CircleCI 2.1 configuration file
# for mets-mods2tei
#
# Check https://circleci.com/docs/2.1/language-python/ for more details
# Check https://circleci.com/docs/2.0/language-python/ for more details
#
version: 2.1
bertsky marked this conversation as resolved.
Show resolved Hide resolved
orbs:
codecov: codecov/[email protected]
jobs:
build:
test:
parameters:
version:
type: string
docker:
- image: python:3.6
- image: circleci/python:<< parameters.version >>
working_directory: ~/repo
steps:
- checkout
- run: pip install -r requirements-test.txt
- run: pip install .
- run: make deps deps-test
- run: make install
- run: make test
- run: make coverage
- codecov/upload
pypi:
docker:
- image: circleci/python:3.6
working_directory: ~/repo
steps:
- checkout
- setup_remote_docker
- run: make install
- run: python setup.py sdist
- run: |
pip install cibuildwheel
cibuildwheel --output-dir dist
- store_artifacts:
path: dist/
destination: artifacts
# later: upload to PyPI...

workflows:
version: 2
test-all:
jobs:
- test:
matrix:
parameters:
version: [3.5.10, 3.6.15, 3.7.12, 3.8.12, 3.9.9]
deploy:
jobs:
- pypi:
filters:
branches:
only: master
18 changes: 16 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,26 +1,40 @@
# Python interpreter. Default: '$(PYTHON)'
PYTHON = python
PYTHON ?= python
PIP ?= pip

# BEGIN-EVAL makefile-parser --make-help Makefile

help:
@echo ""
@echo " Targets"
@echo ""
@echo " install Install this package"
@echo " deps Install dependencies only"
@echo " deps-test Install dependencies for testing only"
@echo " test Run all unit tests"
@echo " coverage Run coverage tests"
@echo ""
@echo " Variables"
@echo ""
@echo " PYTHON Python interpreter. Default: '$(PYTHON)'"
@echo " PIP Python packager. Default: '$(PIP)'"

# END-EVAL

#
# Tests
#

.PHONY: test coverage
.PHONY: install test coverage deps deps-test

install:
$(PIP) install .

deps:
$(PIP) install -r requirements.txt

deps-test:
$(PIP) install -r requirements-test.txt

# Run all unit tests
test:
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -118,5 +118,5 @@ including the extracted information from the MODS part of the METS.

Example:

mm2tei "https://digital.slub-dresden.de/oai/?verb=GetRecord&metadataPrefix=mets&identifier=oai:de:slub-dresden:db:id-453779263"
mm2tei "https://digital.slub-dresden.de/oai/?verb=GetRecord&metadataPrefix=mets&identifier=oai:de:slub-dresden:db:id-453779263" > tei.xml

6 changes: 5 additions & 1 deletion mets_mods2tei/api/alto.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,11 @@ def get_text_in_line(self, line):
Returns the ALTO-encoded text .
:param Element line: The line to extract the text from.
"""
return " ".join(element.get("CONTENT") for element in line.xpath("./alto:String", namespaces=ns))
text = " ".join(element.get("CONTENT") for element in line.xpath("./alto:String", namespaces=ns))
hyp = line.find("alto:HYP", namespaces=ns)
if hyp is not None:
text += hyp.get("CONTENT")
return text

def __compute_fuzzy_distance(self, text1, text2):
"""
Expand Down
28 changes: 21 additions & 7 deletions mets_mods2tei/api/mets.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ def __spur(self):
person[name_part.get_type()] = name_part.get_valueOf_()

# either author or editor
roles = name.get_role()[0].get_roleTerm()
roles = name.get_role()[0].get_roleTerm() if name.get_role() else []
# TODO: handle the complete set of allowed roles
for role in roles:
if role.get_valueOf_() == "edt":
Expand Down Expand Up @@ -237,12 +237,26 @@ def __spur(self):
#
# metsHdr
header = self.mets.get_metsHdr()

# encoding date
self.encoding_date = header.get_CREATEDATE().isoformat()

# encoding description
self.encoding_desc = list(filter(lambda x: x.get_OTHERTYPE() == "SOFTWARE", header.get_agent()))[0].get_name()
if header:
# encoding date
self.encoding_date = header.get_CREATEDATE()
# encoding description
self.encoding_desc = [agent.get_name()
for agent in header.get_agent()
if agent.get_TYPE() == "OTHER" and agent.get_OTHERTYPE() == "SOFTWARE"]
else:
self.encoding_date = None
self.encoding_desc = None

if self.encoding_date:
self.encoding_date = self.encoding_date.isoformat()
else:
self.logger.error("Found no @CREATEDATE for publicationStmt/date")
if self.encoding_desc:
self.encoding_desc = self.encoding_desc[0] # or -1?
# what about agent.get_OTHERROLE() and agent.get_note()?
else:
self.logger.error("Found no mets:agent for encodingDesc")

#
# location of manuscript
Expand Down
8 changes: 5 additions & 3 deletions mets_mods2tei/api/tei.py
Original file line number Diff line number Diff line change
Expand Up @@ -462,15 +462,17 @@ def add_encoding_date(self, date):
publication_stmt = self.tree.xpath('//tei:publicationStmt', namespaces=ns)[0]
encoding_date = etree.SubElement(publication_stmt, "%sdate" % TEI)
encoding_date.set("type", "publication")
encoding_date.text = date
if date:
encoding_date.text = date

def set_encoding_description(self, creator):
"""
Set some details on the encoding of the digital edition
"""
encoding_desc = self.tree.xpath('//tei:encodingDesc', namespaces=ns)[0]
encoding_desc_details = etree.SubElement(encoding_desc, "%sp" % TEI)
encoding_desc_details.text = "Encoded with the help of %s." % creator
if creator:
encoding_desc_details = etree.SubElement(encoding_desc, "%sp" % TEI)
encoding_desc_details.text = "Encoded with the help of %s." % creator

def add_repository(self, repository):
"""
Expand Down
3 changes: 2 additions & 1 deletion mets_mods2tei/scripts/mets_mods2tei.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import

import sys
import os
import logging
import click
Expand All @@ -19,7 +20,7 @@ def cli(mets, ocr, text_group, log_level):

#
# logging level
logging.basicConfig(level=logging.getLevelName(log_level))
logging.basicConfig(level=logging.getLevelName(log_level), stream=sys.stderr)

#
# interpret mets argument
Expand Down