Skip to content

Merge branch 'main' of https://github.com/OCR-D/gt_structure_text #61

Merge branch 'main' of https://github.com/OCR-D/gt_structure_text

Merge branch 'main' of https://github.com/OCR-D/gt_structure_text #61

Workflow file for this run

name: gtrepo
on:
push:
tags:
- 'v[0-9]+.[0-9]+.[0-9]+'
workflow_dispatch:
inputs:
tag-name:
description: Name of the release tag
defaults:
run:
shell: bash
jobs:
build:
name: analyse and make Bagit
runs-on: ubuntu-latest
permissions:
checks: write
contents: write
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Using tag name from ref name
if: github.event.inputs.tag-name == ''
run: echo "TAG_NAME=$GITHUB_REF_NAME" >> $GITHUB_ENV
- name: Using tag name from input param
if: github.event.inputs.tag-name != ''
run: echo "TAG_NAME=${{ github.event.inputs.tag-name}}" >> $GITHUB_ENV
- name: download and install Saxon
run: |
wget https://github.com/Saxonica/Saxon-HE/releases/download/SaxonHE12-3/SaxonHE12-3J.zip
unzip SaxonHE12-3J.zip
rm SaxonHE12-3J.zip
- name: install jq
run: sudo apt-get install jq
- name: install XSL stylesheets
run: |
git clone https://github.com/tboenig/gt-repo-scripts.git
mv gt-repo-scripts/scripts scripts/
rm -r gt-repo-scripts
- name: install megalevelrules.xml
run: |
git clone --branch gh-pages --single-branch https://github.com/OCR-D/gt-MufiLevelRules.git
mv gt-MufiLevelRules/rules/megalevelrules.xml scripts/megalevelrules.xml
rm -r gt-MufiLevelRules
- name: convert metadata from YAML to JSON
uses: mikefarah/yq@master
with:
cmd: yq -o=json METADATA.yml > METADATA.json
- name: check repo directory structure
run: |
mkdir ghout
java -jar saxon-he-12.3.jar -xsl:scripts/gt-overview_unitTest.xsl \
output=unitTest1 \
-s:scripts/gt-overview_unitTest.xsl -o:ghout/pathtest.md
- name: test result
run: |
test -e ghout/pathtest.md
if test -s ghout/pathtest.md; then \
cat ghout/pathtest.md; false; fi
- name: install GT Labelling docs
run: git clone https://github.com/tboenig/gt-guidelines.git
- name: make output directories
run: mkdir metadata_out ocrdzip_out
- name: move README to readme_old/
run: bash scripts/readmefolder.sh
- name: make readme.xml
run: bash scripts/xreadme.sh
- name: transform METADATA and make GT-Overview
run: |
java -jar saxon-he-12.3.jar -xsl:scripts/gt-overview_metadata.xsl \
output=METADATA repoBase=${{ env.TAG_NAME }} repoName=$GITHUB_REPOSITORY bagitDumpNum=$GITHUB_RUN_NUMBER releaseTag=${{ env.TAG_NAME }} \
-s:scripts/gt-overview_metadata.xsl -o:ghout/metadata.md
- name: make compressed table view
run: |
java -jar saxon-he-12.3.jar -xsl:scripts/gt-overview_metadata.xsl \
output=TABLE repoBase=${{ env.TAG_NAME }} repoName=$GITHUB_REPOSITORY \
-s:scripts/gt-overview_metadata.xsl -o:ghout/table.md
- name: detailed table view
run: |
java -jar saxon-he-12.3.jar -xsl:scripts/gt-overview_metadata.xsl \
output=OVERVIEW repoBase=${{ env.TAG_NAME }} repoName=$GITHUB_REPOSITORY \
-s:scripts/gt-overview_metadata.xsl -o:ghout/overview.md
- name: leveling the volume and documents
run: |
java -jar saxon-he-12.3.jar -xsl:scripts/gt-level_parser.xsl \
repoName=$GITHUB_REPOSITORY \
-s:scripts/gt-level_parser.xsl -o:ghout/overview-level.md
- name: generate mets.sh
run: |
java -jar saxon-he-12.3.jar -xsl:scripts/gt-overview_metadata.xsl \
output=METS repoBase=${{ env.TAG_NAME }} repoName=$GITHUB_REPOSITORY \
-s:scripts/gt-overview_metadata.xsl -o:scripts/mets.sh
ls -l scripts/mets.sh
cat scripts/mets.sh
- name: generate Metadata JSON file
run: |
java -jar saxon-he-12.3.jar -xsl:scripts/gt-overview_metadata.xsl \
output=METAJSON repoBase=${{ env.TAG_NAME }} repoName=$GITHUB_REPOSITORY bagitDumpNum=$GITHUB_RUN_NUMBER releaseTag=${{ env.TAG_NAME }} \
-s:scripts/gt-overview_metadata.xsl -o:metadata_out/metadata_l.json
- name: pretty-print JSON file
run: |
jq '.' metadata_out/metadata_l.json > metadata_out/metadata.json
cp metadata_out/metadata.json ghout/
rm metadata_out/metadata_l.json
- name: generate README
run: |
java -jar saxon-he-12.3.jar -xsl:scripts/gt-overview_metadata.xsl \
output=README repoBase=${{ env.TAG_NAME }} repoName=$GITHUB_REPOSITORY \
-s:scripts/gt-overview_metadata.xsl -o:README.md
- name: generate METADATA_htr_united.yml
run: |
java -jar saxon-he-12.3.jar -xsl:scripts/gt-metadata_htr_united.xsl repoName=${{ github.event.repository.name }} \
-s:scripts/gt-metadata_htr_united.xsl
- name: generate METS Volume File
run: |
java -jar saxon-he-12.3.jar -xsl:scripts/gt-overview_metadata.xsl \
output=METSvolume repoBase=${{ env.TAG_NAME }} repoName=$GITHUB_REPOSITORY bagitDumpNum=$GITHUB_RUN_NUMBER releaseTag=${{ env.TAG_NAME }} \
-s:scripts/gt-overview_metadata.xsl -o:metadata_out/mets.xml
- name: generate release download list
run: |
java -jar saxon-he-12.3.jar -xsl:scripts/gt-overview_metadata.xsl \
output=download repoBase=${{ env.TAG_NAME }} repoName=$GITHUB_REPOSITORY bagitDumpNum=$GITHUB_RUN_NUMBER releaseTag=${{ env.TAG_NAME }} \
-s:scripts/gt-overview_metadata.xsl -o:ghout/download.txt
- name: delete fileGrp DEFAULT
run: |
java -jar saxon-he-12.3.jar -xsl:scripts/gt-overview_metadata.xsl \
output=METSdefault repoBase=${{ env.TAG_NAME }} repoName=$GITHUB_REPOSITORY bagitDumpNum=$GITHUB_RUN_NUMBER releaseTag=${{ env.TAG_NAME }} \
-s:scripts/gt-overview_metadata.xsl
- name: generate CITATION.cff
run: |
java -jar saxon-he-12.3.jar -xsl:scripts/gt-overview_metadata.xsl \
output=CITATION repoBase=${{ env.TAG_NAME }} repoName=$GITHUB_REPOSITORY bagitDumpNum=$GITHUB_RUN_NUMBER releaseTag=${{ env.TAG_NAME }} \
-s:scripts/gt-overview_metadata.xsl -o:rawCITATION.cff
- name: pretty-print CITATION.cff
uses: mikefarah/yq@master
with:
cmd: |
yq -I4 rawCITATION.cff > CITATION.cff
rm rawCITATION.cff
- name: symlink metadata as index
run: ln -s ghout/metadata.md ghout/index.md
- name: ensure valid METS
run: bash -ex scripts/data_mets.sh
- name: install ocrd and bagit
run: |
sudo apt-get install -y python3 imagemagick libgeos-dev
pip install -U pip 'setuptools>=61'
pip install ocrd
ocrd --version
- name: make validMets
run: bash -ex scripts/mets.sh
- name: make bagit
run: bash scripts/data_structure.sh
- name: copy CSS styles, Javascript and Markdown config files
run: |
cp scripts/table_hide.css ghout/
cp scripts/levelparser.css ghout/
cp scripts/lang.js ghout/
cp scripts/_config.yml ghout/
- name: add metadata files to release assets
uses: thedoctor0/zip-release@master
with:
filename: metadata-v${{ github.run_number }}.zip
path: 'metadata_out'
- name: copy metadata.zip to ocrdzip_out
run: cp metadata-v${{ github.run_number }}.zip ocrdzip_out/
- name: upload release assets
uses: ncipollo/release-action@v1
if: env.TAG_NAME != ''
with:
allowUpdates: true
artifacts: 'ocrdzip_out/*.zip'
artifactContentType: application/zip
body: |
<dl>
<dt>Version:</dt>
<dd>${{ env.TAG_NAME }}</dd>
<dt>Info:</dt>
<dd>
To make use of Ground Truth, please download the provided zip files.<br/>
The 'ocrd.zip' files are ocr-d-bagit files.<br/>
The 'metadata-v${{ github.run_number }}.zip' file contains metadata for the Ground Truth corpus in both METS and JSON format.<br/>
The 'mets.xml' file enumerates all the documents and BagIt files contained within.<br/>
The bagits correspond to the <a href="https://ocr-d.de/de/spec/ocrd_zip.html">OCR-D Bagit Spec</a>.<br/>
The source-code-zip and source-code-tar.gz files only provide metadata, citations, license and readme information.<br/>
If you want to use the source files, please clone the repository.
</dd>
</dl>
</dl>
name: Release ${{ github.run_number }}_${{ env.TAG_NAME }}
omitNameDuringUpdate: true
tag: ${{ env.TAG_NAME }}
token: ${{ secrets.GITHUB_TOKEN }}
- name: commit README
run: |
git config --local user.email "41898282+github-actions[bot]@users.noreply.github.com"
git config --local user.name "github-actions[bot]"
git add README.md
git commit -m "[Automatic] Update readme files" || echo "Nothing to update"
git push origin HEAD:main
- name: commit METADATA_htr_united.yml
run: |
git config --local user.email "41898282+github-actions[bot]@users.noreply.github.com"
git config --local user.name "github-actions[bot]"
git add ${{ github.event.repository.name }}_METADATA_htr_united.yml
git commit -m "[Automatic] Update METADATA_htr_united.yml files" || echo "Nothing to update"
git push origin HEAD:main
- name: commit CITATION.cff
run: |
git config --local user.email "41898282+github-actions[bot]@users.noreply.github.com"
git config --local user.name "github-actions[bot]"
git add CITATION.cff
git commit -m "[Automatic] Update CITATION.cff files" || echo "Nothing to update"
git push origin HEAD:main
- name: deploy GT Overview to GitHub Pages branch 🚀
uses: JamesIves/github-pages-deploy-action@v4
with:
branch: gh-pages # The branch the action should deploy to.
folder: ghout # The folder the action should deploy.