Skip to content

Commit

Permalink
initial implementation of release script (preparation for v4.0) #205
Browse files Browse the repository at this point in the history
  • Loading branch information
matyaskopp committed Jan 18, 2024
1 parent cbfbbd9 commit ea119ef
Show file tree
Hide file tree
Showing 15 changed files with 1,983 additions and 522 deletions.
54 changes: 54 additions & 0 deletions src/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@




PWD=$(shell pwd)
DATA-RELEASE=$(PWD)/data/release/
SAMPLE-DATA-IN=$(PWD)/data/sample-in/
DATA-IN=$(SAMPLE-DATA-IN)
SAMPLE-DATA-SOURCE=parczech:/opt/data/data-ParlaMint3.1-FRESH/




release:
mkdir -p $(DATA-RELEASE)/
$s -xsl:tools/ParCzech-finalize.xsl \
outDir=$(DATA-RELEASE)/ \
inListPerson=$(DATA-IN)/parczech.tei.ana/consolidated/ParCzech-listPerson.xml \
inListOrg=$(DATA-IN)/parczech.tei.ana/consolidated/ParCzech-listOrg.xml \
inTaxonomiesDir=$(PWD)/metadater/taxonomies/ \
type=TEI.ana \
$(DATA-IN)/parczech.tei.ana/consolidated/ParCzech.ana.xml
cp ./tei2teitok/pdt-fslib.xml $(DATA-RELEASE)/ParCzech.TEI.ana/





DEV-clean-sample-for-release:
rm -r $(SAMPLE-DATA-IN)/parczech.tei.*
DEV-prepare-sample-for-release: DEV-prepare-sample-for-release-raw DEV-prepare-sample-for-release-ana

DEV-prepare-sample-for-release-raw DEV-prepare-sample-for-release-ana: DEV-prepare-sample-for-release-%:
mkdir -p $(SAMPLE-DATA-IN)/parczech.tei.$* || :
rsync -a --exclude='*/' $(SAMPLE-DATA-SOURCE)/parczech.tei.$*/consolidated/ $(SAMPLE-DATA-IN)/parczech.tei.$*/consolidated/
@echo "INFO: [$*] sync files in root folder"
xmlstarlet edit --inplace \
--delete "/_:teiCorpus/xi:include[not(position() = 1 or position() = last() )]" \
$(SAMPLE-DATA-IN)/parczech.tei.$*/consolidated/ParCzech.*xml
@echo "INFO: [$*] sync component files"
echo $(SAMPLE-DATA-IN)/parczech.tei.$*/consolidated/ParCzech.*xml \
| xargs ${getcomponentincludes} \
| xargs -I {} scp $(SAMPLE-DATA-SOURCE)/parczech.tei.$*/consolidated/{} $(SAMPLE-DATA-IN)/parczech.tei.$*/consolidated/{}
make DEV-prepare-sample-for-release-$*-fix

DEV-prepare-sample-for-release-raw-fix: # raw specific issues

DEV-prepare-sample-for-release-ana-fix: # ana specific issues


###################x
s = java $(JM) -jar /usr/share/java/saxon.jar
j = java $(JM) -jar /usr/share/java/jing.jar
getcomponentincludes = -I % java -cp /usr/share/java/saxon.jar net.sf.saxon.Query -xi:off \!method=adaptive -qs:'/*/*[local-name()="include"]/@href' -s:% |sed 's/^ *href="//;s/"//'
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
<?xml version="1.0" encoding="utf-8"?>
<taxonomy xmlns="http://www.tei-c.org/ns/1.0" xml:id="NER.cnec2.0">
<taxonomy xmlns="http://www.tei-c.org/ns/1.0" xml:id="ParCzech-taxonomy-NER.cnec2.0.ana" xml:lang="mul">
<desc xml:lang="en">
<term>CNEC 2.0 Named Entities</term>
</desc>
Expand Down
9 changes: 9 additions & 0 deletions src/metadater/taxonomies/ParCzech-taxonomy-meeting.parts.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
<?xml version="1.0" encoding="UTF-8"?>
<taxonomy xmlns="http://www.tei-c.org/ns/1.0" xml:id="ParCzech-taxonomy-meeting.parts" xml:lang="mul">
<desc xml:lang="cs"><term>Bod</term></desc>
<desc xml:lang="en"><term>Agenda</term></desc>
<category xml:id="parla.agenda">
<catDesc xml:lang="cs"><term>Bod jednání</term></catDesc>
<catDesc xml:lang="en"><term>Agenda</term>: topic discussed during sitting</catDesc>
</category>
</taxonomy>
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
<?xml version="1.0" encoding="utf-8"?>
<taxonomy xmlns="http://www.tei-c.org/ns/1.0" xml:id="parla.links">
<taxonomy xmlns="http://www.tei-c.org/ns/1.0" xml:id="ParCzech-taxonomy-parla.links" xml:lang="mul">
<desc xml:lang="cs">
<term>Druhy odkazů</term>
</desc>
Expand Down
21 changes: 21 additions & 0 deletions src/metadater/taxonomies/ParlaMint-taxonomy-NER.ana.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
<?xml version="1.0" encoding="UTF-8"?>
<taxonomy xmlns="http://www.tei-c.org/ns/1.0" xml:id="ParlaMint-taxonomy-NER.ana" xml:lang="mul">
<desc xml:lang="en"><term>Named entities</term></desc>
<desc xml:lang="cs"><term>Jmenné entity</term></desc>
<category xml:id="PER">
<catDesc xml:lang="en"><term>person</term></catDesc>
<catDesc xml:lang="cs"><term>osoba</term></catDesc>
</category>
<category xml:id="LOC">
<catDesc xml:lang="en"><term>location</term></catDesc>
<catDesc xml:lang="cs"><term>místo</term></catDesc>
</category>
<category xml:id="ORG">
<catDesc xml:lang="en"><term>organization</term></catDesc>
<catDesc xml:lang="cs"><term>organizace</term></catDesc>
</category>
<category xml:id="MISC">
<catDesc xml:lang="en"><term>miscellaneous</term></catDesc>
<catDesc xml:lang="cs"><term>různé</term></catDesc>
</category>
</taxonomy>
Loading

0 comments on commit ea119ef

Please sign in to comment.