From ea119ef3b9a08c4fdaaf462d93e2f580c01750ae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maty=C3=A1=C5=A1=20Kopp?= Date: Thu, 18 Jan 2024 09:27:10 +0100 Subject: [PATCH] initial implementation of release script (preparation for v4.0) #205 --- src/Makefile | 54 + ... => ParCzech-taxonomy-NER.cnec2.0.ana.xml} | 2 +- .../ParCzech-taxonomy-meeting.parts.xml | 9 + ....xml => ParCzech-taxonomy-parla.links.xml} | 2 +- .../taxonomies/ParlaMint-taxonomy-NER.ana.xml | 21 + .../ParlaMint-taxonomy-UD-SYN.ana.xml | 1115 +++++++++++++++++ .../ParlaMint-taxonomy-parla.legislature.xml | 138 ++ .../ParlaMint-taxonomy-speaker_types.xml | 18 + src/metadater/taxonomies/taxonomy-NER.xml | 42 - src/metadater/taxonomies/taxonomy-UD-SYN.xml | 147 --- .../taxonomies/taxonomy-meeting.parts.xml | 15 - .../taxonomies/taxonomy-parla.legislature.xml | 274 ---- .../taxonomies/taxonomy-speaker_types.xml | 28 - src/metadater/tei_parczech.xml | 28 +- src/tools/ParCzech-finalize.xsl | 612 +++++++++ 15 files changed, 1983 insertions(+), 522 deletions(-) create mode 100644 src/Makefile rename src/metadater/taxonomies/{taxonomy-NER.cnec2.0.xml => ParCzech-taxonomy-NER.cnec2.0.ana.xml} (98%) create mode 100644 src/metadater/taxonomies/ParCzech-taxonomy-meeting.parts.xml rename src/metadater/taxonomies/{taxonomy-parla.links.xml => ParCzech-taxonomy-parla.links.xml} (84%) create mode 100644 src/metadater/taxonomies/ParlaMint-taxonomy-NER.ana.xml create mode 100644 src/metadater/taxonomies/ParlaMint-taxonomy-UD-SYN.ana.xml create mode 100644 src/metadater/taxonomies/ParlaMint-taxonomy-parla.legislature.xml create mode 100644 src/metadater/taxonomies/ParlaMint-taxonomy-speaker_types.xml delete mode 100644 src/metadater/taxonomies/taxonomy-NER.xml delete mode 100644 src/metadater/taxonomies/taxonomy-UD-SYN.xml delete mode 100644 src/metadater/taxonomies/taxonomy-meeting.parts.xml delete mode 100644 src/metadater/taxonomies/taxonomy-parla.legislature.xml delete mode 100644 src/metadater/taxonomies/taxonomy-speaker_types.xml create mode 100644 src/tools/ParCzech-finalize.xsl diff --git a/src/Makefile b/src/Makefile new file mode 100644 index 0000000..82de3a7 --- /dev/null +++ b/src/Makefile @@ -0,0 +1,54 @@ + + + + +PWD=$(shell pwd) +DATA-RELEASE=$(PWD)/data/release/ +SAMPLE-DATA-IN=$(PWD)/data/sample-in/ +DATA-IN=$(SAMPLE-DATA-IN) +SAMPLE-DATA-SOURCE=parczech:/opt/data/data-ParlaMint3.1-FRESH/ + + + + +release: + mkdir -p $(DATA-RELEASE)/ + $s -xsl:tools/ParCzech-finalize.xsl \ + outDir=$(DATA-RELEASE)/ \ + inListPerson=$(DATA-IN)/parczech.tei.ana/consolidated/ParCzech-listPerson.xml \ + inListOrg=$(DATA-IN)/parczech.tei.ana/consolidated/ParCzech-listOrg.xml \ + inTaxonomiesDir=$(PWD)/metadater/taxonomies/ \ + type=TEI.ana \ + $(DATA-IN)/parczech.tei.ana/consolidated/ParCzech.ana.xml + cp ./tei2teitok/pdt-fslib.xml $(DATA-RELEASE)/ParCzech.TEI.ana/ + + + + + +DEV-clean-sample-for-release: + rm -r $(SAMPLE-DATA-IN)/parczech.tei.* +DEV-prepare-sample-for-release: DEV-prepare-sample-for-release-raw DEV-prepare-sample-for-release-ana + +DEV-prepare-sample-for-release-raw DEV-prepare-sample-for-release-ana: DEV-prepare-sample-for-release-%: + mkdir -p $(SAMPLE-DATA-IN)/parczech.tei.$* || : + rsync -a --exclude='*/' $(SAMPLE-DATA-SOURCE)/parczech.tei.$*/consolidated/ $(SAMPLE-DATA-IN)/parczech.tei.$*/consolidated/ + @echo "INFO: [$*] sync files in root folder" + xmlstarlet edit --inplace \ + --delete "/_:teiCorpus/xi:include[not(position() = 1 or position() = last() )]" \ + $(SAMPLE-DATA-IN)/parczech.tei.$*/consolidated/ParCzech.*xml + @echo "INFO: [$*] sync component files" + echo $(SAMPLE-DATA-IN)/parczech.tei.$*/consolidated/ParCzech.*xml \ + | xargs ${getcomponentincludes} \ + | xargs -I {} scp $(SAMPLE-DATA-SOURCE)/parczech.tei.$*/consolidated/{} $(SAMPLE-DATA-IN)/parczech.tei.$*/consolidated/{} + make DEV-prepare-sample-for-release-$*-fix + +DEV-prepare-sample-for-release-raw-fix: # raw specific issues + +DEV-prepare-sample-for-release-ana-fix: # ana specific issues + + +###################x +s = java $(JM) -jar /usr/share/java/saxon.jar +j = java $(JM) -jar /usr/share/java/jing.jar +getcomponentincludes = -I % java -cp /usr/share/java/saxon.jar net.sf.saxon.Query -xi:off \!method=adaptive -qs:'/*/*[local-name()="include"]/@href' -s:% |sed 's/^ *href="//;s/"//' diff --git a/src/metadater/taxonomies/taxonomy-NER.cnec2.0.xml b/src/metadater/taxonomies/ParCzech-taxonomy-NER.cnec2.0.ana.xml similarity index 98% rename from src/metadater/taxonomies/taxonomy-NER.cnec2.0.xml rename to src/metadater/taxonomies/ParCzech-taxonomy-NER.cnec2.0.ana.xml index 7a1ed08..b034edb 100644 --- a/src/metadater/taxonomies/taxonomy-NER.cnec2.0.xml +++ b/src/metadater/taxonomies/ParCzech-taxonomy-NER.cnec2.0.ana.xml @@ -1,5 +1,5 @@ - + CNEC 2.0 Named Entities diff --git a/src/metadater/taxonomies/ParCzech-taxonomy-meeting.parts.xml b/src/metadater/taxonomies/ParCzech-taxonomy-meeting.parts.xml new file mode 100644 index 0000000..a92a6b2 --- /dev/null +++ b/src/metadater/taxonomies/ParCzech-taxonomy-meeting.parts.xml @@ -0,0 +1,9 @@ + + + Bod + Agenda + + Bod jednání + Agenda: topic discussed during sitting + + diff --git a/src/metadater/taxonomies/taxonomy-parla.links.xml b/src/metadater/taxonomies/ParCzech-taxonomy-parla.links.xml similarity index 84% rename from src/metadater/taxonomies/taxonomy-parla.links.xml rename to src/metadater/taxonomies/ParCzech-taxonomy-parla.links.xml index 204a632..74eea20 100644 --- a/src/metadater/taxonomies/taxonomy-parla.links.xml +++ b/src/metadater/taxonomies/ParCzech-taxonomy-parla.links.xml @@ -1,5 +1,5 @@ - + Druhy odkazů diff --git a/src/metadater/taxonomies/ParlaMint-taxonomy-NER.ana.xml b/src/metadater/taxonomies/ParlaMint-taxonomy-NER.ana.xml new file mode 100644 index 0000000..640f88a --- /dev/null +++ b/src/metadater/taxonomies/ParlaMint-taxonomy-NER.ana.xml @@ -0,0 +1,21 @@ + + + Named entities + Jmenné entity + + person + osoba + + + location + místo + + + organization + organizace + + + miscellaneous + různé + + diff --git a/src/metadater/taxonomies/ParlaMint-taxonomy-UD-SYN.ana.xml b/src/metadater/taxonomies/ParlaMint-taxonomy-UD-SYN.ana.xml new file mode 100644 index 0000000..a7598a5 --- /dev/null +++ b/src/metadater/taxonomies/ParlaMint-taxonomy-UD-SYN.ana.xml @@ -0,0 +1,1115 @@ + + + UD syntactic relations: All defined syntactic relations of the Universal Dependendencies project + + acl: clausal modifier of noun (adnominal clause) + + acl:adv: adverbs acting as amod + + + acl:attr: attributive adnominal clause + + + acl:cleft: clefted phrase modifier + + + acl:fixed: fixed clausal modifier + + + acl:inf: acl:inf + + + acl:relat: relational adnominal clause + + + acl:relcl: relative clause modifier + + + acl:subj: clausal modifier of noun (adnominal clause): the noun is the subject + + + acl:tmod: clausal modifier of noun of time (adnominal clause) + + + acl:tonp: nounization + + + + advcl: adverbial clause modifier + + advcl:abs: ablativus absolutus + + + advcl:cau: adverb with causative modality + + + advcl:cleft: clefted adverbial clause modifier + + + advcl:cmp: comparative adverbial clause modifier of an adjective or adverb + + + advcl:cmpr: comparative clause + + + advcl:cond: conditional adverbial clause modifier + + + advcl:coverb: adverbial coverb phrase + + + advcl:eval: advcl with evaluative modality + + + advcl:lcl: advcl with locative to modality + + + advcl:lto: advcl with locative to modality + + + advcl:mcl: adverbial clause with modal modality + + + advcl:objective: adverbial purpose clause modifier + + + advcl:pred: adverbial secondary predication + + + advcl:relcl: relative clause modifier of the clause + + + advcl:svc: adverbial infinitive + + + advcl:tcl: temporal adverbial clause + + + + advmod: adverbial modifier + + advmod:adj: adverbial modifier is an adjective + + + advmod:arg: adverbial complement + + + advmod:cau: adverb with causative modality + + + advmod:cmp: comparative modifier of an adjective or adverb + + + advmod:deg: advmod with degree modality + + + advmod:det: adverbial modification by a determiner + + + advmod:df: duration or frequency adverbial modifier + + + advmod:dir: direction adverbial modifier + + + advmod:emph: emphasizing word, intensifier + + + advmod:eval: advmod with evaluative modality + + + advmod:fixed: fixed adverbial modifier + + + advmod:foc: advmod with focus modality + + + advmod:freq: advmod with frequentative modality + + + advmod:lfrom: advmod with source locative from modality + + + advmod:lmod: locative adverbial modifier + + + advmod:lmp: advmod with locative multipoint modality + + + advmod:loc: an adverbial local marker + + + advmod:locy: adverbial modifier where + + + advmod:lto: advmod with locative to modality + + + advmod:mmod: advmod with modal modality + + + advmod:mode: adverbial modifier + + + advmod:neg: adverbial polarity (negative) modifier + + + advmod:obl: contracted advmod and oblique nominal + + + advmod:que: question suffix + + + advmod:tfrom: adverbial modifier since when + + + advmod:tlocy: adverbial modifier when + + + advmod:tmod: advmod with temporal modality + + + advmod:to: adverbial modifier where to + + + advmod:tto: adverbial modifier till when + + + + amod: adjectival modifier + + amod:att: adjectival modifier + + + amod:attlvc: participle in a light-verb construction + + + amod:flat: adjectival part of a named entity + + + + appos: appositional modifier + + appos:nmod: An appositional modifier + + + appos:trans: nominal translation pair + + + + aux: auxiliary + + aux:aff: auxiliary affix + + + aux:aspect: aspect auxiliary + + + aux:caus: causative auxiliary + + + aux:clitic: mobile inflection auxiliary + + + aux:cnd: conditional auxiliary + + + aux:ex: existentials as auxiliary + + + aux:exhort: exhortative auxiliary + + + aux:imp: imperative marker auxiliary + + + aux:nec: necessative auxiliary + + + aux:neg: negative auxiliary + + + aux:opt: optative auxiliary + + + aux:part: auxiliary particle + + + aux:pass: passive auxiliary + + + aux:pot: potential auxiliary + + + aux:q: question auxiliary + + + aux:tense: tense auxiliary + + + + case: case marking + + case:acc: accusative case marker + + + case:adv: case marking to form adverbs + + + case:det: preposition with determiner + + + case:gen: genitive case marker + + + case:loc: postpositional localizer + + + case:pred: predicative particles + + + case:voc: vocative particle + + + + cc: coordinating conjunction + + cc:nc: coordinated conjunct : non coordonant + + + cc:preconj: preconjunct + + + + ccomp: clausal complement + + ccomp:cleft: required clausal dependent of the pronoun _to_ + + + ccomp:obj: clausal complement (object) + + + ccomp:obl: clausal complement (non-object) + + + ccomp:pmod: clausal prepositional object + + + ccomp:pred: clausal complement (predicative) + + + ccomp:relcl: double pronoun construction or free relative acting as object + + + ccomp:reported: reported speech from active verb of saying or digression in discursive form + + + + clf: classifier + + clf:det: classifier used as determiner + + + + compound: compound + + compound:a: adjective compound + + + compound:adj: adjective and adjective compound + + + compound:affix: construct state modification + + + compound:amod: Noun and adjective compound + + + compound:apr: adjective and particle compound + + + compound:atov: adjective and verb compound + + + compound:dir: directional verb compound + + + compound:ext: extent and descriptive verb compound + + + compound:lvc: light verb construction + + + compound:nn: noun compound modifier + + + compound:preverb: relation between verb and preverb + + + compound:pron: Noun and pronoun compound + + + compound:prt: phrasal verb particle + + + compound:quant: verb-quantifier compound + + + compound:redup: reduplicated compounds + + + compound:smixut: construct state modification + + + compound:svc: serial verb compounds + + + compound:verbnoun: verb and noun compound + + + compound:vmod: noun-verb compound + + + compound:vo: verb-object compound + + + compound:vv: verb-verb compound + + + compound:z: compound with Z (POS tag) + + + + conj: conjunct + + conj:expl: explicative conjunct + + + conj:extend: open/incomplete conjunct + + + conj:redup: reduplicated conjunction + + + conj:svc: coordination of serial verbs + + + + cop: copula + + cop:expl: copula with expletive subject + + + cop:locat: copula with a locative predicate + + + cop:outer: outer copula + + + cop:own: copula for posessive clauses + + + + csubj: clausal subject + + csubj:asubj: clausal subject: Adjective subject + + + csubj:cleft: relative clause modifier + + + csubj:cop: clausal copular subject + + + csubj:outer: outer clause clausal subject + + + csubj:pass: clausal passive subject + + + csubj:pred: clausal subject of a secondary predicate (enhanced dependency) + + + csubj:relcl: double pronoun construction or free relative acting as subject + + + csubj:reported: reported speech from passive verb of saying + + + csubj:vsubj: clausal subject: Adjective subject + + + + dep: unspecified dependency + + dep:aff: affix unspecified dependency + + + dep:agr: agreement clitic + + + dep:alt: alternative gender suffix + + + dep:ana: anaphoric prefix dependency + + + dep:aux: derivational auxiliary dependency + + + dep:comp: unspecified dependency + + + dep:conj: conjunct + + + dep:cop: derivational copular dependency + + + dep:der: derivational suffix + + + dep:emo: emotional root dependency + + + dep:infl: inflectional dependency + + + dep:mark: derivational marker dependency + + + dep:mod: modifier underspecified for the syntactic category of its head + + + dep:pos: postural root dependency + + + dep:repeat: repeating of word + + + dep:ss: status suffix + + + + det: determiner + + det:adj: article of a prearticulated adjective + + + det:clf: determiner classifier + + + det:noun: article of a prearticulated noun + + + det:numgov: pronominal quantifier governing the case of the noun + + + det:nummod: pronominal quantifier agreeing in case with the noun + + + det:pmod: pronoun determiner + + + det:poss: possessive determiner + + + det:predet: predeterminer + + + det:pron: article of a prearticulated pronoun + + + det:rel: relative determiner + + + + discourse: discourse element + + discourse:conn: discourse connective marker + + + discourse:emo: emoticons, emojis + + + discourse:filler: filler sound in spoken data + + + discourse:intj: interjection + + + discourse:q: discourse particle for questions + + + discourse:sp: sentence particle + + + + dislocated: dislocated elements + + dislocated:advcl: dislocated adverbial clause + + + dislocated:ccomp: dislocated complement clause + + + dislocated:cleft: cleft constructions that lack a copula + + + dislocated:csubj: dislocated clausal subject + + + dislocated:nsubj: dislocated nominal subject + + + dislocated:obj: dislocated object + + + dislocated:obl: dislocated oblique argument + + + dislocated:subj: dislocated subject + + + dislocated:vo: dislocated object of verb-object compound + + + + expl: expletive + + expl:comp: expletive + + + expl:impers: impersonal expletive + + + expl:pass: reflexive pronoun used in reflexive passive + + + expl:poss: possessively used reflexive clitic + + + expl:pv: reflexive clitic with an inherently reflexive verb + + + expl:subj: expletive subject + + + + fixed: fixed multiword expression + + + flat: flat multiword expression + + flat:abs: clausal absolutive + + + flat:date: flat multiword expression: date + + + flat:dist: distributive + + + flat:foreign: foreign words + + + flat:gov: partitive-like appositional element + + + flat:name: names + + + flat:num: compound number + + + flat:number: flat multiword expression: number + + + flat:range: range + + + flat:redup: flat multiword expression: alliterative expressions + + + flat:repeat: repetition + + + flat:sibl: siblings + + + flat:time: flat multiword expression: time + + + flat:title: parts of a title + + + flat:vv: serial verbs + + + + goeswith: goes with + + + iobj: indirect object + + iobj:agent: agentive indirect object + + + iobj:appl: applied indirect object in applicative construction + + + iobj:patient: patient object of a non-actor/patient-focused verb + + + + list: list + + + mark: marker + + mark:adv: manner adverbializer + + + mark:advmod: adverbial modifier confusable with a subordination marker + + + mark:aff: affix marker + + + mark:pcomp: marker for the purpose clause + + + mark:plur: independently written plural suffix + + + mark:prt: particle + + + mark:q: question particle + + + mark:rel: adjectival, relativizer, and nominalizer 的 DE + + + + neg: negation modifier + + + nmod: nominal modifier + + nmod:agent: agent of verbnouns in _cael_ constructions + + + nmod:appos: nominal modifier apposition + + + nmod:arg: nominal modifier used as an argument + + + nmod:att: nominal modifier without postposition + + + nmod:attlvc: object nominal in a nominalized light-verb construction + + + nmod:attr: attributive nominal modifier + + + nmod:bahuv: nominal bahuvriihi modifier + + + nmod:cau: nominal modifier indicating the causee of a causative predicate + + + nmod:comp: comparative modifier of an adjective or adverb + + + nmod:det: determinative + + + nmod:flat: nominal part of a named entity + + + nmod:gen: genitive modifier + + + nmod:gobj: genitive object + + + nmod:gsubj: genitive subject + + + nmod:lfrom: advmod with locative to modality + + + nmod:lmod: nominal with locative modality + + + nmod:npmod: noun phrase as adverbial modifier + + + nmod:obj: nominative object + + + nmod:part: nominal modifier indicating part-whole relations + + + nmod:poss: possessive nominal modifier + + + nmod:pred: predicative expression + + + nmod:prep: prepositional pronouns + + + nmod:prp: proprietive modifier of a noun + + + nmod:relat: relational nominal modifier + + + nmod:subj: nominative subject + + + nmod:tmod: temporal modifier + + + + nsubj: nominal subject + + nsubj:advmod: fused subject pronoun and adverb + + + nsubj:aff: nominal subject affix + + + nsubj:bfoc: nominal subject of a beneficiary-focused verb + + + nsubj:caus: causative nominal subject + + + nsubj:cleft: nominal residual subject of a cleft sentence + + + nsubj:cop: nominal copular subject + + + nsubj:expl: expletive subject + + + nsubj:ifoc: nominal subject of an instrumental-focused verb + + + nsubj:lfoc: nominal subject of a locative-focused verb + + + nsubj:lvc: subject in a light-verb construction + + + nsubj:nc: non-canonical nominal subject + + + nsubj:nn: nominal subject: the predicate is noun + + + nsubj:obj: fused subject and object pronoun + + + nsubj:outer: outer clause nominal subject + + + nsubj:pass: passive nominal subject + + + nsubj:periph: postposed subject + + + nsubj:pred: subject of a secondary predicate (enhanced dependency) + + + nsubj:quasi: quasi subject + + + nsubj:x: controlling subject (enhanced dependency) + + + nsubj:xsubj: The subject xcomp is the complement + + + + nummod: numeric modifier + + nummod:det: numeric determiner + + + nummod:entity: numeric modifier governed by a noun + + + nummod:flat: numeral part of a named entity + + + nummod:gov: numeric modifier governing the case of the noun + + + + obj: object + + obj:advmod: fused adverb and object pronoun + + + obj:advneg: fused negation and object pronoun + + + obj:agent: agentive object + + + obj:appl: applied object in applicative construction + + + obj:cau: direct object of an intransitive causative verb + + + obj:caus: agentive object in causative construction + + + obj:lvc: object in a light-verb construction + + + obj:obl: fused oblique and object pronoun + + + obj:periph: preposed object + + + + obl: oblique nominal + + obl:ad: oblique adjunct + + + obl:adj: oblique nominal: Auxiliary nouns for adjectives + + + obl:adv: oblique nominal for adverbs + + + obl:advmod: adverbial modifier confusable with an oblique dependent + + + obl:agent: agent modifier + + + obl:appl: applied oblique argument in non-canonical applicative construction + + + obl:arg: oblique argument + + + obl:cau: oblique with causative modality + + + obl:cmp: standard-of-comparison modifier of an adjective or adverb + + + obl:cmpr: comparative oblique argument + + + obl:comp: oblique nominal with other preposition + + + obl:dat: dative argument + + + obl:freq: oblique with frequentative modality + + + obl:inst: oblique instrument + + + obl:iobj: oblique nominal for verb means "to give" + + + obl:lfrom: obl with source locative from modality + + + obl:lmod: locative modifier + + + obl:lmp: obl with locative to modality + + + obl:lto: obl with locative to modality + + + obl:lvc: oblique nominal in a light-verb construction + + + obl:mcl: obl indicating manner + + + obl:mod: oblique modifier + + + obl:npmod: noun phrase as adverbial modifier + + + obl:obj: NP-related object + + + obl:orphan: adpositional dependent with the elided noun + + + obl:own: owner in possessive existential clauses + + + obl:patient: object in ZOENG construction + + + obl:pmod: prepositional object + + + obl:poss: oblique modifier specifying possessor + + + obl:prep: prepositional pronouns + + + obl:sentcon: sentence-initial discourse connective + + + obl:smod: oblique spatial modifier + + + obl:subj: NP-related subject + + + obl:tmod: temporal modifier + + + obl:with: oblique nominal to answer "with whom" + + + + orphan: orphan + + orphan:missing: textual gap in the source + + + + parataxis: parataxis + + parataxis:appos: paratactic apposition + + + parataxis:conj: juxtaposed clause + + + parataxis:coord: coordinating parataxis + + + parataxis:deletion: loosely connected clause because of deletion + + + parataxis:discourse: paratactic discourse + + + parataxis:dislocated: parataxis:dislocated + + + parataxis:hashtag: paratactic hashtag + + + parataxis:insert: parenthetical clause or comment + + + parataxis:mod: parataxis:mod + + + parataxis:newsent: new sentence attached to a node in the previous sentence + + + parataxis:nsubj: paratactic nominal subject + + + parataxis:obj: direct speech + + + parataxis:parenth: parataxis parenthesical + + + parataxis:rel: relative clause for clauses + + + parataxis:rep: reported speech + + + parataxis:reporting: interjected verb of saying + + + parataxis:restart: loosely connected clause because of restart + + + parataxis:rt: retweets + + + parataxis:sentence: sentence + + + parataxis:trans: translation pair + + + parataxis:url: URLs + + + + punct: punctuation + + + reparandum: overridden disfluency + + + root: root + + + vocative: vocative + + vocative:cl: clausal vocative + + + vocative:mention: Twitter mentions + + + + xcomp: open clausal complement + + xcomp:adj: open clausal complement for adjective + + + xcomp:cleft: required open dependent of the pronoun _to_ + + + xcomp:dir: open clausal complement for adjective + + + xcomp:ds: clausal complement with different subject + + + xcomp:obj: infinitival objects + + + xcomp:pred: predicate + + + xcomp:relcl: double pronoun construction or free relative acting as complement clause + + + xcomp:subj: infinitival and adverbial subjects + + + xcomp:vcomp: open clausal complement for adjective + + + + diff --git a/src/metadater/taxonomies/ParlaMint-taxonomy-parla.legislature.xml b/src/metadater/taxonomies/ParlaMint-taxonomy-parla.legislature.xml new file mode 100644 index 0000000..f055d55 --- /dev/null +++ b/src/metadater/taxonomies/ParlaMint-taxonomy-parla.legislature.xml @@ -0,0 +1,138 @@ + + + Legislature + Zákonodárná moc + + Geo-political or administrative units + Geo-politické nebo administrativní jednotky + + Supranational legislature + Mezinárodní legislatura + + + National legislature + Národní legislatura + + + Regional legislature + Regionální legislatura + + + Local legislature + Místní legislatura + + + + Organization + Organizace + + Chambers + Komory + + Unicameralism + Jednokomorový parlament + + + Bicameralism + Dvoukomorový parlament + + Upper house + Horní komora + + + Lower house + Dolní komora + + + + Multicameralism + Vícekomorový parlament + + Chamber + Komora + + + + + Committee + Komise + + Standing committee + Stálá komise + + + Special committee + Zvláštní komise + + + Committee of inquiry + Vyšetřovací komise + + + + + Legislative period: term of the parliament between general elections. + Legislativní perioda + + Legislative session: the period of time in which a legislature is convened for purpose of lawmaking, usually being one of two or more smaller divisions of the entire time between two elections. A session is a meeting or series of connected meetings devoted to a single order of business, program, agenda, or announced purpose. + Legislativní schůze + + Meeting: Each meeting may be a separate session or part of a group of meetings constituting a session. The session/meeting may take one or more days. + Schůze + + Types of meetings + Druhy schůzí + + Regular meeting + Běžná schůze + + + Special meeting + Zvláštní schůze + + Extraordinary meeting + Mimořádná schůze + + + Urgent meeting + Urgentní schůze + + + Ceremonial meeting + Slavnostní schůze + + + Commemorative meeting + Pamětní schůze + + + Public presentation of opinions + Veřejná prezentace možností + + + Committee meeting + Schůze výboru + + + + Continued meeting + Pokračování schůze + + + Public meeting + Veřejná schůze + + + Executive meeting + Výkonná schůze + + + + Sitting: sitting day + Sezení + + + + + + diff --git a/src/metadater/taxonomies/ParlaMint-taxonomy-speaker_types.xml b/src/metadater/taxonomies/ParlaMint-taxonomy-speaker_types.xml new file mode 100644 index 0000000..f00dfdd --- /dev/null +++ b/src/metadater/taxonomies/ParlaMint-taxonomy-speaker_types.xml @@ -0,0 +1,18 @@ + + + Types of speakers + Druhy řečníků + + Chairperson: chairman of a sitting + Předsedající: předsedá zasedání + + + Regular: a regular speaker at a sitting + Poslanec: poslanec nebo člen vlády + + + Guest: a guest speaker at a sitting + Host: hostující řečník na sezení + + + diff --git a/src/metadater/taxonomies/taxonomy-NER.xml b/src/metadater/taxonomies/taxonomy-NER.xml deleted file mode 100644 index d20db03..0000000 --- a/src/metadater/taxonomies/taxonomy-NER.xml +++ /dev/null @@ -1,42 +0,0 @@ - - - - Named entities - - - Jmenné entity - - - - person - - - osoba - - - - - location - - - místo - - - - - organization - - - organizace - - - - - miscellaneous - - - různé - - - diff --git a/src/metadater/taxonomies/taxonomy-UD-SYN.xml b/src/metadater/taxonomies/taxonomy-UD-SYN.xml deleted file mode 100644 index cd8be79..0000000 --- a/src/metadater/taxonomies/taxonomy-UD-SYN.xml +++ /dev/null @@ -1,147 +0,0 @@ - - - - UD syntactic relations - - - acl: Clausal modifier of noun (adjectival clause) - - - acl:relcl: Relative clause modifier - - - advcl: Adverbial clause modifier - - - advmod: Adverbial modifier - - - advmod:emph: Emphasizing word, intensifier - - - amod: Adjectival modifier - - - appos: Appositional modifier - - - aux: Auxiliary - - - aux:pass: Passive auxiliary - - - case: Case marking - - - cc: Coordinating conjunction - - - ccomp: Clausal complement - - - cc:preconj: Preconjunct - - - compound: Compound - - - conj: Conjunct - - - cop: Copula - - - csubj: Clausal subject - - - csubj:pass: Clausal passive subject - - - dep: Unspecified dependency - - - det: Determiner - - - det:numgov: Pronominal quantifier governing the case of the noun - - - det:nummod: Pronominal quantifier agreeing in case with the noun - - - discourse: Discourse element - - - expl: Expletive - - - expl:pass: Reflexive pronoun used in reflexive passive - - - expl:pv: Reflexive clitic with an inherently reflexive verb - - - fixed: Fixed multiword expression - - - flat: Flat multiword expression - - - flat:foreign: Flat multiword expression: foreign - - - flat:name: Flat name - - - iobj: Indirect object - - - mark: Marker - - - nmod: Nominal modifier - - - nsubj: Nominal subject - - - nsubj:pass: Passive nominal subject - - - nummod: Numeric modifier - - - nummod:gov: Numeric modifier governing the case of the noun - - - obj: Object - - - obl: Oblique nominal - - - obl:arg: Oblique argument - - - orphan: Orphan - - - parataxis: Parataxis - - - punct: Punctuation - - - reparandum: Overridden disfluency (here used for program mistakes!) - - - root: Root - - - vocative: Vocative - - - xcomp: Open clausal complement - - diff --git a/src/metadater/taxonomies/taxonomy-meeting.parts.xml b/src/metadater/taxonomies/taxonomy-meeting.parts.xml deleted file mode 100644 index 2462cac..0000000 --- a/src/metadater/taxonomies/taxonomy-meeting.parts.xml +++ /dev/null @@ -1,15 +0,0 @@ - - - - Bod - - - Agenda - - - - Bod jednání - - Agenda: topic discussed during sitting - - diff --git a/src/metadater/taxonomies/taxonomy-parla.legislature.xml b/src/metadater/taxonomies/taxonomy-parla.legislature.xml deleted file mode 100644 index c2c71a6..0000000 --- a/src/metadater/taxonomies/taxonomy-parla.legislature.xml +++ /dev/null @@ -1,274 +0,0 @@ - - - - Legislature - - - Zákonodárná moc - - - - Geo-political or administrative units - - - Geo-politické nebo administrativní jednotky - - - - Supranational legislature - - - Mezinárodní legislatura - - - - - National legislature - - - Národní legislatura - - - - - Regional legislature - - - Regionální legislatura - - - - - Local legislature - - - Místní legislatura - - - - - - Organization - - - Organizace - - - - Chambers - - - Komory - - - - Unicameralism - - - Jednokomorový parlament - - - - - Bicameralism - - - Dvoukomorový parlament - - - - Upper house - - - Horní komora - - - - - Lower house - - - Dolní komora - - - - - - Multicameralism - - - Vícekomorový parlament - - - - Chamber - - - Komora - - - - - - - Committee - - - Komise - - - - Standing committee - - - Stálá komise - - - - - Special committee - - - Zvláštní komise - - - - - Committee of inquiry - - - Vyšetřovací komise - - - - - - - Legislative period: term of the parliament between general elections. - - Legislativní perioda - - - - Legislative session: the period of time in which a legislature is convened for - purpose of lawmaking, usually being one of two or more smaller divisions of the entire time between two elections. - A session is a meeting or series of connected meetings devoted to a single order of business, program, agenda, or - announced purpose. - - Legislativní schůze - - - - Meeting: Each meeting may be a separate session or part of a group of meetings - constituting a session. The session/meeting may take one or more days. - - Schůze - - - - Types of meetings - - - Druhy schůzí - - - - Regular meeting - - - Běžná schůze - - - - - Special meeting - - - Zvláštní schůze - - - - Extraordinary meeting - - - Mimořádná schůze - - - - - Urgent meeting - - - Urgentní schůze - - - - - Ceremonial meeting - - - Slavnostní schůze - - - - - Commemorative meeting - - - Pamětní schůze - - - - - Public presentation of opinions - - - Veřejná prezentace možností - - - - - Committee meeting - - - Schůze výboru - - - - - - Continued meeting - - - Pokračování schůze - - - - - Public meeting - - - Veřejná schůze - - - - - Executive meeting - - - Výkonná schůze - - - - - - Sitting: sitting day - - Sezení - - - - - - diff --git a/src/metadater/taxonomies/taxonomy-speaker_types.xml b/src/metadater/taxonomies/taxonomy-speaker_types.xml deleted file mode 100644 index 0071d2c..0000000 --- a/src/metadater/taxonomies/taxonomy-speaker_types.xml +++ /dev/null @@ -1,28 +0,0 @@ - - - - Types of speakers - - - Druhy řečníků - - - - Chairperson: chairman of a sitting - - Předsedající: předsedá zasedání - - - - Regular: a regular speaker at a sitting - - Poslanec: poslanec nebo člen vlády - - - - Guest: a guest speaker at a sitting - - Host: hostující řečník na sezení - - diff --git a/src/metadater/tei_parczech.xml b/src/metadater/tei_parczech.xml index 2930e54..1198a43 100644 --- a/src/metadater/tei_parczech.xml +++ b/src/metadater/tei_parczech.xml @@ -353,12 +353,12 @@ - - + + - - + + @@ -369,10 +369,10 @@ - + - + @@ -381,10 +381,10 @@ - + - + @@ -597,10 +597,10 @@ - + - + @@ -712,10 +712,10 @@ - + - + @@ -724,10 +724,10 @@ - + - + diff --git a/src/tools/ParCzech-finalize.xsl b/src/tools/ParCzech-finalize.xsl new file mode 100644 index 0000000..196646b --- /dev/null +++ b/src/tools/ParCzech-finalize.xsl @@ -0,0 +1,612 @@ + + + + + + + + + + + + + + 4.0 + http://hdl.handle.net/11234/1-5360 + czech-pdt-ud-2.10-220711 + czech-cnec2.0-200831 + + + + LINDAT/CLARIAH-CZ: Digitální výzkumná infrastruktura pro jazykové technologie, umění a humanitní vědy + LINDAT/CLARIAH-CZ: Digital Research Infrastructure for Language Technologies, Arts and Humanities + www.lindat.cz + + + + + + + + + ParlaMint-taxonomy-parla.legislature.xml + ParlaMint-taxonomy-speaker_types.xml + + + + ParCzech-taxonomy-parla.links.xml + ParCzech-taxonomy-meeting.parts.xml + + ParlaMint-taxonomy-UD-SYN.ana.xml + ParlaMint-taxonomy-NER.ana.xml + ParCzech-taxonomy-NER.cnec2.0.ana.xml + + + + + + + / + + / + + + .ana.xml + .xml + + invalid type param: allowed values are 'TEI' and 'TEI.ana' + + + + + + + + + .ana + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + INFO: processing root + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + debateSection + commentSection + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Parlament České republiky, Poslanecká sněmovna + Parliament of the Czech Republic, Chamber of Deputies + https://www.psp.cz/eknih/ + + + + + + + + +

No correction of source texts was performed.

+
+ +

Text has not been normalised, except for spacing.

+
+ +

No end-of-line hyphens were present in the source.

+
+ +

Quotation marks have been left in the text and are not explicitly marked up.

+
+ +

The texts are segmented into utterances (speeches) and segments (corresponding to paragraphs in the source transcription).

+
+
+ + + + + + + + + + + + + + + + + + + + + + + +

Private URIs with this prefix point to elements giving their name. In this document they are simply local references into the UD-SYN taxonomy categories in the corpus root TEI header.

+
+ +

Feature-structure elements definition of the Czech Positional Tags

+
+ +

Taxonomy for named entities (cnec2.0)

+
+
+ + + + POS tagging, lemmatization and dependency parsing done with UDPipe 2 (http://ufal.mff.cuni.cz/udpipe/2) with model + + + + Name entity recognition done with NameTag 2 (http://ufal.mff.cuni.cz/nametag/2) with model (http://hdl.handle.net/11234/1-3443). Posprocessing: nested named entities have been merged into four categories (PER, LOC, ORG, MISC). + + +
+
+ +
+
+ + + + + + + + + + + + + + + + + + + + + + + čeština + angličtina + Czech + English + + + + + + + + + words + words + slov + + + + + + + + speeches + speeches + promluv + + + + + + + + + + + + cs + + + + + + en + + + + + + + + + + http://www.tei-c.org/ns/1.0 + + + + + + + + + + + + Matyáš Kopp + Data retrieval + TEI XML corpus encoding + + Linguistic annotation + + + + + + + LINDAT/CLARIAH-CZ: Digitální výzkumná infrastruktura pro jazykové technologie, umění a humanitní vědy + LINDAT/CLARIAH-CZ: Digital Research Infrastructure for Language Technologies, Arts and Humanities + + + + + + Parlament České republiky - Poslanecká sněmovna + Sněmovní 176/4 + Praha + Czech Republic + + + + + + + + + + + + + + + + + + + + + + https://creativecommons.org/publicdomain/zero/1.0/ +

This work is licensed under the CC0 1.0 Universal (CC0 1.0) Public Domain Dedication.

+
+ +
+
+ + + +

ParCzech is a project on compiling Czech parliamentary data into annotated corpora

+
+
+ + + + + + + + + + + + + +