-
Notifications
You must be signed in to change notification settings - Fork 52
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
6d2d038
commit 7d358e2
Showing
326 changed files
with
633,369 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,201 @@ | ||
<?xml version="1.0" encoding="UTF-8"?> | ||
<teiCorpus xmlns="http://www.tei-c.org/ns/1.0" | ||
xml:lang="en" | ||
xml:id="ParlaMint-AT-en.ana" | ||
corresp="../ParlaMint-AT.TEI.ana/ParlaMint-AT.ana.xml"> | ||
<teiHeader> | ||
<fileDesc> | ||
<titleStmt> | ||
<title xml:lang="de" type="main">Österreichisches Parlamentskorpus ParlaMint-AT-en [ParlaMint-en.ana SAMPLE]</title> | ||
<title xml:lang="en" type="main">Austrian parliamentary corpus ParlaMint-AT-en [ParlaMint-en.ana SAMPLE]</title> | ||
<title xml:lang="de" type="sub">Stenographische Protokolle der Plenarsitzungen des Österreichischen Nationalrats, XX. Gesetzgebungsberiode - XXVII. Gesetzgebungsperiode (1996 - 2022)</title> | ||
<title xml:lang="en" type="sub">Shorthand records of the plenary sittings of the National Council of the Austrian parliament, terms 20 - terms 27 (1996 - 2022)</title> | ||
<meeting n="27" corresp="#NR" ana="#parla.lower #parla.term #NR.XXVII"/> | ||
<meeting n="26" corresp="#NR" ana="#parla.lower #parla.term #NR.XXVI"/> | ||
<meeting n="25" corresp="#NR" ana="#parla.lower #parla.term #NR.XXV"/> | ||
<meeting n="24" corresp="#NR" ana="#parla.lower #parla.term #NR.XXIV"/> | ||
<meeting n="23" corresp="#NR" ana="#parla.lower #parla.term #NR.XXIII"/> | ||
<meeting n="22" corresp="#NR" ana="#parla.lower #parla.term #NR.XXII"/> | ||
<meeting n="21" corresp="#NR" ana="#parla.lower #parla.term #NR.XXI"/> | ||
<meeting n="20" corresp="#NR" ana="#parla.lower #parla.term #NR.XX"/> | ||
<respStmt> | ||
<persName ref="https://orcid.org/0000-0002-8111-5584">Hannes Pirker</persName> | ||
<persName ref="https://orcid.org/0000-0003-2436-0361">Daniel Schopper</persName> | ||
<persName ref="https://orcid.org/0000-0002-1631-4560">Tanja Wissik</persName> | ||
<resp xml:lang="de">Projektplanung und Methode</resp> | ||
<resp xml:lang="en">Project set-up and methodology</resp> | ||
</respStmt> | ||
<respStmt> | ||
<persName>Hannes Pirker</persName> | ||
<resp xml:lang="de">Datenbeschaffung, Korpuskodierung in TEI und automatische linguistische Annotation</resp> | ||
<resp xml:lang="en">Data retrieval, TEI corpus encoding and automatic linguistic annotation</resp> | ||
</respStmt> | ||
<respStmt> | ||
<persName>Daniel Schopper</persName> | ||
<resp xml:lang="de">XSLT Transformationen</resp> | ||
<resp xml:lang="en">XSLT transformations</resp> | ||
</respStmt> | ||
<respStmt> | ||
<persName>Martin Kirnbauer</persName> | ||
<resp xml:lang="de">Einige der manuellen Korrekturen</resp> | ||
<resp xml:lang="en">Some of the manual curation</resp> | ||
</respStmt> | ||
<respStmt> | ||
<persName>Tanja Wissik</persName> | ||
<resp xml:lang="de">Metadaten und Übersetzung</resp> | ||
<resp xml:lang="en">Metadata and translation</resp> | ||
</respStmt> | ||
<respStmt> | ||
<persName>Taja Kuzman</persName> | ||
<persName>Nikola Ljubešić</persName> | ||
<resp xml:lang="en">Machine translation to English and linguistic analysis of the translation</resp> | ||
</respStmt> | ||
<funder> | ||
<orgName xml:lang="de">CLARIN-ERIC</orgName> | ||
<orgName xml:lang="en">CLARIN-ERIC (Common Language Resources and Technology Infrastructure—European Research Infrastructure Consortium)</orgName> | ||
<ref target="https://www.clarin.eu/">www.clarin.eu</ref> | ||
</funder> | ||
<funder> | ||
<orgName xml:lang="de">ÖAW (Österreichische Akademie der Wissenschaften)</orgName> | ||
<orgName xml:lang="en">ÖAW (Austrian Academy of Sciences)</orgName> | ||
<ref target="https://www.oeaw.ac.at/">www.oeaw.ac.at</ref> | ||
</funder> | ||
</titleStmt> | ||
<editionStmt> | ||
<edition>3.0</edition> | ||
</editionStmt> | ||
<extent><!--These numbers do not reflect the size of the sample!--> | ||
<measure unit="speeches" quantity="227991" xml:lang="en">227,991 speeches</measure> | ||
<measure unit="words" quantity="63932213" xml:lang="en">63,932,213 words</measure> | ||
</extent> | ||
<publicationStmt> | ||
<publisher> | ||
<orgName xml:lang="de">Die CLARIN Forschungsinfrastruktur</orgName> | ||
<orgName xml:lang="en">The CLARIN research infrastructure</orgName> | ||
<ref target="https://www.clarin.eu/">www.clarin.eu</ref> | ||
</publisher> | ||
<idno type="URI" subtype="handle">http://hdl.handle.net/11356/1810</idno> | ||
<availability status="free"> | ||
<licence>http://creativecommons.org/licenses/by/4.0/</licence> | ||
<p xml:lang="de">Dieses Werk ist lizensiert unter der <ref target="http://creativecommons.org/licenses/by/4.0/">Creative Commons Namensnennung 4.0 International Lizenz (CC BY 4.0)</ref>.</p> | ||
<p xml:lang="en">This work is licensed under the <ref target="http://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution 4.0 International License</ref>.</p> | ||
</availability> | ||
<date when="2023-06-24">2023-06-24</date> | ||
</publicationStmt> | ||
<sourceDesc> | ||
<bibl> | ||
<title type="main" xml:lang="de">Stenographische Protokolle der Plenarsitzungen des Nationalrats der Republik Österreich</title> | ||
<title type="main" xml:lang="en">Shorthand records of the plenary sittings of the National Council of the Austrian parliament</title> | ||
<publisher>Parlamentsdirektion</publisher> | ||
<idno type="URI" subtype="parliament">https://www.parlament.gv.at/PAKT/STPROT</idno> | ||
<date from="1996-01-15" to="2022-05-19">15.01.1996 - 19.05.2022</date> | ||
</bibl> | ||
</sourceDesc> | ||
</fileDesc> | ||
<encodingDesc> | ||
<projectDesc> | ||
<p xml:lang="en"> | ||
<ref target="https://www.clarin.eu/content/parlamint">ParlaMint</ref> is a project that aims to (1) create a multilingual set of comparable corpora of parliamentary proceedings uniformly encoded according to the <ref target="https://clarin-eric.github.io/ParlaMint/">ParlaMint encoding guidelines</ref>, covering the period from 2015 to mid-2022; (2) add linguistic annotations to the corpora and machine-translate them to English; (3) make the corpora available through concordancers; and (4) build use cases in Political Sciences and Digital Humanities based on the corpus data.</p> | ||
<p xml:lang="de"> | ||
<ref target="https://www.clarin.eu/content/parlamint">ParlaMint</ref> | ||
</p> | ||
</projectDesc> | ||
<editorialDecl> | ||
<correction> | ||
<p>No correction of source texts was performed.</p> | ||
</correction> | ||
<normalization> | ||
<p>Text has not been normalised, except for spacing. Printed matter quoted in the protocols was removed</p> | ||
</normalization> | ||
<hyphenation> | ||
<p>No end-of-line hyphens were present in the source.</p> | ||
</hyphenation> | ||
<quotation> | ||
<p>Quotation marks have been left in the text and are not explicitly marked up.</p> | ||
</quotation> | ||
<segmentation> | ||
<p>The texts are segmented into utterances (speeches) and segments (corresponding to paragraphs in the source transcription).</p> | ||
</segmentation> | ||
</editorialDecl> | ||
<tagsDecl><!--These numbers do not reflect the size of the sample!--> | ||
<namespace name="http://www.tei-c.org/ns/1.0"> | ||
<tagUsage gi="body" occurs="1197"/> | ||
<tagUsage gi="desc" occurs="346176"/> | ||
<tagUsage gi="div" occurs="1197"/> | ||
<tagUsage gi="gap" occurs="14864"/> | ||
<tagUsage gi="kinesic" occurs="248593"/> | ||
<tagUsage gi="name" occurs="2100135"/> | ||
<tagUsage gi="note" occurs="668625"/> | ||
<tagUsage gi="pb" occurs="116531"/> | ||
<tagUsage gi="pc" occurs="9280443"/> | ||
<tagUsage gi="s" occurs="3919672"/> | ||
<tagUsage gi="seg" occurs="662401"/> | ||
<tagUsage gi="text" occurs="1197"/> | ||
<tagUsage gi="u" occurs="227991"/> | ||
<tagUsage gi="vocal" occurs="82719"/> | ||
<tagUsage gi="w" occurs="63932213"/> | ||
</namespace> | ||
</tagsDecl> | ||
<classDecl> | ||
<xi:include xmlns:xi="http://www.w3.org/2001/XInclude" | ||
href="ParlaMint-taxonomy-parla.legislature.xml"/> | ||
<xi:include xmlns:xi="http://www.w3.org/2001/XInclude" | ||
href="ParlaMint-taxonomy-speaker_types.xml"/> | ||
<xi:include xmlns:xi="http://www.w3.org/2001/XInclude" | ||
href="ParlaMint-taxonomy-subcorpus.xml"/> | ||
<xi:include xmlns:xi="http://www.w3.org/2001/XInclude" | ||
href="ParlaMint-taxonomy-NER.ana.xml"/> | ||
</classDecl> | ||
<appInfo> | ||
<application ident="EasyNMT" version="2.0"> | ||
<label>EasyNMT (OPUS-MT model)</label> | ||
<desc>Translation to English done with EasyNMT (<ref target="https://github.com/UKPLab/EasyNMT">https://github.com/UKPLab/EasyNMT</ref>) with OPUS-MT model gmw (<ref target="https://github.com/Helsinki-NLP/Opus-MT">https://github.com/Helsinki-NLP/Opus-MT</ref>)</desc> | ||
</application> | ||
<application ident="Stanza" version="1.5"> | ||
<label>Stanza</label> | ||
<desc>Tokenisation, PoS tagging, lemmatization, and NER annotation done with Stanza (<ref target="https://stanfordnlp.github.io/stanza/">https://stanfordnlp.github.io/stanza/</ref>) with the model for English. For NER the conll03 model with 4 NE classes was used.</desc> | ||
</application> | ||
</appInfo> | ||
</encodingDesc> | ||
<profileDesc> | ||
<settingDesc> | ||
<setting> | ||
<name type="city" xml:lang="de">Wien</name> | ||
<name type="city" xml:lang="en">Vienna</name> | ||
<name type="country" xml:lang="de" key="AT">Österreich</name> | ||
<name type="country" xml:lang="en" key="AT">Austria</name> | ||
<date from="1996-01-15" to="2022-04-27"/> | ||
</setting> | ||
</settingDesc> | ||
<textClass> | ||
<catRef scheme="#ParlaMint-taxonomy-parla.legislature" | ||
target="#parla.bi #parla.lower"/> | ||
</textClass> | ||
<particDesc> | ||
<xi:include xmlns:xi="http://www.w3.org/2001/XInclude" href="ParlaMint-AT-listOrg.xml"/> | ||
<xi:include xmlns:xi="http://www.w3.org/2001/XInclude" | ||
href="ParlaMint-AT-listPerson.xml"/> | ||
</particDesc> | ||
<langUsage> | ||
<language ident="de" xml:lang="de">Deutsch</language> | ||
<language ident="de" xml:lang="en">German</language> | ||
<language ident="en" xml:lang="de">Englisch</language> | ||
<language ident="en" xml:lang="en">English</language> | ||
</langUsage> | ||
</profileDesc> | ||
<revisionDesc> | ||
<change when="2023-06-24"> | ||
<name>Tomaž Erjavec</name>: Made sample.</change> | ||
<change when="2023-06-24">parlamint2release script: Fix some identifiable erros for the release.</change> | ||
<change when="2023-06-23"> | ||
<name>Tomaž Erjavec</name>: Generate TEI version of MTed corpus.</change> | ||
<change when="2023-06-24">parlamint-add-common-content script: Adding common content.</change> | ||
</revisionDesc> | ||
</teiHeader> | ||
<xi:include xmlns:xi="http://www.w3.org/2001/XInclude" | ||
href="ParlaMint-AT-en_2005-03-31-022-XXII-NRSITZ-00100.ana.xml"/> | ||
<xi:include xmlns:xi="http://www.w3.org/2001/XInclude" | ||
href="ParlaMint-AT-en_2014-09-24-025-XXV-NRSITZ-00042.ana.xml"/> | ||
<xi:include xmlns:xi="http://www.w3.org/2001/XInclude" | ||
href="ParlaMint-AT-en_2022-05-19-027-XXVII-NRSITZ-00159.ana.xml"/> | ||
</teiCorpus> |
3 changes: 3 additions & 0 deletions
3
Data/ParlaMint-AT/ParlaMint-AT-en_2005-03-31-022-XXII-NRSITZ-00100-meta.tsv
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
ID Title Date Body Term Session Meeting Sitting Agenda Subcorpus Speaker_role Speaker_MP Speaker_Minister Speaker_party Speaker_party_name Party_status Speaker_name Speaker_gender Speaker_birth | ||
ParlaMint-AT_2005-03-31-022-XXII-NRSITZ-00100_d7e355 Sitting Number 100, Legislative period XXII, Thursday, 31. March 2005 2005-03-31 Lower house 22 100 Reference Chairperson MP - FPÖ Freiheitlicher Parlamentsklub Prinzhorn, Thomas M 1943 | ||
ParlaMint-AT_2005-03-31-022-XXII-NRSITZ-00100_d7e386 Sitting Number 100, Legislative period XXII, Thursday, 31. March 2005 2005-03-31 Lower house 22 100 Reference Chairperson MP - FPÖ Freiheitlicher Parlamentsklub Prinzhorn, Thomas M 1943 |
Oops, something went wrong.