diff --git a/sparv/modules/stanza/metadata.yaml b/sparv/modules/stanza/metadata.yaml index 6ef55a37..84d7bca9 100644 --- a/sparv/modules/stanza/metadata.yaml +++ b/sparv/modules/stanza/metadata.yaml @@ -1,4 +1,4 @@ -id: stanza-parent +id: stanza-parent-swe abstract: true language_codes: - swe @@ -27,7 +27,7 @@ created: 2020-12-07 updated: 2022-08-10 --- id: swe-pos-stanza-stanzamorph -parent: stanza-parent +parent: stanza-parent-swe name: swe: SUC-ordklasstaggning med Stanza eng: SUC part-of-speech tagging with Stanza @@ -89,7 +89,7 @@ description: [Sparv](https://spraakbanken.gu.se/sparv). --- id: swe-msd-stanza-stanzamorph-ufeats -parent: stanza-parent +parent: stanza-parent-swe name: swe: Morfologisk analys för svenska baserad på Stanza eng: Stanza-based morphological analysis for Swedish @@ -126,7 +126,7 @@ description: This analysis uses universal features, defined as part of the Universal Dependencies standard. --- id: swe-lemmatization-stanza-stanzalem -parent: stanza-parent +parent: stanza-parent-swe name: swe: SUC3-grundformanalys med Stanza eng: SUC3-citation form analysis with Stanza @@ -163,7 +163,7 @@ description: out-of-vocabulary tokens. --- id: swe-dependency-stanza-stanzasynt -parent: stanza-parent +parent: stanza-parent-swe name: swe: Dependensanalys med Stanza eng: Dependency analysis with Stanza @@ -197,3 +197,256 @@ description: eng: |- In 2020, the Stanza tool was trained and tested on TalbankenSBX (following MambaDep-style annotation) in order to create a high-quality analysis. Currently (in 2024), this is the default analysis for Swedish in Sparv +--- +id: stanza-parent-eng +abstract: true +language_codes: + - eng +standard_reference: '' +tool: "Stanza" +trained_on: '' +other_references: + - "Stanza: Peng Qi, Yuhao Zhang, Yuhui Zhang, Jason Bolton and Christopher D. Manning. 2020" + - "Stanza: A Python Natural Language Processing Toolkit for Many Human Languages. In Association for Computational Linguistics (ACL) System Demonstrations. 2020" +evaluation_results: '' +model: Stanza standard model for English (https://stanfordnlp.github.io/stanza/models.html) +created: 2022-08-10 +updated: 2022-08-10 +--- +id: eng-pos-stanza +parent: stanza-parent-eng +name: + swe: Ordklasstaggning med Stanza för engelska + eng: Part-of-speech tagging with Stanza for English +short_description: + swe: Annotering av ordklasser (Penn Treebank-taggar) med Stanzas standardmodell för engelska + eng: Part-of-speech annotation with Penn Treebank tags with Stanza's standard model for English +task: part-of-speech tagging +keywords: + - pos-tagging + - stanza +tagset: "[Penn Treebank tagset](https://www.ling.upenn.edu/courses/Fall_2003/ling001/penn_treebank_pos.html)" +annotations: + - :stanza.pos +example_output: |- + ```xml + This + is + a + corpus + . + ``` +--- +id: eng-sentece-stanza +parent: stanza-parent-eng +name: + swe: Meningssegmentering för engelska med Stanza + eng: Sentence segmentation for English with Stanza +short_description: + swe: Meningssegmentering med Stanzas standardmodell för engelska + eng: Sentence segmentation with Stanza's standard model for English +task: sentence segmentation +keywords: + - sentence segmentation + - stanza +annotations: + - stanza.sentence +example_output: |- + ```xml + + This + is + a + corpus + . + + + It + contains + multiple + sentences + . + + + Here + comes + another + sentence + . + + ``` +--- +id: eng-tokenization-stanza +parent: stanza-parent-eng +name: + swe: Tokenisering för engelska med Stanza + eng: Tokenization for English with Stanza +short_description: + swe: Tokenisering med Stanzas standardmodell för engelska + eng: Tokenization with Stanza's standard model for English +task: tokenization +keywords: + - tokenization + - stanza +annotations: + - stanza.token +example_output: |- + ```xml + This + is + a + corpus + . + ``` +--- +id: eng-lemmatization-stanza +parent: stanza-parent-eng +name: + swe: Lemmatisering för engelska med Stanza + eng: Lemmatization for English with Stanza +short_description: + swe: Lemmatisering med Stanzas standardmodell för engelska + eng: Lemmatization with Stanza's standard model for English +task: lemmatization +keywords: + - lemmatization + - stanza +annotations: + - :stanza.baseform +example_output: |- + ```xml + This + is + a + corpus + containing + some + words + . + ``` +--- +id: eng-dependency-stanza +parent: stanza-parent-eng +name: + swe: Dependensparsning för engelska med Stanza + eng: Dependency parsing for English with Stanza +short_description: + swe: Dependensparsning med Stanzas standardmodell för engelska + eng: Dependency parsing with Stanza's standard model for English +task: dependency parsing +keywords: + - dependency parsing + - stanza +tagset: "[UD](https://universaldependencies.org/en/dep/)" +annotations: + - :stanza.ref + - :stanza.dephead_ref + - :stanza.deprel +example_output: |- + ```xml + This + is + a + corpus + containing + some + words + . + ``` +--- +id: eng-namedentity-stanza +parent: stanza-parent-eng +name: + swe: Namnigenkänning för engelska med Stanza + eng: Named entity recognition for English with Stanza +short_description: + swe: Namnigenkänning (NER) med Stanzas standardmodell för engelska + eng: Named entity recognition with Stanza's standard model for English +task: named entity recognition +keywords: + - ner + - stanza +annotations: + - stanza.ne + - stanza.ne:stanza.ne_type +example_output: |- + ```xml + The + + Swedish + + chemist + + Alfred + Bernhard + Nobel + + was + born + on + + 21 + October + 1833 + + in + + Stockholm + + . + ``` +description: + swe: |- + Namnigenkänning (NER) gör det möjligt att märka upp namnentiteter (som t.ex. personnamn, organisationer, ortnamn) i + texten. + eng: |- + Named entity recognition (NER) enables the detection of named entities (e.g. personal names, organizations, + geographical locations) in the text. +--- +id: eng-pos-stanza-upos +parent: stanza-parent-eng +name: + swe: UD-Ordklasstaggning med Stanza för engelska + eng: UD part-of-speech tagging with Stanza for English +short_description: + swe: Annotering av UD-ordklasser (universal dependencies) med Stanzas standardmodell för engelska + eng: Part-of-speech annotation with UD (universal dependency) tags with Stanza's standard model for English +task: part-of-speech tagging +keywords: + - pos-tagging + - stanza +tagset: "[UD](https://universaldependencies.org/u/pos/)" +annotations: + - :stanza.upos +example_output: |- + ```xml + This + is + a + corpus + . + ``` +--- +id: eng-msd-stanza-ufeats +parent: stanza-parent-eng +name: + swe: Morfologisk analys för engelska baserad på Stanza + eng: Stanza-based morphological analysis for English +short_description: + swe: Morfologisk analys för engelska med universal features (UD) baserad på Stanza + eng: Stanza-based morphological analysis for English, using universal features (UD) +task: morphosyntactic tagging +keywords: + - msd + - stanza +tagset: "[UD](https://universaldependencies.org/u/feat/index.html)" +annotations: + - :stanza.ufeats +example_output: |- + ```xml + This + is + a + corpus + . + ```