From b4721be4d543e1086c2309e1c571d737d0385d2e Mon Sep 17 00:00:00 2001 From: xinru1414 Date: Wed, 1 Jun 2022 09:49:48 -0700 Subject: [PATCH] Ingested UDFestBR 2022 (#1966) * Added language tags * Updated workflow to cancel previous build checks and preview generations (publication remains not-canceled) * Moved from iso639 module to langcodes module (to support BCP-47) Co-authored-by: Matt Post --- .github/workflows/check-build.yml | 4 +- .github/workflows/preview.yml | 4 +- bin/anthology/papers.py | 6 +-- bin/requirements.txt | 2 +- data/xml/2022.udfestbr.xml | 74 +++++++++++++++++++++++++++++++ data/yaml/venues.yaml | 3 ++ 6 files changed, 87 insertions(+), 6 deletions(-) create mode 100644 data/xml/2022.udfestbr.xml diff --git a/.github/workflows/check-build.yml b/.github/workflows/check-build.yml index bfff7d8797..6f5783a7aa 100644 --- a/.github/workflows/check-build.yml +++ b/.github/workflows/check-build.yml @@ -4,7 +4,9 @@ on: pull_request # only run one at a time per branch -concurrency: check-build-${{ github.ref }} +concurrency: + group: check-build-${{ github.ref }} + cancel-in-progress: true jobs: check-build: diff --git a/.github/workflows/preview.yml b/.github/workflows/preview.yml index 74a5a229b7..8874ab9074 100644 --- a/.github/workflows/preview.yml +++ b/.github/workflows/preview.yml @@ -10,7 +10,9 @@ on: - '!master' # only run one at a time per branch -concurrency: preview-${{ github.ref }} +concurrency: + group: preview-${{ github.ref }} + cancel-in-progress: true jobs: preview: diff --git a/bin/anthology/papers.py b/bin/anthology/papers.py index 989478b4c2..f39a7b7750 100644 --- a/bin/anthology/papers.py +++ b/bin/anthology/papers.py @@ -15,7 +15,7 @@ # limitations under the License. from functools import cached_property -import iso639 +import langcodes import logging as log from .utils import ( @@ -293,7 +293,7 @@ def isbn(self): @property def langcode(self): - """Returns the ISO-639 language code, if present""" + """Returns the BCP47 language code, if present""" return self.attrib.get("language", None) @property @@ -301,7 +301,7 @@ def language(self): """Returns the language name, if present""" lang = None if self.langcode: - lang = iso639.languages.get(part3=self.langcode).name + lang = langcodes.Language.get(self.langcode).display_name() return lang def get(self, name, default=None): diff --git a/bin/requirements.txt b/bin/requirements.txt index 1096f39380..a05693cc85 100644 --- a/bin/requirements.txt +++ b/bin/requirements.txt @@ -4,7 +4,7 @@ citeproc-py-styles click docopt>=0.6.0 filetype -iso-639 +langcodes[data] latexcodec>=1.0.7 lxml>=4.2.0 nltk diff --git a/data/xml/2022.udfestbr.xml b/data/xml/2022.udfestbr.xml new file mode 100644 index 0000000000..07fdd24956 --- /dev/null +++ b/data/xml/2022.udfestbr.xml @@ -0,0 +1,74 @@ + + + + + Procedings of the Universal Dependencies Brazilian Festival + Thiago Alexandre SalgueiroPardo + ArianiDi-Felippo + Norton TrevisanRoman + Association for Computational Linguistics +
Fortaleza, Brazil
+ march + 2022 + 2022.udfestbr-1 + + + 2022.udfestbr-1.0 + udfestbr-2022-procedings + + + Anotação de textos não canônicos: um estudo exploratorio de Grande sertão: veredas pelas dependências universais + pt-BR + Andre V. L.Coneglian + Ana Luisa A. R.Guimarães + Thiago CastroFerreira + Adriana S.Pagano + 1–11 + 2022.udfestbr-1.1 + coneglian-etal-2022-anotacao + + + Polishing the gold – how much revision do we need in treebanks? + ElvisSouza + ClaudiaFreitas + 1–11 + 2022.udfestbr-1.2 + souza-freitas-2022-polishing + + + Que simples que nada: a anotação da palavra que em corpus de <fixed-case>UD</fixed-case> + pt-BR + Magali S.Duran + HeloisaOliveira + ClarissaScandarolli + 1–11 + 2022.udfestbr-1.3 + duran-etal-2022-que + + + Shallow parsing of <fixed-case>P</fixed-case>ortuguese texts annotated under <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies + Guilherme MartinianoOliveira + Paulo BerlangaNeto + Evandro Eduardo SeronRuiz + 1–8 + 2022.udfestbr-1.4 + oliveira-etal-2022-shallow + + + Still on arguments and adjuncts: the status of the indirect object and the adverbial adjunct relations in <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies for <fixed-case>P</fixed-case>ortuguese + ElvisSouza + ClaudiaFreitas + 1–10 + 2022.udfestbr-1.5 + souza-freitas-2022-still + + + <fixed-case>UDC</fixed-case>oncord: A Concordancer for <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies Treebanks + Lucas Gabriel MendesMiranda + Thiago Alexandre SalgueiroPardo + 1–10 + 2022.udfestbr-1.6 + miranda-pardo-2022-udconcord + +
+
diff --git a/data/yaml/venues.yaml b/data/yaml/venues.yaml index fc1c8c4571..198fb3e14d 100644 --- a/data/yaml/venues.yaml +++ b/data/yaml/venues.yaml @@ -1104,6 +1104,9 @@ tutorials: name: 'Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing: Tutorial Abstracts' +udfestbr: + acronym: udfestbr + name: Universal Dependencies Brazilian Festival udw: acronym: UDW name: Universal Dependencies Workshop