Skip to content

Commit

Permalink
Ingested UDFestBR 2022 (acl-org#1966)
Browse files Browse the repository at this point in the history
* Added language tags
* Updated workflow to cancel previous build checks and preview generations (publication remains not-canceled)
* Moved from iso639 module to langcodes module (to support BCP-47)

Co-authored-by: Matt Post <[email protected]>
  • Loading branch information
xinru1414 and mjpost authored Jun 1, 2022
1 parent b23a8bf commit b4721be
Show file tree
Hide file tree
Showing 6 changed files with 87 additions and 6 deletions.
4 changes: 3 additions & 1 deletion .github/workflows/check-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@ on:
pull_request

# only run one at a time per branch
concurrency: check-build-${{ github.ref }}
concurrency:
group: check-build-${{ github.ref }}
cancel-in-progress: true

jobs:
check-build:
Expand Down
4 changes: 3 additions & 1 deletion .github/workflows/preview.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,9 @@ on:
- '!master'

# only run one at a time per branch
concurrency: preview-${{ github.ref }}
concurrency:
group: preview-${{ github.ref }}
cancel-in-progress: true

jobs:
preview:
Expand Down
6 changes: 3 additions & 3 deletions bin/anthology/papers.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
# limitations under the License.

from functools import cached_property
import iso639
import langcodes
import logging as log

from .utils import (
Expand Down Expand Up @@ -293,15 +293,15 @@ def isbn(self):

@property
def langcode(self):
"""Returns the ISO-639 language code, if present"""
"""Returns the BCP47 language code, if present"""
return self.attrib.get("language", None)

@property
def language(self):
"""Returns the language name, if present"""
lang = None
if self.langcode:
lang = iso639.languages.get(part3=self.langcode).name
lang = langcodes.Language.get(self.langcode).display_name()
return lang

def get(self, name, default=None):
Expand Down
2 changes: 1 addition & 1 deletion bin/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ citeproc-py-styles
click
docopt>=0.6.0
filetype
iso-639
langcodes[data]
latexcodec>=1.0.7
lxml>=4.2.0
nltk
Expand Down
74 changes: 74 additions & 0 deletions data/xml/2022.udfestbr.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
<?xml version='1.0' encoding='UTF-8'?>
<collection id="2022.udfestbr">
<volume id="1" ingest-date="2022-05-31">
<meta>
<booktitle>Procedings of the <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies <fixed-case>B</fixed-case>razilian Festival</booktitle>
<editor><first>Thiago Alexandre Salgueiro</first><last>Pardo</last></editor>
<editor><first>Ariani</first><last>Di-Felippo</last></editor>
<editor><first>Norton Trevisan</first><last>Roman</last></editor>
<publisher>Association for Computational Linguistics</publisher>
<address>Fortaleza, Brazil</address>
<month>march</month>
<year>2022</year>
<url hash="0ab16ffb">2022.udfestbr-1</url>
</meta>
<frontmatter>
<url hash="79abac39">2022.udfestbr-1.0</url>
<bibkey>udfestbr-2022-procedings</bibkey>
</frontmatter>
<paper id="1">
<title>Anotação de textos não canônicos: um estudo exploratorio de Grande sertão: veredas pelas dependências universais</title>
<language>pt-BR</language>
<author><first>Andre V. L.</first><last>Coneglian</last></author>
<author><first>Ana Luisa A. R.</first><last>Guimarães</last></author>
<author><first>Thiago Castro</first><last>Ferreira</last></author>
<author><first>Adriana S.</first><last>Pagano</last></author>
<pages>1–11</pages>
<url hash="8398b865">2022.udfestbr-1.1</url>
<bibkey>coneglian-etal-2022-anotacao</bibkey>
</paper>
<paper id="2">
<title>Polishing the gold – how much revision do we need in treebanks?</title>
<author><first>Elvis</first><last>Souza</last></author>
<author><first>Claudia</first><last>Freitas</last></author>
<pages>1–11</pages>
<url hash="c4709f4a">2022.udfestbr-1.2</url>
<bibkey>souza-freitas-2022-polishing</bibkey>
</paper>
<paper id="3">
<title>Que simples que nada: a anotação da palavra que em corpus de <fixed-case>UD</fixed-case></title>
<language>pt-BR</language>
<author><first>Magali S.</first><last>Duran</last></author>
<author><first>Heloisa</first><last>Oliveira</last></author>
<author><first>Clarissa</first><last>Scandarolli</last></author>
<pages>1–11</pages>
<url hash="b44fa3d0">2022.udfestbr-1.3</url>
<bibkey>duran-etal-2022-que</bibkey>
</paper>
<paper id="4">
<title>Shallow parsing of <fixed-case>P</fixed-case>ortuguese texts annotated under <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies</title>
<author><first>Guilherme Martiniano</first><last>Oliveira</last></author>
<author><first>Paulo Berlanga</first><last>Neto</last></author>
<author><first>Evandro Eduardo Seron</first><last>Ruiz</last></author>
<pages>1–8</pages>
<url hash="4ff0f086">2022.udfestbr-1.4</url>
<bibkey>oliveira-etal-2022-shallow</bibkey>
</paper>
<paper id="5">
<title>Still on arguments and adjuncts: the status of the indirect object and the adverbial adjunct relations in <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies for <fixed-case>P</fixed-case>ortuguese</title>
<author><first>Elvis</first><last>Souza</last></author>
<author><first>Claudia</first><last>Freitas</last></author>
<pages>1–10</pages>
<url hash="86c474fe">2022.udfestbr-1.5</url>
<bibkey>souza-freitas-2022-still</bibkey>
</paper>
<paper id="6">
<title><fixed-case>UDC</fixed-case>oncord: A Concordancer for <fixed-case>U</fixed-case>niversal <fixed-case>D</fixed-case>ependencies Treebanks</title>
<author><first>Lucas Gabriel Mendes</first><last>Miranda</last></author>
<author><first>Thiago Alexandre Salgueiro</first><last>Pardo</last></author>
<pages>1–10</pages>
<url hash="be1662f6">2022.udfestbr-1.6</url>
<bibkey>miranda-pardo-2022-udconcord</bibkey>
</paper>
</volume>
</collection>
3 changes: 3 additions & 0 deletions data/yaml/venues.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1104,6 +1104,9 @@ tutorials:
name: 'Proceedings of the 59th Annual Meeting of the Association for Computational
Linguistics and the 11th International Joint Conference on Natural Language Processing:
Tutorial Abstracts'
udfestbr:
acronym: udfestbr
name: Universal Dependencies Brazilian Festival
udw:
acronym: UDW
name: Universal Dependencies Workshop
Expand Down

0 comments on commit b4721be

Please sign in to comment.