From 8e5d4db699524d40c6f52e125d81382b9c765a2b Mon Sep 17 00:00:00 2001 From: xinru1414 Date: Mon, 27 Sep 2021 09:42:45 -0400 Subject: [PATCH] KONVENS-2021 Ingestion (#1581) * ingested KONVENS 2021 * adding url for konvens in venues.yaml * updated ingest.py to ingest the full-volume PDF * ingested KONVENS full-volume pdf * updated script to use f string to find the book.pdf path --- bin/ingest.py | 5 +- data/xml/2021.konvens.xml | 267 ++++++++++++++++++++++++++++++++++++++ data/yaml/venues.yaml | 4 + 3 files changed, 274 insertions(+), 2 deletions(-) create mode 100644 data/xml/2021.konvens.xml diff --git a/bin/ingest.py b/bin/ingest.py index 41c92209c5..f14947bb63 100755 --- a/bin/ingest.py +++ b/bin/ingest.py @@ -291,8 +291,9 @@ def disambiguate_name(node, anth_id): if not os.path.exists(pdfs_dest_dir): os.makedirs(pdfs_dest_dir) - # copy the book from the top-level proceedings/ dir, named "book.pdf" - book_src_path = os.path.join(meta["path"], "book.pdf") + # copy the book from the top-level proceedings/ dir, named "VENUE-year.pdf" + book_path = f'cdrom/{venue_name.upper()}-{year}.pdf' + book_src_path = os.path.join(meta["path"], book_path) book_dest_path = None if os.path.exists(book_src_path) and not args.dry_run: book_dest_path = ( diff --git a/data/xml/2021.konvens.xml b/data/xml/2021.konvens.xml new file mode 100644 index 0000000000..b813956329 --- /dev/null +++ b/data/xml/2021.konvens.xml @@ -0,0 +1,267 @@ + + + + + Proceedings of the 17th Conference on Natural Language Processing (KONVENS 2021) + KilianEvang + LauraKallmeyer + RainerOsswald + JakubWaszczuk + TorstenZesch + KONVENS 2021 Organizers +
Düsseldorf, Germany
+ 6--9 September + 2021 + 2021.konvens-1 + + + 2021.konvens-1.0 + konvens-2021-natural + + + The Impact of Word Embeddings on Neural Dependency Parsing + BenediktAdelmann + WolfgangMenzel + HeikeZinsmeister + 1–13 + 2021.konvens-1.1 + adelmann-etal-2021-impact + + + Benchmarking down-scaled (not so large) pre-trained language models + MatthiasAßenmacher + PatrickSchulze + ChristianHeumann + 14–27 + 2021.konvens-1.2 + assenmacher-etal-2021-benchmarking + + + <fixed-case>A</fixed-case>rgue<fixed-case>BERT</fixed-case>: How To Improve <fixed-case>BERT</fixed-case> Embeddings for Measuring the Similarity of Arguments + MaikeBehrendt + StefanHarmeling + 28–36 + 2021.konvens-1.3 + behrendt-harmeling-2021-arguebert + + + How Hateful are Movies? A Study and Prediction on Movie Subtitles + Niklasvon Boguszewski + SanaMoin + AnirbanBhowmick + Seid MuhieYimam + ChrisBiemann + 37–48 + 2021.konvens-1.4 + von-boguszewski-etal-2021-hateful + + + Emotion Recognition under Consideration of the Emotion Component Process Model + FelixCasel + AmelieHeindl + RomanKlinger + 49–61 + 2021.konvens-1.5 + casel-etal-2021-emotion + + + Identifikation von Vorkommensformen der Lemmata in Quellenzitaten frühneuhochdeutscher Lexikoneinträge + StefanieDipper + Jan ChristianSchaffert + 62–72 + 2021.konvens-1.6 + dipper-schaffert-2021-identifikation + + + Emotion Stimulus Detection in <fixed-case>G</fixed-case>erman News Headlines + Bao Minh DoanDang + LauraOberländer + RomanKlinger + 73–85 + 2021.konvens-1.7 + dang-etal-2021-emotion + + + Lexicon-based Sentiment Analysis in <fixed-case>G</fixed-case>erman: Systematic Evaluation of Resources and Preprocessing Techniques + JakobFehle + ThomasSchmidt + ChristianWolff + 86–103 + 2021.konvens-1.8 + fehle-etal-2021-lexicon + + + Definition Extraction from Mathematical Texts on Graph Theory in <fixed-case>G</fixed-case>erman and <fixed-case>E</fixed-case>nglish + TheresaKruse + FritzKliche + 104–113 + 2021.konvens-1.9 + kruse-kliche-2021-definition + + + Extraction and Normalization of Vague Time Expressions in <fixed-case>G</fixed-case>erman + UlrikeMay + KarolinaZaczynska + JuliánMoreno-Schneider + GeorgRehm + 114–126 + 2021.konvens-1.10 + may-etal-2021-extraction + + + Automatic Phrase Recognition in Historical <fixed-case>G</fixed-case>erman + KatrinOrtmann + 127–136 + 2021.konvens-1.11 + ortmann-2021-automatic + + + Automatically Identifying Online Grooming Chats Using <fixed-case>CNN</fixed-case>-based Feature Extraction + SvenjaPreuß + Luna PiaBley + TabeaBayha + VivienDehne + AlessaJordan + SophieReimann + FinaRoberto + Josephine RomyZahm + HannaSiewerts + DirkLabudde + MichaelSpranger + 137–146 + 2021.konvens-1.12 + preuss-etal-2021-automatically + + + Who is we? Disambiguating the referents of first person plural pronouns in parliamentary debates + InesRehbein + JosefRuppenhofer + JulianBernauer + 147–158 + 2021.konvens-1.13 + rehbein-etal-2021-disambiguating + + + Examining the Effects of Preprocessing on the Detection of Offensive Language in <fixed-case>G</fixed-case>erman Tweets + SebastianReimann + DanielDakota + 159–169 + 2021.konvens-1.14 + reimann-dakota-2021-examining + + + Neural End-to-end Coreference Resolution for <fixed-case>G</fixed-case>erman in Different Domains + FynnSchröder + Hans OleHatzel + ChrisBiemann + 170–181 + 2021.konvens-1.15 + schroder-etal-2021-neural + + + How to Estimate Continuous Sentiments From Texts Using Binary Training Data + SandraWankmüller + ChristianHeumann + 182–192 + 2021.konvens-1.16 + wankmuller-heumann-2021-estimate + + + forum<fixed-case>BERT</fixed-case>: Topic Adaptation and Classification of Contextualized Forum Comments in <fixed-case>G</fixed-case>erman + AyushYadav + BenjaminMilde + 193–202 + 2021.konvens-1.17 + yadav-milde-2021-forumbert + + + Robustness of end-to-end Automatic Speech Recognition Models – A Case Study using Mozilla <fixed-case>D</fixed-case>eep<fixed-case>S</fixed-case>peech + AashishAgarwal + TorstenZesch + 203–207 + 2021.konvens-1.18 + agarwal-zesch-2021-robustness + + + Effects of Layer Freezing on Transferring a Speech Recognition System to Under-resourced Languages + OnnoEberhard + TorstenZesch + 208–212 + 2021.konvens-1.19 + eberhard-zesch-2021-effects + + + <fixed-case>D</fixed-case>e<fixed-case>I</fixed-case>n<fixed-case>S</fixed-case>tance: Creating and Evaluating a <fixed-case>G</fixed-case>erman Corpus for Fine-Grained Inferred Stance Detection + AnneGöhring + ManfredKlenner + SophiaConrad + 213–217 + 2021.konvens-1.20 + gohring-etal-2021-deinstance + + + Combining text and vision in compound semantics: Towards a cognitively plausible multimodal model + AbhijeetGupta + FritzGünther + IngoPlag + LauraKallmeyer + StefanConrad + 218–222 + 2021.konvens-1.21 + gupta-etal-2021-combining + + + <fixed-case>M</fixed-case>ob<fixed-case>IE</fixed-case>: A <fixed-case>G</fixed-case>erman Dataset for Named Entity Recognition, Entity Linking and Relation Extraction in the Mobility Domain + LeonhardHennig + Phuc TranTruong + AleksandraGabryszak + 223–227 + 2021.konvens-1.22 + hennig-etal-2021-mobie + + + Automatically evaluating the conceptual complexity of <fixed-case>G</fixed-case>erman texts + FreyaHewett + ManfredStede + 228–234 + 2021.konvens-1.23 + hewett-stede-2021-automatically + + + <fixed-case>W</fixed-case>ord<fixed-case>G</fixed-case>uess: Using Associations for Guessing, Learning and Exploring Related Words + CennetOguz + AndréBlessing + JonasKuhn + Sabine Schulte ImWalde + 235–241 + 2021.konvens-1.24 + oguz-etal-2021-wordguess + + + Towards a balanced annotated Low <fixed-case>S</fixed-case>axon dataset for diachronic investigation of dialectal variation + JanineSiewert + YvesScherrer + JörgTiedemann + 242–246 + 2021.konvens-1.25 + siewert-etal-2021-towards + + + <fixed-case>G</fixed-case>erman Abusive Language Dataset with Focus on <fixed-case>COVID</fixed-case>-19 + MaximilianWich + SvenjaRäther + GeorgGroh + 247–252 + 2021.konvens-1.26 + wich-etal-2021-german + + + Comparing Contextual and Static Word Embeddings with Small Data + WeiZhou + JelkeBloem + 253–259 + 2021.konvens-1.27 + zhou-bloem-2021-comparing + +
+
diff --git a/data/yaml/venues.yaml b/data/yaml/venues.yaml index 65b88cc5a6..ab9695655f 100644 --- a/data/yaml/venues.yaml +++ b/data/yaml/venues.yaml @@ -489,6 +489,10 @@ knlp: acronym: knlp name: 'Knowledgeable NLP: Workshop on Integrating Structured Knowledge and Neural Networks for NLP' +konvens: + acronym: KONVENS + name: Conference on Natural Language Processing + url: https://konvens.org/site/ lantern: acronym: LANTERN name: 'The Workshop Beyond Vision and LANguage: inTEgrating Real-world kNowledge'