From 8e5d4db699524d40c6f52e125d81382b9c765a2b Mon Sep 17 00:00:00 2001
From: xinru1414 <xinru1414@gmail.com>
Date: Mon, 27 Sep 2021 09:42:45 -0400
Subject: [PATCH] KONVENS-2021 Ingestion (#1581)

* ingested KONVENS 2021

* adding url for konvens in venues.yaml

* updated ingest.py to ingest the full-volume PDF

* ingested KONVENS full-volume pdf

* updated script to use f string to find the book.pdf path
---
 bin/ingest.py             |   5 +-
 data/xml/2021.konvens.xml | 267 ++++++++++++++++++++++++++++++++++++++
 data/yaml/venues.yaml     |   4 +
 3 files changed, 274 insertions(+), 2 deletions(-)
 create mode 100644 data/xml/2021.konvens.xml
diff --git a/bin/ingest.py b/bin/ingest.py
index 41c92209c5..f14947bb63 100755
--- a/bin/ingest.py
+++ b/bin/ingest.py
@@ -291,8 +291,9 @@ def disambiguate_name(node, anth_id):
         if not os.path.exists(pdfs_dest_dir):
             os.makedirs(pdfs_dest_dir)
 
-        # copy the book from the top-level proceedings/ dir, named "book.pdf"
-        book_src_path = os.path.join(meta["path"], "book.pdf")
+        # copy the book from the top-level proceedings/ dir, named "VENUE-year.pdf"
+        book_path = f'cdrom/{venue_name.upper()}-{year}.pdf'
+        book_src_path = os.path.join(meta["path"], book_path)
         book_dest_path = None
         if os.path.exists(book_src_path) and not args.dry_run:
             book_dest_path = (
diff --git a/data/xml/2021.konvens.xml b/data/xml/2021.konvens.xml
new file mode 100644
index 0000000000..b813956329
--- /dev/null
+++ b/data/xml/2021.konvens.xml
@@ -0,0 +1,267 @@
+<?xml version='1.0' encoding='UTF-8'?>
+<collection id="2021.konvens">
+  <volume id="1" ingest-date="2021-09-06">
+    <meta>
+      <booktitle>Proceedings of the 17th Conference on Natural Language Processing (KONVENS 2021)</booktitle>
+      <editor><first>Kilian</first><last>Evang</last></editor>
+      <editor><first>Laura</first><last>Kallmeyer</last></editor>
+      <editor><first>Rainer</first><last>Osswald</last></editor>
+      <editor><first>Jakub</first><last>Waszczuk</last></editor>
+      <editor><first>Torsten</first><last>Zesch</last></editor>
+      <publisher>KONVENS 2021 Organizers</publisher>
+      <address>Düsseldorf, Germany</address>
+      <month>6--9 September</month>
+      <year>2021</year>
+      <url hash="bf8a70a7">2021.konvens-1</url>
+    </meta>
+    <frontmatter>
+      <url hash="08ee717a">2021.konvens-1.0</url>
+      <bibkey>konvens-2021-natural</bibkey>
+    </frontmatter>
+    <paper id="1">
+      <title>The Impact of Word Embeddings on Neural Dependency Parsing</title>
+      <author><first>Benedikt</first><last>Adelmann</last></author>
+      <author><first>Wolfgang</first><last>Menzel</last></author>
+      <author><first>Heike</first><last>Zinsmeister</last></author>
+      <pages>1–13</pages>
+      <url hash="f1d106ed">2021.konvens-1.1</url>
+      <bibkey>adelmann-etal-2021-impact</bibkey>
+    </paper>
+    <paper id="2">
+      <title>Benchmarking down-scaled (not so large) pre-trained language models</title>
+      <author><first>Matthias</first><last>Aßenmacher</last></author>
+      <author><first>Patrick</first><last>Schulze</last></author>
+      <author><first>Christian</first><last>Heumann</last></author>
+      <pages>14–27</pages>
+      <url hash="1e75c015">2021.konvens-1.2</url>
+      <bibkey>assenmacher-etal-2021-benchmarking</bibkey>
+    </paper>
+    <paper id="3">
+      <title><fixed-case>A</fixed-case>rgue<fixed-case>BERT</fixed-case>: How To Improve <fixed-case>BERT</fixed-case> Embeddings for Measuring the Similarity of Arguments</title>
+      <author><first>Maike</first><last>Behrendt</last></author>
+      <author><first>Stefan</first><last>Harmeling</last></author>
+      <pages>28–36</pages>
+      <url hash="62033e0d">2021.konvens-1.3</url>
+      <bibkey>behrendt-harmeling-2021-arguebert</bibkey>
+    </paper>
+    <paper id="4">
+      <title>How Hateful are Movies? A Study and Prediction on Movie Subtitles</title>
+      <author><first>Niklas</first><last>von Boguszewski</last></author>
+      <author><first>Sana</first><last>Moin</last></author>
+      <author><first>Anirban</first><last>Bhowmick</last></author>
+      <author><first>Seid Muhie</first><last>Yimam</last></author>
+      <author><first>Chris</first><last>Biemann</last></author>
+      <pages>37–48</pages>
+      <url hash="b5f5fc74">2021.konvens-1.4</url>
+      <bibkey>von-boguszewski-etal-2021-hateful</bibkey>
+    </paper>
+    <paper id="5">
+      <title>Emotion Recognition under Consideration of the Emotion Component Process Model</title>
+      <author><first>Felix</first><last>Casel</last></author>
+      <author><first>Amelie</first><last>Heindl</last></author>
+      <author><first>Roman</first><last>Klinger</last></author>
+      <pages>49–61</pages>
+      <url hash="f8e39cb8">2021.konvens-1.5</url>
+      <bibkey>casel-etal-2021-emotion</bibkey>
+    </paper>
+    <paper id="6">
+      <title>Identifikation von Vorkommensformen der Lemmata in Quellenzitaten frühneuhochdeutscher Lexikoneinträge</title>
+      <author><first>Stefanie</first><last>Dipper</last></author>
+      <author><first>Jan Christian</first><last>Schaffert</last></author>
+      <pages>62–72</pages>
+      <url hash="d29f27dd">2021.konvens-1.6</url>
+      <bibkey>dipper-schaffert-2021-identifikation</bibkey>
+    </paper>
+    <paper id="7">
+      <title>Emotion Stimulus Detection in <fixed-case>G</fixed-case>erman News Headlines</title>
+      <author><first>Bao Minh Doan</first><last>Dang</last></author>
+      <author><first>Laura</first><last>Oberländer</last></author>
+      <author><first>Roman</first><last>Klinger</last></author>
+      <pages>73–85</pages>
+      <url hash="6b9cd292">2021.konvens-1.7</url>
+      <bibkey>dang-etal-2021-emotion</bibkey>
+    </paper>
+    <paper id="8">
+      <title>Lexicon-based Sentiment Analysis in <fixed-case>G</fixed-case>erman: Systematic Evaluation of Resources and Preprocessing Techniques</title>
+      <author><first>Jakob</first><last>Fehle</last></author>
+      <author><first>Thomas</first><last>Schmidt</last></author>
+      <author><first>Christian</first><last>Wolff</last></author>
+      <pages>86–103</pages>
+      <url hash="b8412d7c">2021.konvens-1.8</url>
+      <bibkey>fehle-etal-2021-lexicon</bibkey>
+    </paper>
+    <paper id="9">
+      <title>Definition Extraction from Mathematical Texts on Graph Theory in <fixed-case>G</fixed-case>erman and <fixed-case>E</fixed-case>nglish</title>
+      <author><first>Theresa</first><last>Kruse</last></author>
+      <author><first>Fritz</first><last>Kliche</last></author>
+      <pages>104–113</pages>
+      <url hash="992f9914">2021.konvens-1.9</url>
+      <bibkey>kruse-kliche-2021-definition</bibkey>
+    </paper>
+    <paper id="10">
+      <title>Extraction and Normalization of Vague Time Expressions in <fixed-case>G</fixed-case>erman</title>
+      <author><first>Ulrike</first><last>May</last></author>
+      <author><first>Karolina</first><last>Zaczynska</last></author>
+      <author><first>Julián</first><last>Moreno-Schneider</last></author>
+      <author><first>Georg</first><last>Rehm</last></author>
+      <pages>114–126</pages>
+      <url hash="5cb3dbfc">2021.konvens-1.10</url>
+      <bibkey>may-etal-2021-extraction</bibkey>
+    </paper>
+    <paper id="11">
+      <title>Automatic Phrase Recognition in Historical <fixed-case>G</fixed-case>erman</title>
+      <author><first>Katrin</first><last>Ortmann</last></author>
+      <pages>127–136</pages>
+      <url hash="83178c7c">2021.konvens-1.11</url>
+      <bibkey>ortmann-2021-automatic</bibkey>
+    </paper>
+    <paper id="12">
+      <title>Automatically Identifying Online Grooming Chats Using <fixed-case>CNN</fixed-case>-based Feature Extraction</title>
+      <author><first>Svenja</first><last>Preuß</last></author>
+      <author><first>Luna Pia</first><last>Bley</last></author>
+      <author><first>Tabea</first><last>Bayha</last></author>
+      <author><first>Vivien</first><last>Dehne</last></author>
+      <author><first>Alessa</first><last>Jordan</last></author>
+      <author><first>Sophie</first><last>Reimann</last></author>
+      <author><first>Fina</first><last>Roberto</last></author>
+      <author><first>Josephine Romy</first><last>Zahm</last></author>
+      <author><first>Hanna</first><last>Siewerts</last></author>
+      <author><first>Dirk</first><last>Labudde</last></author>
+      <author><first>Michael</first><last>Spranger</last></author>
+      <pages>137–146</pages>
+      <url hash="1e67294f">2021.konvens-1.12</url>
+      <bibkey>preuss-etal-2021-automatically</bibkey>
+    </paper>
+    <paper id="13">
+      <title>Who is we? Disambiguating the referents of first person plural pronouns in parliamentary debates</title>
+      <author><first>Ines</first><last>Rehbein</last></author>
+      <author><first>Josef</first><last>Ruppenhofer</last></author>
+      <author><first>Julian</first><last>Bernauer</last></author>
+      <pages>147–158</pages>
+      <url hash="fd54aa3b">2021.konvens-1.13</url>
+      <bibkey>rehbein-etal-2021-disambiguating</bibkey>
+    </paper>
+    <paper id="14">
+      <title>Examining the Effects of Preprocessing on the Detection of Offensive Language in <fixed-case>G</fixed-case>erman Tweets</title>
+      <author><first>Sebastian</first><last>Reimann</last></author>
+      <author><first>Daniel</first><last>Dakota</last></author>
+      <pages>159–169</pages>
+      <url hash="a369b8bf">2021.konvens-1.14</url>
+      <bibkey>reimann-dakota-2021-examining</bibkey>
+    </paper>
+    <paper id="15">
+      <title>Neural End-to-end Coreference Resolution for <fixed-case>G</fixed-case>erman in Different Domains</title>
+      <author><first>Fynn</first><last>Schröder</last></author>
+      <author><first>Hans Ole</first><last>Hatzel</last></author>
+      <author><first>Chris</first><last>Biemann</last></author>
+      <pages>170–181</pages>
+      <url hash="04b3285d">2021.konvens-1.15</url>
+      <bibkey>schroder-etal-2021-neural</bibkey>
+    </paper>
+    <paper id="16">
+      <title>How to Estimate Continuous Sentiments From Texts Using Binary Training Data</title>
+      <author><first>Sandra</first><last>Wankmüller</last></author>
+      <author><first>Christian</first><last>Heumann</last></author>
+      <pages>182–192</pages>
+      <url hash="b5fd8f48">2021.konvens-1.16</url>
+      <bibkey>wankmuller-heumann-2021-estimate</bibkey>
+    </paper>
+    <paper id="17">
+      <title>forum<fixed-case>BERT</fixed-case>: Topic Adaptation and Classification of Contextualized Forum Comments in <fixed-case>G</fixed-case>erman</title>
+      <author><first>Ayush</first><last>Yadav</last></author>
+      <author><first>Benjamin</first><last>Milde</last></author>
+      <pages>193–202</pages>
+      <url hash="88dab76c">2021.konvens-1.17</url>
+      <bibkey>yadav-milde-2021-forumbert</bibkey>
+    </paper>
+    <paper id="18">
+      <title>Robustness of end-to-end Automatic Speech Recognition Models – A Case Study using Mozilla <fixed-case>D</fixed-case>eep<fixed-case>S</fixed-case>peech</title>
+      <author><first>Aashish</first><last>Agarwal</last></author>
+      <author><first>Torsten</first><last>Zesch</last></author>
+      <pages>203–207</pages>
+      <url hash="b238647f">2021.konvens-1.18</url>
+      <bibkey>agarwal-zesch-2021-robustness</bibkey>
+    </paper>
+    <paper id="19">
+      <title>Effects of Layer Freezing on Transferring a Speech Recognition System to Under-resourced Languages</title>
+      <author><first>Onno</first><last>Eberhard</last></author>
+      <author><first>Torsten</first><last>Zesch</last></author>
+      <pages>208–212</pages>
+      <url hash="6e387d20">2021.konvens-1.19</url>
+      <bibkey>eberhard-zesch-2021-effects</bibkey>
+    </paper>
+    <paper id="20">
+      <title><fixed-case>D</fixed-case>e<fixed-case>I</fixed-case>n<fixed-case>S</fixed-case>tance: Creating and Evaluating a <fixed-case>G</fixed-case>erman Corpus for Fine-Grained Inferred Stance Detection</title>
+      <author><first>Anne</first><last>Göhring</last></author>
+      <author><first>Manfred</first><last>Klenner</last></author>
+      <author><first>Sophia</first><last>Conrad</last></author>
+      <pages>213–217</pages>
+      <url hash="b8a1e2ce">2021.konvens-1.20</url>
+      <bibkey>gohring-etal-2021-deinstance</bibkey>
+    </paper>
+    <paper id="21">
+      <title>Combining text and vision in compound semantics: Towards a cognitively plausible multimodal model</title>
+      <author><first>Abhijeet</first><last>Gupta</last></author>
+      <author><first>Fritz</first><last>Günther</last></author>
+      <author><first>Ingo</first><last>Plag</last></author>
+      <author><first>Laura</first><last>Kallmeyer</last></author>
+      <author><first>Stefan</first><last>Conrad</last></author>
+      <pages>218–222</pages>
+      <url hash="050ed5c7">2021.konvens-1.21</url>
+      <bibkey>gupta-etal-2021-combining</bibkey>
+    </paper>
+    <paper id="22">
+      <title><fixed-case>M</fixed-case>ob<fixed-case>IE</fixed-case>: A <fixed-case>G</fixed-case>erman Dataset for Named Entity Recognition, Entity Linking and Relation Extraction in the Mobility Domain</title>
+      <author><first>Leonhard</first><last>Hennig</last></author>
+      <author><first>Phuc Tran</first><last>Truong</last></author>
+      <author><first>Aleksandra</first><last>Gabryszak</last></author>
+      <pages>223–227</pages>
+      <url hash="59a4cca9">2021.konvens-1.22</url>
+      <bibkey>hennig-etal-2021-mobie</bibkey>
+    </paper>
+    <paper id="23">
+      <title>Automatically evaluating the conceptual complexity of <fixed-case>G</fixed-case>erman texts</title>
+      <author><first>Freya</first><last>Hewett</last></author>
+      <author><first>Manfred</first><last>Stede</last></author>
+      <pages>228–234</pages>
+      <url hash="be4c9190">2021.konvens-1.23</url>
+      <bibkey>hewett-stede-2021-automatically</bibkey>
+    </paper>
+    <paper id="24">
+      <title><fixed-case>W</fixed-case>ord<fixed-case>G</fixed-case>uess: Using Associations for Guessing, Learning and Exploring Related Words</title>
+      <author><first>Cennet</first><last>Oguz</last></author>
+      <author><first>André</first><last>Blessing</last></author>
+      <author><first>Jonas</first><last>Kuhn</last></author>
+      <author><first>Sabine Schulte Im</first><last>Walde</last></author>
+      <pages>235–241</pages>
+      <url hash="aa63f3eb">2021.konvens-1.24</url>
+      <bibkey>oguz-etal-2021-wordguess</bibkey>
+    </paper>
+    <paper id="25">
+      <title>Towards a balanced annotated Low <fixed-case>S</fixed-case>axon dataset for diachronic investigation of dialectal variation</title>
+      <author><first>Janine</first><last>Siewert</last></author>
+      <author><first>Yves</first><last>Scherrer</last></author>
+      <author><first>Jörg</first><last>Tiedemann</last></author>
+      <pages>242–246</pages>
+      <url hash="2fbca907">2021.konvens-1.25</url>
+      <bibkey>siewert-etal-2021-towards</bibkey>
+    </paper>
+    <paper id="26">
+      <title><fixed-case>G</fixed-case>erman Abusive Language Dataset with Focus on <fixed-case>COVID</fixed-case>-19</title>
+      <author><first>Maximilian</first><last>Wich</last></author>
+      <author><first>Svenja</first><last>Räther</last></author>
+      <author><first>Georg</first><last>Groh</last></author>
+      <pages>247–252</pages>
+      <url hash="ec5fb607">2021.konvens-1.26</url>
+      <bibkey>wich-etal-2021-german</bibkey>
+    </paper>
+    <paper id="27">
+      <title>Comparing Contextual and Static Word Embeddings with Small Data</title>
+      <author><first>Wei</first><last>Zhou</last></author>
+      <author><first>Jelke</first><last>Bloem</last></author>
+      <pages>253–259</pages>
+      <url hash="593195b4">2021.konvens-1.27</url>
+      <bibkey>zhou-bloem-2021-comparing</bibkey>
+    </paper>
+  </volume>
+</collection>
diff --git a/data/yaml/venues.yaml b/data/yaml/venues.yaml
index 65b88cc5a6..ab9695655f 100644
--- a/data/yaml/venues.yaml
+++ b/data/yaml/venues.yaml
@@ -489,6 +489,10 @@ knlp:
   acronym: knlp
   name: 'Knowledgeable NLP: Workshop on Integrating Structured Knowledge and Neural
     Networks for NLP'
+konvens:
+  acronym: KONVENS
+  name: Conference on Natural Language Processing
+  url: https://konvens.org/site/
 lantern:
   acronym: LANTERN
   name: 'The Workshop Beyond Vision and LANguage: inTEgrating Real-world kNowledge'