diff --git a/.github/workflows/quarto-publish.yml b/.github/workflows/quarto-publish.yml index 3552a56..466973a 100644 --- a/.github/workflows/quarto-publish.yml +++ b/.github/workflows/quarto-publish.yml @@ -86,6 +86,7 @@ jobs: to: html - name: JamPack + if: github.event_name != 'pull_request' run: | npx @divriots/jampack . diff --git a/submissions/427/_quarto.yml b/submissions/427/_quarto.yml new file mode 100644 index 0000000..ae6ece2 --- /dev/null +++ b/submissions/427/_quarto.yml @@ -0,0 +1,8 @@ +project: + type: manuscript + +manuscript: + article: index.qmd + +format: + html: default diff --git a/submissions/427/images/schema-pipeline.png b/submissions/427/images/schema-pipeline.png new file mode 100644 index 0000000..8f870cb Binary files /dev/null and b/submissions/427/images/schema-pipeline.png differ diff --git a/submissions/427/index.qmd b/submissions/427/index.qmd new file mode 100644 index 0000000..dab9735 --- /dev/null +++ b/submissions/427/index.qmd @@ -0,0 +1,149 @@ +--- +submission_id: 427 +categories: 'Session 6A' +title: On a solid ground. Building software for a 120-year-old research project applying modern engineering practices +author: + - name: Christian Sonder + orcid: 0009-0009-5702-7902 + email: christian.sonder@unisg.ch + affiliations: + - University of St. Gallen + - name: Bastian Politycki + orcid: 0000-0002-6308-2424 + email: bastian.politycki@unisg.ch + affiliations: + - University of St. Gallen +keywords: + - software engineering + - TEI-XML + - digital edition + - project organization +abstract: | + There is no doubt that the increasing use of digital methods and tools in the humanities opens up an almost infinite number of new possibilities. At the same time, it is becoming more and more clear that this creates new problems for the humanities. Many software solutions are often 'quick hacks'—changes to them are time-consuming, lead to errors, and the sustainability of the solution itself is overall questionable. Digital editing projects—which are mostly based on TEI-XML—face this challenge from the beginning: The 'TEI standard' is rather a loose collection of recommendations, which necessitates the development of a customized schema (a TEI subset) for project-specific data, so that the edition or encoding guidelines can be enforced and their compliance checked. These machine-readable rules must be supplemented by human-readable guidelines which document the fundamental philological decisions and can be used as a manual for the editors. + + The development of such a schema—and the associated workflows—becomes particularly relevant in the context of long-term projects, such as the Collection of Swiss Legal Sources (SLS). Changes to the schema require a continuous conversion of existing datasets. The contribution addresses how practices of modern software development, such as versioning or test-driven development (TDD), can be profitably used for humanities projects. It presents the entire workflow beginning with the creation of a modularized schema for a complex text corpus, which includes texts in German, French, Latin, Italian and Romansh from the 6th to the 18th century, up to the automated verification and publication of the documentation/schema. +key-points: + - Software development is increasingly important in digital humanities research projects, yet many struggle to implement modern engineering practices that enhance sustainability and speed up development. + - Developing an XML schema for a scholarly edition project is challenging but can provide a solid foundation for the project when executed effectively. +date: 07-25-2024 +bibliography: references.bib +--- + +## Introduction + +### General Problem Description + +Nowadays, software is a central component of every research project. Since the establishment of personal computers digital tools are used for a wide range of tasks, from simple text processing to machine assisted recognition in all sorts of historical documents. Research projects however, in particular those that produce digital scholarly editions, rarely rely just on existing tools, they often create new ones. Starting from the development or customization of own data formats ending with the implementation of often complex web applications for presentation, it is not uncommon for the tools developed in this context to be 'quick hacks' rather than well-designed software projects.^[Carver et al. recently demonstrated this with a survey, which shows that many researchers developing software have never received training in software development, and best practices are often ignored. See @carver_survey_2022.] In many cases, this is not a problem at all, because the duration of research projects in the humanities is often rather short (e.g. between three and six years). Software developed in such a short amount of time must first and foremost achieve the project’s goals, and therefore adaptation to other subjects or subsequent use is usually not intended. However, this becomes a problem if the corresponding research project is scheduled for a longer term, or if it is part of a series of projects depending on each other. In this case, quick solutions often become serious issues and are not really FAIR for either internal or external subsequent use. Not least for this reason, this phenomenon is the subject of discussion in the digital humanities community under the heading of *research software engineering*.^[See Manuel Burghardt and Claudia Müller-Birn organised a workshop specifically on this topic at the 50th Annual Conference of the German Informatics Society, see @informatik_software_2019.] This paper describes practical experiences from the perspective of a long-term editorial project and explores opportunities for sustainable development practices by utilizing modern methods that have long been established outside the academic world. + +### The Swiss Law Sources + +The Collection of Swiss Law Sources (SLS) is a 120 year old research project that publishes Swiss legal texts in German, French, Latin, Italian and Romansh from the 6th to the 18th century. The edited texts are published in a printed reference publication and in digital form.^[See @law_sources_foundation_of_the_swiss_lawyers_society_collection_2024 for the web presentation.] By the time of writing ten edition projects are currently carried out by 23 researchers in three languages throughout Switzerland: In French, volumes are to be published in the cantons of Geneva (1 vol.), Vaud (2 vols.) Neuchatel (1 vol.), Fribourg (1 vol.); in German Valais (1 vol.), Lucerne (2 vols.), Schaffhausen (2 vols.), St. Gallen (1 vol.), Graubünden (1 vol.) and in Italian Ticino (1 vol.). Further editions projects are planned or applied for, while the overall project is scheduled for another ~ 50 years. The entire technical infrastructure is provided and developed by the SLS core team, which consists of the project manager and two members of staff specializing in DH (the authors of this paper). This team is also responsible for coordinating the projects, processing the data, typesetting the printed volumes and digital publishing of the edited texts. + +In this context, the development of new and the improvement of already existing software is not only a technical challenge, but also an organizational one. Existing applications must run continuously to provide the researchers with the tools they need for their daily work (and to grant the users of the digital edition access to all information), while new requirements must be met on an ongoing basis as each project deals with unique documents. + +### Sidenote on the evolution technical infrastructure of the SLS + +About 15 years ago the Swiss Law Foundation, which stands behind the SLS, decided to retro-digitize the over hundred volumes published up to that point. Since then, the results of these initial digitization efforts have been presented in a web application which, as a 'browsing machine', makes the results of the many years of editing work, previously locked between two book covers, available to a broad public. This also marked the start of the project's transition to a predominantly digital editing and working method. In these 15 years numerous (web) applications have been created: These include databases that collate information on historical entities (people, organizations, places and terms), a digital application that presents the transcriptions, now encoded in TEI-XML, in both a web and a print view and a lot of other tools used in the various tasks at hand. The ongoing nature of the project was one of the reasons why many of these applications were often 'ad hoc solutions' or proof of concepts that were neither designed for long-term operation nor for integration—i.e. collaboration—with other tools. As a result, a rather diverse ecosystem of different technologies has developed on the data side and on the processing and presentation side.^[The edited texts themselves are available as PDF (the retro-digitized collection), TeX and FileMaker (transition phase) and TEI-XML (current projects). These are processed by scripts and applications in the programming languages Perl, OCaml, Python, JavaScript and XQuery. Relational as well as graph-based and document-orientated databases are used to store the entity data.] + +## Data as a solid ground: developing an XML Schema for a scholarly edition + +The foundation of a digital scholarly edition is undoubtedly the transcribed and annotated data, which usually is encoded in an XML format.^[There have been various discussion what's the key value of a digital scholarly digition. Maybe it's the data (see @porter_what_2024) or it's the interface (see @van_zundert_what_2018). In the recent time it's becoming more and more clear, it could be both. Therefore models have been developed, which understand scholarly editions as a stack of data, the processing applied to it and the resulting presentation (see @neuber_digitale_2023, p. 71).] All our newly edited texts are encoded in XML and as time permits all previously edited texts will be converted to this format. Therefore all further application layers, such as web presentation or printed output, have to be based on these XML files according to the single source principle. Over the last two decades, the guidelines of the Text Encoding Initiative (TEI)^[For details see @text_encoding_initiative_guidelines_2024.] have established themselves as the de facto standard for this markup work. These guidelines are primarily a broad collection of suggestions rather than a clear set of rules, necessitating a precise formulation of philological concepts into a logical data model, specifically the creation of a TEI subset as an XML schema. The TEI itself offers a format called ODD (One Document Does it all) for creating an XML schema in a literate programming-fashion^[The term literate programming usually refers to a programming paradigm introduced by Donald E. Knuth. It describes an approach where programming is done in a human readable style at first. See @knuth_literate_1992.], which itself is TEI-XML.^[The ODD-format is used in various contexts, e.g. the German Textarchiv (DTA) uses ODD-files as a source for their TEI-subset *DTABf*. See @haaf_enabling_2016.] + +A schema’s main use case is validation, i.e. checking whether the XML data corresponds to certain structures and constraints. As a TEI subset it defines which components and elements provided by the TEI guidelines are used and how they are used, making it an important part of the editing concept itself. The validation against a schema ensures the consistency of the resulting data sets in an ongoing project and is necessary to continuously support and check the researchers during the transcription and annotation process. Therefore we regard an XML schema as a key software component, although the development of a schema is typically not understood as software development in the true sense of the word. This is probably one of the reasons why most of the modern engineering practices we want to demonstrate are not yet applied in this field (at least as far as we know). + +### Four modern engineering practices and their application + +In order to deal with a complex situation, as described above, the authors of this paper propose to make use of the following software engineering practices^[This principles have been described in various books by many authors; one of the most famous is the book *Clean code* by Robert C. @martin_clean_2009.]: + +- modular software development +- test driven development +- semantic versioning +- semiautomatic documentation + +The development of the XML schema used in our project will be used as an example to show how these practices can be utilized for digital humanities projects. In the context of the ongoing reworking of the SLS application landscape, we developed a test based and modular workflow (see @fig-schema-pipeline) for the creation of a new schema, based on ODD-files as input.^[The source code of this pipeline as well as the ODD sources are open sourced and can be found in the corresponding GitHub-Repo as well as on Zenodo. See @politycki_tei-xml_2024.] + +![Test and build pipeline of a modern schema development workflow](./images/schema-pipeline.png){#fig-schema-pipeline} + +### Modular software development + +If you download a sample ODD file from the TEI homepage^[The starting point for the creation of ODD files is usually a tool called Roma. See @text_encoding_initiative_roma_2024.] which contains all elements and components, such a file may be made up of 70000 lines of code. Our ODD file—which is just a limited subset—still contains way over 20000 lines of code. The first step to handle such a large and complex object is to split it into manageable pieces. For each TEI-element we need, we created a separate file containing the element’s specification. Common parts like attribute classes, data types or custom definitions that are used by multiple elements each went into their own files. + +A rather simple specification for the element `` may look like this: + +```xml + + + Contains a punctuation mark, which is processed specially + considering linguistic regulations (for example, by adding a space). + + + + + [;:?!] + + + + + + + + +``` + +This principle of atomicity enforces a clear structure, provides better maintainability in the future, made the files way more easy to grasp and to modify and had also the benefit of reducing redundancy, because shared parts were refactored and can be used throughout the schema while being defined in one place. The downside to this, of course, is the need to compile all those files into one ODD in a separate step. But this may be a small price to pay for the benefits. + +### Test driven development (TDD)^[The term TDD usually refers to Kent Beck, who reintroduced this idea in the early 2000s. It describes a programming paradigm where tests are written before the actual code. See @alsaqqa_agile_2020, p. 255.] + +The second step was to define a set of tests for all element, attribute and datatype definitions.^[These tests would normally be set up before the concrete description in the ODD-module is created, but we started with an already existing schema and decided to add the test later on.] Each test set describes the expected behavior of a piece of the schema and consists of three components: a title of the test set, the markup being tested and the expected result, which can either be valid (`True`) or invalid (`False`). Each test set is executed and evaluated by a Python function which invokes an XML-Parser. + +The following tests describe the contents and attributes of the element ``. + +```python +@pytest.mark.parametrize( + "name, markup, result", + [ + ( + "valid-pc", + ";", + True, + ), + ( + "invalid-pc-with-wrong-char", + "-", + False, + ), + ( + "invalid-pc-with-attribute", + ";", + False, + ), + ], +) +def test_pc( + test_element_with_rng: RNG_test_function, + name: str, + markup: str, + result: bool, +): + test_element_with_rng("pc", name, markup, result, False) +``` + +If each specification is coupled with one or more tests, it is guaranteed that individual changes to the schema will not compromise the overall functionality and possible side-effects may be detected early on. Such test cases are abstract enough to enable representative testing of the software components to be developed, but at the same time concrete enough to make them readable for employees specializing in philology, thus they can be used as a means of communication between the digital humanist team and the philological or historical team. We can simply ask: Should this piece of XML be `True` or `False`? + +### Semiautomatic documentation + +The schema has to be documented for those who use it to encode the files as well as for those who use the files for any other purpose. We decided to generate as much of this documentation automatically as possible using markdown as a language and a site generator called MkDocs^[See @christie_mkdocs_2024.]. Our documentation website^[See @law_sources_foundation_of_the_swiss_lawyers_society_transkriptionsrichtlinien_2024.] is constructed like this: A self written Python program reads all parts of the schema, converts them to markdown files and hands those to the MkDocs processor which returns a static HTML webpage that can easily be accessed and searched. + +### Semantic versioning (SemVer and git) + +It is obvious that each change to the schema not only affects the XML files to be validated^[It may sometimes be necessary to convert them with XSLT to be valid against the newer version of the schema.], but also changes the documentation. For this purpose every release of the schema is versioned with git and is reflected in a new corresponding build of the documentation site. All versions of the schema are named in accordance to the principles of semantic versioning^[See @preston-werner_semantic_2023.] so a user of any XML file that has to be validated against our schema can see which versions are available and is able to read a specific documentation for any schema version. + +## A brief outlook + +Although our journey of refactoring has just begun, we are already seeing the benefits of the principles we have applied. If the ground your are standing on is a solid one, you can build on it. Currently, we are working on a multilingual translation of our schema from German as the main language to English, French and Italian and hope to enrich the schema with extensive examples from actual XML files. Furthermore, we are rewriting the existing rendering-mechanisms (e.g. TEI to HTML), applying the same rules as described above. All in all, the work done and the cost we had to pay for is already paying off. + +## References + +::: {#refs} +::: \ No newline at end of file diff --git a/submissions/427/references.bib b/submissions/427/references.bib new file mode 100644 index 0000000..eeda7a7 --- /dev/null +++ b/submissions/427/references.bib @@ -0,0 +1,180 @@ + +@incollection{informatik_software_2019, + title = {Software Engineering in den Digital Humanities 2. Workshop der Fachgruppe Informatik und Digital Humanities ({InfDH})}, + isbn = {978-3-88579-689-3}, + series = {Proceedings}, + shorttitle = {50 Jahre Gesellschaft für Informatik - Informatik für Gesellschaft Workshopbeiträge der 49. Jahrestagung der Gesellschaft für Informatik}, + pages = {75}, + number = {volume 295}, + booktitle = {50 Jahre Gesellschaft für Informatik - Informatik für Gesellschaft Workshopbeiträge der 49. Jahrestagung der Gesellschaft für Informatik: 23.-26.9.2019, Kassel, Deutschland}, + publisher = {Bonn Gesellschaft für Informatik e.V. [2019]}, + author = {Burghardt, Manuel and Müller-Birn, Claudia}, + editora = {{Informatik} and Draude, Claude and Lange, Martin and Sick, Bernhard and {Gesellschaft für Informatik}}, + editoratype = {collaborator}, + date = {2019}, + note = {{OCLC}: 1163601901}, +} + +@online{text_encoding_initiative_guidelines_2024, + title = {Guidelines}, + url = {https://tei-c.org/release/doc/tei-p5-doc/en/html/index.html}, + titleaddon = {{TEI}: Text Encoding Initiative}, + author = {{Text Encoding Initiative}}, + urldate = {2024-07-25}, + date = {2024}, +} + +@book{knuth_literate_1992, + location = {Stanford, Calif.}, + title = {Literate programming}, + isbn = {978-0-937073-80-3 978-0-937073-81-0}, + series = {{CSLI} lecture notes}, + pagetotal = {368}, + number = {no. 27}, + publisher = {Center for the Study of Language and Information}, + author = {Knuth, Donald Ervin}, + date = {1992}, + keywords = {Computer programming}, +} + +@article{haaf_enabling_2016, + title = {Enabling the Encoding of Manuscripts within the {DTABf}: Extension and Modularization of the Format}, + issn = {2162-5603}, + url = {http://journals.openedition.org/jtei/1650}, + doi = {10.4000/jtei.1650}, + shorttitle = {Enabling the Encoding of Manuscripts within the {DTABf}}, + issue = {Issue 10}, + journaltitle = {Journal of the Text Encoding Initiative}, + shortjournal = {jtei}, + author = {Haaf, Susanne and Thomas, Christian}, + urldate = {2024-07-16}, + date = {2016-12-07}, +} + +@online{law_sources_foundation_of_the_swiss_lawyers_society_collection_2024, + title = {Collection of Swiss Law Sources online}, + url = {https://editio.sls-online.ch}, + titleaddon = {editio}, + author = {{Law Sources Foundation of the Swiss Lawyers Society}}, + urldate = {2024-07-25}, + date = {2024}, +} + +@article{alsaqqa_agile_2020, + title = {Agile Software Development: Methodologies and Trends}, + volume = {14}, + issn = {1865-7923}, + url = {https://online-journals.org/index.php/i-jim/article/view/13269}, + doi = {10.3991/ijim.v14i11.13269}, + shorttitle = {Agile Software Development}, + pages = {246--270}, + number = {11}, + journaltitle = {International Journal of Interactive Mobile Technologies ({iJIM})}, + shortjournal = {Int. J. Interact. Mob. Technol.}, + author = {Alsaqqa, Samar and Sawalha, Samer and Abdel-Nabi, Heba}, + urldate = {2024-07-25}, + date = {2020-07-10}, + file = {Alsaqqa et al_2020_Agile Software Development.pdf:/Users/bastian/Zotero/storage/GSRPNDQJ/Alsaqqa et al_2020_Agile Software Development.pdf:application/pdf}, +} + +@online{text_encoding_initiative_roma_2024, + title = {Roma}, + url = {https://roma.tei-c.org}, + titleaddon = {{TEI}: Text Encoding Initiative}, + author = {{Text Encoding Initiative}}, + urldate = {2024-07-25}, + date = {2024}, +} + +@misc{politycki_tei-xml_2024, + title = {{TEI}-{XML} Schema der Sammlung Schweizerischer Rechtsquellen}, + url = {https://zenodo.org/records/10625840}, + doi = {10.5281/zenodo.10625840}, + version = {1.1.0}, + number = {https://zenodo.org/records/10625840}, + author = {Politycki, Bastian and Sonder, Christian and Sutter, Pascale}, + urldate = {2024-07-25}, + date = {2024}, +} + +@article{carver_survey_2022, + title = {A survey of the state of the practice for research software in the United States}, + volume = {8}, + rights = {https://creativecommons.org/licenses/by/4.0/}, + issn = {2376-5992}, + url = {https://peerj.com/articles/cs-963}, + doi = {10.7717/peerj-cs.963}, + abstract = {Research software is a critical component of contemporary scholarship. Yet, most research software is developed and managed in ways that are at odds with its long-term sustainability. This paper presents findings from a survey of 1,149 researchers, primarily from the United States, about sustainability challenges they face in developing and using research software. Some of our key findings include a repeated need for more opportunities and time for developers of research software to receive training. These training needs cross the software lifecycle and various types of tools. We also identified the recurring need for better models of funding research software and for providing credit to those who develop the software so they can advance in their careers. The results of this survey will help inform future infrastructure and service support for software developers and users, as well as national research policy aimed at increasing the sustainability of research software.}, + pages = {e963}, + journaltitle = {{PeerJ} Computer Science}, + author = {Carver, Jeffrey C. and Weber, Nic and Ram, Karthik and Gesing, Sandra and Katz, Daniel S.}, + urldate = {2024-07-25}, + date = {2022-05-05}, + langid = {english}, + file = {Carver et al_2022_A survey of the state of the practice for research software in the United States.pdf:/Users/bastian/Zotero/storage/6JRBU2NV/Carver et al_2022_A survey of the state of the practice for research software in the United States.pdf:application/pdf}, +} + +@incollection{van_zundert_what_2018, + location = {Norderstedt}, + title = {What Are You Trying to Say? The Interface as an Integral Element of Argument}, + pages = {3--33}, + booktitle = {Digital Scholarly Editions as Interfaces}, + author = {van Zundert, Joris and Andrews, Tara}, + date = {2018}, +} + +@online{porter_what_2024, + title = {What is an edition anyway? My Keynote for the Digital Scholarly Editions as Interfaces conference, University of Graz}, + url = {http://www.dotporterdigital.org/what-is-an-edition-anyway-my-keynote-for-the-digital-scholarly-editions-as-interfaces-conference-university-of-graz/}, + author = {Porter, Dot}, + date = {2024-07-25}, +} + +@incollection{neuber_digitale_2023, + location = {Berlin/Boston}, + title = {Der digitale Editionstext. Technologische Schichten, ‚editorischer Kerntext‘ und datenzentrierte Rezeption}, + volume = {55}, + series = {Beihefte zu editio}, + pages = {69--84}, + booktitle = {Der Text und seine (Re)Produktion}, + author = {Neuber, Frederike}, + editor = {Fröhlich, Niklas and Politycki, Bastian and Schäfer, Dirk and Sonder, Annkathrin}, + date = {2023}, +} + +@online{christie_mkdocs_2024, + title = {{MkDocs}. Project documentation with Markdown}, + url = {https://www.mkdocs.org}, + author = {Christie, Tom}, + urldate = {2024-07-25}, + date = {2024}, +} + +@online{law_sources_foundation_of_the_swiss_lawyers_society_transkriptionsrichtlinien_2024, + title = {Transkriptionsrichtlinien und Dokumentation}, + url = {https://schema.ssrq-sds-fds.ch/latest/}, + titleaddon = {{SSRQ} Dokumentation}, + author = {{Law Sources Foundation of the Swiss Lawyers Society}}, + urldate = {2024-07-25}, + date = {2024}, +} + +@online{preston-werner_semantic_2023, + title = {Semantic Versioning 2.0.0}, + url = {https://semver.org}, + author = {Preston-Werner, Tom}, + urldate = {2024-07-25}, + date = {2023}, +} + +@collection{martin_clean_2009, + location = {Upper Saddle River, {NJ}}, + title = {Clean code: a handbook of agile software craftsmanship}, + isbn = {978-0-13-235088-4}, + shorttitle = {Clean code}, + pagetotal = {431}, + publisher = {Prentice Hall}, + editor = {Martin, Robert C.}, + date = {2009}, + keywords = {Agile software development, Computer software, Reliability}, +} diff --git a/submissions/429/_quarto.yml b/submissions/429/_quarto.yml new file mode 100644 index 0000000..ae6ece2 --- /dev/null +++ b/submissions/429/_quarto.yml @@ -0,0 +1,8 @@ +project: + type: manuscript + +manuscript: + article: index.qmd + +format: + html: default diff --git a/submissions/429/index.qmd b/submissions/429/index.qmd new file mode 100644 index 0000000..eab89f6 --- /dev/null +++ b/submissions/429/index.qmd @@ -0,0 +1,85 @@ +--- +submission_id: 429 +categories: 'Session 5A' +title: 'Training engineering students through a digital humanities project: Techn’hom Time Machine' +author: + - name: Cyril Lacheze + orcid: 0009-0003-7231-9774 + email: cyril.lacheze@utbm.fr + affiliations: + - University of Technology of Belfort-Montbéliard. UTBM, CNRS, institut FEMTO-ST, F-90000 Belfort, France + - name: Marina Gasnier + orcid: + email: marina.gasnier@utbm.fr + affiliations: + - University of Technology of Belfort-Montbéliard. UTBM, CNRS, institut FEMTO-ST, F-90000 Belfort, France +keywords: + - students participation + - 3D reconstruction + - spinning mill + +abstract: | + The Techn'hom Time Machine project aims to offer a digital reconstruction of a spinning mill, integrating buildings in their environment, machines, and activities. It involves engineering students from the Belfort-Montbéliard University of Technology, in all aspects of the project. They are able to discover and practise on software that they will have to use in their future professional activity (Revit, Catia, Blender, Unity…). Some students are able to discover entire conceptual fields that are rarely covered in their course, such as the notions of ontology and RDF database. A special relationship between history and digital technology underlies this work: students have a choice about which software to use, their results directly impact the evolution of the project, and they learn the importance of the whole organizing. These students, unfamiliar with humanities and specific problems associated with them, are at the same time discovering these disciplines and their difficulties, thus opening up their perspectives. +key-points: + - The Techn’hom Time Machine project aims to offer a virtual reality reconstruction of a former spinning mill in the city of Belfort (France), with its machines and activities. + - Students from the Belfort-Montbéliard University of Technology participate directly in the project by modeling buildings, machines, or by working on knowledge engineering. + - Their reports make it possible to identify points that most marked them, namely the discovery of human sciences and their difficulties, as well as new technical and organizational skills learning. +date: 07-26-2024 +bibliography: references.bib +--- + +## Introduction + +Part of the national Lab In Virtuo project (2021-2024), the Techn'hom Time Machine project, initiated in 2019 by the Belfort-Montbéliard University of Technology, aims to study and digitally restore the history of an industrial neighborhood, with teacher-researchers but also students as co-constructors [@Gasnier2014 ; @Gasnier2020, p. 293]. The project is thus located at the interface of pedagogy and research. The Techn'hom district was created after the Franco-Prussian War of 1870 with two companies from Alsace: the Société Alsacienne de Constructions Mécaniques, nowadays Alstom; and the Dollfus-Mieg et Compagnie (DMC) spinning mill, in operation from 1879 to 1959. The project aims to create a “Time Machine” of these industrial areas, beginning with the spinning mill. We seek to restore in four dimensions (including time) buildings, machines with their operation, but also document and model sociability and know-how, down to the gestures and feelings. The resulting “Sensory Realistic Intelligent Virtual Environment” should allow both researchers and general public to virtually discover places and “facts” taking place in the industry, but also interact with them or even make modifications. + + + +## Study and training areas + +The project is carried out within a technology university and, as such, is designed to include the participation of engineering students. They can apply and develop skills previously covered in a more basic way in their curriculum. This constitute for students an investment in the acquisition of skills that can subsequently be reused in their professional lives as engineers. In the current state, four main axes exist concerning inclusion of students in the Techn’hom Time Machine project: + +* Modeling of industrial buildings on Revit; +* Machine modeling on Catia; +* Knowledge engineering with the construction of a data model, initially as a relational database, having evolved into an RDF base based on standard ontologies; +* Integration of those elements in the same virtual environment on Unity. +Historical sources are crucial in all axes since many artifacts no longer exist, have been heavily modified and/or are inaccessible. Modeling is based on handwritten or printed writings, plans, iconography, and surviving heritage. This imposes a disciplinary opening for engineering students, untrained in the manipulation and analysis of such sources, and who may feel distant from issues linked to human and social sciences. + + +## Project progress + +To date, thirty two students were included in the project. Each of the four axes was allocated between four and twelve students depending on opportunities and needs. In addition to the scientific contribution, student reports make it possible to evaluate their point of view on this training, all critical perspective retained. + + +### Modeling: the software question + +This axis has currently involved twelve students, and has led to the complete or partial modeling of six machines. It implies to reverse engineering machines with very partial data, on software designed for rendering of much more recent mechanisms. Students are assigned to work on small projects whose results are not necessarily directly usable. This offers the advantage of an exploratory and critical approach, by having a student take over the project of a previous one. Students were thus responsible for creating the model, but also for defining the software used. The first machine modeled, a twisting machine, was the subject of two successive works, linked to a change in modeling software. The first student used Blender, directing his work “on the optimization of models rather than on precision” and “took the initiative to abandon coherence”, offering “parts very close to the base material from a visual point of view but absolutely not reliable from a measurement point of view” [@Bogacz2019, pp. 11-12]. The following year, a second group was tasked of restoring consistency in this model, but realized that their colleague's choices prevented such an achievement: pieces were too inaccurate, and conversion to a kinematic CAD model was impossible [@Castagno2020, pp. 11, 13]. They therefore remade the model on Catia, without realistic texture. The team of another machine proposed another solution: on Catia, they “‘imagined’ missing parts”, paying attention to their mechanical coherence, while using Keyshot to obtain a more visually attractive final result [@Paulin2020, pp. 15-16]. This questioning also occurred with integration of buildings and machines on Unity: models produced by specialized software are each quite efficient, but too heavy and ill-optimized to be all integrated in the same simulation. Students working on this topic thus have to take and reduce models in order to optimize performance, losing a part of the precision [@Bozane2022, pp. 4-6, 10]. Freedom left to students in technical solutions thus made it possible, by authorizing research and free experimentation, to identify configurations most likely to meet the needs of the project as a whole. + + +### Which data model? + +Similarly, tests “distribution” between students provided insights as to the appropriate type of data model. The Techn’hom Time Machine project was initially supposed to rely on a “classic” relational database. The first student to work on setting up said database quickly realized that a historical database involves “a certain complexity in its design”, necessitating a table for abstract concepts “most difficult to define”, and a table for specifying types of links between actors, but without informing in advance all possible types of relationships [@Garcia2020, pp. 20, 23]. In short, the student realized that, for a system as complex as a human society, a relational database quickly shows its limits. In fact, even if this first student still managed to create a relational database, the next two underlined its complexity: “the number of tables in the database makes reading difficult” [@Ruff2022, p. 7], and it was difficult to “precisely complete [it]” [@Marais2020, pp. 9, 16]. A fourth student, tasked to take up the previous work to refine it and make a functional application, concluded with the support of teacher-researchers that this database simply did not allowed to describe precisely enough a historical reality, and pointed the need to use an RDF graph database [@Echard2023, pp. 15-16, 21]. This solution, actually adopted, therefore comes once again from a series of works allowing a self-critique of the entire project, helping to define effective solutions. + + +## Reflective feedback from students + +Beyond these contributions to the scientific project, this program also aims to offer training to students. The point that emerges most clearly from students' reports, before any technical consideration or skills acquisition, relates to discovery of human sciences and their methodologies. + +### Discovering human sciences + +Almost all of the students emphasize an initial dismay when faced with historical sources, lacking quantity, precision and conciseness of the information expected in an engineering context. Apart from a few immediately relevant sources, the mass of additional documentation, necessary to understand machines operation and context, is much more confusing and time-consuming to analyze, while offering mediocre quality of information. Students have “access to a lot of documents but little precision” [@Bogacz2019, pp. 6, 8], and historical documents often “do not provide as much information as [they] hoped” [@Castagno2020, p. 4]. Moreover, students note that, even with good sources, machines “remain much more complex” than diagrams, and no blueprints, which “does not allow the direct connection and understanding of each piece" [@Paulin2020, p. 8]. The same goes for buildings, with damaged or partial plans, forcing to “make measurements on the plan to approximate distances” [@LeGuilly2022, p. 9]. + +Despite this initial blockage, students developed solutions - starting with awareness that historical models can never “exactly” reproduce past reality. The most important resource consisted of seeking by themselves complementary sources, like archive originals [@Marchal2021, p. 5], old films [@Castagno2020, p. 4], or “observations made on site” for buildings [@LeGuilly2022, p. 10]. For machines, two other valuable sources could be mobilized, via contacts obtained by supervising teacher-researchers: dialogue with former workers about machines functioning and details [@Bogacz2019, p. 7; @Paulin2020, p. 9]; and visits in still-working spinning mills. Those experiences allowed them, according to their feedback, to better understand machines, operations but also context, “allowing [them] to take a step back from the project” [@Bogacz2019, p. 7]. On the contrary, students working in the midst of the Covid pandemic, regretted not having been able to have the same experience [@Paulin2020, p. 20]. Direct contact with historical elements also include an emotional aspect highlighted by the students: “It was both a very interesting and very pleasant moment. Being able to see with our own eyes the machine that we were trying to reproduce computationally was a very enriching experience”; “The fact of visualizing in real life a machine that we had been modeling for several months is truly incredible” [@Bogacz2019, pp. 7, 16]. + +This need to delve into sources implied for students the discovery, through practice, of the ins and outs of human sciences research. Typically, with data modeling, working from real data brings a certain advantage: working from “concrete cases […] helped us to understand how to articulate [several] ontologies and thus develop a strategy to combine them effectively into a coherent whole” [@Echard2023, pp. 23, 32]. Likewise, for buildings, sources comparison led students to perceive inconsistencies, and thus “note the importance of reading all the archives and not just a few because errors may be present” [@Pic2020, pp. 3-4]. Some also emphasize “difficulty of exploiting numerous bibliographic resources” in terms of synthesis capacities and working time [@Bogacz2019, p. 6; @Castagno2020, p. 15], but also the pleasure of “learning to read archives” [@Paulin2020, p. 20]. The novelty of the practice compared to classic engineering curriculum is well summed up by one of the teams: “This type of task requires patience and a methodology completely different from what we have habit of doing. The difficulty or even the impossibility of finding the desired information taught us to put ourselves in the shoes of a historian who must at certain times make hypotheses in order to continue his work.” [@Castagno2020, p. 15]. + + +### Project managing + +Whatever the students’ specific project, it generally appeared to be a first in their training, positioning them as researchers over several months. This induced a “complete autonomy” [@Garcia2020, p. 8] underlined by all reports, often before competence gains. One, those project was “the most significant project he had to carry out”, “learned the management” of his organization [@Bogacz2019, p. 16]. Another “learned to manage a project in [his] free time” [@LeGuilly2022, p. 10], and a third “learned to work efficiently and manage projects independently” [@Echard2023, pp. 9, 40]. Faced with complex and non-linear projects, students emphasize the “need to do a lot of research to use the right method to work correctly”, and to propose solutions on their own [@Marchal2021, pp. 9-10, 27]. The gross volume of work is finally underlined, projects requiring “time to understand the documents, research into software functionalities as well as a considerable investment” [@Pic2020, p. 16]. Participation in the project can appear as “a first professional experience (...) The experience gained during the internship is immense” [@Garcia2020, p. 39]. + +Beyond each individual work, some students also develop reflection on the overall project. In particular, they suffered from a lack of communication with their predecessor on the same subject, “making the task more difficult”, leading to risk of “wasting (...) time understanding what the other had already understood” [@Castagno2020, p. 15]. This experience lead to an awareness of the importance of good communication or documentation. Students therefore suggested organizing “video conferences between old and new groups”, and that “each group [should] bring together important documents in a separate file” during project transitions. They applied the lesson to their own report, by “explaining as best as possible what [they] had understood”, with concrete recommendations [@Castagno2020, pp. 15-16]. + + +## Conclusion + +Students involvement in the Techn’hom Time Machine project leads to bidirectional enrichment. The project benefits from the possibility of distributed work and multiple proposal strengths, making it possible to test several options in parallel on a given subject. Students deepen their knowledge of diverse software, while introducing themselves to human sciences and project management. Gain in technical skills is often implied in reports, obviously being an integral part of expectations of any engineering school project. Acquisition of more fundamental knowledge can be identified, with discovery of some entirely new technologies. An interest in the historical dimension is also mentioned, as well as human contacts with researchers and workers. Finally, the very fact of participating in a digital humanities project, atypical in itself, appears as a source of satisfaction. diff --git a/submissions/429/references.bib b/submissions/429/references.bib new file mode 100644 index 0000000..bdf36a2 --- /dev/null +++ b/submissions/429/references.bib @@ -0,0 +1,125 @@ +@article{Gasnier2014, + author = "Gasnier, Marina", + title = "Territorialisation urbaine et processus de patrimonialisation : le cas de Techn'hom à Belfort (Franche-Comté)", + journal = "Annales de géographie", + year = 2014, + volume = "699", + pages = "1168--1192" +} + +@article{Gasnier2020, + author = "Gasnier, Marina", + title = "Techn’hom Time Machine : un patrimoine industriel augmenté", + journal = "Artefact", + year = 2020, + volume = "12", + pages = "293--299" +} + +@article{Laubé2021, + author = "Laubé, Sylvain", + title = "Maritimité et paysage culturel maritime : les apports du modèle « Any-artefact »", + journal = "Artefact", + year = 2021, + volume = "14", + pages = "293--316" +} + +@inproceedings{Querrec2018, + author = "Querrec, Ronan & alii", + title = "Lab in Virtuo: un Environnement Virtuel Intelligent pour l’histoire et le patrimoine des paysages culturels industriels", + booktitle = "Archéologie: imagerie numérique et 3D : actes du 3e séminaire scientifique et technique de l’Inrap, 26-27 juin 2018, Rennes", + year = 2018, + howpublished = "\url{https://sstinrap.hypotheses.org/487}" +} + +@unpublished{Bogacz2019, + author = "Bogacz, Florent", + title = "Projet de restitution historique en 3D. Continu à retordre à anneaux au mouillé", + note = "project report, UTBM", + year = "2019" +} + +@unpublished{Bozane2022, + author = "Bozane, Marius", + title = "Implémentation en VR du projet Techn’hom Time Machine", + note = "project report, UTBM", + year = "2022" +} + +@unpublished{Castagno2020, + author = "Castagno, Nathan, Javourez, Maxence, and Vigne, Pierre", + title = "Continu à retordre – rapport de projet", + note = "project report, UTBM", + year = "2020" +} + +@unpublished{Echard2023, + author = "Echard, Noé", + title = "Ontologie et base de données pour le projet Techn’hom Time Machine", + note = "internship report, UTBM", + year = "2023" +} + +@unpublished{Garcia2020, + author = "Garcia, Gabriel", + title = "Quelle base de données pour Techn’hom Time Machine ?", + note = "internship report, UTBM", + year = "2020" +} + +@unpublished{LeGuilly2022, + author = "Le Guilly, Erwann", + title = "Reconstitution de bâtiment sur Revit", + note = "project report, UTBM", + year = "2022" +} + +@unpublished{Marais2020, + author = "Marais, Maxence", + title = "Mémoire Techn’hom Time Machine", + note = "project report, UTBM", + year = "2020" +} + +@unpublished{Marchal2021, + author = "Marchal, Jean-Baptiste", + title = "Techn’hom Time Machine", + note = "project report, UTBM", + year = "2021" +} + +@unpublished{Michal2020, + author = "Michal, Noëlie, Chorques, Théo, and Reimel, Tifaine", + title = "Modélisation et simulation thermique dynamique des anciens ateliers de retordage de la Filature DMC, Belfort", + note = "project report, UTBM", + year = "2020" +} + +@unpublished{Michal2019, + author = "Michal, Noëlie, Reimel, Tifaine, and Chorques, Théo", + title = "Audit énergétique. Bâtiment 14 Techn’hom (RDC), Belfort. Etude menée selon la démarche BIM", + note = "project report, UTBM", + year = "2019" +} + +@unpublished{Paulin2020, + author = "Paulin, Antoine, and Chambon, Rémi", + title = "Modélisation d’un dévidoir", + note = "project report, UTBM", + year = "2020" +} + +@unpublished{Pic2020, + author = "Pic, Pierre", + title = "Rapport de projet : Reconstitution 3D du poste de transformation de l’usine DMC de Belfort", + note = "project report, UTBM", + year = "2020" +} + +@unpublished{Ruff2022, + author = "Ruff, Guillaume", + title = "TTM Front End", + note = "project report, UTBM", + year = "2022" +} diff --git a/submissions/431/_quarto.yml b/submissions/431/_quarto.yml new file mode 100644 index 0000000..ae6ece2 --- /dev/null +++ b/submissions/431/_quarto.yml @@ -0,0 +1,8 @@ +project: + type: manuscript + +manuscript: + article: index.qmd + +format: + html: default diff --git a/submissions/431/images/Network analysis settings in nodegoat.png b/submissions/431/images/Network analysis settings in nodegoat.png new file mode 100644 index 0000000..da99c2f Binary files /dev/null and b/submissions/431/images/Network analysis settings in nodegoat.png differ diff --git a/submissions/431/images/Network of Basel jurists 1460-1550.png b/submissions/431/images/Network of Basel jurists 1460-1550.png new file mode 100644 index 0000000..9dab109 Binary files /dev/null and b/submissions/431/images/Network of Basel jurists 1460-1550.png differ diff --git a/submissions/431/images/Places of activity of Basel jurists 1460-1550.png b/submissions/431/images/Places of activity of Basel jurists 1460-1550.png new file mode 100644 index 0000000..c79e393 Binary files /dev/null and b/submissions/431/images/Places of activity of Basel jurists 1460-1550.png differ diff --git a/submissions/431/images/Places of origin of students at the university of Basel 1460-1550.png b/submissions/431/images/Places of origin of students at the university of Basel 1460-1550.png new file mode 100644 index 0000000..17d802b Binary files /dev/null and b/submissions/431/images/Places of origin of students at the university of Basel 1460-1550.png differ diff --git a/submissions/431/images/RAG Eingabemaske MS Access.JPG b/submissions/431/images/RAG Eingabemaske MS Access.JPG new file mode 100644 index 0000000..ff779cb Binary files /dev/null and b/submissions/431/images/RAG Eingabemaske MS Access.JPG differ diff --git a/submissions/431/images/RAG nodegoat frontend for data collection.png b/submissions/431/images/RAG nodegoat frontend for data collection.png new file mode 100644 index 0000000..915be3e Binary files /dev/null and b/submissions/431/images/RAG nodegoat frontend for data collection.png differ diff --git a/submissions/431/images/nodegoat text reconciliation settings.png b/submissions/431/images/nodegoat text reconciliation settings.png new file mode 100644 index 0000000..399e0af Binary files /dev/null and b/submissions/431/images/nodegoat text reconciliation settings.png differ diff --git a/submissions/431/index.qmd b/submissions/431/index.qmd new file mode 100644 index 0000000..51ff9e4 --- /dev/null +++ b/submissions/431/index.qmd @@ -0,0 +1,145 @@ +--- +submission_id: 431 +categories: 'Session 2A' +title: "From manual work to artificial intelligence: developments in data literacy using the example of the Repertorium Academicum Germanicum (2001-2024)" +author: + - name: Kaspar Gubler + orcid: 0000-0002-6627-5045 + email: kaspar.gubler@unibe.ch + affiliations: + - University of Bern + - University of Krakow (Hector) + +keywords: + - Digital Prosopography + - Data Biographies + - Data visualisations + - Network analysis + - History of knowledge and science + - History of universities + +abstract: | + The Repertorium Academicum Germanicum (RAG) is a prosopographical research project dedicated to studying medieval scholars and their impact on society in Europe from 1250 to 1550. The RAG database contains approximately 62,000 scholars and 400,000 biographical entries across 26,000 locations, derived from university registers, academic sources, and general biographical records. As a pioneering project in digital prosopography, the RAG is exemplary for the development of data competences in the last 20 years. The presentation will therefore highlight the methods, procedures, best practices and future approaches used to date. What is special about the RAG is that the project not only collects data, but also analyses it in a targeted manner with a focus on data visualisations (maps, networks, time series). RAG presents the results in its own series of publications [(RAG Forschungen). ](https://vdf.ch/index.php?route=product/collection&language=de-DE&collection_id=35) + +key-points: + - Key point 1 The Repertorium Academicum Germanicum (RAG) focuses on the knowledge influence of medieval scholars in pre-modern Europe, creating a comprehensive research database. + - Key point 2 The RAG database, with data on 62,000 scholars, has advanced from manual to computer-aided and AI-assisted data collection and analysis. + - Key point 3 Technological advancements, including the use of nodegoat, have enhanced data management, collaboration, and accessibility, integrating AI for improved historical data analysis. +date: 07-07-2024 +bibliography: references.bib +--- + +## Introduction + +The core data of RAG is based on the university registers. The registers usually contain the names and places of origin of the students as well as the date of enrolment. This data is enriched in the research database with biographical data on subjects studied, professional activities and written works. Since 2020, the RAG has been a sub-project of the umbrella project Repertorium Academicum (REPAC), which is being carried out at the Historical Institute of the University of Bern. See on the project and its developments: [@gubler_hesse_schwinges2022]. +Data skills in RAG can be divided into data collection, data entry and data analysis. Different data skills are required in the three areas, which have of course also changed over time as a result of digitalisation. While compiling and analysing data has been simplified by computer-aided processes, the precise recording of data in the database still requires in-depth historical knowledge and human intelligence. + +## Project history + +The RAG started with a Microsoft Access database as a multi-user installation. In 2007, the switch was made to a client-server architecture, with MS Access continuing to serve as the front end and a Microsoft SQL server being added as the back end. This configuration had to be replaced in 2017 as regular software updates for the client and server had been neglected. As a result, it was no longer possible to update the MS Access client to the new architecture in good time and the server, which was running on the outdated MS SQL Server 2005 operating system, increasingly posed a security risk. In addition, publishing the data on the internet was only possible to a limited extent, as a fragmented export from the MS SQL server to a MySQL database with a PHP front end was required. +In 2017, it was therefore decided to switch to a new system [@gubler2020]. + + + +![Fig. 1: Former frontend of the RAG project for data collection in MS Access 2003.](images/RAG Eingabemaske MS Access.jpg) + + +Over one million data records on people, events, observations, locations, institutions, sources and literature were to be integrated in a database migration - a project that had previously been considered for years without success. After a evaluation of possible research environments, nodegoat was chosen [@vanBree_Kessels2013]. Nodegoat was a tip from a colleague who had attended a nodegoat workshop [@gubler2021]. With nodegoat, the RAG was able to implement the desired functions immediately: + +- Location-independent data collection thanks to a web-based front end. + +- Data visualisations (maps, networks, time series) are integrated directly into nodegoat, which means that exporting to other software is not necessary, but possible. + +- Research data can be published directly from nodegoat without the need to export it to other software. + + +From then on, the RAG research team worked with nodegoat in a live environment in which the data collected can be made available on the Internet immediately after a brief review. This facilitated the exchange with the research community and the interested public and significantly increased the visibility of the research project. The database migration to nodegoat meant that the biographical details of around 10,000 people could be published for the first time, which had previously not been possible due to difficulties in exporting data from the MS SQL server. On 1 January 2018, the research teams at the universities in Bern and Giessen then began collecting data in nodegoat, starting with extensive standardisation of the data. Thanks to a multi-change function in nodegoat, these standardisations could now be carried out efficiently by all users. Institutions where biographical events took place (e.g. universities, schools, cities, courts, churches, monasteries, courts) were newly introduced. + +![Fig. 2: Frontend of the RAG project for data collection in nodegoat.](images/RAG nodegoat frontend for data collection.png) + + +## Methodology + +These institutions were assigned to the events accordingly, which forms the basis for the project's method of analysis: analysing the data according to the criteria 'incoming' and 'outgoing' [@gubler2022]. The key questions here are: Which people, ideas or knowledge entered an institution or space? + +![Fig. 3: Incoming: Places of origin of students at the University of Basel 1460-1550 with the large dot in the centre as the city of Basel., data: repac.ch, 07/2024.](images/Places of origin of students at the university of Basel 1460-1550.png) + +How was this knowledge shared and passed on there? Spaces are considered both as geographical locations and as knowledge spaces within networks of scholars. In addition, the written works of scholars are taken into account in order to document their knowledge. The people themselves are seen as knowledge carriers who acquire knowledge and pass it on. Consequently, the people are linked to their knowledge in the database using approaches from the history of knowledge [@steckel2015]. The methodology described can therefore not only be used to research the circulation of knowledge between individuals and institutions, but also to digitally reconstruct spheres of influence and knowledge, for example by discipline: Spaces that were shaped by jurists, Physicians or theologians. The map shows places or regions where a particularly large number of Basel jurists were active. The second graphic shows the network of the same group with famous Bonifacius Amerbach as a strong link in the centre. The network is formed based on a Force-directed graph. + +![Fig. 4: Outgoing: Spheres of activity of jurists with a doctorate from the University of Basel +1460-1550., data: repac.ch, 07/2024.](images/Places of activity of Basel jurists 1460-1550.png) + +![Fig. 5: Network: Jurists with a doctorate from the University of Basel +1460-1550., data: repac.ch, 07/2024.](images/Network of Basel jurists 1460-1550.png) + + +## Data literacy + +Students and researchers working on the RAG project can acquire important data skills. We can make a distinction, as said, between the skills required to collect, enter and analyse the biografical data. Key learning content related to the data entering process for students working in the RAG project are: + +- Basics of data modelling + +Basic knowledge of the use of digital research tools and platforms. Students learn how to design and adapt data structures in order to systematically enter, manage and analyse historical information. They understand how to define entities (such as people, places, events) and their relationships. + +- Basics of data collection + +The collection of data in a historical project involves several steps and methods to ensure data consistency. In the project, students learn how to search and evaluate sources based on research questions and extract the relevant information. Both quantitative and qualitative approaches are considered in the methods of data collection. An SNSF Spark project provides an example of a quantitative approach on dynamic data ingestion of linked open data in one nodegoat environment [@gubler2021_1] + +- Data entry and management + +Students acquire practical experience in entering and maintaining data within a digital research environment. Additionally, they learn to document workflows and data sources to ensure transparency and traceability. For effective data entry, both students and researchers must develop essential skills related to the extraction and evaluation of historical information. + +- Source criticism and information extraction + +The project's most challenging task is extracting relevant biographical information from sources and literature and systematically recording and documenting it in the database according to project-specific guidelines. The goal is to achieve the highest possible standardization to ensure data quality and consistency. Specifically, students must select life events from approximately 900 biographical categories to accurately record an event. These categories are divided into three major blocks: 1) personal data (birth, death, social and geographical origin, etc.), 2) academic data (specializations, degrees), and 3) professional activities. These encompass all potential fields of activity in both ecclesiastical and secular administration in the late Middle Ages. Collecting data and accurately evaluating information from sources and research literature is a demanding task that requires a solid knowledge of history and Latin. + +Key learning content related to data analysis is: + +- Learning how to query a database. The use of filters and search functions for targeted data analysis requires a solid understanding of the data model, the data collection methodology, and the available content. For an initial overview of the data and, if necessary, for in-depth analysis, AI tools for data analysis will also be used in the project in the future. Such tools can help with data retrieval, as the data can be queried using natural language prompts. + +- Geographical and temporal visualisations + +Use of GIS functionalities to create and analyse geographical maps. Visualisation of historical data on time axes to show chronological processes and changes. + +- Network analysis + +Knowing and applying methods for linking different data sets and for analysing networks and interactions between historical actors such as people, institutions, objects and others. The data can also be exported from nodegoat in order to evaluate it with other visualisation software, for example such as Gephi for network analyses. The graphic shows the general settings in nodegoat for network analyses. + +![Fig. 6: General settings for network analyses in nodegoat.](images/Network analysis settings in nodegoat.png) + +- Interpretation of the digital findings (patterns, developments) + +The most important skill in the entire research process is, of course, the ability to interpret the results. The data is always interpreted against the background of the historical context. Without well-founded historical expertise, however, the data cannot provide in-depth insights for historical research, but at best enable superficial observations. It follows that when working with research data, a double source criticism must always take place: when obtaining the information from the sources (data collection) and when analysing the digital results obtained from the information (data interpretation). + +## Digitisation + +How have the described data competences changed since the start of the project in 2001? This question is linked to changes in the research infrastructure, the availability of digitised material (sources and literature) and with the question of how computer-aided automation, in particular, artificial intelligence have influenced and will influence the practices of data collection, entry and analysis in the project, expanding the epistemological framework? The most important factors in connection with digitalisation in general are: + +- Resources: The increasing availability of digitized texts, particularly through Google Books, has significantly transformed prosopographical and biographical research. Not only is a wealth of information more accessible today, but it can also be entered into databases more efficiently. Consequently, skills for digital research and information processing have had to be continuously adapted throughout the course of the project. + +- Tools: Since the start of the project, new software tools have significantly transformed the processes of collecting, extracting, entering, and analyzing information. The most substantial development has been in data analysis, which, thanks to advanced tools and user-friendly graphical interfaces, has become accessible to a wide range of researchers, no longer being limited to computer scientists. AI tools for data analysis also open up huge potential for data analysis. Large amounts of data can be analyzed in a short time using simple query languages. However, when using AI, the results must be examined even more critically than with conventional data analysis. + +- Data analysis: The visualization of research data in historical studies has seen significant advancements. For instance, data can now be displayed on historical maps, within networks, or in time series, and dynamically over time using a time slider in a research environment like nodegoat. This has accelerated data analysis: tasks like creating a map, which took weeks in the early years of the RAG project, now take only a few minutes. + +- Interpretation of the data: The core method of historical scholarship, source criticism, has also evolved significantly. While it traditionally involved evaluating information from sources and literature, today it also requires the ability to analyze data visualizations and network representations derived from these sources. To adequately assess these digital findings, a thorough understanding of the data model, data context, and historical background is essential. Consequently, data analysis presents new challenges for historical research, necessitating advanced data competencies at multiple levels. + +- Collaboration: Web-based research environments have made collaboration much easier and more transparent. Teams are now able to follow each other's progress in real time, making the location of the work less important and communication smoother. + + +## Human and artificial intelligence + +Regarding data collection, entry, and analysis, artificial intelligence significantly impacts several, though not all, tasks within the RAG project. + +- Data collection: AI supports the rapid processing and pre-sorting of digital information used for data collection. For example, Transkribus is utilized to create OCR texts, which are then directly imported into nodegoat and matched with specific vocabularies using algorithms [@gubler2023]. This technology aids the RAG project by efficiently detecting references to students and scholars within large text corpora, significantly speeding up the identification and extraction process. + +![Fig. 7: Example settings for the algorithm for reconciling textual data in nodegoat.](images/nodegoat text reconciliation settings.png) + + +- Data entry: In this area, human intelligence remains crucial. In-depth specialist knowledge of the historical field under investigation is essential, particularly concerning the history of universities and knowledge in the European Middle Ages and the Renaissance. Due to the heterogeneous and often fragmented nature of the sources, AI cannot yet replicate this expertise. The nuanced understanding required to interpret historical events and their semantic levels still necessitates human insight. + +- Data analysis: While AI support for data entry is still limited, it is much greater for data analysis. The epistemological framework has expanded considerably not only in digital prosopography and digital biographical research, but in history in general. Exploratory data analysis in particular will become a key methodology in history through the application of AI. + +## Conclusion + +Since the 1990s, digital resources and tools have become increasingly prevalent in historical research. However, skills related to handling data remain underdeveloped in this field. This gap is not due to a lack of interest from students, but rather stems from a chronic lack of available training opportunities. This situation has gradually improved in recent years, with a growing number of courses and significant initiatives promoting digital history. +Nevertheless, the responsibility now lies with academic chairs to take a more proactive role in integrating a sustainable range of digital courses into the general history curriculum. It is crucial that data literacy becomes a fundamental component of the training for history students, particularly considering their future career prospects and the increasingly complex task of evaluating information, including the critical use of artificial intelligence methods, tools and results. Especially with regard to the methodology of source criticism, which is now more important than ever in the evaluation of AI-generated results. In addition to formal teaching, more project-based learning should be offered to support students in acquiring digital skills. diff --git a/submissions/431/references.bib b/submissions/431/references.bib new file mode 100644 index 0000000..6646775 --- /dev/null +++ b/submissions/431/references.bib @@ -0,0 +1,93 @@ +@book{vanBree_Kessels2013, + title = {nodegoat: a web-based data management, network analysis & visualisation environment, http://nodegoat.net from LAB1100, http://lab1100.com}, + shorttitle = {nodegoat data management}, + author = {van Bree, Pim and Kessels, Geert}, + date = {2013}, + howpublished = "\url{https://nodegoat.net}", + langid = {english}, + keywords = {data management, data visualisation, network analysis, research environment}, +} +@book{gubler_hesse_schwinges2022, + title = {Person und Wissen. Bilanz und Perspektiven (RAG Forschungen 4)}, + shorttitle = {Person und Wissen}, + author = {Gubler, Kaspar and Hesse, Christian and Schwinges, Rainer Christoph}, + date = {2022}, + publisher = {vdf,Zürich}, + doi = {10.3218/4114-9}, + langid = {german}, + keywords = {digital prosopography, data biographies}, +} +@book{gubler2020, + title = {Database Migration Case Study: Repertorium Academicum Germanicum (RAG)}, + shorttitle = {Database Migration}, + author = {Gubler, Kaspar}, + date = {2020}, + publisher = {histdata.hypotheses.org}, + doi = {10.58079/pldk}, + langid = {english}, + keywords = {database migration,case study, methodology}, +} +@book{gubler2021, + title = {The coffee break as a driver of science: Nodegoat @ Uni Bern (2017-2021)}, + shorttitle = {nodegoat @ Uni Bern}, + author = {Gubler, Kaspar}, + date = {2021}, + publisher = {histdata.hypotheses.org}, + doi = {10.58079/ple4}, + langid = {english}, + keywords = {nodegoat,database migration,data management, data visualisation,network analysis}, +} +@book{gubler2021_1, + title = {Data Ingestion Episode III – May the linked open data be with you}, + shorttitle = {Data ingestion}, + author = {Gubler, Kaspar}, + date = {2021}, + publisher = {histdata.hypotheses.org}, + doi = {10.58079/pldv}, + langid = {english}, + keywords = {nodegoat,data ingestion, data visualisation,network analysis}, +} +@book{gubler2023, + title = {Transkribus kombiniert mit nodegoat: Ein vielseitiges Werkzeug für Datenanalysen}, + shorttitle = {Transkribus + nodegoat}, + author = {Gubler, Kaspar}, + date = {2023}, + publisher = {histdata.hypotheses.org}, + doi = {10.58079/plex}, + langid = {german}, + keywords = {nodegoat,transkribus,data reconciliation,ocr, text mining}, +} +@book{gubler2022, + title = {Von Daten zu Informationen und Wissen. Zum Stand der Datenbank des Repertorium Academicum Germanicum, in: Kaspar Gubler, Christian Hesse, Rainer C. Schwinges (Hrsg.): Person und Wissen. Bilanz und Perspektiven (RAG Forschungen 4)}, + shorttitle = {Informationen und Wissen}, + author = {Gubler, Kaspar}, + date = {2022}, + publisher = {vdf,Zürich}, + URL = "https://boris.unibe.ch/174773/2/Gubler__Von_Daten_zu_Informationen_und_Wissen.pdf", + langid = {german}, + keywords = {methodology,data model,data analysis}, +} +@book{gubler2022_1, + title = {Forschungsdaten vernetzen, harmonisieren und auswerten: Methodik und Umsetzung am Beispiel einer prosopographischen Datenbank mit rund 200.000 Studenten europäischer Universitäten (1200–1800), in: Oberdorf, Andreas (Hrsg.): Digital Turn und Historische Bildungsforschung. Bestandesaufnahme und Forschungsperspektiven}, + shorttitle = {Forschungsdaten vernetzen}, + author = {Gubler, Kaspar}, + date = {2022}, + publisher = {Bad Heilbrunn}, + doi = {10.35468/5952}, + langid = {german}, + keywords = {methodology,data harmonisation,data reconciliation, big data}, +} +@book{steckel2015, + title = {Wissensgeschichten. Zugänge, Probleme und Potentiale in der Erforschung mittelalterlicher Wissenskulturen, in: Akademische Wissenskulturen. Praktiken des Lehrens und Forschens vom Mittelalter bis zur Moderne, hg. v. Martin Kintzinger / Sita Steckel}, + shorttitle = {Wissensgeschichte}, + author = {Steckel, Sita}, + date = {2015}, + publisher = {Bern}, + URL = "https://repositorium.uni-muenster.de/document/miami/6532a89c-da39-4d14-9a28-f550471da4e7/steckel_2015_wissensgeschichte.pdf", + langid = {german}, + keywords = {history of knowledge,methodology}, +} + + + + diff --git a/submissions/438/_quarto.yml b/submissions/438/_quarto.yml new file mode 100644 index 0000000..ae6ece2 --- /dev/null +++ b/submissions/438/_quarto.yml @@ -0,0 +1,8 @@ +project: + type: manuscript + +manuscript: + article: index.qmd + +format: + html: default diff --git a/submissions/438/images/annotated_screenshot_focus_traps_n_treasure_gameplay.png b/submissions/438/images/annotated_screenshot_focus_traps_n_treasure_gameplay.png new file mode 100644 index 0000000..45bd856 Binary files /dev/null and b/submissions/438/images/annotated_screenshot_focus_traps_n_treasure_gameplay.png differ diff --git a/submissions/438/images/annotated_screenshot_overview_traps_n_treasure_gameplay.png b/submissions/438/images/annotated_screenshot_overview_traps_n_treasure_gameplay.png new file mode 100644 index 0000000..17ea76f Binary files /dev/null and b/submissions/438/images/annotated_screenshot_overview_traps_n_treasure_gameplay.png differ diff --git a/submissions/438/images/barbarian_amiga_in-game.png b/submissions/438/images/barbarian_amiga_in-game.png new file mode 100644 index 0000000..df473a5 Binary files /dev/null and b/submissions/438/images/barbarian_amiga_in-game.png differ diff --git a/submissions/438/images/barbarian_dos_in-game.png b/submissions/438/images/barbarian_dos_in-game.png new file mode 100644 index 0000000..8b5e8d1 Binary files /dev/null and b/submissions/438/images/barbarian_dos_in-game.png differ diff --git a/submissions/438/images/final_fight_1_arcade_in-game_enemy_appears.png b/submissions/438/images/final_fight_1_arcade_in-game_enemy_appears.png new file mode 100644 index 0000000..1a5123b Binary files /dev/null and b/submissions/438/images/final_fight_1_arcade_in-game_enemy_appears.png differ diff --git a/submissions/438/images/final_fight_1_arcade_in-game_walking_along.png b/submissions/438/images/final_fight_1_arcade_in-game_walking_along.png new file mode 100644 index 0000000..40121a4 Binary files /dev/null and b/submissions/438/images/final_fight_1_arcade_in-game_walking_along.png differ diff --git a/submissions/438/index.qmd b/submissions/438/index.qmd new file mode 100644 index 0000000..4ea9902 --- /dev/null +++ b/submissions/438/index.qmd @@ -0,0 +1,84 @@ +--- +submission_id: 438 +categories: 'Session 4A' +title: A handful of pixels of blood +subtitle: Decoding early video game graphics with the FAVR ontology +author: + - name: Adrian Demleitner + orcid: 0000-0001-9918-7300 + email: adrian.demleitner@hkb.bfh.ch + affiliations: + - University of the Arts Bern + - University of Bern +keywords: + - Video Game Graphics + - Visual Analysis + - FAVR Framework + - 1980s-1990s Home Computers + - Video Game Studies +abstract: | + What is the purpose of blood splattering onto the screen in a video game? Does it serve functional value, or is it mere intended to shock the user? Up to this day, little advance has been made in studying video game graphics' material and structural conditions. They are not only narrative and aesthetic devices comparable to films but also hold functional information for players and are of an interactive nature [@fizekLudicGlassMaking2022; @gerlingScreenImagesInGame2023]. Classical visual analysis struggles to encompass video game images because they focus on analogue or time-based visual media [@arsenaultGameFAVRFramework2015]. How can blood spurting from a virtual body be analyzed? This research applies and extends the _Framework for the Analysis of Visual Representation in Video Games_ (FAVR) to conduct and further adequate research on video game images. +key-points: + - The study of video game graphics integrates narrative and aesthetic aspects with interactive and functional elements, differing significantly from classical visual media. + - The Framework for the Analysis of Visual Representation in Video Games (FAVR) provides a structured approach to analyze video game images through annotation, focusing on their formal, material, and functional aspects. + - The initial implementation of the FAVR framework as a linked open ontology for tools like Tropy has proven valuable in formally analyzing video game images and comparing aspects such as dynamic versus static image space, facilitating further digital and computational research. +date: 07-19-2024 +bibliography: references.bib +--- + +The 1980s marked the arrival of the home computer. Computing systems became affordable and were marketed to private consumers through state-supported programs and new economic opportunities [@haddonHomeComputerMaking1988; @williamsEarlyComputersEurope1976]. Early models, such as the ZX Spectrum[^1], Texas Instrument TI-99/4A[^2], or the Atari[^3], quickly became popular in Europe and opened the door for digital technology to enter the home. This period also marks the advent of homebrew video game culture and newly emerging creative programming practices [@swalwellHomebrewGamingBeginnings2021; @albertsHackingEuropeComputer2014]. As part of this process, these early programmers not only had to figure out how to develop video games but also were among the first to incorporate graphics into video games. This created fertile grounds for a new array of video game genres and helped popularize video games as a mainstream media. + +I’m researching graphics programming for video games from the 1980s and 1990s. The difference to other visual media lies in the amalgamation of computing and the expression of productive or creative intent by video game designers and developers. The specifics of video game graphics are deeply rooted in how human ideas must be translated into instructions that a computer understands. This necessitates a mediation between the computer's pure logic and a playing person's phenomenological experience. In other words, the video game image is a specific type of interface that needs to take care of a semiotic layer and offer functional affordances. I am interested in how early video game programmers worked with these interfaces, incorporating their own visual inspirations and attempting to work with the limited resources at hand. Besides critical source code analysis, I also extensively analyze formal aspects of video game images. For the latter, I depend on FAVR to properly describe and annotate images in datasets relevant to my inquiries. The framework explicitly deals with problems of analyzing video game graphics. It guides the annotation of images by their functional, material, and formal aspects and aids in analyzing narrativity and the rhetoric of aesthetic aspects [@arsenaultGameFAVRFramework2015]. + +The video game image also differs substantially from the image in animation or other software interfaces, to which they are often compared. Next to its interactivity, it also holds a double function of _telling_ the game as well as offering the affordances to let the player participate. It is between animation and the user interface of the software, with added techno-visual dimensionalities such as resolution or frame rate. The concepts of the “ergodic animage” [@arsenaultGameFAVRFramework2015] and “algorithmic images” [@fizekLudicGlassMaking2022] aptly describe these video games-related aspects. The two terms imply that video game images are of a calculated nature and don’t represent reality but construct one and display the operations of software. Further, these images only work when players participate in them. + +Classical visual analysis is limited in its ability to deal with video game images due to their visual and material diversity, as well as a disciplinary vocabulary that is not a good fit for video game graphics [@arsenaultGameFAVRFramework2015]. The same is true for the formal analysis of video game images through computer vision models, for example, towards object or image classifications. Neither general-purpose models nor such specialized user interfaces can deal with the visual diversity of video game images and interfaces. FAVR fills this gap by explicitly concentrating on what is displayed on the screen rather than what these images convey. For FAVR, the image on the screen becomes a specific type of interface at the intersection of the game’s rules and mechanics and their visual mediation. + +While the framework can identify different game modes and the different functionalities those screens can encompass, more intricate details can escape an analysis. FAVR distinguishes between _tangible_, _intangible,_ and _negative space_, as well as _agents_, _in-game_ and _off-game elements_, and _interfaces_. Whereas the aspect of space concerns the overall composition of the screen, the second set of attributes circumscribes the construction of the image. Intangible space, for example, is concerned with information relevant to gameplay but without the direct agency of the player. Examples are life bars or a display of the current score. As another example, off-game denotes decorative background elements. Being of a time-based and interactive nature, some of the relevant information only unfolds as animation or through player interaction. Further, not all visual mediations of the games’ operation are represented as expected in software interfaces or classic visual compositions. + +![Fig. 1: Barbarian (Palace Software Inc, 1987, Amiga). Our character is attacking and blood spurting forth indicates our hit was successful.](images/barbarian_amiga_in-game.png) + +![Fig. 1: Barbarian (Palace Software Inc, 1987, DOS). A similar scene from the game's DOS version. Technical limitations, such as a limited color palette, can be a difficult factor to implement and port the same game on another system, raising questions about the tension between technology and design.](images/barbarian_dos_in-game.png) + +A simple example could be blood spurting from an agent, which can be any gameplay-relevant character on screen. The blood holds information relevant to the player, indicating that the character on screen got hurt and may prompt a change in play behavior. Whereas a life bar can represent the player’s character health, such indications are usually absent for enemies. Some video games also play with the distinction between in- and off-game planes. In Final Fight (Capcom, 1989, Arcade), our character walks from left to right in a raging city and, on the way, fights numerous enemies entering the screen from left and right. The off-game plane, the background, is composed of run-down houses and alleyways. At one point during the game, those houses’ doors start to open and spawn enemies as well. This mixes up the formerly established convention of what visual information is relevant for gameplay in terms of interactive and decorative elements. + +![Fig. 3: Final Fight (Capcom, 1987, Arcade). Our character wrestles a pedestrian. The door on the right is closed and part of the background plane.](images/final_fight_1_arcade_in-game_walking_along.png) + +![Fig. 4: Final Fight (Capcom, 1987, Arcade). Another character emerges from the previously closed door. The black squares around the feet indicate that the technical implementation wasn't without problems.](images/final_fight_1_arcade_in-game_enemy_appears.png) + +Another relevant point regarding FAVR is its limitation to qualitative analysis and manual application. Since I am interested in a larger historical trajectory of video game images in the 1980s and 1990s, I need to leverage digital tools and computational methods to aid my research. I work with two image corpora in my research. A smaller corpus contains 1525 screenshots from 35 video games from Switzerland from 1987-1998. Acquiring a sufficient number of screenshots from old video games from Switzerland is difficult due to their low popularity. The corpus consists of video stills from _Let’s Plays_[^4] and screenshots from various video game databases. The second and larger corpus consists of 115’848 screenshots from 4316 video games and is solely sourced through the Mobygames database. Mobygames is one of the largest community-driven platforms for the collection of knowledge on video games. Being maintained mainly by amateurs and video game enthusiasts is not without problems [@pfisterWarumWirEs2023]. There are open questions on accessing data, searchability, and, most importantly, completeness. Working with Mobygames makes it difficult to assess what will be missing from the dataset. Despite these shortcomings, the work of the community behind such database platforms is of immense value to video game research. + +To leverage the potential of these corpora, I need to be able to apply FAVR in a formalized and digital method. To that end, I created a linked open ontology that derives and expands on FAVR [@demleitnerThgieFavrontologyAlpha2023]. It is based on CIDOC and can be applied in Tropy or similar image-annotation tools by providing templates. Other video game-related ontologies were not suitable for the tasks at hand. Most of the better-developed ontologies, such as Video Game Ontology (VGO), Digital Game Ontology (DGO), and Game Metadata and Citation Project (GAMECIP) are concentrating on describing the contents of a game and are mostly abandoned [@martinoModelingVideoGame2023]. Interestingly, both the VGO and VideOWL try to be of benefit to the industry and game developers. In turn, I’m mainly interested in the historic contextualization of video games and the practices of video game development. + +![Fig. 5: Tropy overview of an annotated still from Traps 'n' Treasures (Starbyte, 1993, Amiga).](images/annotated_screenshot_overview_traps_n_treasure_gameplay.png) + +![Fig. 6: Tropy showing the focus on an annotation in a still from Traps 'n' Treasures (Starbyte, 1993, Amiga).](images/annotated_screenshot_focus_traps_n_treasure_gameplay.png) + +So far, I was able to formalize the framework’s aspect on game modes and space, as well as create annotation templates that are building on those aspects. These were made for Tropy, a software that enables researchers to organize their visual material, properly describe the images with metadata, and make create annotations. The templates so far allow for the annotation of video game images regarding their overall composition of the screen, spaces as well as game modes. The screenshots provided above demonstrate the annotation of an in-game screenshot of Traps ‘n’ Treasures (Starbyte, 1993, Amiga). Such an annotation allows the comparison with other games, for example regarding the ratio of dynamic versus static image space. Such a ratio was an important factor in video game development, as dynamic image space needed more resources. The annotation can then be exported exportable as JSON[^5] and used in further analysis and digital methods. + +To be able to analyze large quantities of video game images towards their functionality as interfaces, digital methods need to be leveraged. Computer vision (CV) models are of limited help regarding this inquiry. CV models are generally trained to extract semantic value, focusing primarily on object classification [@kurfessArtificialIntelligence2003] or segmentation tasks [@xuApplicationImageSegmentation2024]. However, what is considered of semantic value typically does not include user interface elements, particularly in the specific context of video game images with their dual functionality. Image similarity cluster visualizations based on embeddings calculated using both classic convolutional neural network[^6] models like ResNet101 [@heDeepResidualLearning2015] and newer transformer[^7] models such as DINOv2 [@oquabDINOv2LearningRobust2024] have shown [demleitnerThgieComingofageofthevideogameimageInitial2024] that these models are quite capable of recognizing what equals to modes in FAVR, although they lack the ability to properly annotate the images on that level. This limitation is likely due to the visual diversity present in video game images, where narrative elements and the visual mediation of game mechanics coexist on a wide spectrum. Visual Material annotated in Tropy with the FAVR ontology could potentially be used to train or fine-tune new models that are more adept at recognition in this domain. + +The _Framework for the Analysis of Visual Representation in Video Games_ is a welcomed vantage point for my research inquiry. After translating the framework into a linked open ontology, further work is needed to refine and expand it to encompass more subtle aspects of video game interfaces. Whereas the ontology developed so far works on a formal level, I have yet to research to what extent FAVR can be leveraged to applications of distant viewing of larger video game image corpora. Despite being implemented only in a limited form so far, the FAVR has proven to be a valuable tool in analyzing video game images towards their formal, discursive, and historical aspects. + +## Media List + +- Fig. 1-2: Screenshots from [Barbarian (1987) - MobyGames](https://www.mobygames.com/game/253/barbarian/), accessed July 09, 2024 +- Fig. 3-4: Screenshots of [Final Fight (Arcade) Playthrough - NintendoComplete - YouTube](https://www.youtube.com/watch?v=p8gYGfL_p2o), accessed July 09, 2024 +- Fig. 5-6: Screenshots provided by the author +- Barbarian (Palace Software Inc, 1987, Amiga, DOS) +- Final Fight (Capcom, 1987, Arcade) +- Traps 'n' Treasures (Starbyte, 1993, Amiga) + +[^1]: [ZX Spectrum](https://en.wikipedia.org/wiki/ZX_Spectrum), accessed May 13, 2024 +[^2]: [TI-99/4A](https://en.wikipedia.org/wiki/TI-99/4A), accessed May 13, 2024 +[^3]: [Atari 8-bit computers - Wikipedia](https://en.wikipedia.org/wiki/Atari_8-bit_computers), accessed July 12, 2024 +[^4]: [Let's Play - Wikipedia](https://en.wikipedia.org/wiki/Let%27s_Play), accessed July 09, 2024 +[^5]: [favr-ontology/examples/ball-raider-1987-main-gameplay.json](https://github.com/thgie/favr-ontology/blob/main/examples/ball-raider-1987-main-gameplay.json), accessed July 09, 2024 +[^6]: [Convolutional neural network - Wikipedia](https://en.wikipedia.org/wiki/Convolutional_neural_network), accessed July 19, 2024 +[^7]: [Transformer (deep learning architecture) - Wikipedia](https://en.wikipedia.org/wiki/Transformer_(deep_learning_architecture)), accessed July 19, 2024 + + diff --git a/submissions/438/references.bib b/submissions/438/references.bib new file mode 100644 index 0000000..93029db --- /dev/null +++ b/submissions/438/references.bib @@ -0,0 +1,217 @@ +@book{albertsHackingEuropeComputer2014, + title = {Hacking {{Europe}}: {{From Computer Cultures}} to {{Demoscenes}}}, + shorttitle = {Hacking {{Europe}}}, + editor = {Alberts, Gerard and Oldenziel, Ruth}, + date = {2014}, + series = {History of {{Computing}}}, + publisher = {Springer}, + location = {London}, + doi = {10.1007/978-1-4471-5493-8}, + url = {https://link.springer.com/10.1007/978-1-4471-5493-8}, + urldate = {2024-05-13}, + isbn = {978-1-4471-5492-1 978-1-4471-5493-8}, + langid = {english} +} + +@article{arsenaultGameFAVRFramework2015, + title = {The {{Game FAVR}}: {{A Framework}} for the {{Analysis}} of {{Visual Representation}} in {{Video Games}}}, + author = {Arsenault, Dominic and Côté, Pierre-Marc and Larochelle, Audrey}, + date = {2015}, + abstract = {This paper lays out a unified framework of the ergodic animage, the rule-based and interactiondriven part of visual representation in video games. It is the end product of a three-year research project conducted by the INTEGRAE team, and is divided into three parts. Part 1 contextualizes the research on graphics and visuality within game studies, notably through the opposition between fiction and rules and the difficulties in finding common vocabulary to discuss key visual concepts such as perspective and point of view. Part 2 discusses a number of visual traditions through which we frame video game graphics (film, animation, art history, graphical projection and technical drawing), highlighting their relevance and shortcomings in addressing the long history of video games and the very different paradigms of 2D and 3D graphics. Part 3 presents the Game FAVR, a model that allows any game’s visual representation to be described and discussed through a common frame and vocabulary. The framework is presented in an accessible manner and is organized as a toolkit, with sample case studies, templates, and a flowchart for using the FAVR provided as an annex1, so that researchers and students can immediately start using it.}, + langid = {english}, + keywords = {Video-Game-Graphics,Video-Game-Studies,Visual-Studies,Visuality}, + file = {/home/adrian/Zotero/storage/32VD7ULH/FAVR-article.pdf;/home/adrian/Zotero/storage/59U2PCWH/Arsenault et al. - The Game FAVR A Framework for the Analysis of Vis.pdf} +} + +@software{demleitnerThgieComingofageofthevideogameimageInitial2024, + title = {Thgie/Coming-of-Age-of-the-Video-Game-Image: {{Initial Release}}}, + shorttitle = {Thgie/Coming-of-Age-of-the-Video-Game-Image}, + author = {Demleitner, Adrian}, + date = {2024-07-17}, + doi = {10.5281/zenodo.12755609}, + url = {https://zenodo.org/records/12755609}, + urldate = {2024-07-19}, + abstract = {Distant viewing the early history of the video game image.}, + organization = {Zenodo}, + version = {v1.0}, + file = {/home/adrian/Zotero/storage/J7WZWHCC/12755609.html} +} + +@software{demleitnerThgieFavrontologyAlpha2023, + title = {Thgie/Favr-Ontology: {{Alpha Release}}}, + shorttitle = {Thgie/Favr-Ontology}, + author = {Demleitner, Adrian}, + date = {2023-11-16}, + doi = {10.5281/zenodo.10142313}, + url = {https://zenodo.org/records/10142313}, + urldate = {2024-03-15}, + abstract = {Ontology based on FAVR, FAVR, a model to described digital game's visual representations.}, + organization = {Zenodo}, + version = {v1.0}, + file = {/home/adrian/Zotero/storage/KXVBUKCR/10142313.html} +} + +@article{fizekLudicGlassMaking2022, + title = {Through the {{Ludic Glass}}: {{Making Sense}} of {{Video Games}} as {{Algorithmic Spectacles}}}, + shorttitle = {Through the {{Ludic Glass}}}, + author = {Fizek, Sonia}, + date = {2022-04}, + journaltitle = {Game Studies}, + volume = {22}, + number = {2}, + issn = {1604-7982}, + url = {http://gamestudies.org/2202/articles/gap_fizek}, + urldate = {2023-01-22}, + abstract = {Video game analyses have historically focused on the human act of play or on the events resulting from the player’s act. Until recently, spectating has remained an analytical domain of film theory and visual arts. In game studies, this perspective has changed, with the arrival of the phenomenon of gameplay spectating and game streaming on a mass scale, and its leakage into academic as well as popular consciousness. How does the spectacle change the analytical perspective towards video games as objects of scholarly analysis and video gaming as reflective practice? In this paper, I will approach the video game as an algorithmic spectacle and propose an analytical perspective to study this phenomenon, reaching out to theories of moving (digital) image proposed by the philosopher Vilém Flusser and the filmmaker Harun Farocki.}, + keywords = {Algorithmic-Image,Critical-Code-Studies,Friedrich-Kittler,Operational-Image,Technical-Image,Video-Game-Graphics,Video-Game-Studies,Vilém-Flusser,Visuality}, + file = {/home/adrian/Zotero/storage/CJUY3GSB/Fizek_2022_Through the Ludic Glass.pdf} +} + +@book{gerlingScreenImagesInGame2023, + title = {Screen {{Images}}. {{In-Game Photography}}, {{Screenshot}}, {{Screencast}}}, + author = {Gerling, Winfried and Möring, Sebastian and Mutiis, Marco}, + date = {2023-05-31}, + doi = {10.55309/c3ie61k5}, + abstract = {[This book is open access - to read it follow the DOI link: https://doi.org/10.55309/c3ie61k5] This volume examines historical and contemporary image practices and phenomena, including screenshots, screen photography, screencasts and in-game photography. The individual chapters pose questions relating to the status, ontology and aesthetics of such practices and phenomena and also analyse their cultural and artistic significance. Artistic works explore these questions in the form of various image practices. The authors and artists investigate the potential for a new area of research at the intersection of a range of disciplines, such as media studies, media aesthetics, media history, image studies, photography theory, game studies, media art and game art. As one of the first publications to address these phenomena, this book speaks to a varied audience in the realms of media studies, game studies and cultural studies as well as to members of the general public interested in historical and contemporary practices associated with visual and digital media.}, + isbn = {978-3-86599-535-3}, + file = {/home/adrian/Zotero/storage/JTM7SWNV/Gerling et al. - 2023 - Screen Images. In-Game Photography, Screenshot, Sc.pdf;/home/adrian/Zotero/storage/L7RU9Y5K/gerling-et-al.-2022-screen-images-in-game-photography-screenshot-scr.pdf} +} + +@article{haddonHomeComputerMaking1988, + title = {The Home Computer: {{The}} Making of a Consumer Electronic}, + shorttitle = {The Home Computer}, + author = {Haddon, Leslie}, + date = {1988-01-01}, + journaltitle = {Science as Culture}, + volume = {1}, + number = {2}, + pages = {7--51}, + publisher = {Routledge}, + issn = {0950-5431}, + doi = {10.1080/09505438809526198}, + url = {https://doi.org/10.1080/09505438809526198}, + urldate = {2024-03-15}, + file = {/home/adrian/Zotero/storage/Q477ERFF/Haddon - 1988 - The home computer The making of a consumer electr.pdf} +} + +@online{heDeepResidualLearning2015, + title = {Deep {{Residual Learning}} for {{Image Recognition}}}, + author = {He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian}, + date = {2015-12-10}, + eprint = {1512.03385}, + eprinttype = {arXiv}, + eprintclass = {cs}, + doi = {10.48550/arXiv.1512.03385}, + url = {http://arxiv.org/abs/1512.03385}, + urldate = {2024-07-19}, + abstract = {Deeper neural networks are more difficult to train. We present a residual learning framework to ease the training of networks that are substantially deeper than those used previously. We explicitly reformulate the layers as learning residual functions with reference to the layer inputs, instead of learning unreferenced functions. We provide comprehensive empirical evidence showing that these residual networks are easier to optimize, and can gain accuracy from considerably increased depth. On the ImageNet dataset we evaluate residual nets with a depth of up to 152 layers---8x deeper than VGG nets but still having lower complexity. An ensemble of these residual nets achieves 3.57\% error on the ImageNet test set. This result won the 1st place on the ILSVRC 2015 classification task. We also present analysis on CIFAR-10 with 100 and 1000 layers. The depth of representations is of central importance for many visual recognition tasks. Solely due to our extremely deep representations, we obtain a 28\% relative improvement on the COCO object detection dataset. Deep residual nets are foundations of our submissions to ILSVRC \& COCO 2015 competitions, where we also won the 1st places on the tasks of ImageNet detection, ImageNet localization, COCO detection, and COCO segmentation.}, + pubstate = {prepublished}, + keywords = {Computer Science - Computer Vision and Pattern Recognition}, + file = {/home/adrian/Zotero/storage/7IWNT2PN/He et al. - 2015 - Deep Residual Learning for Image Recognition.pdf;/home/adrian/Zotero/storage/SPVFJRT6/1512.html} +} + +@incollection{kurfessArtificialIntelligence2003, + title = {Artificial {{Intelligence}}}, + booktitle = {Encyclopedia of {{Physical Science}} and {{Technology}} ({{Third Edition}})}, + author = {Kurfess, Franz J.}, + editor = {Meyers, Robert A.}, + date = {2003-01-01}, + pages = {609--629}, + publisher = {Academic Press}, + location = {New York}, + doi = {10.1016/B0-12-227410-5/00027-2}, + url = {https://www.sciencedirect.com/science/article/pii/B0122274105000272}, + urldate = {2024-07-19}, + isbn = {978-0-12-227410-7}, + file = {/home/adrian/Zotero/storage/988HKH5Q/B0122274105000272.html} +} + +@article{martinoModelingVideoGame2023, + title = {Modeling the {{Video Game Environment}}: The {{VideOWL Ontology}}}, + author = {Martino, Simone De and Nicolosi-Asmundo, Marianna and Rizzo, Stefano Angelo and Santamaria, Daniele Francesco}, + date = {2023}, + abstract = {The paper presents an ontology that models the constitutional elements of video games, including agents and artefacts. It also addresses the ever-changing works of game programmers, who strive to exceed the limits derived from the classification mechanisms based on labels and title genres. With respect to the state-of-the-art, the ontology additionally permits to infer the categories of a game through the features of its dynamics and mechanics, such as the point of view (POV) or game-play elements. Furthermore, it encompasses programmers, final users, software programs, and devices in its ontological model. The ontology aims at advancing the mechanisms for understanding and classify games based on their features, ultimately offering new insights and opportunities in the field of game development. In this context, the ontology can be leveraged to represent any agent in the video-game domain such as players, bots, related actions and game strategies.}, + langid = {english}, + file = {/home/adrian/Zotero/storage/IEJ4ME8E/Martino et al. - Modeling the Video Game Environment the VideOWL O.pdf} +} + +@online{oquabDINOv2LearningRobust2024, + title = {{{DINOv2}}: {{Learning Robust Visual Features}} without {{Supervision}}}, + shorttitle = {{{DINOv2}}}, + author = {Oquab, Maxime and Darcet, Timothée and Moutakanni, Théo and Vo, Huy and Szafraniec, Marc and Khalidov, Vasil and Fernandez, Pierre and Haziza, Daniel and Massa, Francisco and El-Nouby, Alaaeldin and Assran, Mahmoud and Ballas, Nicolas and Galuba, Wojciech and Howes, Russell and Huang, Po-Yao and Li, Shang-Wen and Misra, Ishan and Rabbat, Michael and Sharma, Vasu and Synnaeve, Gabriel and Xu, Hu and Jegou, Hervé and Mairal, Julien and Labatut, Patrick and Joulin, Armand and Bojanowski, Piotr}, + date = {2024-02-02}, + eprint = {2304.07193}, + eprinttype = {arXiv}, + eprintclass = {cs}, + doi = {10.48550/arXiv.2304.07193}, + url = {http://arxiv.org/abs/2304.07193}, + urldate = {2024-07-19}, + abstract = {The recent breakthroughs in natural language processing for model pretraining on large quantities of data have opened the way for similar foundation models in computer vision. These models could greatly simplify the use of images in any system by producing all-purpose visual features, i.e., features that work across image distributions and tasks without finetuning. This work shows that existing pretraining methods, especially self-supervised methods, can produce such features if trained on enough curated data from diverse sources. We revisit existing approaches and combine different techniques to scale our pretraining in terms of data and model size. Most of the technical contributions aim at accelerating and stabilizing the training at scale. In terms of data, we propose an automatic pipeline to build a dedicated, diverse, and curated image dataset instead of uncurated data, as typically done in the self-supervised literature. In terms of models, we train a ViT model (Dosovitskiy et al., 2020) with 1B parameters and distill it into a series of smaller models that surpass the best available all-purpose features, OpenCLIP (Ilharco et al., 2021) on most of the benchmarks at image and pixel levels.}, + pubstate = {prepublished}, + keywords = {Computer Science - Computer Vision and Pattern Recognition}, + file = {/home/adrian/Zotero/storage/AHH95UTH/Oquab et al. - 2024 - DINOv2 Learning Robust Visual Features without Su.pdf;/home/adrian/Zotero/storage/MBZSMA34/2304.html} +} + +@incollection{pfisterWarumWirEs2023, + title = {Warum wir es für eine gute Idee gehalten haben, eine DACH-Spieledatenbank aufzubauen}, + booktitle = {Game-Journalismus: Grundlagen – Themen – Spannungsfelder. Ein Handbuch}, + author = {Pfister, Eugen and Brandenburg, Aurelia and Demleitner, Adrian and Klausner, Lukas Daniel}, + editor = {Bigl, Benjamin and Stoppe, Sebastian}, + date = {2023}, + pages = {307--316}, + publisher = {Springer Fachmedien}, + location = {Wiesbaden}, + doi = {10.1007/978-3-658-42616-3_22}, + url = {https://doi.org/10.1007/978-3-658-42616-3_22}, + urldate = {2024-03-15}, + abstract = {Unser Werkstattbericht gibt Einblick in den Entstehungskontext sowie die zugrunde liegenden methodischen Überlegungen hinter der von den Autor*innen publizierten Spieledatenbank. Diese wurde kollaborativ erarbeitet und führt digitale Spiele, die in Deutschland, Österreich und der Schweiz bis zum Jahr 2000 entwickelt wurden. In diesem Bericht skizzieren wir neben unseren Ausgangsüberlegungen und den verschiedenen Arbeitsschritten bei der Realisierung außerdem auch, auf welcher Datenbasis die Datenbank aufgebaut und geprüft wurde, was die Ziele des Datenmodells sind und mit welchen Schwierigkeiten wir im Prozess der Erstellung konfrontiert waren. Hiernach ordnen wir den aktuellen Stand der Spieledatenbank ein und geben einen Ausblick auf die weiteren Pläne des Projekts.}, + isbn = {978-3-658-42616-3}, + langid = {ngerman}, + keywords = {Datenbank,Datenmodellierung,Digitalisierung,Game Studies,local history,Spielgeschichte,Werkstattbericht}, + file = {/home/adrian/Zotero/storage/JIQZA5RV/Pfister et al. - 2023 - Warum wir es für eine gute Idee gehalten haben, ei.pdf} +} + +@book{swalwellHomebrewGamingBeginnings2021, + title = {Homebrew {{Gaming}} and the {{Beginnings}} of {{Vernacular Digitality}}}, + author = {Swalwell, Melanie}, + date = {2021}, + publisher = {MIT}, + location = {Cambridge}, + url = {https://mitpress.mit.edu/9780262044776/homebrew-gaming-and-the-beginnings-of-vernacular-digitality/}, + urldate = {2023-05-23}, + abstract = {The overlooked history of an early appropriation of digital technology: the creation of games though coding and hardware hacking by microcomputer users.From ...}, + isbn = {978-0-262-04477-6}, + langid = {american}, + keywords = {¨,Amateur-Programming,BASIC,Certeau,Homebrew,Media-History,Media-Theory,Micro-Computers,Ordinary-Culture,Programming-Practices,Video-Game-Studies}, + file = {/home/adrian/Zotero/storage/954FKXUI/Homebrew Gaming and the Beginnings of Vernacular Digitality -- Melanie Swalwell -- Game Histories, 2021 -- The MIT Press -- 9780262044776 -- a1cb8f7738bd05c21d3fabda8c2aa257 -- Anna’s Archive.epub} +} + +@inproceedings{williamsEarlyComputersEurope1976, + title = {Early Computers in {{Europe}}}, + booktitle = {Proceedings of the {{June}} 7-10, 1976, National Computer Conference and Exposition}, + author = {Williams, Richard}, + date = {1976-06-07}, + series = {{{AFIPS}} '76}, + pages = {21--29}, + publisher = {Association for Computing Machinery}, + location = {New York, NY, USA}, + doi = {10.1145/1499799.1499804}, + url = {https://dl.acm.org/doi/10.1145/1499799.1499804}, + urldate = {2024-01-17}, + abstract = {This paper describes the early history of computers in Europe, notably in Germany, Holland, France, Italy and the Scandinavian countries as well as Great Britain. Of necessity, in such a short paper, information is given in a fairly short form, but the paper also includes a detailed description of the birth and foundation of the most successful first British commercial computer company---Leo Computers Limited, and this gives an insight into the thinking which lay behind British early computer development. Three appendices are included which give the names and addresses of the early computer manufacturers and sales organizations in Europe, and short notes on the early computers and calculators.}, + isbn = {978-1-4503-7917-5}, + file = {/home/adrian/Zotero/storage/YPYASLJ5/Williams_1976_Early computers in Europe.pdf} +} + +@article{xuApplicationImageSegmentation2024, + title = {Application of {{Image Segmentation Algorithms}} in {{Computer Vision}}}, + author = {Xu, Yuhang}, + date = {2024-04-10}, + journaltitle = {Frontiers in Computing and Intelligent Systems}, + shortjournal = {Frontiers in Computing and Intelligent Systems}, + volume = {7}, + pages = {17--20}, + doi = {10.54097/gq1s6737}, + abstract = {In the field of computer vision (CV), image segmentation technology, as a fundamental part, has a crucial impact on the accuracy of subsequent image processing tasks. Image segmentation is not only a crucial transitional step from image processing to image analysis, but also a hot and difficult research topic in the field of CV. Although significant progress has been made in the research of image segmentation algorithms, existing segmentation algorithms may still face challenges in certain specific scenarios due to the complexity and diversity of images, making it difficult to achieve ideal segmentation results. In recent years, the rapid development of deep learning (DL) technology has brought new breakthroughs to the field of image segmentation. DL models, especially Convolutional Neural Networks (CNNs), can capture semantic information of images more accurately by automatically learning feature representations in images, thereby achieving more precise image segmentation. This article delves into the research and application of image segmentation algorithms in CV, with a focus on the application of DL in the field of image segmentation. With the continuous development of advanced technologies such as DL, it is believed that image segmentation technology will play a greater role in more fields in the future.}, + file = {/home/adrian/Zotero/storage/PLIV6AN7/Xu - 2024 - Application of Image Segmentation Algorithms in Co.pdf} +} diff --git a/submissions/447/_quarto.yml b/submissions/447/_quarto.yml new file mode 100644 index 0000000..ae6ece2 --- /dev/null +++ b/submissions/447/_quarto.yml @@ -0,0 +1,8 @@ +project: + type: manuscript + +manuscript: + article: index.qmd + +format: + html: default diff --git a/submissions/447/images/Geovistory_components.png b/submissions/447/images/Geovistory_components.png new file mode 100644 index 0000000..85d0ae9 Binary files /dev/null and b/submissions/447/images/Geovistory_components.png differ diff --git a/submissions/447/index.qmd b/submissions/447/index.qmd new file mode 100644 index 0000000..bc84e31 --- /dev/null +++ b/submissions/447/index.qmd @@ -0,0 +1,126 @@ +--- +submission_id: 447 +categories: 'Session 3A' +title: Geovistory, a LOD Research Infrastructure for Historical Sciences +author: + - name: Stephen Hart + email: stephen.hart@unibe.ch + orcid: 0009-0003-6556-5512 + affiliations: + - Universität Bern + - name: Vincent Alamercery + email: vincent.alamercery@ens-lyon.fr + orcid: 0000-0001-5830-3192 + affiliations: + - ENS Lyon, LARHRA + - name: Francesco Beretta + email: francesco.beretta@cnrs.fr + orcid: 0000-0002-4389-4126 + affiliations: + - CNRS, LARHRA + - name: Djamel Ferhod + email: Djamel.Ferhod@ish-lyon.cnrs.fr + affiliations: + - CNRS, LARHRA + - name: Sebastian Flick + email: sebastian.flick@unibe.ch + affiliations: + - Universität Bern + - name: Tobias Hodel + email: tobias.hodel@unibe.ch + orcid: 0000-0002-2071-6407 + affiliations: + - Universität Bern + - name: David Knecht + email: david.knecht@kleiolab.ch + orcid: 0000-0001-5237-8337 + affiliations: + - Kleiolab GmbH + - name: Gaétan Muck + email: gaetan.muck@kleiolab.ch + affiliations: + - Kleiolab GmbH + - name: Alexandre Perraud + email: alexandre.perraud@cnrs.fr + affiliations: + - CNRS, LARHRA + - name: Morgane Pica + email: morgane.pica@ens-lyon.fr + orcid: 0000-0002-0981-4516 + affiliations: + - ENS Lyon, LARHRA + - name: Pierre Vernus + email: Pierre.VERNUS@msh-lse.fr + orcid: 0000-0002-9335-7070 + affiliations: + - MSH, LARHRA +keywords: + - Linked Open Data + - Research Infrastructure + - Semantic Web + - Ontology + - FAIR Data +abstract: This article explores the significance of the Geovistory platform in the context of the growing Open Science movement within the Humanities, particularly its role in facilitating the production and reuse of FAIR data. As funding agencies increasingly mandate the publication of research data in compliance with FAIR principles, researchers face the dual challenge of mastering new methodologies in data management and adapting to a digital research landscape. Geovistory provides a comprehensive research environment specifically designed to meet the needs of historians and humanists, offering intuitive tools for managing research data, establishing a collaborative Knowledge Graph, and enhancing scholarly communication. By integrating semantic methodologies in the development of a modular ontology, Geovistory fosters interoperability among research projects, enabling scholars to draw on a rich pool of shared information while maintaining control over their data. Additionally, the platform addresses the inherent complexities of historical information, allowing for the coexistence of diverse interpretations and facilitating nuanced digital analyses. Despite its promising developments, the Digital Humanities ecosystem faces challenges related to funding and collaboration. The article concludes that sustained investment and strengthened partnerships among institutions are essential for ensuring the longevity and effectiveness of initiatives like Geovistory, ultimately enriching the field of Humanities research. +date: 07-26-2024 +bibliography: references.bib +--- + +## Introduction + +The movement of Open Science has grown in importance in the Humanities, advocating for better accessibility of scientific research, especially in the form of the publication of research data [@unesco2023]. This has led funding agencies like SNSF, ANR, and Horizon Europe to ask research projects to publish their research data and metadata along the FAIR principles in public repositories (see for instance [@anr2023; @ec2023; @snsf2024]. Such requirements are putting pressure on researchers, who need to learn and understand the principles and standards of FAIR data and its impact on research data, but also require them to acquire new methodologies and know-how, such as in data management and data science. + +At the same time, this accessibility of an increasing volume of interoperable quality data and the new semantic methodologies might bring a change of paradigm in the Humanities by the way knowledge is produced [@beretta2023; @feugere2015]. The utilization of Linked Open Data (LOD) grants scholars access to large volumes of interoperable and high-quality datasets, at a scale analogue methods cannot reach, fundamentally altering their approach to information. This enables scholars to pose novel research questions, marking a departure from traditional modes of inquiry and facilitating a broader range of analytical perspectives within academic discourse. Moreover, drawing upon semantic methodologies rooted in ontology engineering, scholars can effectively document the intricate complexities inherent of social and historical phenomena, enabling a nuanced representation essential to the Social Sciences and Humanities domains within their databases. This meticulous documentation not only reflects a sophisticated understanding of multifaceted realities but also empowers researchers to deepen the digital analysis of rich corpora. + +The transition from analogical to digital research methodologies does not come without challenges for researchers, thus necessitating the development of new tools and research infrastructures to support them in this evolution. The demand arises for user-friendly tools that abstract the technical complexity, as well as project accompaniment organisations that can provide support in digital methodologies and strategies to help scholars to better manage their data for computational analysis and information sharing. + +This is the goal of Geovistory. It is conceived as a virtual research and data publication environment designed to strengthen Open Research Data practices. Geovistory is developed for research projects in the Humanities and Social Sciences, whether in history, geography, literature or other related fields, according to the participatory method of "user experience design". It supports researchers with simple and easy-to-use interfaces and allows them to make their research accessible in an attractive way to people interested in history. + +## Geovistory as a Research Environment + +Geovistory aims to be a comprehensive research environment that accompanies scholars throughout the whole research cycle. Geovistory includes: +- The *Geovistory Toolbox*, which allows to manage and curate projects' research data. The Toolbox is freely accessible for all individual projects. Each research project works on its own data perspective but at the same time directly contributes to a joint knowledge graph. +- A joint *Data repository* that allows to connect and link the different research projects under a unique and modular ontology, thus creating a large Knowledge Graph. +- The Geovistory *Publication platform* (), where data is published using the RDF framework and can be accessed via the community page or project-specific webpages and its graphical search tools or a SPARQL-endpoint. +- An active *Community* to foster information and know-how exchange among the researchers, users and technological experts. + +::: {#fig-geovistory-components} + +![](images/Geovistory_components.png) + +::: + +As per current terms of service, all data produced in the information layer of Geovistory are licensed under creative commons BY-SA 4.0. Initiated by KleioLab GmbH, the different infrastructure components are currently being developed jointly by LARHRA and the University of Bern, while other actors are welcome to join the Geovistory vision.. All the web components and the publication platform have been made available as open source, as well as the toolbox. The LOD4HSS project (), co-funded by swissuniversities, structures these efforts and aims at creating a larger community of users and supporters of this vision. + +## The aim of breaking information silos + +The goal of producing and publishing FAIR research data is to break the information silos that hinder the sharing and reusing of scientific data. However, achieving interoperability hinges on two critical components [@beretta2024a]: +- Firstly, the unambiguous identification of real-world entities (e.g., persons, places, concepts) with unique identifiers (e.g., URIs in Linked Open Data) and the establishment of links between identical entities across different projects (e.g., ensuring that the entity "Paris" is identified by the same URI in all projects); +- Secondly, the utilization of explicit ontologies that can be aligned across projects. Nevertheless, mapping between ontologies may prove challenging, or even unfeasible, particularly when divergent structural frameworks are employed (e.g., an event-centric ontology may have limited compatibility with an object-centric one). + +In Geovistory, those challenges are addressed by producing a unique Knowledge Graph that integrates the various projects. This necessitates from each project the adherence to the Semantic Data for History and Social Sciences (SDHSS) ontology ecosystem. It includes a methodology of ontological foundational analysis, based on the principles of OntoClean, from the domain of semantic engineering [@guarino2004], and the high-level conceptual categories of the DOLCE ontology [@borgo2022]. This has been applied to the CIDOC CRM ontology, the ICOM standard for the Heritage domain, while extending it to include the social and mental realities crucial for documenting essential aspects of human history, like ownership, membership, collective beliefs, etc. [@beretta2024b]. On this basis, a standardised semantic methodology for the development of domain-oriented ontologies in different fields of the Humanities, such as archaeology, prosopography, and geography has been created.. The SDHSS ontology ecosystem provides adaptability to the specificities of the various research projects while ensuring full interoperability among them. It is collaboratively managed in the ontome.net () application, so that scholars and domain experts can participate in its development if interested. + +This shared Knowledge Graph streamlines the entity creation process by enabling users to navigate the graph, identify existing objects, and reuse them in their project using the same URIs for entity identification. By leveraging a common ontology ecosystem, users can not only easily identify and reuse information pertaining to specific entities but also ensure seamless integration and interoperability across projects within the Geovistory platform. + +## A modular system for managing complex HSS information + +Scholars within the Humanities domain grapple with intricate information, significantly more complex when compared to other scientific disciplines. Historical sources, whether textual, oral, visual, or material, provide fragmented and biased glimpses into the past, necessitating contextualization and interpretation. Consequently, this dynamic can engender a considerable degree of information uncertainty and discordance that need to be meticulously documented. Any digital infrastructure or model employed must adeptly navigate this multifaceted information landscape and accommodate its inherent complexity. + +An inherent strength of Geovistory lies in its handling of the challenges associated with scientific information in the Humanities and Social Sciences domain. Noteworthy among these challenges is the nuanced, context-sensitive nature of information and its relation with different research agendas, as well as the wide variations in meaning for the same terms and vocabulary complexities, competing views or gaps and fragmentation of available information. These complexities are deftly managed through the application of the SDHSS methodology, which tends to limit the number of classes and properties in the ontology ecosystem, while inviting projects to develop and share rich collections of controlled vocabularies of concepts that enrich the data model according to the different research agendas and perspectives. + +Moreover, the project-partition of the Knowledge Graph within Geovistory enables users to repurpose existing information while also accommodating contradictory data, particularly when discrepancies are identified by researchers. Each project graph is stored within a designated dataset, maintaining its individual identity within the overarching Knowledge Graph. This approach allows for the coexistence and contextualization of disparate interpretations of facts, enhancing the platform's flexibility and adaptability to varying scholarly perspectives. It is the unique amalgamation of the Geovistory graph data model and its robust semantic enrichment capabilities that render it particularly compelling for research within the Humanities and Social Sciences. + +## Integrating the DH ecosystem + +Operating within the framework of Linked Open Data principles entails establishing connections with disparate datasets housed in various open and online repositories or Knowledge Graphs, culminating in the creation of an inclusive and interconnected Web of Data—an accomplishment characterized as the fifth star of Tim Berners Lee's Open Data (). As datasets interlink, they collectively form the Linked Open Data Cloud (), wherein predominant repositories such as Wikidata or DBpedia, alongside authority files such as VIAF or GND, assume pivotal roles as data hubs, enhancing the discoverability, contextualization, and citability of information. + +The Geovistory ecosystem applies those principles, actively engaging with the Digital Humanities landscape. It is connected dynamically to the information systems of producers of authority records (such as IdRef, GND) and data repositories (such as Wikidata) in view of interconnecting bibliographic information systems and scale up to a large Knowledge Graph. Collaborative efforts include the establishment of a data exchange pipeline with the French Agence Bibliographique de l'Enseignement Supérieur (ABES), with ongoing initiatives to forge additional partnerships. + +Moreover, ensuring long-term preservation of research data remains imperative, with initiatives to archive completed projects in the Zenodo repository and explore potential collaborations with entities like DaSCH, OLOS, and Huma-Num for dynamic updates and data management, with preliminary engagements initiated with DaSCH. + +## Conclusions and future perspectives + +Geovistory has been designed as a comprehensive research environment tailored by and for historians and humanists to address their needs in generating and utilizing FAIR data, thereby streamlining the research digitization process. As the utilization of Geovistory proliferates across more projects, the Knowledge Graph grows with increasingly enriched information, rendering the overall environment more advantageous for scholars either by providing reusable datasets or by enriching imported data. In this regard, Geovistory can be compared as a Wikidata dedicated to research endeavors, with the difference that projects retain full control over their data without a loss of semantic coherence throughout the graph. + +The forthcoming years mark a critical juncture for Geovistory, as the tools and infrastructures of the environment recently transitioned into the public domain. This needed change will ease collaboration with future public institutions within Europe, but a greater part of public fundings will be needed to ensure the sustainability of the ecosystem. + +Nonetheless, the Digital Humanities ecosystem remains unstable, attributed to the lack of sustained funding for infrastructural initiatives by national funding agencies and the absence of cohesive coordination among institutions. To ameliorate this landscape, prioritizing the establishment of robust collaborations and partnerships among diverse tools and infrastructures in Switzerland and Europe is imperative. Leveraging the specialized expertise of each institution holds the promise of engendering a harmonized and synergistic, distributed environment conducive to scholarly pursuits. diff --git a/submissions/447/references.bib b/submissions/447/references.bib new file mode 100644 index 0000000..9797685 --- /dev/null +++ b/submissions/447/references.bib @@ -0,0 +1,80 @@ +@misc{anr2023, + title = {La science ouverte}, + url = {https://anr.fr/fr/lanr/engagements/la-science-ouverte/}, + author = {ANR}, + year = {2023}, + note = {Accessed on July 19, 2024} +} +@article{beretta2023, + title={Données ouvertes liées et recherche historique : un changement de paradigme}, + author={Francesco Beretta}, + journal={Humanités numériques}, + volume={7}, + year={2023}, + doi = {10.4000/revuehn.3349} +} +@article{beretta2024a, + title={Données liées ouvertes et référentiels publics : un changement de paradigme pour la recherche en sciences humaines et sociales}, + author={Francesco Beretta}, + journal={Arabesques}, + volume={112}, + year={2024}, + pages={26--27} +} +@incollection{beretta2024b, + title = {Semantic Data for Humanities and Social Sciences (SDHSS): an Ecosystem of CIDOC CRM Extensions for Research Data Production and Reuse}, + author = {Francesco Beretta}, + editor = "Thomas Riechert and Hartmurt Beyer and Jennifer Blanke and Edgard Marx", + booktitle = "Professorale Karrieremuster. Entwicklung einer wissenschaftlichen Methode zur Forschung auf online verfügbaren und verteilten Forschungsdatenbanken der Universitätsgeschichte", + publisher = "International Handbooks on Information Systems", + address = "Leipzig", + year = 2024, + pages = "73--101", +} +@article{borgo2022, + title={DOLCE: A descriptive ontology for linguistic and cognitive engineering}, + author={Stefano Borgo and Roberta Ferrario and Aldo Gangemi and Nicola Guarino and Claudio Masolo and Daniele Porello and Emilio M. Sanfilippo and Laure Vieu}, + journal={Applied Ontology}, + volume={17}, + year={2022}, + pages={45--69}, +} +@misc{ec2023, + title = {Open Data, Software and Code Guidelines}, + url = {https://open-research-europe.ec.europa.eu/for-authors/data-guidelines#standardsandfair}, + author = {EC}, + year = {2023}, + note = {Accessed on July 19, 2024} +} +@article{feugere2015, + title={Les bases de données en archéologie. De la révolution informatique au changement de paradigme}, + author={Michel Feugère}, + journal={Cahiers philosophiques}, + volume={141}, + year={2015}, + pages={139--147}, +} +@incollection{guarino2004, + title = {An Overview of OntoClean}, + author = {Nicola Guarino and Chistopher A. Welty}, + editor = {Steffen Staab and Rudi Studer}, + booktitle = {Handbook on Ontologies}, + publisher = {International Handbooks on Information Systems}, + address = {Berlin AND Heidelberg}, + year = {2004}, + pages = {151--171}, +} +@misc{snsf2024, + title = {Open Research Data}, + url = {https://www.snf.ch/en/dMILj9t4LNk8NwyR/topic/open-research-data}, + author = {SNSF}, + year = {2024}, + note = {Accessed on July 19, 2024} +} +@misc{unesco2023, + title = {UNESCO Recommendation on Open Science}, + url = {https://www.unesco.org/en/open-science/about?hub=686}, + author = {UNESCO}, + year = {2023}, + note = {Accessed on July 19, 2024} +} diff --git a/submissions/453/_quarto.yml b/submissions/453/_quarto.yml new file mode 100644 index 0000000..ae6ece2 --- /dev/null +++ b/submissions/453/_quarto.yml @@ -0,0 +1,8 @@ +project: + type: manuscript + +manuscript: + article: index.qmd + +format: + html: default diff --git a/submissions/453/images/cycle_en_anglais.jpg b/submissions/453/images/cycle_en_anglais.jpg new file mode 100644 index 0000000..146aef1 Binary files /dev/null and b/submissions/453/images/cycle_en_anglais.jpg differ diff --git a/submissions/453/index.qmd b/submissions/453/index.qmd new file mode 100644 index 0000000..bb62367 --- /dev/null +++ b/submissions/453/index.qmd @@ -0,0 +1,67 @@ +--- +submission_id: 453 +categories: 'Session 5A' +title: Contributing to a Paradigm Shift in Historical Research by Teaching Digital Methods to Master's Students +author: + - name: Francesco Beretta + orcid: 0000-0002-4389-4126 + email: francesco.beretta@cnrs.fr + affiliations: + - LARHRA UMR 5190 CNRS/Université de Lyon + - Université de Neuchâtel +keywords: + - teaching digital methodology + - paradigm shift + - open data reuse for research + +abstract: | + Over the last few decades, we have witnessed a major transformation in the digital resources available, with significant implications for society, the economy and research. In the social sciences, and history in particular, we can observe the provision of ever larger amounts of open research data and a growing number of data journals, as well as the development of educational resources aimed at strengthening the digital skills of researchers. Knowledge graphs and Linked Open Data make an exponentially growing number of resources easily accessible and raise the question of a paradigm shift for historical research. But this will only happen if digital methods are integrated into the training of new generations of historians, not just as tools but as part of new approaches to knowledge production, as a growing number of scholars and projects are realising. I have been teaching a master's course in digital methods in history at the University of Lyon 3 for five years, and now for four years at the University of Neuchâtel, which currently offers teachings in digital methods in the master courses in Historical Sciences and in Regional Heritage and Digital Humanities. In this paper, I will present the structure of the threefold programme of my teaching: in the first semester, understanding the research cycle, setting up an information system and discovering the semantic web; in the second, learning data analysis and visualisation methods; in the third, applying the methods to one's own research agenda. I will also review the results obtained and provide some examples of completed Master's theses. +date: 07-26-2024 +bibliography: references.bib +--- + +## Introduction + +Over the past few decades, we have witnessed a major transformation in the digital resources and methodologies available, particularly in the field of Artificial Intelligence (AI), with significant implications for society and the economy. As it is stated in the White Paper [*The Digital Turn in the Sciences and Humanities*](https://zenodo.org/records/4191345) by the German Research Foundation’s (DFG), the digital turn is bringing about three major changes in research: former analogue research practices are being realised with digital tools (transformative change); data-intensive technologies allow new research questions to be addressed (enabling change); digital technologies, especially AI methods, can even replace humans in parts of the research project (substitutive change). + +This phenomenon can also be observed in the human and social sciences (HSS), and even in history, and is particularly striking in the area of open data publication. On the one hand, data can be deposited in well-known, dedicated repositories, such as Zenodo, Nakala, DaSCH or DANS, and a growing number of data journals (e.g. the [Journal of Open Humanities Data](https://openhumanitiesdata.metajnl.com/)) publish papers dedicated to contextualising data production in order to facilitate its reuse. On the other hand, directly accessible data are available in the form of relational databases that can be queried (e.g. the [PRELIB project](https://mshb.huma-num.fr/prelib/)) or, using the RDF framework, in the form of Linked Open Data (e.g. the [Sphaera project](http://db.sphaera.mpiwg-berlin.mpg.de/resource/Start) or the [Geovistory collaborative platform](https://www.geovistory.org/)). We can thus observe that the digital transformation of research practices in HSS (transformative change) is leading to the production and publication of an exponentially growing wealth of information, making it possible to address new research questions (enabling change), in particular by applying AI methodologies in the context of new disciplines known under the label of [computational humanities](http://2024.computational-humanities-research.org/contact/) (substitutive change). + +## A paradigm shift + +This important transformation of historical research raises the question of a paradigm shift. This concept was used by Thomas Kuhn in 1962 in his book *The Structure of Scientific Revolutions* [@kuhn_structure_1962] to describe the intellectual structure of disciplines and to analyse the ruptures that lead to scientific revolutions. There are two essential elements to be considered: on the one hand, the paradigm consists of all the shared methods, practices and achievements that form the basis and structure of a disciplinary community; on the other hand, it includes, in its ancient, original sense, the teaching practices applied during education with the aim of enabling the acquisition of the skills essential to the practice of a discipline. Since the purpose of scientific activity is the production of knowledge, the paradigm enables students to learn the methods and rules that are legitimate within a disciplinary community. The digital turn thus raises the question of the transformation of methods and forms of knowledge production in the historical sciences, as can be seen from the publications of a growing number of scholars (e.g. the [Journal of Digital History](https://journalofdigitalhistory.org)). + +On the basis of this analysis, it seems essential to introduce training in digital methodologies and tools into the standard disciplinary curriculum of history, and not just in optional Digital Humanities Minors. Since learning disciplinary tools is at the heart of the paradigm of a discipline, digital methodologies should be taught from the beginning of university studies, so that future generations of teachers, doctoral students, professors and researchers can make the transition to the new paradigm from within. This will enable to create a disciplinary community trained in the new methodologies, familiar with the issues from direct experience, and capable of defending the place of the historical sciences in the field of contemporary science and the digital society [@francesco_beretta_donnees_2023]. + +## Master’s course in digital methodology for historical research + +These considerations stem not only from my work as a CNRS researcher who has spent the last fifteen years building collaborative information systems for research (symogih.org, ontome.net, geovistory.org)[@francesco_beretta_donnees_2024], in line with the vision that, as the DFG White Paper points out, "digital infrastructure is essential for research and must be built for long-term service", but also from ten years of experience in teaching digital methodology at bachelor and master level in history, first at the University of Lyon 3 and for the last four years at the University of Neuchâtel, which currently offers courses in digital methodology in the master's programmes in Historical Sciences and in Regional Heritage and Digital Humanities. + +But at this point an essential question arises: what should be taught to history students to help them make the most of the digital transition and build a new paradigm? Looking at recent handbooks, e.g. [@antenhofer_digital_2023; @doring_digital_2022; @schuster_routledge_2021], or at educational resources like the [programminghistorian.org](https://programminghistorian.org/en/) project, we can see a huge variety of approaches and areas of application of digital methods, and often the answer to the question depends on the own field of research and experience. In this sense, I will not provide a somewhat abstract review of the literature, and existing courses, but rather share some aspects of my own approach in the hope that they may be of some use or inspiration to others. + +My teaching at Master's level consists of a three-part programme: the first semester deals with understanding the research cycle in history, setting up an information system and discovering the semantic web; the second focuses on learning data analysis and visualisation methods using Python notebooks; the third is about applying the methods to the students’ own research agenda. This teaching programme has two objectives, which correspond to the first two components of the digital transformation mentioned in the DFG White Paper: to learn a methodology suitable for the manual collection of information from sources, according to the best practices of computer science (transformative change); to learn a pool of data analysis and visualisation methodologies, allowing the exploitation of the growing number of existing resources (enabling change). These courses therefore provide students with basic skills, particularly in data analysis, which they can apply directly to their Master's thesis and, if they wish, continue on to computational research courses such as Machine Learning or Natural Language Processing (substitutive change). + +Since the aim of research is to produce knowledge, an analysis of the research process, conceptualised in terms of a research cycle, forms the basis of my courses. This choice underlines the iterative dimension that is specific to the scientific approach in general and also applies to the formulation and verification (or falsification) of hypotheses that is specific to the social sciences. + +::: {#fig-cycle} +![Cycle of knowledge production in historical disciplines](images/cycle_en_anglais.jpg){fig-align="center" width="800"} +::: + +In this context, knowledge is understood as the result of the analysis and interpretation of information. With regard to information it is at the heart of the scientific process and can be defined as a representation of reality (which is the only datum is the world we observe), and more precisely as an identification and representation of the objects in the world (people, organisations, artefacts, etc.), their characteristics (physical properties of objects, education and income levels of people, opinions, etc.) and their relationships in time and space (membership in organisations, exchange of messages or goods, journeys, etc.). Knowledge can thus be defined as an interpretation of the world represented in the information collected, and if the former is the result of the scientific activity and is generally published in the form of books or articles, the latter should be understood as a most accurate approximation of the facts in words, making the information reusable for new research when shared in the form of digital open data according to the FAIR principles. + +As the diagram of the knowledge production cycle shows, all research must begin with the definition of a research agenda that fits within the horizon of existing knowledge, expressed in literature, and that defines the methodology that will be adopted and the research questions to be answered. Zotero seems to be the best tool for this task, not only for storing bibliographical references, but also for enriching them with your own notes and categories, and for connecting them to resources on the web, thus realising the first step of a digitally transformed research. On the basis of their line of inquiry, student must then select from the available mass of sources the relevant ones in order to gather the information that will be analysed and serve as a basis for knowledge. They will have to decide what information will be systematically retained and how it will be conceptualised and produced. This raises the issue of the conceptual model and the choice of digital storage technology, because while spreadsheets may be adequate if one is limited to systematically collecting a certain number of characteristics of a population of individuals of the same type, as soon as one wishes to inform about complex relationships between different objects (persons, organisations, artefacts, opinions, economic values, etc.) in space and time, it is essential to use a relational or graph-oriented database in order to capture the full wealth of the required information. + +This is precisely the content of the teaching of the first semester and I propose to the students to follow the example of the [teacher’s own GitHub repository](https://github.com/Sciences-historiques-numeriques/astronomers/wiki) in order to document, in a dedicated GitHub repository and wiki the progress of their research cycle. In other words, I'm adopting a kind of teaching by example, where the whole approach is documented in a sample project available on GitHub that can be imitated and applied to one's own subject, while endeavouring to go through all the proposed steps by creating one's own SQLite database, one's own analyses in Python, etc. + +To propose the simplest and most concrete use case, I adopt a proposopographical approach and invite students to search Wikipedia for the biographical records of a population that corresponds to their interests, for example political activists or fashion designers, while asking themselves some questions to which they would like to find answers. We then consider the Wikipedia biographical records for this population as sources and define a catalogue of information to be extracted that will lead to the creation of a conceptual model and an initial SQLite database. Students will thus acquire the basic elements for creating a simple, easy-to-manage information system, which will greatly facilitate the manual input of relatively complex information from the sources analysed (transformative change). + +Since it does not make sense to produce a lot of information manually in the context of this course, at this stage I take advantage of the DBPedia and Wikidata projects, which provide a wealth of information on the previously selected populations in the form of structured data published in RDF. Students will therefore learn how to retrieve this information using the SPARQL language and import it into their SQLite database for refinement, thus discovering the process of re-using existing data, which can be considerable in volume with thousands of individuals described and dozens of pieces of information about them (enabling change). + +This step marks the transition to the second semester, which begins by learning basic skills in Python and using Jupyter notebooks. To be able to analyse the information collected, it must be simplified and coded. It is at this stage that the research questions are introduced and a range of tools are applied to the information collected in the form of digital data: univariate and multivariate statistical analysis, network analysis, spatial representation, etc. Students will discover a new notion of model, now in the statistical sense, that emerges from these analyses and has an eminently heuristic function, since the representations produced by analysis software always require critical discussion, contextualisation and interpretation. At the same time, these methods and digital tools make visible significant phenomena that would otherwise be impossible to see "with the naked eye", given the considerable volume and complexity of the information collected on the Semantic Web. + +At the end of the process, students formulate some possible answers to their research questions and document the results obtained in their repository wiki, accompanied by graphics resulting from the analysis. They thus complete the research cycle by producing new knowledge in response to their initial research agenda, publishing online not only the results of their investigations, but also the database, the Python notebooks and the discussion of the analyses that led to their conclusions, thus learning in practice to undertake a reproducible scientific approach. The third semester is devoted to accompanying students who wish to realise their Master's thesis using the methods learned in the previous semesters. This is still an ongoing process in Neuchâtel, so in my paper I'll present some results from the master's theses written by students at Lyon 3 university. + +## Results and discussion + +I observed in all these years that if the students invest some time in practising the exercises and follow the learning cycle in this kind of apprenticeship by example during the two semesters, they can achieve amazing results (e.g. [Militant.e.s pour le droits des femmes](https://github.com/AliaBrah/militants_droit_femmes/wiki) and [Fashion Designers](https://github.com/czeacach/fashion_designers/wiki)). But at the same time I have to admit that the learning curve is steep, because in just one year students learn the basics of conceptual modelling, SQL, SPARQL, Python and the essential concepts of various data analysis methods. As well as versioning with GIT and putting data and notebooks online. On the one hand, a certain pedagogical investment is necessary, especially to support students who have less of a natural inclination towards digital technology. On the other hand, the more technical part of this method should be introduced at bachelor level, like GitHub versioning and Python. At the University of Neuchâtel, a brand new minor in Digital Humanities has been introduced in the bachelor's programme, which will enable students who have taken it to benefit more from the master's courses. + +As far as the Master's thesis is concerned, it seems that the conceptual modelling and the setting up of a database for the input of information extracted from sources are the most useful, while the venture into collecting data available on the web as a basis for the Master's thesis does not yet seem attractive. However, there are exceptions, as shown by a work using the [Refuge Huguenot database](http://refuge-huguenot.ish-lyon.cnrs.fr/), which I will present in my paper. In conclusion, it seems that at the moment students that take this course can only reach the level of transformative change. But experience shows that it is only with the development of appropriate research infrastructure and the emergence of a wider community of digital disciplinary practices that we will be able to provide students with a context that will allow them to achieve the enabling and substitutive changes, and thus bring about an effective paradigm shift. It is up to the new generations to make this happen. \ No newline at end of file diff --git a/submissions/453/references.bib b/submissions/453/references.bib new file mode 100644 index 0000000..f13d3cb --- /dev/null +++ b/submissions/453/references.bib @@ -0,0 +1,66 @@ + +@book{antenhofer_digital_2023, + address = {Stuttgart}, + edition = {Erste Ausgabe}, + title = {Digital {Humanities} in den {Geschichtswissenschaften}}, + isbn = {978-3-8385-6116-5}, + language = {ger}, + publisher = {utb GmbH}, + author = {Antenhofer, Christina and Kühberger, Christoph and Strohmeyer, Arno}, + year = {2023}, + doi = {10.36198/9783838561165}, + } +@book{doring_digital_2022, + address = {Germany}, + edition = {1}, + series = {Studies in digital history and hermeneutics}, + title = {Digital {History}: {Konzepte}, {Methoden} und {Kritiken} {Digitaler} {Geschichtswissenschaft}}, + volume = {6}, + isbn = {978-3-11-075710-1}, + publisher = {De Gruyter}, + author = {Döring, Karoline Dominika and Haas, Stefan and König, Mareike and Wettlaufer, Jörg}, + year = {2022} +} + @article{francesco_beretta_donnees_2023, + title = {Données ouvertes liées et recherche historique : un changement de paradigme}, + issn = {2736-2337}, + url = {https://journals.openedition.org/revuehn/3349}, + doi = {10.4000/revuehn.3349}, + language = {fr}, + number = {7}, + urldate = {2023-07-26}, + journal = {Humanités numériques}, + author = {Beretta, Francesco}, + month = jul, + year = {2023}, + note = {Number: 7 +Publisher: Humanistica} +} +@article{francesco_beretta_donnees_2024, + title = {Données liées ouvertes et référentiels public : un changement de paradigme pour la recherche en sciences humaines et sociales}, + doi = {10.35562/arabesques.3820}, + number = {112}, + journal = {Arabesques}, + author = {Beretta, Francesco}, + year = {2024}, + pages = {26--27} +} +@book{kuhn_structure_1962, + address = {Chicago}, + title = {The structure of scientific revolutions}, + language = {eng}, + publisher = {University of Chicago Press}, + author = {Kuhn, Thomas S.}, + year = {1962}, +} +@book{schuster_routledge_2021, + address = {London ;}, + edition = {1st ed.}, + series = {Routledge international handbooks}, + title = {Routledge international handbook of research methods in digital humanities}, + isbn = {978-0-429-67025-1}, + language = {eng}, + publisher = {Routledge}, + author = {Schuster, Kristen and Dunn, Stuart E.}, + year = {2021} + } \ No newline at end of file diff --git a/submissions/457/_quarto.yml b/submissions/457/_quarto.yml new file mode 100644 index 0000000..ae6ece2 --- /dev/null +++ b/submissions/457/_quarto.yml @@ -0,0 +1,8 @@ +project: + type: manuscript + +manuscript: + article: index.qmd + +format: + html: default diff --git a/submissions/457/index.qmd b/submissions/457/index.qmd new file mode 100644 index 0000000..ca216fb --- /dev/null +++ b/submissions/457/index.qmd @@ -0,0 +1,73 @@ +--- +submission_id: 457 +categories: 'Session 4B' +title: Towards Computational Historiographical Modeling +author: + - name: Michael Piotrowski + orcid: 0000-0003-3307-5386 + email: michael.piotrowski@unil.ch + affiliations: + - University of Lausanne +keywords: + - corpora + - concepts + - epistemology + - methodology + - theory of history +abstract: | + Digital corpora play an important, if not defining, role in digital history and may be considered as one of the most obvious differences to traditional history. Corpora are essential for the use of computational methods and thus for the construction of computational historical models. But beyond their technical necessity and their practical advantages, their epistemological impact is significant. While the traditional pre-digital corpus is often more of a potentiality, a mere "intellectual object," the objective of computational processing requires the corpus to be made explicit and thus turns it into a "material object." Far from being naturally given, corpora are constructed as models of a historical phenomenon and therefore have all the properties of models. Moreover, following Gaston Bachelard, I would argue that corpora actually construct the phenomenon they are supposed to represent; they should therefore be considered as phenomenotechnical devices. +date: 2024-08-14 +bibliography: references.bib +--- + +## Introduction + +When we look for epistemological differences between "traditional" and digital history, *corpora*---stand out. Of course, historians have always created and studied collections of traces, in particular documents, but sometimes also other artifacts, and have built their narratives on the basis of these collections. This is a significant aspect of scholarship and in some sense constitutes the difference between historical and literary narratives: historical narratives are supposed to be grounded (in some way) in the historical facts represented by the respective corpus. + +Nevertheless, the relation between such a corpus and the narrative is traditionally rather unclear. Not only is the corpus necessarily incomplete (and uncertain), but it's typically only "virtual." As @Mayaffre2006 [20] puts it, in the humanities corpora traditionally tend to be potentialities rather than realities: one *could* go and consult a certain document in some archive, but this may only be rarely done, and the corpus may thus have never been anything but an "intellectual object." ``{=html} + +Machine-readable digital corpora---that is, what we mean by corpora today---have brought about major changes. Most of the time, it is their practical advantages that are highlighted: they are easier to store, they are (at least potentially) accessible from anywhere at any time, and they can be processed automatically. This, in turn, enables us to apply new types of analysis and thus to ask and study new research questions. What tends to be overlooked, though, is the epistemological impact of machine-readable corpora in history. The notion of corpus in digital history (and in digital humanities in general) is heavily influenced by the notion of corpus in computational linguistics: a large but finite collection of digital texts. @Mayaffre2006 [20] hints at the epistemological impact when he notes that, on the one hand, digitization dematerializes the *text* in that it is lifted from its previous support, but on the other hand, materializes the *corpus* more rigorously than before. + +This is, of course, a precondition for more rigorous types of analysis, notably computational analyses, and---eventually---the construction of computational historical models. However, this raises a number of epistemological and methodological questions. In computational linguistics, a corpus is essentially considered a statistical sample of language. Historical corpora typically differ from linguistic corpora, both in its relation to the research objects, the research questions, and to the expected research findings. They also differ in the way they are constructed. + +While there is much discussion of individual methods and their appropriateness---and many common definitions as well as a large part of the criticism of DH are related to these methods---there is surprisingly little theoretical discussion of corpora. In a typical DH paper (or project proposal), just a few words are said about the corpus that was used, and most of it tends to concern its size and composition (*n* items of class *X*, *m* items of class *Y*, and so on) and the technical aspects of its construction (e.g., how it was scraped), if the authors did not use an existing corpus. The methods (algorithms, tools, etc.) used and the results achieved (and their interpretation and visualization) are typically discussed extensively, though. + +Given the central role of corpora in digital history, I think we need to study them and the roles they play in order to avoid the production of research that is formally rigorous but historically meaningless (or even nonsensical). + +## Corpora as Models + +As @Granger1967 notes, the goal of any science (natural or other) is to build *coherent and effective models of the phenomena they study*. + +Thus, and as I have argued before [@Piotrowski2018c], a corpus should be considered a model in the sense of Leo Apostel, who asserted that "*any subject using a system A that is neither directly nor indirectly interacting with a system B to obtain information about the system B*, is using A as a model for B" [@Apostel1961, 36, emphasis in original]. Creating a corpus thus means constructing a model, and modelers consequently have to answer questions such as: What is it that I am trying to model? In what respects is the model a reduction of it? And for whom and for what purpose am I creating the model? + +These are not new questions: every time historians select sources, they construct models, even before any detailed analysis. However, machine-readable corpora are not only potentially much larger than any material collection of sources---which is already not inconsequential---but also have important epistemological consequences. The larger and the more "complete" a corpus is, the greater the danger to succumb to an "implicit essentialism" [@Mothon2010, 19] and to mistake the model for the original, a fallacy that can frequently be observed in the field of cultoromics [@Michel2011], when arguments are being made on the basis of the Google Books Ngram Corpus. + +The same then goes for any analysis of a corpus: if the corpus is "true," so must be the results of the analysis; if there is no evidence of something in the corpus, it did not exist. This allure is even greater when the analysis is done automatically and in particular using opaque quantitative methods: as the computational analysis is assumed to be completely objective, there seems to be no reason to question the results---they merely need to be interpreted, which leads us to some kind of "digital positivism." To rephrase Fustel de Coulanges [@Monod1889, 278], "[Ne m'applaudissez pas, ce n'est pas moi qui vous parle ; ce sont les données qui parlent par mes courbes.]{lang="fr"}" + +However, as @Korzybski1933 [58] famously remarked, "A map is *not* the territory it represents, but, if correct, it has a *similar structure* to the territory, which accounts for its usefulness." An analysis of a corpus will *always* yield results; the crucial question is whether these can tell us anything about the original phenomenon it aims to model. So, the crucial point is that corpora are not naturally occurring but intentionally constructed. A corpus is *already* a model and thus not epistemologically neutral. A good starting point for dealing with this seems to be Gaston Bachelard's notion of *phenomenotechnique* [@Bachelard1968]. + +## Corpora as Phenomenotechnical Devices + +Bachelard originally developed this notion, which treats scientific instruments as "materialized theories," as a way to study the epistemology of modern physics, which goes far beyond what is directly observable. The humanities also and even primarily deal with phenomena that are not directly observable, but only through artifacts, in particular texts. They thus have also always constructed the objects of their studies through, for example, the categorization and selection of sources and the hermeneutic postulation and affirmation of phenomena. + +However, only the praxis has been codified to some extent as "best practices," such as source criticism. Theories---or perhaps better: models and metamodels, as the term "theory" has a somewhat different meaning in the humanities than in the sciences---are not formalized and are only suggested by the (natural language) narrative. What history (and the humanities in general) traditionally do not have is something that corresponds to the scientific instrument. + +This changes with digitalization and datafication: phenomena are now constructed and modeled through data and code, and (like in the sciences), the computational model takes on the role of the instrument and "sits in the center of the epistemic ensemble" [@Rheinberger2005, 320]. Corpora are then, methodologically speaking, phenomenotechnical devices and form the basis and influence how we build, understand, and research higher-level concepts---which at the same time underly the construction of the corpus. In short: a corpus produces the phenomenon to be studied. As a model, it has Stachowiak's three characteristics of models, the *characteristic of mapping*, the *characteristic of shortening*, and the *characteristic of pragmatical model-function* [@Stachowiak1973 131--133]. Note also that while a model does not have all properties of its corresponding original (the characteristic of shortening), it has *abundant attributes* [@Stachowiak1973, 155], i.e., attributes that are not present in the original. + +Statistics provide us with means to formally describe and analyze a specific subclass of models that are able to represent originals that have particular properties. However, the phenomena studied by the humanities generally do not have these properties, and we thus still lack adequate formal methods to describe them. + +## Conclusion + +I have tried to outline some of the background and the motivation for the project *Towards Computational Historiographical Modeling: Corpora and Concepts*, which is part of a larger research program. + +So far, digital history (and digital humanities more generally) has largely contented itself with borrowing methods from other fields and has developed little methodology of its own. The focus on "methods and tools" represents a major obstacle towards the construction of computational models that could help us to obtain new insights into *humanities* research questions rather than just automate primarily quantitative processing---which is, without doubt, useful, but inherently limited, given that the research questions are ultimately qualitative. + +Regardless of the application domain, digital humanities research tends to rely heavily on *corpora*, i.e., curated collections of texts, images, music, or other types of data. However, both the epistemological foundations---the underlying concepts---and the epistemological implications have so far been largely ignored. I have proposed to consider corpora as *phenomenotechnical devices* [@Bachelard1968], like scientific instruments: corpora are, on the one hand, models of the phenomenon under study; on the other hand, the phenomenon is *constructed* through the corpus. + +We therefore need to study corpora as models to answer questions such as: How do corpora model and produce phenomena? What are commonalities and differences between different types of corpora? How can corpora-as-models be formally described in order to take their properties into account for research that makes use of them? + +The overall goal of the project is to contribute to theory formation in digital history and digital humanities, and to help us move from project-specific, often ad hoc, solutions to particular problems to a more general understanding of the issues at stake. + +## Acknowledgements {#acknowledgements .unnumbered} + +This research was supported by the Swiss National Science Foundation (SNSF) under grant no. 105211_204305. diff --git a/submissions/457/references.bib b/submissions/457/references.bib new file mode 100644 index 0000000..11c4684 --- /dev/null +++ b/submissions/457/references.bib @@ -0,0 +1,175 @@ +@article{Acerbi2013, + author = {Acerbi, Alberto and Lampos, Vasileios and Garnett, Philip + and Bentley, R. Alexander}, + title = {The Expression of Emotions in 20th Century Books}, + journal = {PLoS ONE}, + volume = {8}, + number = {3}, + pages = {e59030}, + date = {2013}, + doi = {10.1371/journal.pone.0059030}, + issn = {1932-6203}, + langid = {en-US} +} +@inproceedings{Apostel1961, + author = {Apostel, Leo}, + editor = {Freudenthal, Hans}, + publisher = {Reidel}, + title = {Towards the Formal Study of Models in the Non-Formal + Sciences}, + booktitle = {The concept and the role of the model in mathematics and + natural and social sciences}, + pages = {1-37}, + date = {1961}, + address = {Dordrecht}, + doi = {10.1007/978-94-010-3667-2_1}, + isbn = {978-94-010-3669-6}, + langid = {en-US}, + annote = {The contributions of this volume first appeared as special + issue of Synthese (Volume 12, Issue 2-3, September 1960) + https://link.springer.com/journal/11229/12/2/page/1} +} +@book{Bachelard1968, + author = {Bachelard, Gaston}, + publisher = {Les Presses universitaires de France}, + title = {Le nouvel esprit scientifique}, + edition = {10}, + date = {1968}, + address = {Paris}, + langid = {fr-FR} +} +@book{Bachelard2020, + author = {Bachelard, Gaston}, + editor = {Bontems, Vincent}, + publisher = {Les Presses universitaires de France}, + title = {Le nouvel esprit scientifique}, + edition = {1\textsuperscript{re} édition critique}, + date = {2020}, + address = {Paris}, + langid = {fr-FR} +} +@book{Granger1967, + author = {Granger, Gilles-Gaston}, + publisher = {Aubier-Montaigne}, + title = {Pensée formelle et sciences de l’homme}, + edition = {Nouvelle éd. augmentée d’une préface}, + date = {1967}, + address = {Paris}, + langid = {fr-FR} +} +@book{Korzybski1933, + author = {Korzybski, Alfred}, + publisher = {International Non-Aristotelian Library Publishing + Company}, + title = {Science and Sanity: {An} Introduction to Non-Aristotelian + Systems and General Semantics}, + date = {1933}, + address = {Lancaster, PA}, + url = {https://n2t.net/ark:/13960/t6c261n93}, + langid = {en-US} +} +@article{Michel2011, + author = {Michel, Jean-Baptiste and Shen, Yuan K. and Aiden, Aviva P. + and Veres, Adrian and Gray, Matthew K. and The Google Books Team and + Pickett, Joseph P. and Hoiberg, Dale and Clancy, Dan and Norvig, + Peter and Orwant, Jon and Pinker, Steven and Nowak, Martin A. and + Aiden, Erez L.}, + publisher = {American Association for the Advancement of Science}, + title = {Quantitative Analysis of Culture Using Millions of Digitized + Books}, + journal = {Science}, + volume = {331}, + number = {6014}, + pages = {176-182}, + date = {2011-01-14}, + doi = {10.1126/science.1199644}, + issn = {1095-9203}, + langid = {en-US}, + abstract = {We constructed a corpus of digitized texts containing + about 4\% of all books ever printed. Analysis of this corpus enables + us to investigate cultural trends quantitatively. We survey the vast + terrain of ’culturomics,’ focusing on linguistic and cultural + phenomena that were reflected in the English language between 1800 + and 2000. We show how this approach can provide insights about + fields as diverse as lexicography, the evolution of grammar, + collective memory, the adoption of technology, the pursuit of fame, + censorship, and historical epidemiology. Culturomics extends the + boundaries of rigorous quantitative inquiry to a wide array of new + phenomena spanning the social sciences and the humanities.} +} +@article{Monod1889, + author = {Monod, Gabriel}, + title = {M. Fustel de Coulanges}, + journal = {Revue historique}, + volume = {42}, + number = {2}, + pages = {277-285}, + date = {1889}, + url = {https://www.jstor.org/stable/40938008}, + langid = {fr-FR} +} +@book{Mothon2010, + author = {Mothon, Bernard}, + publisher = {Archétype82}, + title = {{Modélisation et vérité}}, + date = {2010}, + address = {Paris}, + isbn = {2915973318}, + langid = {fr-FR}, + abstract = {Un des grands paradoxes de la modernité est que, pour la + plupart de nos contemporains, la philosophie de la connaissance soit + passée à côté d’un fait épistémologique fondamental, à savoir le + rôle du processus de modélisation mathématique dans l’intelligence + scientifique des mathématiques appliquées. Tout le propos de ce + livre est de montrer qu’une compréhension lucide et conséquente de + la modélisation mathématique débouche nécessairement sur une + nouvelle attitude face à toute connaissance fondée sur un discours.} +} +@article{Piotrowski2018c, + author = {Piotrowski, Michael}, + title = {Historical Models and Serial Sources}, + journal = {Journal of European Periodical Studies}, + volume = {4}, + number = {1}, + pages = {8-18}, + date = {2019}, + doi = {10.21825/jeps.v4i1.10226}, + langid = {en-US} +} +@article{Rheinberger2005, + author = {Rheinberger, Hans-Jörg}, + title = {{{Gaston}} {{Bachelard}} and the Notion of + “Phenomenotechnique”}, + journal = {Perspectives on Science}, + volume = {13}, + number = {3}, + pages = {313-328}, + date = {2005}, + doi = {10.1162/106361405774288026}, + issn = {1530-9274}, + langid = {en-US} +} +@book{Stachowiak1973, + author = {Stachowiak, Herbert}, + publisher = {Springer}, + title = {Allgemeine Modelltheorie}, + date = {1973}, + address = {Wien, New York}, + isbn = {3-211-81106-0}, + langid = {de-DE} +} +@inproceedings{Mayaffre2006, + author = {Mayaffre, Damon}, + editor = {Rastier, François and Ballabriga, Michel}, + publisher = {CALS-CPST}, + title = {Philologie et/ou herméneutique numérique: nouveaux concepts + pour de nouvelles pratiques?}, + booktitle = {Corpus en lettres et sciences sociales: des documents + numériques à l’interprétation. Actes du XXVII\textsuperscript{e} + Colloque d’Albi “Langages et signification”}, + pages = {15-25}, + date = {2006}, + eventdate = {2006-07-10/2006-07-14}, + url = {https://hal.science/hal-00551477}, + langid = {fr-FR} +} diff --git a/submissions/459/_quarto.yml b/submissions/459/_quarto.yml new file mode 100644 index 0000000..ae6ece2 --- /dev/null +++ b/submissions/459/_quarto.yml @@ -0,0 +1,8 @@ +project: + type: manuscript + +manuscript: + article: index.qmd + +format: + html: default diff --git a/submissions/459/index.qmd b/submissions/459/index.qmd new file mode 100644 index 0000000..4934638 --- /dev/null +++ b/submissions/459/index.qmd @@ -0,0 +1,66 @@ +--- +submission_id: 459 +categories: 'Session 2A' +title: Data Literacy and the Role of Libraries +author: + - name: Catrina Langenegger + orcid: 0000-0001-8875-2730 + email: c.langenegger@unibas.ch + affiliations: + - University of Basel + - name: Johanna Schüpbach + orcid: 0000-0002-0905-2056 + email: johanna.schuepbach@unibas.ch + affiliations: + - University of Basel + +keywords: + - Data Literacy + - Academic Libraries + - Digital Humanities + - Experience Report + +abstract: | + Libraries are finding their place in the field of data literacy and the opportunities as well as challenges of supporting students and researchers in the field of Digital Humanities. Key aspects of this development are research data management, repositories, libraries as suppliers of data sets, digitisation and more. Over the past few years, the library has undertaken steps to actively bring itself into teaching and facilitate the basics of working with digital sources. The talk shares three experience reports of such endeavours undertaken by subject librarians of the Digital Humanities Work Group (AG DH) at the University Library Basel (UB). + +date: 07-26-2024 + +--- + +## Introduction + +More and more, libraries are becoming important institutions when it comes to teaching data literacy and the basics of Digital Humanities (DH) tools and methods, especially to undergraduates or other people new to the subject matter. The Digital Humanities Work Group (AG DH), consisting of a selection of subject librarians from the University Library Basel (UB), have developed various formats to introduce students to these topics and continue to build and expand upon the available teaching elements in order to assemble customised lesson or workshop packages as needed. The aim of this talk is to share our experiences with the planning and teaching of three different course formats. These classes and workshops play, on one hand, an important part of making the library's (historical) holdings and datasets visible and available for digital research and, on the other hand, they are means to engage with students and (early stage) researchers and imparting skills in the area of working with data at an easily accessible level. +As of today, there have been three distinct formats in which the AG DH has introduced students to data literacy and working with digitised historical sources: a full semester course (research seminar) that the AG DH has come up with in collaboration with a professor for Jewish and General History; a 90-minute session on data literacy and working with subject specific datasets within the larger frame of an existing semester course on information, data, and media literacy; and, last but not least, another 90-minute session within a research seminar in literary studies to provide a brief introduction to DH and how it can be incorporated in further research on the seminar topic. + +## Research Seminar/Semester Course + +To this end, the AG DH organised a semester course in close collaboration with Prof. Dr. phil. Erik Petry with whom they have created and then co-taught a curriculum introducing various DH tools and methods to be tried out using the UB's holdings on the topic of the first Zionist Congresses in Basel. The course was attended by MA students from the subjects of History, Jewish Studies and Digital Humanities. This research seminar was designed to provide an introduction to digital methods. +We have divided our course into different phases. The first introduction to work organisation, data management and data literacy was followed by sessions that combined the basics of the topic and introductions to digital methods. We focussed on different forms of sources: images, maps and text, with one session being dedicated to each type. This meant we could offer introductions to a broad spectrum of DH tools and methods such as digital storytelling and IIIF, geomapping and working with GIS, and transcription, text analysis and topic modelling. As a transition to the third phase of the project, we organised a session in which we presented various sources either from the University Library or from other institutions – the Basel-Stadt State Archives and the Jewish Museum Switzerland. The overall aim of the course was to enable students to apply their knowledge directly. To this end, they developed small projects in which they researched source material using digital methods and were able to visualise the results of their work. In the third phase of the course, students were given time to work on their own projects. In a block event at the end of the semester, the groups presented their projects and the status of their work. We were able to see for ourselves the students’ exciting approaches and good realisations. +The course was also a good experience for us subject librarians. Above all, we benefited from the broad knowledge in our team as well as the opportunity to gain new insights and experiences in select areas of DH. We particularly appreciated the good collaboration with Prof. Dr. Petry, who treated us as equal partners and experts. Despite the positive experience, this format is not sustainable: The effort involved in creating an entire semester course exceeds the resources available to regularly offer similar semester courses. Nevertheless, for this pilot project of the AG DH, the effort was justified because the course allowed us to make our holdings visible and they were researched. + +## Data Literacy – a Session Within an Existing IDM Semester Course + +For the second format, the AG DH was approached by the organisers of the regular IDM (“Informations-, Daten- & Medienkompetenz”) semester courses at the University Library Basel. These semester courses are offered for select subject areas to teach students basic information, data and media literacy skills tailored to their subject. The AG DH was asked to come up with two 90-minute sessions to introduce the students to the basics of data literacy. After talking through the requirements with the course lead, the AG DH decided to collaborate with the colleagues from the Open Science Team who would cover the first session dedicated to Research Data Management and a more general introduction to the subject matter. Building on that, the AG DH covers the second session, tailoring it to the requirements of the subject area in question (e.g. art history, sociology, cultural anthropology, economics etc.). Rather than by the whole group, these sessions are mainly prepared and taught by a member of the AG DH whose own subject specialty is closest to (or even the same as) the course’s audience. This means that not all AG DH members are involved in it all the time, therefore being more time and work efficient. Slides are, of course, liberally copied, pasted and reused. This ensures that not everyone has to do all the work while at the same time also guarantees that everyone in the group has access to all the information (which can then be adapted to the subject area). Of course these slides are always edited and brought up to date as to reflect the changes in the field. + +The goals for the session on subject specific data literacy are intended for the students to…: + +* …know the relevant sources where to get (research) data and/or corpora for their projects +* …understand the specifics of working with data as pertains to the subject in question +* …assemble subject specific (reused or collected) data sets and how to work with them (i.e. analyse and visualise). +* …introduce them to the people and contacts at the University Library who can help them with their further studies/research. + +A big challenge for these sessions is, of course, the sheer extent of working with data. It is impossible to teach every method/tool the students might need for their projects. Particularly in subjects like social anthropology, where almost everything and anything can be seen and collected as data, this session works mainly as a very broad overview of what is possible. The students are given an entry point, links, examples and an understanding of the different kinds of data they might encounter – e.g. texts and linguistic data, statistical data, geodata, image and audi(visual) data – but are required to then work their own way into what they’ll need for their own projects. +Because this 90-minute session is only just enough to give a brief introduction and overview of what data is and how you could work with subject specific data, it is important to provide the students with enough links and contact addresses where they can find further assistance, like the subject librarian or the AG DH. However, because the target audience are always students of one specific subject area, it is also easier to tailor the session to that particular subject. (All subject areas may request a semester course from the IDM-team/the organisers.) +This format has been a very positive experience in terms of collaboration – not only with the department of the subject but also with the colleagues organising the IDM semester course and the Open Science team. + +## Introduction to DH for a Research Seminar in English Literary Studies + +Lastly we are also able to prepare bespoke inputs within the framework of a regular class. In this example, the idea for collaboration came about through an informal talk with Prof. Dr. Ina Habermann and her assistant MA Stefanie Heeg from the University of Basel’s English Department, while they were planning a research seminar on early modern travel writing. Since the UB has some of the texts discussed in their collections, I suggested teaching a session at the library where the students may look at the original print books and then talk about and discuss introductory aspects of DH when juxtaposing them with the digitised texts of the same. By using these examples the aim of this 90-minute session was to give the students an introduction to DH, metadata, authority files (in particular the GND) and – drawing on material used for the IDM session on data literacy – showing them possibilities of what they can do with and how to work with these digitised texts. Even though this was within the frame of a class in literary studies, the subject matter is closely related to historical research. +While this session was also very dense, content wise, by hosting it at the UB and having the books from the historical holdings ready to be examined in the classroom, it added a nice touch of interactivity to the class. At the same time, preparing and teaching this session fulfils two intentions of the AG DH: first, to strengthen ties with the departments and let the researchers and teaching staff know, that the UB has the competence and people to help with and support with basic DH needs; second, to highlight and showcase our (digitised) collections and holdings, and to familiarise students and researchers with the possibilities of working with them. In addition to that, the UB could present itself as a location combining both the historical dimension with the original texts, as well as a centre for competence in digital methods. + +## Conclusion + +These three different formats highlight some of the chances but also challenges the AG DH faces with regards to their work on with and for students and researchers, and the experiences and feedback from these different formats throw an important light on the role of the UB in the task of teaching skills in this field. +Generally it can be said that it needs an active involvement from and by the AG DH to get into the teaching spaces. Either through directly talking with professors/teaching staff and offering to collaborate with them in contributing to their planned classes or by getting involved in existing course formats like the IDM semester courses. +It can thus be shown that libraries play a key role in imparting knowledge and skills as well as guardians of cultural property in their function as reliable and long-lasting institutions. We also want to highlight aspects that can still be improved. Above all, this concerns the awareness and attractiveness of such services as well as cooperation with researchers and teachers from all subject areas that work digitally, and history in particular. +The questions that drive the AG DH are many and varied: What are the needs of researchers and students? What do you need from your university library? Where do you see the possibility for the library to support and raise awareness with working with historical documents? diff --git a/submissions/465/_quarto.yml b/submissions/465/_quarto.yml new file mode 100644 index 0000000..ae6ece2 --- /dev/null +++ b/submissions/465/_quarto.yml @@ -0,0 +1,8 @@ +project: + type: manuscript + +manuscript: + article: index.qmd + +format: + html: default diff --git a/submissions/465/index.qmd b/submissions/465/index.qmd new file mode 100644 index 0000000..5cfa09d --- /dev/null +++ b/submissions/465/index.qmd @@ -0,0 +1,71 @@ +--- +submission_id: 465 +categories: 'Session 4B' +title: On the Historiographic Authority of Machine Learning Systems +author: + - name: Dominic Weber + orcid: 0000-0002-9265-3388 + email: dominic.weber@unibe.ch + affiliations: + - University of Bern + - University of Basel +keywords: + - Machine Learning + - Methodology + - Epistemology + - Facticity + - Evaluation +abstract: | + The integration of Machine Learning in historical research has significantly altered the approach to sources, data and workflows. Historians now use Machine Learning applications such as Handwritten Text Recognition (HTR) and Natural Language Processing (NLP) to manage large corpora, enhancing research capabilities but also introducing challenges in combining machine-generated and manually created data without propagating errors. The reliability of machine-generated data is a central concern, paralleling issues found in traditional transcription and edition practices. The concept of factoids highlights the fragmentation and recontextualization of data in digital history. Evaluating Machine Learning systems, particularly through tools like CERberus for HTR, emphasises the need for qualitative error analysis to support historical research. The article proposes three strategic directions for digital history: defining clear needs to manage data pragmatically, enhancing transparency to improve data reuse and interoperability, and advancing data criticism and hermeneutics. These directions aim to refine the methods and practices of digital historians, ensuring that Machine Learning outputs are critically assessed and effectively integrated into historical scholarship. +key-points: + - Integrating Machine Learning output in historical research requires meticulous evaluation. + - Factoids can provide a technique for the multifaceted representation of data points. + - Digital History requires new hermeneutical tools suitable for digital data and workflows. +date: 07-23-2024 +bibliography: references.bib +--- + +## Introduction +Over the last few years, Machine Learning applications became more and more popular in the humanities and social sciences in general, and therefore also in history. Handwritten Text Recognition (HTR) and various tasks of Natural Language Processing (NLP) are now commonly employed in a plethora of research projects of various sizes. Even for PhD projects it is now feasible to research large corpora like serial legal source, which would not be possible entirely by hand. This acceleration of research processes implies fundamental changes to how we think about sources, data, research and workflows. + +In history, Machine Learning systems are typically used to speed up the production of research data. As the output of these applications is never entirely accurate or correct, this raises the question how historians can use machine generated data together with manually created data without propagating errors and uncertainties to downstream tasks and investigations. + +## Facticity +The question of the combined usability of machine-generated and manually generated data is also a question of the reliability or facticity of data. Data generated by humans are not necessarily complete and correct either, as they are a product of human perception. For example, creating transcriptions depends on the respective transcription guidelines and individual text understanding, which can lead to errors. However, we consider transcriptions by experts as correct and use them for historical research. This issue is even more evident in the field of editions. Even very old editions with methodological challenges are valued for their core content. Errors may exist, but they are largely accepted due to the expertise of the editors, treating the output as authorised. This pragmatic approach enables efficient historical research. Historians trust their ability to detect and correct errors during research. + +Francesco Beretta represents data, information, and knowledge as a pyramid: data form the base, historical information (created from data through conceptual models and critical methods) forms the middle, and historical knowledge (produced from historical information through theories, statistical models and heuristics) forms the top [@berettaDonneesOuvertesLiees2023, fig. 3]. Interestingly, however, he makes an important distinction regarding digital data: "Digital data does not belong to the epistemic layer of data, but to the layer of information, of which they are the information technical carrier" [Translation: DW. Original Text: "[L]les données numériques n’appartiennent pas à la strate épistémique des données, mais bien à celle de l’information dont elles constituent le support informatique.", @berettaDonneesOuvertesLiees2023, p. 18] + +Andreas Fickers adds that digitization transforms the nature of sources, affecting the concept of the original [@fickersUpdateFuerHermeneutik2020, p. 162]. Sources are preprocessed using HTR/OCR and various NLP strategies. The resulting digital data are already processed historical information. This shift from analog to digital means that what we extract from sources is not just given but constructed [@berettaDonneesOuvertesLiees2023, p. 26]. Analog historical research, which relies on handwritten archival documents, also depends on transcriptions or editions to conduct research pragmatically; and here, too, data becomes information. The main difference is that with the generation of digital data, the (often linear) structure of sources is typically dissolved in favour of a highly fragmented and hyperconnected structure [For hyperconnectivity see @fickersWhatDoesHistory2022, pp. 51-54; For the underlying concept of hypertextual systems see @landowHypertextCriticalTheory2006, pp. 53-58; for a a more extensive discussion of digital representations of fragmented texts see @weberKlassifizierenVerknupfenAbbilden2021]. This is partly due to the way sources are processed into historical information using digital tools and methods, but it is inherently connected with issues of storing, retrieving, and presenting digital data -- in a very technical sense. + +The concept of *factoids* introduced by Michele Pasin and John Bradley, is central to this argument. They define factoids as pieces of information about one or more persons in a primary source. Those factoids are then represented in a semantic network of subject-predicate-object triples [@pasinFactoidbasedProsopographyComputer2015, pp. 89-90]. This involves extracting statements from their original context, placing them in a new context, and outsourcing verification to later steps. Therefore, factoids can be contradictory. Francesco Beretta applies this idea to historical science, viewing the aggregation of factoids as a process aiming for the best possible approximation of facticity [@berettaDonneesOuvertesLiees2023, p. 20]. The challenge is to verify machine output sufficiently for historical research and to assess the usefulness of the factoid concept. Evaluating machine learning models and their outputs is crucial for this. + +## Qualifying Error Rates +Evaluating the output of a machine learning system is not trivial. Models can be evaluated using various calculated scores, which is done continuously during the training process. However, these performance metrics are statistical measures that generally refer to the model and are based on a set of test data. Even the probabilities output by machine learning systems when applied to new data are purely computational figures, only partially suitable for quality assurance. This verification is further complicated by the potentially vast scale of the output. Therefore, historical science must find a pragmatic way to translate statistical evaluation metrics into qualitative statements and identify systematic sources of error. + +In automatic handwriting recognition, models are typically evaluated using character error rate (CER). These metrics only tell us the percentage of characters or words incorrectly recognised compared to a ground truth. They do not reveal the distribution of these errors, which is important when comparing automatic and manual transcriptions. For detailed HTR model evaluation, CERberus is being developed [@haverals2023cerberus]. This tool compares ground truth with HTR output from the same source. Instead of calculating just the character error rate, it breaks down the differences further. Errors are categorised into missing, excess, and incorrectly recognised characters. Additionally, a separate CER is calculated for all characters and Unicode blocks in the text, aggregated into confusion statistics that identify the most frequently confused characters. Confusion plots are generated to show the most common errors for each character. These metrics do not pinpoint specific errors but provide a more precise analysis of the model's behaviour. CERberus cannot evaluate entirely new HTR output without comparison text but is a valuable tool for Digital History, revealing which character forms are often confused and guiding model improvement or post-processing strategies. + +In other machine learning applications, such as named entity recognition (NER), different metrics are important, requiring detailed error source analysis. Evaluating NER is more complex than HTR because it involves categorizing longer text sections based on context. Precision (how many recognised positives are true positives) and recall (how many actual positives are recognised) are combined into the F1-score to indicate model performance. Fu et al. proposed evaluating NER with a set of eight annotation attributes influencing model performance. These attributes are divided into local properties (entity length, sentence length, unknown word density, entity density) and aggregated attributes (annotation consistency and frequency at the token and entity levels) [@fuInterpretableMultidatasetEvaluation2020, p. 3]. Buckets of source points where a model performs particularly well or poorly are created and separately evaluated [@fuInterpretableMultidatasetEvaluation2020, p. 1]. This analysis identifies conditions affecting model performance, guiding further training steps and dataset expansion. + +The qualitative error analysis presented here does not solve the question of authorizing machine learning output for historical research. Instead, it provides tools to assess models more precisely and analyse training and test datasets. Such investigations extend the crucial source criticism in historical science to digital datasets and the algorithms and models involved in their creation. This requires historians to expand their traditional methods to include new, less familiar areas. + +## Three Strategic Directions +In the following last part of this article, the previously raised questions and problem areas will be consolidated, from which three strategic directions for digital history will be derived. These will be suggestions for how the theory, methodology, and practice of Digital History could evolve to address and mitigate the identified problem areas. The three perspectives should not be viewed in isolation or as mutually exclusive. Instead, they are interdependent and should work together to meet the additional challenges. + +### Direction 1: Formulating Clear Needs +When data is collected or processed into information in the historical research process a certain pragmatism is involved. Ideally, such a project would fully and consistently transcribe the entire collection with the same thoroughness, but in practice, a compromise is often found between completeness, correctness, and pragmatism. Often, for one's own research purposes, it is sufficient to transcribe a source only to the extent that its meaning can be understood. This compromise has not fully transitioned into Digital History. Even if a good CER is achieved, there is pressure to justify how these potential errors are managed in the subsequent research process. This skepticism is not fundamentally bad, and the epistemological consequences of erroneous machine learning output are worthy of discussion. Nonetheless, the resulting text is usually quite readable and usable. + +Thus, I argue that digital history must more clearly define and communicate its needs. However, it must be remembered that Digital History also faces broader demands. Especially in machine learning-supported research, the demand for data interoperability is rightly emphasised. Incomplete or erroneous datasets are, of course, less reusable by other research projects. + +### Direction 2: Creating Transparency +The second direction for digital history is to move towards greater transparency. The issue of reusability and interoperability of datasets from the first strategic direction can be at least partially mitigated by transparency. + +As Hodel et al. convincingly argued, it is extremely sensible and desirable for projects using HTR to publish their training data. This allows for gradual development towards models that can generalise as broadly as possible [@hodelGeneralModelsHandwritten2021, pp. 7-8]. If a CERberus error analysis is conducted for HTR that goes beyond the mere CER, it makes sense to publish this alongside the data and the model. With this information, it is easier to assess whether it might be worthwhile to include this dataset in one's own training material. Similarly, when NER models are published, an extended evaluation according to Fu et al. helps to better assess the performance of a model for one's own dataset. + +Pasin and Bradley, in their prosopographic graph database, indicate the provenance of each data point and who captured it [@pasinFactoidbasedProsopographyComputer2015, 91-92]. This principle could also be interesting for Digital History, by indicating in the metadata whether published research data was generated manually or by a machine, ideally with information about the model used and the annotating person for manually generated data. Models provide a confidence estimate with their prediction, indicating how likely the prediction is correct. The most probable prediction would be treated as the first factoid. The second or even third most probable prediction from the systems cloud provide additional factoids that can be incorporate into the source representation. These additional pieces of information can support the further research process by allowing inconsistencies and errors to be better assessed and balanced. + +### Direction 3: Data Criticism and Data Hermeneutics +The shift to digital history requires an evaluation and adjustment of our hermeneutic methods. This ongoing discourse is not new, and Torsten Hiltmann has identified three broad directions: first, the debate about extending source criticism to data, algorithms, and interfaces; second, the call for computer-assisted methods to support text understanding; and third, the theorization of data hermeneutics, or the "understanding of and with data" [@hiltmann2024, p. 208]. + +Even though these discourse strands cannot be sharply separated, the focus here is primarily on data criticism and hermeneutics. The former can fundamentally orient itself towards classical source criticism. Since digital data is not given but constructed, it is crucial to discuss by whom, for what purpose, and how data was generated. This is no easy task, especially when datasets are poorly documented. Therefore, the call for data and model criticism is closely linked to the plea for more transparency in data and model publication. + +In the move towards data hermeneutics, a thorough rethinking of the factoid principle can be fruitful. If, as suggested above, the second or even third most likely predictions of a model are included as factoids in the publication of research data, this opens up additional perspectives on the sources underlying the data. From these new standpoints, the data -- and thus the sources -- can be analyzed and understood more thoroughly. Additionally, this allows for a more informed critique of the data, and extensive transparency also mitigates the "black box" problem of interpretation described by Silke Schwandt [@schwandtOpeningBlackBox2022]. If we more precisely describe and reflect on how we generate digital data from sources as historians, we will find that our methods are algorithmic [@schwandtOpeningBlackBox2022, pp. 81-82]. This insight can also support the understanding of how machine learning applications work. Data hermeneutics thus requires both a critical reflection of our methods and a more transparent approach to data and metadata. \ No newline at end of file diff --git a/submissions/465/references.bib b/submissions/465/references.bib new file mode 100644 index 0000000..3ebb286 --- /dev/null +++ b/submissions/465/references.bib @@ -0,0 +1,166 @@ +@article{berettaDonneesOuvertesLiees2023, + title = {{Donn{\'e}es ouvertes li{\'e}es et recherche historique : un changement de paradigme}}, + shorttitle = {{Donn{\'e}es ouvertes li{\'e}es et recherche historique}}, + author = {Beretta, Francesco}, + year = {2023}, + month = jul, + journal = {Humanit{\'e}s num{\'e}riques}, + number = {7}, + publisher = {Humanistica}, + issn = {2736-2337}, + doi = {10.4000/revuehn.3349}, + urldate = {2023-08-18}, + abstract = {Dans le contexte de la transition num{\'e}rique, le Web s{\'e}mantique et les donn{\'e}es ouvertes li{\'e}es (linked open data [LOD], en anglais) jouent un r{\^o}le de plus en plus central, car ils permettent de construire des <<~graphes d'information~>> (knowledge graphs, en anglais) reliant l'ensemble des ressources du Web. Ce ph{\'e}nom{\`e}ne interroge les sciences historiques et soul{\`e}ve la question d'un changement de paradigme. Apr{\`e}s avoir pr{\'e}cis{\'e} ce qu'il faut entendre par <<~donn{\'e}es~>>, l'article analyse la place qu'elles occupent dans le processus de production du savoir. Il pr{\'e}sente les principales composantes du changement de paradigme, en particulier le potentiel des LOD et d'une s{\'e}mantique robuste en tant que v{\'e}hicules d'une information factuelle de qualit{\'e}, intelligible et r{\'e}utilisable. S'ensuit une pr{\'e}sentation des projets d'infrastructure r{\'e}alis{\'e}s au sein du Laboratoire de recherche historique Rh{\^o}ne-Alpes (Larhra)~: symogih.org, ontome.net, geovistory.org. Leur but est de faciliter la transition num{\'e}rique gr{\^a}ce {\`a} un outillage construit en coh{\'e}rence avec l'{\'e}pist{\'e}mologie des sciences historiques et de contribuer {\`a} la r{\'e}alisation d'un <<~graphe d'information~>> disciplinaire.}, + copyright = {https://creativecommons.org/licenses/by/4.0/}, + langid = {french}, + keywords = {epistemologie,histoire,interoperabilite,modelisation des connaissances,Web semantique}, + file = {C:\Users\Dominic\Zotero\storage\9LSN6CAV\Beretta - 2023 - Données ouvertes liées et recherche historique u.pdf} +} + +@article{fickersUpdateFuerHermeneutik2020, + title = {{Update f{\"u}r die Hermeneutik. Geschichtswissenschaft auf dem Weg zur digitalen Forensik?}}, + author = {Fickers, Andreas}, + year = {2020}, + journal = {Zeithistorische Forschungen}, + volume = {1}, + pages = {157--168}, + publisher = {ZZF -- Centre for Contemporary History: Zeithistorische Forschungen}, + doi = {10.14765/ZZF.DOK-1765}, + urldate = {2020-07-10}, + abstract = {>>[{\dots}] wenn {\guilsinglright}die Quelle{\guilsinglleft} die Reliquie historischen Arbeitens ist -- nicht nur {\"U}berbleibsel, sondern auch Objekt wissenschaftlicher Verehrung --, dann w{\"a}re analog {\guilsinglright}das Archiv{\guilsinglleft} die Kirche der Geschichtswissenschaft, in der die heiligen Handlungen des Suchens, Findens, Entdeckens und Erforschens vollzogen werden.<< Achim Landwehr wirft in seinem geschichtstheoretischen Essay den Historikern ihren >>Quellenglauben<< vor -- diese Kritik lie{\ss}e sich im digitalen Zeitalter leicht auf die Heilsversprechen der Apostel der >>Big Data Revolution<< {\"u}bertragen. Zwar regen sich mittlerweile vermehrt Stimmen, die den >>Wahnwitz<< der digitalen Utopie in Frage stellen, doch wird der {\"o}ffentliche Diskurs weiterhin von jener Revolutionsrhetorik dominiert, die standardm{\"a}{\ss}ig als Begleitmusik neuer Technologien ert{\"o}nt. Statt in der intellektuell wenig fruchtbaren Dichotomie von Gegnern und Bef{\"u}rwortern, >>First Movers<< und Ignoranten zu verharren, welche die Landschaft der >>Digital Humanities<< ein wenig {\"u}berspitzt auch heute noch kennzeichnet, ist das Ziel dieses Beitrages eine praxeologische Reflexion, die den Einfluss von digitalen Infrastrukturen, digitalen Werkzeugen und digitalen >>Quellen<< auf die Praxis historischen Arbeitens zeigen m{\"o}chte. Ausgehend von der These, dass ebenjene digitalen Infrastrukturen, Werkzeuge und >>Quellen<< heute einen zentralen Einfluss darauf haben, wie wir Geschichte denken, erforschen und erz{\"a}hlen, pl{\"a}diert der Beitrag f{\"u}r ein >>Update<< der klassischen Hermeneutik in der Geschichtswissenschaft. Die kritische Reflexion {\"u}ber die konstitutive Rolle des Digitalen in der Konstruktion und Vermittlung historischen Wissens ist nicht nur eine Frage epistemologischer Dringlichkeit, sondern zentraler Bestandteil der Selbstverst{\"a}ndigung eines Faches, dessen Anspruch als Wissenschaft sich auf die Methoden der Quellenkritik gr{\"u}ndet.}, + copyright = {ZZF - Clio Lizenz}, + langid = {ngerman}, + file = {C:\Users\Dominic\Zotero\storage\ST3KN5IK\Fickers - 2020 - Update für die Hermeneutik. Geschichtswissenschaft.pdf} +} + +@incollection{fickersWhatDoesHistory2022, + title = {{What the D does to history: Das digitale Zeitalter als neues historisches Zeitregime?}}, + shorttitle = {{What the D does to history}}, + booktitle = {{Digital History: Konzepte, Methoden und Kritiken Digitaler Geschichtswissenschaft}}, + author = {Fickers, Andreas}, + editor = {D{\"o}ring, Karoline Dominika and Haas, Stefan and K{\"o}nig, Mareike and Wettlaufer, J{\"o}rg}, + year = {2022}, + month = sep, + pages = {45--64}, + publisher = {De Gruyter Oldenbourg}, + doi = {10.1515/9783110757101-003}, + urldate = {2024-06-29}, + abstract = {What the D does to history was published in Digital History on page 45.}, + copyright = {De Gruyter expressly reserves the right to use all content for commercial text and data mining within the meaning of Section 44b of the German Copyright Act.}, + isbn = {978-3-11-075710-1}, + langid = {ngerman}, + file = {C:\Users\Dominic\Zotero\storage\MXHFWKAP\Fickers - 2022 - What the D does to history Das digitale Zeitalter.pdf} +} + +@misc{fuInterpretableMultidatasetEvaluation2020, + title = {Interpretable {{Multi-dataset Evaluation}} for {{Named Entity Recognition}}}, + author = {Fu, Jinlan and Liu, Pengfei and Neubig, Graham}, + year = {2020}, + month = dec, + number = {arXiv:2011.06854}, + eprint = {2011.06854}, + primaryclass = {cs}, + publisher = {arXiv}, + doi = {10.48550/arXiv.2011.06854}, + urldate = {2024-06-04}, + abstract = {With the proliferation of models for natural language processing tasks, it is even harder to understand the differences between models and their relative merits. Simply looking at differences between holistic metrics such as accuracy, BLEU, or F1 does not tell us why or how particular methods perform differently and how diverse datasets influence the model design choices. In this paper, we present a general methodology for interpretable evaluation for the named entity recognition (NER) task. The proposed evaluation method enables us to interpret the differences in models and datasets, as well as the interplay between them, identifying the strengths and weaknesses of current systems. By making our analysis tool available, we make it easy for future researchers to run similar analyses and drive progress in this area: https://github.com/neulab/InterpretEval.}, + archiveprefix = {arXiv}, + keywords = {Computer Science - Computation and Language}, + file = {C\:\\Users\\Dominic\\Zotero\\storage\\F8KPHWFH\\Fu et al. - 2020 - Interpretable Multi-dataset Evaluation for Named E.pdf;C\:\\Users\\Dominic\\Zotero\\storage\\6GH29GF2\\2011.html} +} + +@misc{haverals2023cerberus, + title = {{{CERberus}}: Guardian against Character Errors}, + author = {Haverals, Wouter}, + year = {2023}, + version = {1.0}, + url = {https://github.com/WHaverals/CERberus} +} + +@incollection{hiltmann2024, + title = {{Hermeneutik in Zeiten der KI: Large Language Models als hermeneutische Instrumente in den Geschichtswissenschaften}}, + shorttitle = {{Hermeneutik in Zeiten der KI}}, + booktitle = {{KI:Text}}, + author = {Hiltmann, Torsten}, + year = {2024}, + month = jan, + pages = {201--232}, + publisher = {De Gruyter}, + doi = {10.1515/9783111351490-014}, + urldate = {2024-03-18}, + abstract = {Das Kapitel Hermeneutik in Zeiten der KI erschien in KI:Text auf Seite 201.}, + copyright = {De Gruyter expressly reserves the right to use all content for commercial text and data mining within the meaning of Section 44b of the German Copyright Act.}, + isbn = {978-3-11-135149-0}, + langid = {ngerman}, + keywords = {Hermeneutik,Large Language Models,Transformer}, + file = {C:\Users\Dominic\Zotero\storage\RYFG5LMY\Hiltmann_2024_Hermeneutik in Zeiten der KI.pdf} +} + +@article{hodelGeneralModelsHandwritten2021, + title = {General {{Models}} for {{Handwritten Text Recognition}}: {{Feasibility}} and {{State-of-the Art}}. {{German Kurrent}} as an {{Example}}}, + shorttitle = {General {{Models}} for {{Handwritten Text Recognition}}}, + author = {Hodel, Tobias and Schoch, David and Schneider, Christa and Purcell, Jake}, + year = {2021}, + journal = {Journal of Open Humanities Data}, + volume = {7}, + doi = {10.5334/johd.46}, + urldate = {2021-08-01}, + file = {C:\Users\Dominic\Zotero\storage\QWQP2TWK\Hodel et al. - 2021 - General Models for Handwritten Text Recognition F.pdf} +} + +@book{landowHypertextCriticalTheory2006, + title = {Hypertext 3.0: {{Critical Theory}} and {{New Media}} in an {{Era}} of {{Globalization}}}, + shorttitle = {Hypertext 3.0}, + author = {Landow, George P.}, + year = {2006}, + edition = {3. Auflage}, + address = {Baltimore} +} + +@article{pasinFactoidbasedProsopographyComputer2015, + title = {Factoid-Based Prosopography and Computer Ontologies: Towards an Integrated Approach}, + shorttitle = {Factoid-Based Prosopography and Computer Ontologies}, + author = {Pasin, Michele and Bradley, John}, + year = {2015}, + month = apr, + journal = {Digital Scholarship in the Humanities}, + volume = {30}, + number = {1}, + pages = {86--97}, + issn = {2055-7671}, + doi = {10.1093/llc/fqt037}, + urldate = {2023-11-13}, + abstract = {Structured Prosopography provides a formal model for representing prosopography: a branch of historical research that traditionally has focused on the identification of people that appear in historical sources. Since the 1990s, KCL's Department of Digital Humanities has been involved in the development of structured prosopographical databases using a general `factoid-oriented' model of structure that links people to the information about them via spots in primary sources that assert that information. Recent developments, particularly the World Wide Web, and its related technologies around the Semantic Web, have promoted the possibility to both interconnecting dispersed data, and allowing it to be queried semantically. To the purpose of making available our prosopographical databases on the Semantic Web, in this article we review the principles behind our established factoid-based model and reformulate it using a more interoperable approach, based on knowledge representation principles and formal ontologies. In particular, we are going to focus primarily on a high-level semantic analysis of the factoid notion, on its relation to other cultural heritage standards such as CIDOC-CRM, and on the modularity and extensibility of the proposed solutions.}, + file = {C\:\\Users\\Dominic\\Zotero\\storage\\9U2DEIKJ\\Pasin and Bradley - 2015 - Factoid-based prosopography and computer ontologie.pdf;C\:\\Users\\Dominic\\Zotero\\storage\\963NJHG4\\352888.html} +} + +@article{schwandtOpeningBlackBox2022, + title = {Opening the {{Black Box}} of {{Interpretation}}: {{Digital History Practices}} as {{Models}} of {{Knowledge}}}, + shorttitle = {Opening the {{Black Box}} of {{Interpretation}}}, + author = {Schwandt, Silke}, + year = {2022}, + journal = {History and Theory}, + volume = {61}, + number = {4}, + pages = {77--85}, + issn = {1468-2303}, + doi = {10.1111/hith.12281}, + urldate = {2024-06-26}, + abstract = {Digital history is more than just the implementation of algorithmic and other data practices in the practice of history writing. It places our discipline under a microscope and enables us to focus in on what history writing is in the first place: writing about the past under specific social and societal conditions. This article argues for a closer look at the traditions of history writing in order to understand its principles and to determine what the digital condition contributes to historiography. Does the work of historians actually change in principle, or does digital history instead reflect the digital condition under which we operate? The article begins with a reflection on the works of Wilhelm Dilthey and Michel de Certeau to discuss how the society in which the historian writes influences the practices of interpretation. The article then presents what can be understood as the digital condition of our present societies and shows how algorithms function as ``black boxes'' that influence our social interactions, communication, and understanding of the world. The article's third part brings together the earlier discussions of practices of history writing and the digital condition in order to examine the role of modeling for knowledge production in the sciences and the humanities. The closing argument then focuses on the use of visualizations in digital history as an example of the operational use of models of knowledge in opening the ``black box'' of interpretation.}, + copyright = {{\copyright} 2022 The Authors. History and Theory published by Wiley Periodicals LLC on behalf of Wesleyan University.}, + langid = {english}, + keywords = {algorithms,digital condition,digital history,historiography,history of science,interpretation,modeling,research practices}, + file = {C\:\\Users\\Dominic\\Zotero\\storage\\V9EAKFYB\\Schwandt - 2022 - Opening the Black Box of Interpretation Digital H.pdf;C\:\\Users\\Dominic\\Zotero\\storage\\ZXB5HPDI\\hith.html} +} + +@mastersthesis{weberKlassifizierenVerknupfenAbbilden2021, + title = {Klassifizieren -- {{Verkn{\"u}pfen}} -- {{Abbilden}}. {{Herausforderungen}} Der Digitalen {{Repr{\"a}sentation}} Hypertextueller {{Systeme}} Am {{Beispiel}} Des {{Klingentaler Jahrzeitenbuchs H}}}, + shorttitle = {Klassifizieren}, + author = {Weber, Dominic}, + year = {2021}, + address = {Basel}, + url = {https://github.com/DominicWeber/jahrzeitenbuch-h}, + urldate = {2023-02-25}, + copyright = {All rights reserved}, + school = {University of Basel} +} diff --git a/submissions/468/_quarto.yml b/submissions/468/_quarto.yml new file mode 100644 index 0000000..ae6ece2 --- /dev/null +++ b/submissions/468/_quarto.yml @@ -0,0 +1,8 @@ +project: + type: manuscript + +manuscript: + article: index.qmd + +format: + html: default diff --git a/submissions/468/images/Figure1.jpg b/submissions/468/images/Figure1.jpg new file mode 100644 index 0000000..47bcf4d Binary files /dev/null and b/submissions/468/images/Figure1.jpg differ diff --git a/submissions/468/images/Figure2.jpg b/submissions/468/images/Figure2.jpg new file mode 100644 index 0000000..815fff1 Binary files /dev/null and b/submissions/468/images/Figure2.jpg differ diff --git a/submissions/468/images/Figure3.png b/submissions/468/images/Figure3.png new file mode 100644 index 0000000..53f146e Binary files /dev/null and b/submissions/468/images/Figure3.png differ diff --git a/submissions/468/images/Figure4.jpg b/submissions/468/images/Figure4.jpg new file mode 100644 index 0000000..9106544 Binary files /dev/null and b/submissions/468/images/Figure4.jpg differ diff --git a/submissions/468/index.qmd b/submissions/468/index.qmd new file mode 100644 index 0000000..9b0432b --- /dev/null +++ b/submissions/468/index.qmd @@ -0,0 +1,205 @@ +--- +submission_id: 468 +categories: 'Session 4A' +title: Films as sources and as means of communication for knowledge gained from historical research +author: + - name: Peter Moser + orcid: 0000-0001-9132-6871 + email: peter.moser@agrararchiv.ch + affiliations: + - Archives of Rural History + - name: Andreas Wigger + email: andreas.wigger@agrararchiv.ch + affiliations: + - Archives of Rural History +keywords: + - Rural History + - Agricultural Films + - Audiovisual Media + - Film History +date: 08-16-2024 +--- + +## Introduction + +Digital tools like the online portal and the *Video Essays in Rural History* series of the Archives of +Rural History (ARH) and the European Rural History Film Association (ERHFA) have greatly +facilitated the use of films as sources and the publication of audiovisual media as means of +communication. This significantly enhances the source base of historical studies of the 20^th^ century +and therefore enables scholars to include new perspectives in their research. It furthermore enables +researchers to reach new audiences by communicating the results of their studies in audiovisual +formats. + +This presentation will first introduce the relevance of films in rural history and the role that the +agricultural sector played in film history. It will then present the research infrastructure of the +Archives of Rural History and the European Rural History Film Association. The presentation +then concludes with reflections on the use of films as sources and means of communication in historical studies. + +## Agriculture in Films – Films in Agriculture + +The agricultural sector was one of the pioneers when it came to producing moving pictures. Film +production outside the United States really started after World War I. The films made about rural +Europe were used by organisations for educational purposes as well as for advertising products +and for teaching the rural population new values and techniques. While in France the government +funded a rural cinema campaign in the interwar period, in Switzerland it were mainly the agricultural +organisations (often in cooperation with state institutions) which promoted the film as a medium +of communication. And women farmers used the new medium to present their work on the farms +from their own perspective. A crucial period in the development of the rural film production are +the 1960s, when significant changes took place both in the structures and in the actors involved. +Up to the 1960’s, agricultural films were almost exclusively so-called commercial or, more precisely, +commissioned films. These films were commissioned by state departments, agricultural +organisations or scientific institutions for specific purposes – but the films were often used for a +variety of purposes. The producers normally were film production companies producing feature +or cinema films as well. Indeed, most of them could not have survived from the risky feature-film +business alone if they had not had a halfway steady income from their commercial activities, that +is: producing commissioned films. Quite often these commissioned films – whether agricultural or +otherwise – were shown as supporting films (Vorfilme) immediately before a feature film was shown in the cinema. The practice of broadcasting a commissioned film with an industrial, tourist +or agricultural content as a supporting film for a feature film furthermore contributed to a better +acceptance of the latter category as a form of art in the feuilleton of “respectable” papers where +feature films for a long time in the 20^th^ century were judged as “low-culture”. + +Rural films up to the 1960’s can, broadly speaking, be divided into two categories: feature films +under the cultural heading and commissioned films produced for industrial, tourist and agricultural +clients. Exactly because agricultural films were regarded as part of the economic, not the cultural +world, they were not judged as sophisticated enough and culturally valuable enough to be preserved +for the future by the existing film archives. This attitude only changed significantly in the 1960/70s, +when the so-called author-director films began their remarkable career. Intellectuals influenced by +the student movement of the late 1960s began to look at agriculture, especially the peasantry in +remote or mountain areas, from new perspectives. They literally produced new pictures, pictures +their audience often did not associate with the rural world at all. The author-directors called +themselves “documentary” film makers, convinced to “show nothing but the reality”. + +A second element that was crucial for the development and broadening of the independent film +makers was the rise and breakthrough of television. TV provided a new outlet for the author- +director film. It became, in addition to the state, an important financial support for the filmmakers. +And it opened up for them a new, pre-dominantly urban audience that began to be interested in +the peasant-mountain world for a variety of reasons. + + +![Fig. 1: Milk transport with a handcart and a horse-drawn cart, shown in a remarkable split screen. Film still from +the last of the three Swiss milk films (1923–1929), entitled *Wir und die Milch* (1929).[^1]](images/Figure1.jpg) + +## The ARH/ERHFA research infrastructure + +The knowledge about the history of rural films in Europe is collected in the European film database +of the [Archives of Rural History (ARH)](https://www.histoirerurale.ch/) and the [European Rural +History Film Association (ERHFA)](https://www.ruralfilms.eu/). The ERHFA was founded in +2017. It is an association of film archives and research institutions interested in films from and +about rural areas. The aim of the organisation is to promote the documentation, study and +publication of (historical) films related to agricultural history and the history of rural areas. To +achieve this goal, the ARH and the ERHFA operate a film database and an associated online portal, +publish the Video Essays in Rural History series and organise workshops and panels at academic +conferences. + +The ARH/ERHFA film database currently contains metadata on around 4,300 films, including +commissioned, amateur, author’s and feature films as well as television programmes. The status of +the metadata collection differs from film to film. Of many films, a copy has been preserved, which, +if digitised, is embedded directly in the database. For a number of other films, reference is made to +institutions where the film can be viewed. Still other entries contain extensive metadata, without +information about the film’s location, because it is not yet known whether a copy has survived or +not. Finally, there are also fragmentary entries on films for which very little information is known +to date, as well as on films that were planned but never produced. The database is a working tool +that, like the online portal, is being continuously expanded as existing entries are complemented +and new entries are added. + +The database is structured according to works, i.e. versions or multiple copies of films are +summarised in the entry for the corresponding work. Technical information on the individual +copies can be obtained from the linked institutions that archive the films. However, the database +not only contains links to digital copies or locations of film reels, but also details of written archival +material or literature on the film. The database is thus a signpost pointing to institutions where +more information is available. + +Around a quarter of the films listed in the database can be viewed in the [online portal](https://ruralfilms.eu/filmdatabaseOnline). The 27 institutions which contribute to the film +database and the online portal come from Austria, Belgium, England, Finland, France, Germany, +Ireland, the Netherlands, Portugal and Switzerland. + +![Fig. 2: The entries in the online portal can be searched using the quick and advanced search functions by search term, period of production, length, commissioner and production company.](images/Figure2.jpg) + +The films are grouped according to the contributing institutions as well as thematic and +chronological collections. Each collection consists of a short introductory text and a selection of +the corresponding films. The chronological collections on the decades from the 1920s to the 1980s +provide an overview of the development of film technology in the relevant period. The thematic +collections illustrate the diversity of the films. + +![Fig. 3: Some of the chronological and thematic collections in the ARH/ERHFA online portal.](images/Figure3.png) + +## Films as Sources + +The accessibility of films via the ARH/ERHFA online portal facilitates the use of film sources in +historical studies. As sources, films can be interpreted in at least two ways: firstly, as images of a +bygone era that reveal much about the history of agriculture and, secondly, as media that intervened +in this history and shaped it. As images, films visualise aspects of agricultural history that are hardly +ever recorded in written and statistical sources. This may be because they were either not noticed +or concealed, or because they cannot be recorded in writing. What sets the films apart from still +images is that they also capture movements and sounds, which make additional contexts of +agricultural work tangible, such as the verbal and non-verbal communication between humans and +animals at work. Films thus bear witness, often unintentionally, to the fact that farming in practice +often was not as it was portrayed or demanded in textbooks and magazines. + +However, films are more than mere images; they intervene in the context of their creation and use, +create a reality of their own and exert an influence on the viewer.[^2] This was often used deliberately, for example if there was a need for media control when innovations of a technical, economic, political, social or medical nature had an impact on society or the environment. Changes of all kinds, including the controversies that accompanied them, were therefore an important reason to produce commissioned films. The films had the function of adapting their audiences to new +requirements, creating acceptance for the innovation and laying the foundation for further changes. +In this respect, commissioned films contributed to the creation of a willingness to cooperate and +to consensus-building in modernisation processes.[^3] In the agricultural context, this function of +films was used, for example, by the Eidgenössische Alkoholverwaltung EAV (Swiss Alcohol +Board)[^4] and the plant protection company Dr Rudolf Maag AG, which commissioned and +produced numerous films illustrating their activities and the use of their products.[^5] + +The dual function of audiovisual sources as images and as influencing media often cannot be +adequately captured by written texts alone. This is why we conceptualise moving images also for +analysing historical developments and communicating insights from historical research. + +## Films as Means of Communication + +Anyone attempting to transfer knowledge gained from audiovisual sources into the written formats +will come up against limitations because much of what characterises moving images is lost when +written down: the dynamics and (in the case of sound films) the interplay of image and sound in +particular. It is, furthermore, often impossible to translate the content of the image into words, for +example when it comes to the behaviour of (speechless) animals, human-animal interactions or +disappeared (agricultural) practices, for which there is no vocabulary in industrialised societies.[^6] + +To counter these difficulties, the format of the historical video essay lends itself as a supplement +to written texts. A video essay in our series is understood as a montage of historical film and image +material that is supplemented by an analytical commentary. The audiovisual sources are both source +material and visual carrier of the knowledge transfer and are contextualised and analysed by a +commentary. In addition to the communication function, video essays can also be used as an +analytical tool. + +![Fig. 4: The first video essay in the series Video Essays in Rural History focuses on the importance of working horses, cattle, dogs, mules and donkeys in agriculture and in the cities of the 19^th^ and 20^th^ centuries.[^7]](images/Figure4.jpg) + +The ARH and ERHFA have launched the *[Video Essays in Rural History series](https://www.ruralfilms.eu/all_video_essays.html)*, in which five video essays from Switzerland, +Belgium and Canada have been published to date. They address the importance of working animals, Swiss agronomists and farmers travelling to America in the early 20^th^ century, neighbourly +cooperation in rural Canada, the motorisation of Belgian agriculture and Mina Hofstetter, an +ecofeminist pioneer of organic agriculture. + +The video essay is to be understood as a supplement to, not a replacement for, written formats. +The video essays published in the *Video Essays in Rural History* series are therefore published +together with an accompanying text. The five to thirty-minute video essays fulfil academic criteria +and at the same time appeal to a wider audience. So far, they meet with great interest both within +and outside the academic community. They are presented at conferences, used in academic +teaching, linked to in media reports and achieve a relatively high number of hits on YouTube (the +video essay on working animals was clicked on 3,100 times in the first week after publication, for +example). + +[^1]: The film is available online in the ARH/ERHFA online portal: [ruralfilms.eu (16.08.2024)](https://ruralfilms.eu/filmdatabaseOnline/index.php?tablename=films&function=details&where_field=ID_films&where_value=203). + +[^2]: Bernhardt Markus, Visual History: Einführung in den Themenschwerpunkt, in: Zeitschrift für Geschichtsdidaktik, +12/1 (2013), p. 5–8, here: p. 5. + +[^3]: Zimmermann Yvonne, Dokumentarischer Film: Auftragsfilm und Gebrauchsfilm, in: Zimmermann Yvonne (Hg.), +Schaufenster Schweiz: Dokumentarische Gebrauchsfilme 1896-1964, Zürich 2011, p. 34–83, here: p. 64 & 69f. + +[^4]: Auderset Juri/Moser Peter, Rausch & Ordnung. Eine illustrierte Geschichte der Alkoholfrage, der schweizerischen +Alkoholpolitik und der Eidgenössischen Alkoholverwaltung (1887-2015), Bern 2016; Wigger Andreas, Saft statt +Schnaps. Das Filmschaffen der Eidgenössischen Alkoholverwaltung (EAV) von 1930 bis 1985, in: Geschichte im +Puls, Dossier 3: Ekstase (2022), [www.geschichteimpuls.ch (02.07.2024)](https://www.geschichteimpuls.ch/artikel/eav) + +[^5]: Playlist Eidgenössische Alkoholverwaltung (EAV), in: Archiv für Agrargeschichte, [YouTube Playlist (02.07.2024)](https://youtube.com/playlist?list=PLSdpgcFyXTnbny77UvXG2neenufUdK7gH); Playlist Dr. Rudolf +Maag AG, in: Archiv für Agrargeschichte, [YouTube Playlist (02.07.2024)](https://youtube.com/playlist?list=PLSdpgcFyXTnbQFfNleFhCKqqNhGcP3M4_). + +[^6]: Wigger Andreas, Bewegende Tiere auf bewegten Bildern. Filme als Quellen und Vermittlungsformat zur +Geschichte der arbeitenden Tiere in der Zeit der Massenmotorisierung (1950-1980), Videoessay zur Masterarbeit, +Fribourg 2023, [YouTube (25.06.2024)](https://youtu.be/_XVWdHNQxv8). + +[^7]: Moser Peter/Wigger Andreas, Working Animals. Hidden modernisers made visible, in: Video Essays in Rural +History, 1 (2022), [https://www.ruralfilms.eu/essays/videoessay_1_EN.html](https://www.ruralfilms.eu/essays/videoessay_1_EN.html) [16.08.2024]. \ No newline at end of file diff --git a/submissions/480/_quarto.yml b/submissions/480/_quarto.yml new file mode 100644 index 0000000..ae6ece2 --- /dev/null +++ b/submissions/480/_quarto.yml @@ -0,0 +1,8 @@ +project: + type: manuscript + +manuscript: + article: index.qmd + +format: + html: default diff --git a/submissions/480/index.qmd b/submissions/480/index.qmd new file mode 100644 index 0000000..0a5f075 --- /dev/null +++ b/submissions/480/index.qmd @@ -0,0 +1,65 @@ +--- +submission_id: 480 +categories: 'Session 3B' +title: Connecting floras and herbaria before 1850 – challenges and lessons learned in digital history of biodiversity +author: + - name: Christian Forney + orcid: 0000-0002-1318-4983 + email: christian.forney@unibe.ch + affiliations: + - University of Berne + - name: Martin Stuber + orcid: 0000-0001-9853-9570 + email: martin.stuber@unibe.ch + affiliations: + - University of Berne +keywords: + - Biodiversity + - Botanical networks + - Digital edition + - Digital herbaria + - Digital humanities + - FAIR data + - History of botany + - Knowledge history +abstract: | + Floras and herbaria are particularly valuable sources both for historical analyses of the collaborative knowledge culture of botany and for research into historical biodiversity. Therefore, the digital representation of this complementary sources should fulfil the requirements of the humanities and natural sciences as well. In this paper, we describe challenges, solutions and lessons learned in this regard as a summary of experiences from multiple projects on the data and edition platform _hallerNet_ around the Bernese polymath Albrecht von Haller (1708–1777). +date: 08-08-2024 +bibliography: references.bib +--- + +## Introduction + +To start with some definitions, the term “flora” – in the sense of a document – denotes a directory in which the plant species of a specific area are systematically listed, often together with a description and additional information. “Herbarium” refers to a collection of preserved (usually dried and pressed) plants or fungi for scientific purposes; an individual botanical object in a herbarium collected at a specific place and time is called a “specimen” [@Wagenitz2003]. + +## Floras and herbaria in the history of knowledge + +Both the flora and the herbarium are regarded as decisive innovations in the development of botany into an independent scientific discipline from the mid-16th century onwards. Together with the botanical garden, which is functionally related and established in the same period [@Rieppel2016; @Findlen2006a], only the invention of the herbarium made it possible to work out regional floras based on systematic empirical fieldwork [@Flannery2023; @Müller-Wille2019]. Paula Findlen summarised the motivation behind this crucial invention: “The more naturalists observed nature in situ, the more they realized that limited contact with specimens did not yet yield enough knowledge to describe and compare medicinal herbs. They needed to take nature home” [@Findlen2006b, p. 447; see @Sunderland2016]. Findlen stands for the history of knowledge or the renewed history of science that firstly emphasises the shift from finalised knowledge to the act of its production, secondly shows an increased interest in the everyday intellectual life of small groups, circles or networks and thirdly focuses on practices and material cultures of knowledge [@Müller-WilleEtAl2017; @FörschlerMariss2017; @HolensteinSteinkeStuber2013]. In this perspective and with the catchy phrase “collecting as knowledge”, the creation of a natural history collection, such as a herbarium, is seen as knowledge production [@HeesenSpary2001]. The activity of collecting expresses not only the fact that dispersed natural objects are brought together in a single location, but also that the forms of representation associated with them, such as illustrations, descriptions, lists and publications, are included in the repositories, where they are available for comparison, retracing and synoptic synthesis [@Klemun2017, p. 235]. + +The precise structures and functions of floras and herbaria with their spatial relations between local and global can only be understood in the context of the “collaborative knowledge culture of botany”. Therefore the interplay of the three central resources on which botany depended in early modern times has to be reconstructed: living and dried plants, relevant specialised literature (e.g. floras) and correspondence [@Dietz2017a; @Dietz2017b]. The nexus of correspondence, plant transfer and collection policy was first reconstructed by Emma Spary using the example of the network of André Thouin (1747–1824), director of the _Jardin du Roi_ in Paris [@Spary2000, pp. 49-98]. The analysis of such networks draws attention to the extensive transfer of dried plants and seeds as the basis of knowledge production [@DauserEtAl2008]. A wide variety of methods has been used to correspond efficiently, to save time and to avoid loss of information. First and foremost is the use of reference catalogues. This means that lists of transmitted or desired plant species could simply be referred to the numbers that had been assigned to the species in an published flora [@Dietz2017a, pp. 96-99]. Secondly, network analyses show that natural history owes its existence not only to the outstanding figures, but also developed through the participation of thousands of amateurs working locally [@Klemun2017, p. 239]. Correspondence networks not only serve to improve understanding of herbaria and floras, it is also possible to go in the opposite direction: herbaria themselves can serve as a source for social network analyses by systematically evaluating the collectors of the individual specimen [@SiracusaEtAl2020; @GroomOReillyHumphrey2014]. + +## Digitisation of herbaria from a botanical perspective + +The value of herbaria has long been recognized in the fields of taxonomy, systematics and biogeography. Moreover, in recent decades they have proven to be fundamental for dealing with the biology of climate change, biodiversity, phenology, conservation and biological invasions. Given the high scientific and cultural value of herbarium collections, many efforts to make them more accessible have already been made in the last 20 years. Digitization is an essential first step in the process of transforming this vast amount of data associated with physical specimen into flexible digital data formats that allow information to be re-categorized according to variable criteria [@Roma-Marzio2023, p. 108; see generally @AndraschkeWagner2020]. Building on centuries of research based on herbarium specimens collected over time and around the globe, which are freely accessible and aggregable, a “new era” of discovery, synthesis and prediction using digitized collection data is postulated [@JamesEtAl2018; @NelsonEllis2018]. Digitization and online availability of specimen facilitates the rapid exploration and dissemination of accurate biodiversity data on an unprecedented scale: “The emerging ‘herbarium of the future’ (or the ‘global metaherbarium’) will be the central element guiding the exploration, illumination, and prediction of plant biodiversity change in the Anthropocene” [@Davis2023, p. 412]. +It should be borne in mind that collections are usually associated with various distortions that need to be characterised and mitigated to make data usable. Most common are taxonomic and collector biases, which can be understood as the effects of particular recording preferences of key collectors on the overall taxonomic composition of the biological collections to which they contribute [@SiracusaEtAl2020; @Davis2023, p. 421; @JaroszynskaEtAl2023]. In order to capture such phenomena so that they can be taken into account in the data analysis, precise knowledge of the entire context in which a herbarium was created is required. This is exactly the aim of the approach described above under history of knowledge. Obviously, there is a bridge here between the research interests of the natural sciences and the humanities. +An overview published in 2024 shows that the topic of accessibility and digitization of herbaria as “archives of biodiversity” has also gained new relevance in Switzerland in recent years. Apart from two major exceptions, the [_Platter-Herbarium_](https://www.burgerbib.ch/de/bestaende/privatarchive/einzelstuecke/platter-herbarium) and [_Les Herbiers de Rousseau_](https://lesherbiersderousseau.org/), there have been no attempts to do this in an interdisciplinary context [@Stämpfli2024]. Additionally there is a lack of including the interaction with the functionally linked correspondence networks and contemporary floras. For this reason, the experience with historical plants gained on _hallerNet_, on which we pursued an interdisciplinary approach to the interaction between different types of entities (letters, species, specimens, reviews), may be of general interest. + +## Historical plants on the data and edition platform _hallerNet_ + +The data and edition platform [_hallerNet_](https://hallernet.org/) opens up historical networks of knowledge in Switzerland in their European interconnectedness. The basis of the currently around 128,000 data objects is formed by extensive prosopographical and bibliographical data that has been compiled in a relational database (FAUST) since the early 1990s as part of three SNSF projects at the University of Bern. A transformation project (2016–2019) transferred this extensively interlinked data into a XML data structure compliant to the [_Text Encoding Initiative (TEI)_](https://tei-c.org/) and thus turned it into “reusable research data” based on the [_FAIR_](https://www.go-fair.org/fair-principles/) data criteria [@DängeliStuber2020]. The platform nowadays contains, among others, around 46,000 publications, 31,000 persons and 1,200 institutions, which are systematically linked to 9,000 edited reviews and 5,000 edited letters. In our context, the 4,955 plant entities currently on _hallerNet_ are at the centre. Starting point were the 1,737 species of flowering plants mentioned in Haller's Swiss Flora _Historia Stirpium_ [@Haller1768], which were systematically referenced in the aforementioned relational database to Haller's first edition of the Swiss Flora _Enumeratio_ [@Haller1742], to Linné's _Species plantarum_ [@Linné1753] and to the current nomenclature. This concordance between Haller's and Linné's nomenclature, compiled by Luc Lienhard with reference to Johan Rudolf Suter's _Flora Helvetica_ [@Suter1802], not only makes Haller's Swiss flora accessible, but does also provide access to pre-Linnaean botany in general. On this basis, we transformed the botanical data, which was originally divided into four different data types, for the new XML structure into a generic data model based on today's plant entities ([_InfoFlora_](https://www.infoflora.ch/de/), [_Global Biodiversity Information Facility GBIF_](https://www.gbif.org/)), and treat their (historical) names as name variants. In this way, entities become flexibly adaptable for other historical floras that are partly or completely outside Haller's and Linné's nomenclature. At the same time, the data model structured according to today's nomenclature facilitates reference to current issues in historical ecology. The following summary of realized, initiated or planned expansions illustrates this double advantage. + +* **Ecological data**: The diverse ecological information in Haller's _Historia_ on habitat, frequency, typical altitudinal range and specific localities is of far above-average quality for the 18th century [@Lienhard2005]. _hallerNet_ systematically records a total of 7,545 locality details, whereby the 1,920 different localities have been georeferenced for the most part as kilometre squares with their corner points and additionally linked to the neighbouring municipalities (‘populated places’) in order to appear in the _hallerNet_ place register. Haller's extraordinary data is thus available in a flexible structure whose exploration is only just beginning [@Lienhard2008; @Lienhard2000]. Historical biodiversity research, which is high on the agenda of environmental history [@GoethemZandem2019], has a wealth of source material at its disposal. Using appropriate methods of analysis, this will massively extend its temporal scope [@JaroszynskaEtAl2023; @WangEtAl2023; @StöckliEtAl2012; @Lachat2010]. + +* **Collectors and correspondents**: Haller's _Historia_ also often includes the collector to whom Haller owes the information. These 109 people are all curated on _hallerNet_ and systematically referenced for each species (1,342 times in total). The network is to be further expanded by systematically labelling the plants mentioned in Haller's botanical correspondence, some of which has already been edited on _hallerNet_ and most of which are made accessible via the [_International Image Interoperability Framework (IIIF)_](https://iiif.io). How great the potential of the correspondence is, both for the reconstruction of the ‘knowledge culture’ and for the supplementation and specification of the location data, is demonstrated by some analyses already available [@Favre2021; @Hächler2008; @Lienhard2005]. + +* **Book references and reviews**: For his extensive information on the plant species of his Swiss flora, Haller also uses a vast amount of historical data from his predecessors. For example synonyms and place references, which have already been added to _hallerNet_. Together with the links to Hallers other botanical publications [@SteinkeProfos2004, pp. 186-195], to the numerous botanical publications in Hallers personal library [@Monti1983-1994, integrated in _hallerNet_] and to Hallers countless botanical reviews in the _Göttingische Gelehrten Anzeigen_, all of which are available in edited form on _hallerNet_, the integral process of botanical knowledge production could be precisely reconstructed [see @DietzSubm; @Lienhard2005]. + +* **Useful plants**: 656 species or varieties are listed in a total of eleven systematic catalogues in the context of the _Bernese Economic Society_, which was presided over by Haller. In this catalogues, the Latin-universal plant names were consistently linked to the dialectal-regional plant names. On _hallerNet_, 755 actions are linked to them, most of which obtained from the meetings of the _Economic Society_ [@StuberLienhard2007; @Stuber2008]. This reveals a wide range of interferences between botany and agricultural botany [@DauserStuberSubm; @Gerber-VisserStuber2019; @Stuber2018; @BoscaniStuber2017]. + +* **Herbaria**: The digitisation of Haller's herbaria is one of the most intruding unfulfilled postulates in the study of Haller's botany and beyond. Haller's main herbarium, which after being sold by his heirs to Emperor Joseph II was first sent to Pavia and later to Paris by Napoleon, is now in the _Muséum National d'Histoire Naturelle_ and comprises more than 10,000 specimens in a total of 60 volumes (including 8 volumes of cryptogams) [@MargézAupicLamy2006; @Zoller1958a]; further there is a smaller herbarium by Haller in Göttingen [@Zoller1958b]. Due to the fact that the current location of Haller's herbaria is not in Switzerland, its digitization is not in scope of being supported by the ongoing [_SwissCollNet_](https://swisscollnet.scnat.ch) project, the national initiative for the digitization of natural history collections [@FrickStiegerScheidegger2019]. As part of _SwissCollNet_, however, the lichen herbarium of Jean-Frédéric Chaillet (1747–1839), which is kept in the _Neuchâtel herbarium_ (NEU), has now been edited on _hallerNet_ as sub-project _Lichens of the Enlightenment_ led by Jason Grant. This is consequent in terms of content, as Chaillet operated as a direct Swiss successor to Haller and referred to him wherever possible. At the same time, it represents a milestone for _hallerNet_, as the platform data structures for Herbaria could be developed. The centrepiece are the 943 lichen specimens, which firstly contain the transcribed original information on the label. Secondly, they are linked to the original scan via [_IIIF_](https://iiif.io/api/image) and additionally with positional accuracy, as there are several specimens sticked on one herbarium page. Thirdly, they are assigned to the species entities, which point to authority data ([_GBIF_](https://www.gbif.org/), [_Index Fungorum_](https://www.indexfungorum.org), [_SwissLichens_](https://swisslichens.wsl.ch)). This species entities also contain the data from historical floras, in this case all from the manuscript flora by Chaillet and, where already listed, from Haller's _Historia_. The information from historical floras is often the decisive key to relate the objects in a herbarium to present-day taxonomic databases. The assignment of source terms to standardised data is thus presented transparently on the platform, which is particularly essential for a period in which botanical nomenclature is still very unstable. Additionally, the structure of the data follows [Darwin Core](https://dwc.tdwg.org/) standard which facilitates the connection to other systems such as the Neuchâtel Herbarium, the emerging _SwissCollNet_ database or the global [_Index Herbariorum_](https://sweetgum.nybg.org/science/ih/) [@VustEtAl]. + +* **Plant lists**: The connectivity of _hallerNet_ is also demonstrated by Meike Knittel's ongoing guest edition of plant lists in the circle of the Zurich botanist Johannes Gessner (1709–1790), the _Naturforschende Gesellschaft_ and the botanical garden, which document the actual exchange of seeds and list a total of 1,829 individual actions [see @Knittel]. + +## Conclusion + +Reconstructing the whole interaction in which floras and herbaria interplayed, difficulties arise in integrating digital approaches to historical correspondence networks [e.g. @EdmondsonEdelstein2019] with digitization methods for floras and herbaria, which are located in different scientific disciplines. The challenge for the data and edition platform [_hallerNet_](https://hallernet.org/) is therefore to find interdisciplinary solutions. With tools, methods and workflows of the digital humanities, traceable relations between text, scans and structural data are determined in an innovative way. That allows to rely today's botanical authority data systematically to the historical information such as changing plant names, specimens, locality information and plant collectors. For the interoperability of the data, the orientation towards the [Darwin Core](https://dwc.tdwg.org/) standard is mandatory, for the sustainable editorial quality the [_TEI_](https://tei-c.org/) guidelines. Originally developed in the natural sciences, the [_FAIR data principles_](https://www.go-fair.org/fair-principles/) became a standard in the humanities (especially for _GLAM_ institutions), and thus serve as an overarching guideline; in particular, _FAIR_ guarantees the sustainable handling of data, which therefore remains 'reusable' for future generations of users because the traces of the normalization and flexibilization processes can be traced in detail. With this integration of different disciplinary standards and different types of sources, _hallerNet_ could become a dynamic and cross-collection instrument for the interdisciplinary research of historical plants and biodiversity in Switzerland in the period before 1850. The current transformation of _hallerNet_ into the national collaborative platform _République des Lettres_ will further strengthen this potential. diff --git a/submissions/480/references.bib b/submissions/480/references.bib new file mode 100644 index 0000000..b076588 --- /dev/null +++ b/submissions/480/references.bib @@ -0,0 +1,552 @@ +@book{AndraschkeWagner2020, + title = {Objekte im Netz. Wissenschaftliche Sammlungen im digitalen Wandel}, + editor = {Andraschke, Udo and Wagner, Sarah}, + date = {2020}, + location = {Bielefeld}, + langid = {ngerman}, +} + +@book{BoscaniStuber2017, + title = {Wer das Gras wachsen hört. Wissensgeschichte(n) der pflanzlichen Ressourcen vom Mittelalter bis ins 20. Jahrhundert}, + editor = {Boscani Leoni, Simona and Stuber, Martin}, + date = {2017}, + series = {Jahrbuch für Geschichte des ländlichen Raumes 14}, + langid = {ngerman}, +} + +@article{DängeliStuber2020, + author = {Dängeli, Peter and Stuber, Martin}, + title = {Nachhaltigkeit in langjährigen Erschliessungsprojekten. FAIR-Data Kriterien bei Editions- und Forschungsplattformen zum 18. Jahrhundert}, + journal = {xviii.ch, Schweizerische Zeitschrift für die Erforschung des 18. Jahrhunderts / Revue suisse d’études sur le XVIIIe siècle}, + date = {2020}, + volume = {11}, + pages = {34--51}, + doi = {10.24894/2673-4419.0000410.1515/9783486712339}, + langid = {ngerman}, +} + +@book{DauserEtAl2008, + title = {Wissen im Netz. Botanik und Pflanzentransfer in europäischen Korrespondenznetzen des 18. Jahrhunderts}, + editor = {Dauser, Regina and Hächler, Stefan and Kempe, Michael and Mauelshagen, Franz and Stuber, Martin}, + date = {2008}, + location = {Berlin}, + langid = {ngerman}, +} + +@inbook{DauserStuberSubm, + author = {Dauser, Regina and Stuber, Martin}, + title = {Pflanzliche Ressourcen – Interferenzen zwischen Botanik und Ökonomischer Aufklärung}, + editor = {Stuber, Martin and Metz, Bernhard and Steinke, Hubert}, + booktitle = {Korrespondenz und Kritik. Albrecht von Haller als paradigmatische Figur der entstehenden Scientific Community}, + year = {submittet}, + location = {Göttingen}, + langid = {ngerman}, +} + +@article{Davis2023, + author = {Davis, Charles C.}, + title = {The herbarium of the future}, + journal = {Trends in Ecology & Evolution}, + date = {2023}, + volume = {38 (5)}, + pages = {412--423}, + doi = {10.1016/j.tree.2022.11.015}, + langid = {english}, +} + +@book{Dietz2017a, + author = {Dietz, Bettina}, + title = {Das System der Natur. Die kollaborative Wissenskultur der Botanik im 18. Jahrhundert}, + date = {2017}, + location = {Köln u.a.}, + langid = {ngerman}, +} + +@inbook{Dietz2017b, + author = {Dietz, Bettina}, + title = {Kollaboration in der Botanik des 18. Jahrhunderts. Die partizipative Architektur von Linnés System der Natur}, + editor = {Förschler, Silke and Mariss, Anne}, + booktitle = {Verfahrensweisen der Naturgeschichte in der Frühen Neuzeit. Akteure Tiere Dinge}, + date = {2017}, + location = {Köln}, + pages = {93--108}, + langid = {ngerman}, +} + +@inbook{DietzSubm, + author = {Dietz, Bettina}, + title = {Orte der Kritik in der Botanik des 18. Jahrhunderts}, + editor = {Stuber, Martin and Metz, Bernhard and Steinke, Hubert}, + booktitle = {Korrespondenz und Kritik. Albrecht von Haller als paradigmatische Figur der entstehenden Scientific Community}, + year = {submittet}, + location = {Göttingen}, + langid = {ngerman}, +} + +@inbook{DrouinLienhard2008, + author = {Drouin, Jean-Marc and Lienhard, Luc}, + title = {Botanik}, + editor = {Steinke, Hubert and Boschung, Urs and Pross, Wolfgang}, + booktitle = {Albrecht von Haller. Leben, Werk, Epoche}, + date = {2008}, + location = {Göttingen}, + pages = {292--314}, + langid = {ngerman}, +} + +@book{EdmondsonEdelstein2019, + title = {Networks of Enlightenment. Digital approaches to the Republic of Letters}, + editor = {Edmondson, Chloe and Edelstein, Dan}, + date = {2019}, + location = {Oxford}, + langid = {english}, +} + +@inbook{Favre2021, + author = {Favre, Madline}, + title = {Réseaux, pratiques et motivations des acteures locaux de la recherche botanique en milieu alpin. Le cas du Valais entre 1750 et 1810}, + editor = {Boscani Leoni, Simona and Head, Anne-Lise and Lorenzetti, Luigi}, + booktitle = {Histoire naturelle et montagnes – Storia naturale e montagne – Naturgeschichte und Berge}, + date = {2021}, + series = {Histoire des Alpes / Storia delle Alpi / Geschichte der Alpen 26}, + pages = {32--49}, + langid = {french}, +} + +@inbook{Findlen2006a, + author = {Findlen, Paula}, + title = {Anatomy Theaters, Botanical Gardens, and Natural History Collections}, + editor = {Park, Katharina and Daston, Lorraine}, + booktitle = {Early Modern Science}, + date = {2006}, + series = {The Cambridge History of Science 3}, + location = {Cambridge}, + pages = {272--289}, + langid = {english}, +} + +@inbook{Findlen2006b, + author = {Findlen, Paula}, + title = {Natural History}, + editor = {Park, Katharina and Daston, Lorraine}, + booktitle = {Early Modern Science}, + date = {2006}, + series = {The Cambridge History of Science 3}, + location = {Cambridge}, + pages = {435--468}, + langid = {english}, +} + +@book{Flannery2023, + title = {In the Herbarium: the Hidden World of Collecting and Preserving Plants}, + author = {Flannery, Maura C.}, + date = {2023}, + location = {New Haven}, + langid = {english}, +} + +@book{FörschlerMariss2017, + title = {Akteure, Tiere, Dinge : Verfahrensweisen der Naturgeschichte in der Frühen Neuzeit}, + editor = {Förschler, Silke and Mariss, Anne}, + date = {2017}, + location = {Köln}, + langid = {ngerman}, +} + +@article{FrickStiegerScheidegger2019, + author = {Frick, Holger and Stieger, Pia and Scheidegger, Christoph}, + title = {SwissCollNet – A National Initiative for Natural History Collections in Switzerland}, + journal = {Biodiversity Information Science and Standards}, + date = {2019}, + volume = {3: e37188}, + doi = {10.3897/biss.3.37188}, + langid = {english}, +} + +@inbook{Gerber-VisserStuber2019, + author = {Gerber-Visser, Gerrendina and Stuber, Martin}, + title = {Naturgeschichte Helvetiens als Projekt der Ökonomischen Aufklärung. Jakob Samuel Wyttenbachs Forschungsbericht (1788) und Albrecht Höpfners Magazin (1787-1789)}, + editor = {Holenstein, André and Jaquier, Claire and Léchot, Timothée and Schläppi, Daniel}, + booktitle = {Politische, gelehrte und imaginierte Schweiz. Kohäsion und Disparität im Corpus helveticum des 18. Jahrhunderts}, + date = {2019}, + location = {Genève}, + pages = {225--252}, + langid = {ngerman}, +} + +@article{GoethemZandem2019, + author = {Goethem, Thomas van and Zanden, Jan Luiten van}, + title = {Who is Afraid of Biodiversity? Proposal for a Research Agenda for Environmental History}, + journal = {Environment and History}, + date = {2019}, + volume = {25}, + pages = {613--647}, + doi = {10.3197/096734018X15254461646440}, + langid = {english}, +} + +@article{GroomOReillyHumphrey2014, + author = {Groom, Quentin and O’Reilly, C. and Humphrey, T.}, + title = {Herbarium specimens reveal the exchange network of British and Irish botanists, 1856–1932}, + journal = {New Journal of Botany}, + date = {2014}, + volume = {4, 2}, + pages = {95--103}, + doi = {10.1179/2042349714Y.0000000041}, + langid = {english}, +} + +@inbook{Hächler2008, + author = {Hächler, Stefan}, + title = {Avec une grosse boete de plantes vertes. Pflanzentransfer in der Korrespon-denz Albrecht von Hallers (1708-1777)}, + booktitle = {Wissen im Netz. Botanik und Pflanzentransfer in europäischen Korrespondenznetzen des 18. Jahrhunderts}, + editor = {Dauser, Regina and Hächler, Stefan and Kempe, Michael and Mauelshagen, Franz and Stuber, Martin}, + date = {2008}, + location = {Berlin}, + pages = {201--218}, + langid = {ngerman}, +} + +@book{Haller1742, + title = {Enumeratio methodica stirpium Helvetiae indigenarum}, + author = {Haller, Albrecht von}, + date = {1742}, + location = {Gottingae}, + publisher = {ex officina academica Abrami Vandenhoek}, + langid = {latin}, +} + +@book{Haller1768, + title = {Historia stirpium indigenarum Helvetiae inchoata}, + author = {Haller, Albrecht von}, + date = {1768}, + location = {Bernae}, + publisher = {sumptibus Societatis typographicae}, + langid = {latin}, +} + +@book{HeesenSpary2001, + title = {Sammeln als Wissen}, + editor = {Heesen, Anke te and Spary, Emma C.}, + date = {2001}, + location = {Göttingen}, + langid = {ngerman}, +} + +@book{HolensteinSteinkeStuber2013, + title = {Scholars in Action. The Practice of Knowledge and the Figure of the Savant in the 18th Century}, + editor = {Holenstein, André and Steinke, Hubert and Stuber, Martin}, + date = {2013}, + location = {Leiden and Boston}, + edition = {2 vols.}, + langid = {english}, +} + +@article{JamesEtAl2018, + author = {James, Shelley A and Soltis, PS. and Belbin, L. and Chapman, AD. and Nelson, G. and Paul, DL. and Collins, M.}, + title = {Herbarium data: Global biodiversity and societal botanical needs for novel research}, + journal = {Applications in Plant Sciences}, + date = {2018}, + volume = {6, 2: e1024}, + doi = {10.1002/aps3.1024}, + langid = {english}, +} + +@article{JaroszynskaEtAl2023, + author = {Jaroszynska, Francesca and Rixen, Christian and Woodin, Sarah and Lenoir, Jonathan and Wipf, Sonja}, + title = {Resampling alpine herbarium records reveals changes in plant traits over space and time}, + journal = {Journal of Ecology}, + date = {2023}, + volume = {111 (2)}, + pages = {338--355}, + doi = {10.1111/1365-2745.14062}, + langid = {english}, +} + +@inbook{Klemun2017, + author = {Klemun, Marianne}, + title = {Gärten und Sammlungen}, + editor = {Sommer, Marianne and Müller-Wille, Staffan and Reinhard, Carsten}, + booktitle = {Handbuch Wissenschaftsgeschichte}, + date = {2017}, + location = {Stuttgart}, + pages = {235--244}, + langid = {ngerman}, +} + +@book{Knittel, + title = {Blühende Beziehungen. Botanische Praktiken im Zürich des 18. Jahrhunderts}, + author = {Knittel, Meike}, + year = {in print}, + langid = {ngerman}, +} + +@book{Lachat2010, + title = {Wandel der Biodiversität in der Schweiz seit 1900: ist die Talsohle erreicht?}, + editor = {Lachat, Thibault}, + date = {2010}, + location = {Bern u.a.}, + langid = {ngerman}, +} + +@inbook{Lienhard2000, + author = {Lienhard, Luc}, + title = {Haller et la découverte botanique des Alpes}, + editor = {Pont, Jean-Claude and Lacki, Jan}, + booktitle = {Une cordée originale. Histoire des relations entre science et montagne}, + date = {2000}, + location = {Chêne-Bourg}, + pages = {96--119}, + langid = {french}, +} + +@inbook{Lienhard2005, + author = {Lienhard, Luc}, + title = {La machine botanique. Zur Entstehung von Hallers Flora der Schweiz}, + editor = {Stuber, Martin and Hächler, Stefan and Lienhard, Luc}, + booktitle = {Hallers Netz. Ein europäischer Gelehrtenbriefwechsel zur Zeit der Aufklärung}, + date = {2005}, + location = {Basel}, + pages = {371--410}, + langid = {ngerman}, +} + +@inbook{Lienhard2008, + author = {Lienhard, Luc}, + title = {Wegränder, Wiesen, Sümpfe – Flora und Lebensräume}, + editor = {Holenstein, André and Schläppi, Daniel and Schnell, Dieter and Steinke, Hubert and Stuber, Martin and Würgler, Andreas}, + booktitle = {Berns goldene Zeit. Das 18. Jahrhundert neu entdeckt}, + date = {2008}, + location = {Bern}, + pages = {56--59}, + langid = {ngerman}, +} + +@book{Linné1753, + title = {Species plantarum}, + author = {Linné, Carl von}, + date = {1753}, + location = {Stockholm}, + publisher = {Laurenz Salvi}, + langid = {latin}, +} + +@article{MargézAupicLamy2006, + author = {Margéz, Marlène and Aupic, Cécile and Lamy, Denis}, + title = {La restauration de l’herbier Haller du Muséum national d’Histoire naturelle}, + journal = {Support tracé}, + date = {2006}, + volume = {5}, + pages = {354--360}, + langid = {french}, +} + +@book{Monti1983-1994, + title = {Catalogo del Fondo Haller della Biblioteca Nazionale Braidense di Milano}, + editor = {Monti, Maria Teresa}, + date = {1983/1994}, + location = {Milano}, + edition = {13 vols.}, + langid = {italian}, +} + +@inreference{Müller-Wille2019, + author = {Müller-Wille, Staffan}, + title = {Botanik}, + booktitle = {Enzyklopädie der Neuzeit Online}, + date = {2019}, + doi = {10.1163/2352-0248_edn_COM_248430}, + langid = {ngerman}, +} + +@inbook{Müller-WilleEtAl2017, + author = {Müller-Wille, Staffan and Carsten, Reinhardt and Sommer, Marianne}, + title = {Wissenschaftsgeschichte und Wissensgeschichte}, + editor = {Sommer, Marianne and Müller-Wille, Staffan and Reinhard, Carsten}, + booktitle = {Handbuch Wissenschaftsgeschichte}, + date = {2017}, + location = {Stuttgart}, + pages = {2--18}, + langid = {ngerman}, +} + +@article{NelsonEllis2018, + author = {Nelson, Gil and Ellis, Shari}, + title = {The history and impact of digitization and digital data mobilization on biodiversity research}, + journal = {Phil. Trans. R. Soc. B}, + date = {2018}, + volume = {374: 2017039}, + doi = {10.1098/rstb.2017.0391}, + langid = {english}, +} + +@inbook{Rieppel2016, + author = {Rieppel, Lukas}, + title = {Museums and Botanical Gardens}, + editor = {Lightman, Bernard}, + booktitle = {A Companion to the History of Science}, + date = {2016}, + location = {Oxford u.a.}, + pages = {238--251}, + langid = {english}, +} + +@article{Roma-Marzio2023, + author = {Roma-Marzio, Francesco and Maccioni, S. and Dolci, D. and Astuti, G. and Magrini, N. and Pierotti, F. and Vangelisti, R. and Amadei, L. and Peruzzi, L.}, + title = {Digitization of the historical Herbarium of Michele Guadagno at Pisa (PI-GUAD)}, + journal = {PhytoKeys}, + date = {2023}, + volume = {234}, + pages = {107--125}, + doi = {10.3897/phytokeys.234.109464}, + langid = {english}, +} + +@article{SiracusaEtAl2020, + author = {Siracusa, Pedro C. and Gadelha, Jr. and Luiz, M.R. and Ziviani, Arthur}, + title = {New perspectives on analysing data from biological collections based on social network analytics}, + journal = {Sci Rep}, + date = {2020}, + volume = {10, 3358}, + doi = {10.1038/s41598-020-60134-y}, + langid = {english}, +} + +@book{Spary2000, + title = {Utopias Garden : French Natural History from the Old Regime to Revolution}, + author = {Spary, Emma}, + date = {2000}, + location = {London}, + langid = {english}, +} + +@inbook{Stämpfli2024, + author = {Stämpfli, Remo}, + title = {Pflanzen im Netz: Die Möglichkeiten des Digitalen bei der Erschliessung und Vermittlung von Herbarien und Herbarbelegen}, + booktitle = {Informationswissenschaft: Theorie, Methode Und Praxis}, + date = {2024}, + volume = {8 (1)}, + pages = {123--144}, + doi = {10.18755/iw.2024.8}, + langid = {ngerman}, + +} + +@book{SteinkeProfos2004, + title = {Bibliographia Halleriana. Verzeichnis der Schriften von und über Albrecht von Haller}, + editor = {Steinke, Hubert and Profos, Claudia}, + date = {2004}, + location = {Basel}, + langid = {ngerman}, +} + +@article{StöckliEtAl2012, + author = {Stöckli, Veronika and Wipf, S. and Nilsson, C. and Rixen, C.}, + title = {Using historical plant surveys to track biodiversity on mountain summits Plant}, + journal = {Ecology and Diversity}, + date = {2012}, + volume = {4}, + pages = {415--425}, + langid = {english}, +} + +@inbook{Stuber2008, + author = {Stuber, Martin}, + title = {Kulturpflanzentransfer im Netz der Oekonomischen Gesellschaft Bern}, + booktitle = {Wissen im Netz. Botanik und Pflanzentransfer in europäischen Korrespondenznetzen des 18. Jahrhunderts}, + editor = {Dauser, Regina and Hächler, Stefan and Kempe, Michael and Mauelshagen, Franz and Stuber, Martin}, + date = {2008}, + location = {Berlin}, + pages = {229--269}, + langid = {ngerman}, +} + +@inbook{Stuber2018, + author = {Stuber, Martin}, + title = {Vom Simmental bis Spitzbergen. Albrecht von Haller als europäischer Vermittler regionaler Kultur und Ökonomie}, + editor = {Eibach, Joachim and Opitz- Belakhal, Claudia}, + booktitle = {Zwischen den Kulturen. Mittler und Grenzgänger vom 17. bis 19. Jahrhundert}, + date = {2018}, + location = {Hannover}, + pages = {165--189}, + langid = {ngerman}, +} + +@inbook{StuberLienhard2007, + author = {Stuber, Martin and Lienhard, Luc}, + title = {Nützliche Pflanzen. Systematische Pflanzenverzeichnisse von Wild- und Kulturpflanzen im Umfeld der Oekonomischen Gesellschaft}, + editor = {Holenstein, André and Stuber, Martin and Gerber-Visser, Gerrendina}, + booktitle = {Nützliche Wissenschaft und Ökonomie im Ancien Régime. Akteure, Themen, Kommunikationsformen}, + date = {2007}, + pages = {65--106}, + series = {Cardanus. Jahrbuch für Wissenschaftsgeschichte 7}, + langid = {ngerman}, +} + +@inbook{Sunderland2016, + author = {Sunderland, Mary E.}, + title = {Specimens and Collections}, + editor = {Lightman, Bernard}, + booktitle = {A Companion to the History of Science}, + date = {2016}, + location = {Oxford u.a.}, + pages = {488--499}, + langid = {english}, +} + +@book{Suter1802, + title = {Helvetiens Flora, worinn alle im Hallerischen Werke enthaltenen und seither neu entdeckten Schweizer Pflanzen nach Linné's Methode aufgestellt sind}, + author = {Suter, Johann Rudolf}, + date = {1802}, + location = {s.l.}, + edition = {2 Bde.}, + langid = {ngerman}, +} + +@inbook{VustEtAl, + title = {Historical herbarium as a challenge for the Digital humanities - Chaillet's lichens herbarium as pilot project}, + author = {Vust, Mathias and Di Maio, Edouard and Forney, Christian and Stuber, Martin}, + booktitle = {Usages, pratiques et fonctions des herbiers historiques / Uses, practices and functions of historical herbaria}, + series = {Actes Colloque Ascona}, + year = {in prep.}, + langid = {english}, +} + +@book{Wagenitz2003, + title = {Wörterbuch der Botanik}, + author = {Wagenitz, Gerhard}, + date = {2003}, + location = {Heidelberg. Berlin}, + edition = {2. Aufl.}, + langid = {ngerman}, +} + +@article{WangEtAl2023, + author = {Wang, Jessica and Fischer, Markus and Eggenberg, Stefan and Rembold, Katja}, + title = {The impact of climate change on plant distribution and niche dynamics over the past 250 years in Switzerland}, + journal = {Bauhinia}, + date = {2023}, + volume = {29}, + pages = {101--112}, + langid = {english}, +} + +@article{Zoller1958a, + author = {Zoller, Heinrich}, + title = {A l’occasion du 250e anniversaire de albrecht von Haller: quelques remarques sur son oeuvre botanique et ses collections}, + journal = {Bulletin de Museum national d’histoire naturelle}, + date = {1958}, + volume = {série 2, t. 30, no 3}, + pages = {305--312}, + langid = {french}, +} + +@article{Zoller1958b, + author = {Zoller, Heinrich}, + title = {Albrecht von Hallers Pflanzensammlungen in Göttingen, sein botanisches Werk und sein Verhältnis zu Carl von Linné}, + journal = {Nachrichten der Akademie der Wissenschaften in Göttingen}, + date = {1958}, + volume = {2, Mathematisch-physikalische Klasse, Nr. 10}, + pages = {217--251}, + langid = {ngerman}, +}