From 468d738e4d7e516501f9ea49761c175b5d18f480 Mon Sep 17 00:00:00 2001 From: Moritz Twente <127845092+mtwente@users.noreply.github.com> Date: Fri, 30 Aug 2024 08:25:48 +0200 Subject: [PATCH 1/3] feat: add poster 466 (#53) Co-authored-by: Lucas Burkart Co-authored-by: Tobias Hodel Co-authored-by: Benjamin Hitz Co-authored-by: Aline Vonwiller Co-authored-by: Ismail Prada Ziegler Co-authored-by: Jonas Aeby Co-authored-by: Katrin Fuchs --- submissions/poster/466/_quarto.yml | 8 +++++ submissions/poster/466/index.qmd | 50 ++++++++++++++++++++++++++++++ 2 files changed, 58 insertions(+) create mode 100644 submissions/poster/466/_quarto.yml create mode 100644 submissions/poster/466/index.qmd diff --git a/submissions/poster/466/_quarto.yml b/submissions/poster/466/_quarto.yml new file mode 100644 index 0000000..ae6ece2 --- /dev/null +++ b/submissions/poster/466/_quarto.yml @@ -0,0 +1,8 @@ +project: + type: manuscript + +manuscript: + article: index.qmd + +format: + html: default diff --git a/submissions/poster/466/index.qmd b/submissions/poster/466/index.qmd new file mode 100644 index 0000000..39b665c --- /dev/null +++ b/submissions/poster/466/index.qmd @@ -0,0 +1,50 @@ +--- +submission_id: 466 +categories: 'Poster Session' +title: 'Economies of Space: Opening up Historical Finding Aids' +author: + - name: Lucas Burkart + orcid: 0000-0002-9011-5113 + email: lucas.burkart@unibas.ch + affiliations: + - University of Basel + - name: Tobias Hodel + orcid: 0000-0002-2071-6407 + email: tobias.hodel@unibe.ch + affiliations: + - University of Bern + - name: Benjamin Hitz + orcid: 0000-0002-3208-4881 + email: benjamin.hitz@unibas.ch + affiliations: + - University of Basel + - name: Aline Vonwiller + orcid: 0009-0001-2098-9237 + email: a.vonwiller@unibas.ch + affiliations: + - University of Basel + - name: Ismail Prada Ziegler + orcid: 0000-0003-4229-8688 + email: ismail.prada@unibe.ch + affiliations: + - University of Bern + - name: Jonas Aeby + email: jonas.aeby@unibas.ch + affiliations: + - University of Basel + - name: Katrin Fuchs + email: katrin.fuchs@unibas.ch + affiliations: + - University of Basel + +date: 08-28-2024 +--- + +In the realm of historical data processing, machine learning has emerged as a game-changer, enabling the analysis of vast archives and complex finding aids on an unprecedented scale. One intriguing case study exemplifying the potential of these techniques is the digitization of the Historical Land Registry of the City of Basel (=Historisches Grundbuch Basel, HGB). +The HGB, compiled around the turn of the 20th century, contains a wealth of historical data meticulously collected on index cards. Each card represents a transaction or entry from source documents, and the structured data reflects the conventions and interests of its creators. This inherent complexity has set the stage for a multifaceted exploration, encompassing text recognition, specifically for handwritten materials, and information extraction, particularly event extraction. + +One of the key accomplishments of this endeavor is the successful application of machine learning algorithms to decipher handwritten content, resulting in a remarkably low character error rate of just 4%. This breakthrough paves the way for extracting valuable information, such as named entities (persons, places, organizations), their relationships, and mentioned events, through specialized language models. + +When combined with property information, the extracted data offers a unique opportunity to visualize historical events and transactions on Geographical Information Systems. This process allows for analyzing normative and semantic shifts in the real estate market over time, shedding light on historical changes in language and practice. + +Ultimately, this project signifies a milestone in historical data analysis. Machine learning techniques have matured so that even extensive datasets and intricate finding aids can be effectively processed. As a result, innovative approaches to large-scale historical data analysis are now within reach, offering new perspectives on dynamic urban economies during pre-modern times. This venture showcases how technological approaches and humanities deliberations go hand in hand to understand complex patterns in economic history. \ No newline at end of file From 9b819f9df8652b509c373b0495505ad58d0773cf Mon Sep 17 00:00:00 2001 From: Moritz Twente <127845092+mtwente@users.noreply.github.com> Date: Fri, 30 Aug 2024 08:28:50 +0200 Subject: [PATCH 2/3] feat: add poster 472 (#54) Co-authored-by: Torsten Kahlert Co-authored-by: Daniel Kurzawe --- submissions/poster/472/_quarto.yml | 8 ++++++++ submissions/poster/472/index.qmd | 29 +++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+) create mode 100644 submissions/poster/472/_quarto.yml create mode 100644 submissions/poster/472/index.qmd diff --git a/submissions/poster/472/_quarto.yml b/submissions/poster/472/_quarto.yml new file mode 100644 index 0000000..ae6ece2 --- /dev/null +++ b/submissions/poster/472/_quarto.yml @@ -0,0 +1,8 @@ +project: + type: manuscript + +manuscript: + article: index.qmd + +format: + html: default diff --git a/submissions/poster/472/index.qmd b/submissions/poster/472/index.qmd new file mode 100644 index 0000000..8af36fd --- /dev/null +++ b/submissions/poster/472/index.qmd @@ -0,0 +1,29 @@ +--- +submission_id: 472 +categories: 'Poster Session' +title: Discuss Data -- an Open Repository for Research and Data Communities +author: + - name: Torsten Kahlert + orcid: 0009-0003-3264-5006 + email: kahlert@hab.de + affiliations: + - Herzog-August-Bibliothek Wolfenbüttel + - name: Daniel Kurzawe + orcid: 0000-0001-5027-7313 + email: kurzawe@sub.uni-goettingen.de + affiliations: + - SUB Göttingen +date: 08-28-2024 +--- + +In this poster, we show how the Discuss Data research data platform is being expanded to include a "community space" for the digital humanities (DH). Discuss Data enables and promotes contextualized discussion about the quality and sustainability of research data directly on the object. + +Current standards and the digitization of existing processes require structures to enable sustainable development models. This applies in particular to the quality of research data, which is becoming increasingly important in the academic debate. + +Discuss Data offers a platform for this. In addition to the information technology management, archiving and provision of data, Discuss Data also contextualizes data through curated discussion. The platform addresses individual communities and offers them a subject-specific discussion space and, in the long term, community-specific tools. Communities are not to be equated with disciplines, but are rather interest groups on specific issues or data materials. + +Following the introduction of the first community space for the research community on Eastern Europe, South Caucasus and Central Asia in 2020, 121 datasets were published and 141 users have registered (as of 28.11.23). However, the discussion function provided by Discuss Data has been used comparatively little so far. This discussion culture, which is quite common at conferences and reviews and is extremely important from a technical perspective, has not yet become established, despite the positive attitude towards it. + +Digital method and source criticism has become one of the central challenges of the digital humanities. Until now, research data has generally been published on institutional repositories or platforms such as Zenodo, but without the kind of quality control that is customary for journal articles. As a result, datasets often remain unused for further processing because it remains unclear what quality the research data has and what it might be suitable for. + +From the experience of the first funding phase of Discuss Data, it has become clear that more energy must be put into attracting data curators in order to ensure that the community spaces are supported by the community in the long term. Positive examples are needed for this. For example, the integration of discussions as micropublications could help to demonstrate the individual added value. \ No newline at end of file From 62b77db195a2e001a0f1eb92abe49b2bf6ce238a Mon Sep 17 00:00:00 2001 From: Moritz Twente <127845092+mtwente@users.noreply.github.com> Date: Fri, 30 Aug 2024 08:31:44 +0200 Subject: [PATCH 3/3] feat: add poster 476 (#56) Co-authored-by: Axel Matthey <44695679+amatthey@users.noreply.github.com> --- submissions/poster/476/_quarto.yml | 8 ++++ submissions/poster/476/index.qmd | 55 +++++++++++++++++++++++++++ submissions/poster/476/references.bib | 47 +++++++++++++++++++++++ 3 files changed, 110 insertions(+) create mode 100644 submissions/poster/476/_quarto.yml create mode 100644 submissions/poster/476/index.qmd create mode 100644 submissions/poster/476/references.bib diff --git a/submissions/poster/476/_quarto.yml b/submissions/poster/476/_quarto.yml new file mode 100644 index 0000000..ae6ece2 --- /dev/null +++ b/submissions/poster/476/_quarto.yml @@ -0,0 +1,8 @@ +project: + type: manuscript + +manuscript: + article: index.qmd + +format: + html: default diff --git a/submissions/poster/476/index.qmd b/submissions/poster/476/index.qmd new file mode 100644 index 0000000..d9d2fac --- /dev/null +++ b/submissions/poster/476/index.qmd @@ -0,0 +1,55 @@ +--- +submission_id: 476 +categories: 'Poster Session' +title: "Modeling in history: using LLMs to automatically produce diagrammatic models synthesizing Piketty's historiographical thesis on economic inequalities" +author: + - name: Axel Matthey + orcid: 0000-0001-7454-1131 + email: axel.matthey@unil.ch + affiliations: + - Université de Lausanne +keywords: + - LLM + - diagrams + - economic inequality + - pedagogy + - Artificial Intelligence +abstract: | + This study seeks to merge two realms: theoretical digital history, specifically the modeling in history, and economic history, with a focus on the history of income and wealth inequalities. The central objective is to apply theoretical research outcomes concerning models and their application in history to scrutinize a historical explanation of the evolution of economic inequalities between 1914 and 1950. Traditionally, predictive models with reproducible results were paramount for validating explanations through observed data. However, the role of models has expanded, moving beyond mere predictive functions. This paradigm shift, acknowledged by the philosophy of science in recent decades, emphasizes that models now serve broader purposes, guiding exploration and research rather than just prediction. These models are not merely tools for validating predictions; they serve to bring clarity to our thinking processes, establishing the conditions under which our intuitions prove valid. Beyond merely representing systematic relationships between predetermined facets of reality, models aspire to elucidate causal connections. When a historical model aims to provide causal explanations, the process involves identifying the "explanandum" (the aspect of reality being explained) as the dependent variable and working backward to pinpoint its hypothetical causes as independent. Using a diagrammatic approach, we formalized a qualitative model aligning with an historiographical explanation of the evolution of economic inequalities by Thomas Piketty during 1914-1950. The intent was to employ causal diagrams, translating the narrative embedded in Piketty's historiography of inequalities into a formal model. This endeavor sought to make explicit the implicit causal relationships within the historian's narrative: the resulting causal model serves as a symbolic synthesis of our comprehension of a specific subject, enabling the construction of a highly refined narrative synthesis from a complex topic. + +date: 03-12-2023 +bibliography: references.bib +--- + +## Introduction + +This research explores the potential of Large Language Models (LLMs) to automatically generate diagrammatic models that synthesize complex historical narratives. Specifically, we focus on Thomas Piketty's analysis of economic inequalities in the first half of the 20th century, as presented in his seminal work, Capital in the Twenty-First Century . Our project aims to bridge the gap between theoretical digital history and economic history by employing LLMs to translate Piketty's causal explanations into visual representations, thus enhancing understanding and facilitating further analysis. + +## Models in history: from prediction to exploration + +Traditionally, models in the natural sciences served primarily as predictive tools, used to validate explanations through observed data. However, their role has expanded to encompass exploration, research, and the clarification of thought processes. This shift, acknowledged by the philosophy of science, emphasizes the ability of models to provide new perspectives and challenge existing assumptions. The models' main function becomes to clarify our thinking processes. + +With regard to that evolution of modeling, in the digital humanities, it should be noted that there is also a more "introspective" branch of research on models in the humanities, which can be described as the "meta-discipline" of the digital humanities, which attempts to evaluate the epistemological effects of models on research in the humanities, and which “calls for a shift from models as static objects (e.g., what functionalities they enable) to the dynamic process of modeling” . This distinction—between the simple use of models (model-based quantitative operationalization) and epistemological research into the implications of formal modeling—can be mapped onto the division between applied and theoretical digital humanities proposed by Michael Piotrowski . + +## “Manual” causal modeling and Piketty's historical narrative + +Our research delves into the realm of causal modeling, aiming to elucidate the cause-and-effect relationships within historical narratives. In this context, the "explanandum" (the phenomenon to be explained) is treated as the dependent variable, and the model seeks to identify its potential causes. +We began by manually creating a semi-formal qualitative model based on Piketty's explanation of economic inequalities from 1914 to 1950. Utilizing causal diagrams, as described by Judea Pearl (Pearl 2018), we formalized Piketty's narrative, making explicit the implicit causal relationships within his analysis. The resulting model serves as a symbolic synthesis of our understanding of Piketty's core argument: that outside periods of significant economic interventionism, wealth tends to grow faster than economic output, leading to increased inequality (r > g, where r is the rate of return on capital and g is the rate of economic growth). While this trend was mitigated in the first half of the 20th century due to major sociopolitical shocks like the World Wars and the Great Depression, Piketty argues that it has resurfaced since the 1970s and 1980s, a phenomenon he terms the "return of capital." + +## LLMs and the automatic generation of historiographical diagrams: starting with a small article + +Our initial exploration will involve using Google’s LLM (Gemini 1.5 Pro) to convert a concise historical article by Piketty into a simplified causal diagram. This article will be A Historical Approach to Property, Inequality and Debt: Reflections on Capital in the 21st Century . +Our previous experience with manually constructing a causal model based on Piketty's work highlighted the potential for automation using LLMs. LLMs have demonstrated remarkable capabilities in various domains, including understanding and generating code, translating languages, and even creating different creative text formats. We believe that LLMs can be trained to analyze historical texts, identify causal relationships, and automatically generate corresponding diagrammatic models. This could significantly enhance our ability to visualize and comprehend complex historical narratives, making implicit connections explicit and facilitating further exploration and analysis. +Historiographical theories explore the nature of historical inquiry, focusing on how historians represent and interpret the past. The use of diagrams has been considered as a means to enhance the communication and understanding of these complex theories. +Diagrams have been utilized to represent causal narratives in historiography, providing a visual means to support historical understanding and communicate research findings effectively Diagrams have indeed been employed to represent historiographical theories, particularly to illustrate causal narratives and enhance the clarity of historical explanations. +On the other hand, Large Language Models (LLMs) have been increasingly integrated into various aspects of coding, from understanding and generating code to assisting in software development and customization. These models leverage vast amounts of data to provide support for a range of programming-related tasks. LLMs are proving to be versatile tools in the realm of coding, capable of understanding, generating, and customizing code across various programming languages and applications. They offer improvements in code-related tasks, user-friendly interactions, and support for low-resource languages. However, challenges such as bias in code generation and the need for human oversight in code review remain. Overall, LLMs are becoming an integral part of the software development process, offering both opportunities and areas for further research and development. + + +## Benefits and implications of that research + +The ability to automatically generate historiographical diagrams using LLMs offers several potential benefits: +- Enhanced understanding of complex historical narratives: Visual representations can clarify intricate causal relationships and make historical analysis more accessible to a wider audience. +- Identification of uncertainties and biases: LLMs can be trained to recognize subtle markers of uncertainty and bias within historical texts, encouraging critical engagement with historical interpretations. +- Efficiency and scalability: Automating the process of diagram generation would save time and resources, allowing researchers and teachers to explore a wider range of historical topics and narratives. + + diff --git a/submissions/poster/476/references.bib b/submissions/poster/476/references.bib new file mode 100644 index 0000000..bbc6f9c --- /dev/null +++ b/submissions/poster/476/references.bib @@ -0,0 +1,47 @@ + +@article{piketty_historical_2015, + title = {A Historical Approach to Property, Inequality and Debt: Reflections on Capital in the 21st Century}, + pages = {11}, + author = {Piketty, Thomas}, + date = {2015}, + langid = {english}, + file = {Institute and Piketty - A Historical Approach to Property, Inequality and .pdf:/Users/amatthe3/Zotero/storage/9ZXZ3XDH/Institute and Piketty - A Historical Approach to Property, Inequality and .pdf:application/pdf}, +} + +@book{piketty_capital_2017, + title = {Capital in the Twenty-First Century}, + isbn = {978-0-674-97985-7}, + abstract = {A New York Times \#1 Bestseller An Amazon \#1 Bestseller A Wall Street Journal \#1 Bestseller A {USA} Today Bestseller A Sunday Times {BestsellerWinner} of the Financial Times and {McKinsey} Business Book of the Year {AwardWinner} of the British Academy {MedalFinalist}, National Book Critics Circle Award "It seems safe to say that Capital in the Twenty-First Century, the magnum opus of the French economist Thomas Piketty, will be the most important economics book of the year--and maybe of the decade."--Paul Krugman, New York Times "The book aims to revolutionize the way people think about the economic history of the past two centuries. It may well manage the feat."--The Economist"Piketty's Capital in the Twenty-First Century is an intellectual tour de force, a triumph of economic history over the theoretical, mathematical modeling that has come to dominate the economics profession in recent years."--Steven Pearlstein, Washington Post"Piketty has written an extraordinarily important book...In its scale and sweep it brings us back to the founders of political economy."--Martin Wolf, Financial Times"A sweeping account of rising inequality...Piketty has written a book that nobody interested in a defining issue of our era can afford to ignore."--John Cassidy, New Yorker"Stands a fair chance of becoming the most influential work of economics yet published in our young century. It is the most important study of inequality in over fifty years."--Timothy Shenk, The Nation}, + pagetotal = {817}, + publisher = {Harvard University Press}, + author = {Piketty, Thomas}, + date = {2017-08-14}, + langid = {english}, + note = {Google-Books-{ID}: {dqEuDwAAQBAJ}}, + keywords = {Business \& Economics / Development / Economic Development, Business \& Economics / Economic History, Business \& Economics / Economics / Comparative, Business \& Economics / Economics / Theory, Political Science / Public Policy / Economic Policy}, +} + +@inproceedings{piotrowski_digital_2018, + location = {Berlin}, + title = {Digital Humanities: An Explication}, + doi = {10.18420/infdh2018-07}, + abstract = {The relationship between computer science and digital humanities and the potential contributions of computer science to digital humanities cannot be reasonably discussed without defining what we mean by ” digital humanities.” With a view to enabling this important discussion, we propose a concise definition of digital humanities centered around the construction of formal models.}, + booktitle = {Proceedings of {INF}-{DH} 2018}, + publisher = {Gesellschaft für Informatik}, + author = {Piotrowski, Michael}, + editor = {Burghardt, Manuel and Müller-Birn, Claudia}, + date = {2018}, + file = {Piotrowski_2018_Digital Humanities.pdf:/Users/amatthe3/Zotero/storage/B3D4HMI5/Piotrowski_2018_Digital Humanities.pdf:application/pdf}, +} + +@book{page_model_2018, + location = {New York}, + title = {The model thinker: What you need to know to make data work for you}, + isbn = {978-0-465-09463-9}, + pagetotal = {448}, + publisher = {Basic Books}, + author = {Page, Scott}, + date = {2018}, + keywords = {Business \& Economics / Economics / Theory, Computers / Data Modeling \& Design, Computers / Data Processing, Mathematics / Game Theory, Mathematics / Probability \& Statistics / General, Social Science / Statistics}, + file = {Emma - Prologue The Card Catalogue.pdf:/Users/amatthe3/Zotero/storage/B8KFJ7PS/Emma - Prologue The Card Catalogue.pdf:application/pdf;Snapshot:/Users/amatthe3/Zotero/storage/65VIYQPJ/9780465094639.html:text/html;Snapshot:/Users/amatthe3/Zotero/storage/5JTMVWQQ/39088592.html:text/html}, +}