From 61792f0e327373fb4490ec799f3b78f5b57d070a Mon Sep 17 00:00:00 2001
From: Tobias Gerstenberg <gerstenberg@stanford.edu>
Date: Wed, 6 Nov 2024 11:21:18 -0600
Subject: [PATCH] added wise AI paper

---
 content/publication/johnson2024wise.md      |  33 ++
 docs/404.html                               |   8 +-
 docs/bibtex/cic_papers.bib                  |  11 +-
 docs/home/index.xml                         |   4 +-
 docs/index.html                             |   2 +-
 docs/index.xml                              |  20 +-
 docs/member/tobias_gerstenberg/index.html   |  39 ++
 docs/publication/index.html                 |  64 +++
 docs/publication/index.xml                  |   9 +
 docs/publication/johnson2024wise/index.html | 488 ++++++++++++++++++++
 docs/publication_types/1/index.html         |  11 +-
 docs/publication_types/1/index.xml          |  11 +-
 docs/publication_types/index.html           |   8 +-
 docs/publication_types/index.xml            |  20 +-
 docs/sitemap.xml                            |  36 +-
 static/bibtex/cic_papers.bib                |  11 +-
 16 files changed, 726 insertions(+), 49 deletions(-)
 create mode 100644 content/publication/johnson2024wise.md
 create mode 100644 docs/publication/johnson2024wise/index.html

diff --git a/content/publication/johnson2024wise.md b/content/publication/johnson2024wise.md
new file mode 100644
index 0000000..8cc1875
--- /dev/null
+++ b/content/publication/johnson2024wise.md
@@ -0,0 +1,33 @@
++++
+# 0 -> 'Forthcoming',
+# 1 -> 'Preprint',
+# 2 -> 'Journal',
+# 3 -> 'Conference Proceedings',
+# 4 -> 'Book chapter',
+# 5 -> 'Thesis'
+
+title = "Imagining and building wise machines: The centrality of AI metacognition"
+date = "2024-11-06"
+authors = ["S. G. B. Johnson","A. Karimi","Y. Bengio","N. Chater","T. Gerstenberg","K. Larson","S. Levine","M. Mitchell","B. Schölkopf","I. Grossmann"]
+publication_types = ["1"]
+publication_short = "_arXiv_"
+publication = "Johnson, S. G. B., Karimi, A., Bengio, Y., Chater, N., Gerstenberg, T., Larson, K., Levine, S., Mitchell, M., Schölkopf, B., Grossmann, I. (2024). Imagining and building wise machines: The centrality of AI metacognition. _arXiv_."
+abstract = "Recent advances in artificial intelligence (AI) have produced systems capable of increasingly sophisticated performance on cognitive tasks. However, AI systems still struggle in critical ways: unpredictable and novel environments (robustness), lack transparency in their reasoning (explainability), face challenges in communication and commitment (cooperation), and pose risks due to potential harmful actions (safety). We argue that these shortcomings stem from one overarching failure: AI systems lack wisdom. Drawing from cognitive and social sciences, we define wisdom as the ability to navigate intractable problems---those that are ambiguous, radically uncertain, novel, chaotic, or computationally explosive---through effective task-level and metacognitive strategies. While AI research has focused on task-level strategies, metacognition---the ability to reflect on and regulate one's thought processes---is underdeveloped in AI systems. In humans, metacognitive strategies such as recognizing the limits of one's knowledge, considering diverse perspectives, and adapting to context are essential for wise decision-making. We propose that integrating metacognitive capabilities into AI systems is crucial for enhancing their robustness, explainability, cooperation, and safety. By focusing on developing wise AI, we suggest an alternative to aligning AI with specific human values---a task fraught with conceptual and practical difficulties. Instead, wise AI systems can thoughtfully navigate complex situations, account for diverse human values, and avoid harmful actions. We discuss potential approaches to building wise AI, including benchmarking metacognitive abilities and training AI systems to employ wise reasoning. Prioritizing metacognition in AI research will lead to systems that act not only intelligently but also wisely in complex, real-world situations."
+image_preview = ""
+selected = false
+projects = []
+#url_pdf = "papers/johnson2024wise.pdf"
+url_preprint = "https://arxiv.org/abs/2411.02478"
+url_code = ""
+url_dataset = ""
+url_slides = ""
+url_video = ""
+url_poster = ""
+url_source = ""
+#url_custom = [{name = "Github", url = ""}]
+math = true
+highlight = true
+[header]
+# image = "publications/johnson2024wise.png"
+caption = ""
++++
\ No newline at end of file
diff --git a/docs/404.html b/docs/404.html
index 76ef725..162ca1e 100644
--- a/docs/404.html
+++ b/docs/404.html
@@ -237,6 +237,10 @@ <h1>Page not found</h1>
   
   <h2>Publications</h2>
   
+  <ul>
+    <li><a href="https://cicl.stanford.edu/publication/johnson2024wise/">Imagining and building wise machines: The centrality of AI metacognition</a></li>
+  </ul>
+  
   <ul>
     <li><a href="https://cicl.stanford.edu/publication/prinzing2024purpose/">From Artifacts to Human Lives: Investigating the Domain-Generality of Judgments about Purposes</a></li>
   </ul>
@@ -253,10 +257,6 @@ <h2>Publications</h2>
     <li><a href="https://cicl.stanford.edu/publication/beller2024causation/">Causation, Meaning, and Communication</a></li>
   </ul>
   
-  <ul>
-    <li><a href="https://cicl.stanford.edu/publication/gandhi2024affective/">Human-like Affective Cognition in Foundation Models</a></li>
-  </ul>
-  
   
 
   
diff --git a/docs/bibtex/cic_papers.bib b/docs/bibtex/cic_papers.bib
index a8c30af..4cbfcef 100644
--- a/docs/bibtex/cic_papers.bib
+++ b/docs/bibtex/cic_papers.bib
@@ -1,13 +1,22 @@
 %% This BibTeX bibliography file was created using BibDesk.
 %% https://bibdesk.sourceforge.io/
 
-%% Created for Tobias Gerstenberg at 2024-10-26 12:42:26 -0700 
+%% Created for Tobias Gerstenberg at 2024-11-06 11:16:38 -0600 
 
 
 %% Saved with string encoding Unicode (UTF-8) 
 
 
 
+@article{johnson2024wise,
+	abstract = {Recent advances in artificial intelligence (AI) have produced systems capable of increasingly sophisticated performance on cognitive tasks. However, AI systems still struggle in critical ways: unpredictable and novel environments (robustness), lack transparency in their reasoning (explainability), face challenges in communication and commitment (cooperation), and pose risks due to potential harmful actions (safety). We argue that these shortcomings stem from one overarching failure: AI systems lack wisdom. Drawing from cognitive and social sciences, we define wisdom as the ability to navigate intractable problems---those that are ambiguous, radically uncertain, novel, chaotic, or computationally explosive---through effective task-level and metacognitive strategies. While AI research has focused on task-level strategies, metacognition---the ability to reflect on and regulate one's thought processes---is underdeveloped in AI systems. In humans, metacognitive strategies such as recognizing the limits of one's knowledge, considering diverse perspectives, and adapting to context are essential for wise decision-making. We propose that integrating metacognitive capabilities into AI systems is crucial for enhancing their robustness, explainability, cooperation, and safety. By focusing on developing wise AI, we suggest an alternative to aligning AI with specific human values---a task fraught with conceptual and practical difficulties. Instead, wise AI systems can thoughtfully navigate complex situations, account for diverse human values, and avoid harmful actions. We discuss potential approaches to building wise AI, including benchmarking metacognitive abilities and training AI systems to employ wise reasoning. Prioritizing metacognition in AI research will lead to systems that act not only intelligently but also wisely in complex, real-world situations.},
+	author = {Johnson, Samuel G B and Karimi, Amir-Hossein and Bengio, Yoshua and Chater, Nick and Gerstenberg, Tobias and Larson, Kate and Levine, Sydney and Mitchell, Melanie and Sch{\"o}lkopf, Bernhard and Grossmann, Igor},
+	date-added = {2024-11-06 11:16:21 -0600},
+	date-modified = {2024-11-06 11:16:21 -0600},
+	journal = {arXiv},
+	title = {{Imagining and building wise machines: The centrality of AI metacognition}},
+	year = {2024}}
+
 @article{jin2024marple,
 	abstract = {Reconstructing past events requires reasoning across long time horizons. To figure out what happened, we need to use our prior knowledge about the world and human behavior and draw inferences from various sources of evidence including visual, language, and auditory cues. We introduce MARPLE, a benchmark for evaluating long-horizon inference capabilities using multi-modal evidence. Our benchmark features agents interacting with simulated households, supporting vision, language, and auditory stimuli, as well as procedurally generated environments and agent behaviors. Inspired by classic ``whodunit'' stories, we ask AI models and human participants to infer which agent caused a change in the environment based on a step-by-step replay of what actually happened. The goal is to correctly identify the culprit as early as possible. Our findings show that human participants outperform both traditional Monte Carlo simulation methods and an LLM baseline (GPT-4) on this task. Compared to humans, traditional inference models are less robust and performant, while GPT-4 has difficulty comprehending environmental changes. We analyze what factors influence inference performance and ablate different modes of evidence, finding that all modes are valuable for performance. Overall, our experiments demonstrate that the long-horizon, multimodal inference tasks in our benchmark present a challenge to current models. Project website: https: //marple-benchmark.github.io/.},
 	annote = {Comment: NeurIPS 2024. First two authors contributed equally. Project page: https://marple-benchmark.github.io/},
diff --git a/docs/home/index.xml b/docs/home/index.xml
index da6ac1e..ebb37b5 100644
--- a/docs/home/index.xml
+++ b/docs/home/index.xml
@@ -15,7 +15,7 @@
     <item>
       <title>Causality in Cognition Lab</title>
       <link>https://cicl.stanford.edu/home/home/</link>
-      <pubDate>Sun, 15 Oct 2017 00:00:00 -0700</pubDate>
+      <pubDate>Sun, 15 Oct 2017 00:00:00 -0500</pubDate>
       
       <guid>https://cicl.stanford.edu/home/home/</guid>
       <description>The Causality in Cognition Lab at Stanford University studies the role of causality in our understanding of the world and of each other.</description>
@@ -53,7 +53,7 @@ We are looking to hire a lab manager to start in the fall of 2018. For informati
     <item>
       <title>Selected&lt;br&gt;publications</title>
       <link>https://cicl.stanford.edu/home/publications_selected/</link>
-      <pubDate>Wed, 20 Apr 2016 00:00:00 -0700</pubDate>
+      <pubDate>Wed, 20 Apr 2016 00:00:00 -0500</pubDate>
       
       <guid>https://cicl.stanford.edu/home/publications_selected/</guid>
       <description></description>
diff --git a/docs/index.html b/docs/index.html
index ae1c58a..1ec9882 100644
--- a/docs/index.html
+++ b/docs/index.html
@@ -110,7 +110,7 @@
   <meta property="og:description" content="">
   <meta property="og:locale" content="en-us">
   
-  <meta property="og:updated_time" content="2024-10-26T00:00:00&#43;00:00">
+  <meta property="og:updated_time" content="2024-11-06T00:00:00&#43;00:00">
   
 
   
diff --git a/docs/index.xml b/docs/index.xml
index 268e61b..b99a895 100644
--- a/docs/index.xml
+++ b/docs/index.xml
@@ -6,9 +6,18 @@
     <generator>Hugo -- gohugo.io</generator>
     <language>en-us</language>
     <copyright>&amp;copy; 2024 Tobias Gerstenberg</copyright>
-    <lastBuildDate>Sat, 26 Oct 2024 00:00:00 +0000</lastBuildDate>
+    <lastBuildDate>Wed, 06 Nov 2024 00:00:00 +0000</lastBuildDate>
     <atom:link href="/" rel="self" type="application/rss+xml" />
     
+    <item>
+      <title>Imagining and building wise machines: The centrality of AI metacognition</title>
+      <link>https://cicl.stanford.edu/publication/johnson2024wise/</link>
+      <pubDate>Wed, 06 Nov 2024 00:00:00 +0000</pubDate>
+      
+      <guid>https://cicl.stanford.edu/publication/johnson2024wise/</guid>
+      <description></description>
+    </item>
+    
     <item>
       <title>From Artifacts to Human Lives: Investigating the Domain-Generality of Judgments about Purposes</title>
       <link>https://cicl.stanford.edu/publication/prinzing2024purpose/</link>
@@ -135,14 +144,5 @@
       <description></description>
     </item>
     
-    <item>
-      <title>Resource-rational moral judgment</title>
-      <link>https://cicl.stanford.edu/publication/wu2024resource/</link>
-      <pubDate>Wed, 01 May 2024 00:00:00 +0000</pubDate>
-      
-      <guid>https://cicl.stanford.edu/publication/wu2024resource/</guid>
-      <description></description>
-    </item>
-    
   </channel>
 </rss>
diff --git a/docs/member/tobias_gerstenberg/index.html b/docs/member/tobias_gerstenberg/index.html
index 8a1f7a4..2dcdd24 100644
--- a/docs/member/tobias_gerstenberg/index.html
+++ b/docs/member/tobias_gerstenberg/index.html
@@ -356,6 +356,45 @@ <h2 id="publications">Publications</h2>
         
        
        <div class="pub-list-item" style="margin-bottom: 1rem" itemscope itemtype="http://schema.org/CreativeWork">
+  <span itemprop="author">
+    S. G. B. Johnson, A. Karimi, Y. Bengio, N. Chater, T. Gerstenberg, K. Larson, S. Levine, M. Mitchell, B. Schölkopf, I. Grossmann</span>
+  
+    (2024).
+  
+  <a href="https://cicl.stanford.edu/publication/johnson2024wise/" itemprop="name">Imagining and building wise machines: The centrality of AI metacognition</a>.
+  <em>arXiv</em>.
+  
+
+  
+
+  <p>
+    
+
+
+
+<a class="btn btn-outline-primary my-1 mr-1 btn-sm" href="https://arxiv.org/abs/2411.02478" target="_blank" rel="noopener">
+  Preprint
+</a>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+  </p>
+
+</div>
+<div class="pub-list-item" style="margin-bottom: 1rem" itemscope itemtype="http://schema.org/CreativeWork">
   <span itemprop="author">
     M. Prinzing, D. Rose, S. Zhang, E. Tu, A. Concha, M. Rea, J. Schaffer, T. Gerstenberg, J. Knobe</span>
   
diff --git a/docs/publication/index.html b/docs/publication/index.html
index 534511d..1100122 100644
--- a/docs/publication/index.html
+++ b/docs/publication/index.html
@@ -1610,6 +1610,19 @@ <h1>Publications</h1>
 					
 					
 				
+			
+				
+				
+
+				
+					
+					
+					
+					
+					
+					
+					
+				
 			
 				
 				
@@ -1746,6 +1759,57 @@ <h1>Publications</h1>
 				
 				
 
+				<div class='grid-sizer col-md-12 isotope-item pubtype-1 year-2024 author-'>
+					
+						<div class="pub-list-item" style="margin-bottom: 1rem" itemscope itemtype="http://schema.org/CreativeWork">
+  <span itemprop="author">
+    S. G. B. Johnson, A. Karimi, Y. Bengio, N. Chater, T. Gerstenberg, K. Larson, S. Levine, M. Mitchell, B. Schölkopf, I. Grossmann</span>
+  
+    (2024).
+  
+  <a href="https://cicl.stanford.edu/publication/johnson2024wise/" itemprop="name">Imagining and building wise machines: The centrality of AI metacognition</a>.
+  <em>arXiv</em>.
+  
+
+  
+
+  <p>
+    
+
+
+
+<a class="btn btn-outline-primary my-1 mr-1 btn-sm" href="https://arxiv.org/abs/2411.02478" target="_blank" rel="noopener">
+  Preprint
+</a>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+  </p>
+
+</div>
+
+					
+				</div>
+				
+				
+				
+
+				
+				
+
 				<div class='grid-sizer col-md-12 isotope-item pubtype-2 year-2024 author-'>
 					
 						<div class="pub-list-item" style="margin-bottom: 1rem" itemscope itemtype="http://schema.org/CreativeWork">
diff --git a/docs/publication/index.xml b/docs/publication/index.xml
index 781e6ab..6828827 100644
--- a/docs/publication/index.xml
+++ b/docs/publication/index.xml
@@ -12,6 +12,15 @@
 	<atom:link href="https://cicl.stanford.edu/publication/index.xml" rel="self" type="application/rss+xml" />
     
     
+    <item>
+      <title>Imagining and building wise machines: The centrality of AI metacognition</title>
+      <link>https://cicl.stanford.edu/publication/johnson2024wise/</link>
+      <pubDate>Wed, 06 Nov 2024 00:00:00 +0000</pubDate>
+      
+      <guid>https://cicl.stanford.edu/publication/johnson2024wise/</guid>
+      <description></description>
+    </item>
+    
     <item>
       <title>From Artifacts to Human Lives: Investigating the Domain-Generality of Judgments about Purposes</title>
       <link>https://cicl.stanford.edu/publication/prinzing2024purpose/</link>
diff --git a/docs/publication/johnson2024wise/index.html b/docs/publication/johnson2024wise/index.html
new file mode 100644
index 0000000..a2dbab1
--- /dev/null
+++ b/docs/publication/johnson2024wise/index.html
@@ -0,0 +1,488 @@
+<!DOCTYPE html>
+<html lang="en-us">
+<head>
+
+  <meta charset="utf-8">
+  <meta name="viewport" content="width=device-width, initial-scale=1">
+  <meta http-equiv="X-UA-Compatible" content="IE=edge">
+  <meta name="generator" content="Source Themes Academic 2.4.0">
+  <meta name="generator" content="Hugo 0.55.5" />
+  
+
+  
+  
+  
+  
+    
+  
+  <meta name="description" content="Recent advances in artificial intelligence (AI) have produced systems capable of increasingly sophisticated performance on cognitive tasks. However, AI systems still struggle in critical ways: unpredictable and novel environments (robustness), lack transparency in their reasoning (explainability), face challenges in communication and commitment (cooperation), and pose risks due to potential harmful actions (safety). We argue that these shortcomings stem from one overarching failure: AI systems lack wisdom. Drawing from cognitive and social sciences, we define wisdom as the ability to navigate intractable problems---those that are ambiguous, radically uncertain, novel, chaotic, or computationally explosive---through effective task-level and metacognitive strategies. While AI research has focused on task-level strategies, metacognition---the ability to reflect on and regulate one&#39;s thought processes---is underdeveloped in AI systems. In humans, metacognitive strategies such as recognizing the limits of one&#39;s knowledge, considering diverse perspectives, and adapting to context are essential for wise decision-making. We propose that integrating metacognitive capabilities into AI systems is crucial for enhancing their robustness, explainability, cooperation, and safety. By focusing on developing wise AI, we suggest an alternative to aligning AI with specific human values---a task fraught with conceptual and practical difficulties. Instead, wise AI systems can thoughtfully navigate complex situations, account for diverse human values, and avoid harmful actions. We discuss potential approaches to building wise AI, including benchmarking metacognitive abilities and training AI systems to employ wise reasoning. Prioritizing metacognition in AI research will lead to systems that act not only intelligently but also wisely in complex, real-world situations.">
+
+  
+  <link rel="alternate" hreflang="en-us" href="https://cicl.stanford.edu/publication/johnson2024wise/">
+
+  
+
+
+  
+
+  
+  
+  
+  <meta name="theme-color" content="#3f51b5">
+  
+
+  
+  
+  
+  
+    
+    <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/4.1.3/css/bootstrap.min.css" integrity="sha256-eSi1q2PG6J7g7ib17yAaWMcrr5GrtohYChqibrV7PBE=" crossorigin="anonymous">
+    <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/academicons/1.8.6/css/academicons.min.css" integrity="sha256-uFVgMKfistnJAfoCUQigIl+JfUaP47GrRKjf6CTPVmw=" crossorigin="anonymous">
+    <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.0-1/css/all.min.css" integrity="sha384-i090POAzfiGzqb4z2cbVa+SZ/ye0lP0mNl1rCZBTGVVPFpU8a4Ky5aBxJ0Ol5W6s" crossorigin="anonymous">
+    <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/fancybox/3.2.5/jquery.fancybox.min.css" integrity="sha256-ygkqlh3CYSUri3LhQxzdcm0n1EQvH2Y+U5S2idbLtxs=" crossorigin="anonymous">
+
+    
+    
+    
+      
+    
+    
+      
+      
+        
+          <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/styles/github.min.css" crossorigin="anonymous">
+        
+      
+    
+
+    
+
+    
+
+  
+
+  
+  
+  <link rel="stylesheet" href="//fonts.googleapis.com/css?family=Montserrat:400,700%7cRoboto:400,400italic,700%7cRoboto&#43;Mono">
+  
+
+  <link rel="stylesheet" href="/styles.css">
+  
+  <link rel="stylesheet" href="/css/custom.css">
+  
+
+  
+  
+    <script>
+      window.ga=window.ga||function(){(ga.q=ga.q||[]).push(arguments)};ga.l=+new Date;
+      ga('create', 'UA-40308572-2', 'auto');
+      
+      ga('require', 'eventTracker');
+      ga('require', 'outboundLinkTracker');
+      ga('require', 'urlChangeTracker');
+      ga('send', 'pageview');
+    </script>
+    <script async src="//www.google-analytics.com/analytics.js"></script>
+    
+    <script async src="https://cdnjs.cloudflare.com/ajax/libs/autotrack/2.4.1/autotrack.js" integrity="sha512-HUmooslVKj4m6OBu0OgzjXXr+QuFYy/k7eLI5jdeEy/F4RSgMn6XRWRGkFi5IFaFgy7uFTkegp3Z0XnJf3Jq+g==" crossorigin="anonymous"></script>
+    
+  
+  
+
+  
+  <link rel="alternate" href="https://cicl.stanford.edu/index.xml" type="application/rss+xml" title="Causality in Cognition Lab">
+  <link rel="feed" href="https://cicl.stanford.edu/index.xml" type="application/rss+xml" title="Causality in Cognition Lab">
+  
+
+  <link rel="manifest" href="/site.webmanifest">
+  <link rel="icon" type="image/png" href="/img/icon.png">
+  <link rel="apple-touch-icon" type="image/png" href="/img/icon-192.png">
+
+  <link rel="canonical" href="https://cicl.stanford.edu/publication/johnson2024wise/">
+
+  <meta property="twitter:card" content="summary_large_image">
+  
+  <meta property="twitter:site" content="@tobigerstenberg">
+  <meta property="twitter:creator" content="@tobigerstenberg">
+  
+  <meta property="og:site_name" content="Causality in Cognition Lab">
+  <meta property="og:url" content="https://cicl.stanford.edu/publication/johnson2024wise/">
+  <meta property="og:title" content="Imagining and building wise machines: The centrality of AI metacognition | Causality in Cognition Lab">
+  <meta property="og:description" content="Recent advances in artificial intelligence (AI) have produced systems capable of increasingly sophisticated performance on cognitive tasks. However, AI systems still struggle in critical ways: unpredictable and novel environments (robustness), lack transparency in their reasoning (explainability), face challenges in communication and commitment (cooperation), and pose risks due to potential harmful actions (safety). We argue that these shortcomings stem from one overarching failure: AI systems lack wisdom. Drawing from cognitive and social sciences, we define wisdom as the ability to navigate intractable problems---those that are ambiguous, radically uncertain, novel, chaotic, or computationally explosive---through effective task-level and metacognitive strategies. While AI research has focused on task-level strategies, metacognition---the ability to reflect on and regulate one&#39;s thought processes---is underdeveloped in AI systems. In humans, metacognitive strategies such as recognizing the limits of one&#39;s knowledge, considering diverse perspectives, and adapting to context are essential for wise decision-making. We propose that integrating metacognitive capabilities into AI systems is crucial for enhancing their robustness, explainability, cooperation, and safety. By focusing on developing wise AI, we suggest an alternative to aligning AI with specific human values---a task fraught with conceptual and practical difficulties. Instead, wise AI systems can thoughtfully navigate complex situations, account for diverse human values, and avoid harmful actions. We discuss potential approaches to building wise AI, including benchmarking metacognitive abilities and training AI systems to employ wise reasoning. Prioritizing metacognition in AI research will lead to systems that act not only intelligently but also wisely in complex, real-world situations.">
+  <meta property="og:locale" content="en-us">
+  
+  <meta property="article:published_time" content="2024-11-06T00:00:00&#43;00:00">
+  
+  <meta property="article:modified_time" content="2024-11-06T00:00:00&#43;00:00">
+  
+
+  
+
+  
+
+  <title>Imagining and building wise machines: The centrality of AI metacognition | Causality in Cognition Lab</title>
+
+</head>
+<body id="top" data-spy="scroll" data-target="#toc" data-offset="71" >
+
+<nav class="navbar navbar-light fixed-top navbar-expand-lg py-0" id="navbar-main">
+  <div class="container">
+
+    
+      <a class="navbar-brand" href="/"><img src="/img/cicl_logo.png" alt="Causality in Cognition Lab"></a>
+      
+      <button type="button" class="navbar-toggler" data-toggle="collapse"
+              data-target="#navbar" aria-controls="navbar" aria-expanded="false" aria-label="Toggle navigation">
+        <span><i class="fas fa-bars"></i></span>
+      </button>
+      
+
+    
+    <div class="collapse navbar-collapse" id="navbar">
+
+      
+      
+      <ul class="navbar-nav ml-auto">
+        
+
+        
+
+        
+        
+        
+          
+        
+
+        <li class="nav-item">
+          <a class="nav-link" href="/#home">
+            
+            <span>Home</span>
+            
+          </a>
+        </li>
+
+        
+        
+
+        
+
+        
+        
+        
+          
+        
+
+        <li class="nav-item">
+          <a class="nav-link" href="/#people">
+            
+            <span>People</span>
+            
+          </a>
+        </li>
+
+        
+        
+
+        
+
+        
+        
+        
+          
+        
+
+        <li class="nav-item">
+          <a class="nav-link" href="/#publications_selected">
+            
+            <span>Publications</span>
+            
+          </a>
+        </li>
+
+        
+        
+
+        
+
+        
+        
+        
+          
+        
+
+        <li class="nav-item">
+          <a class="nav-link" href="/#collaborators">
+            
+            <span>Collaborators</span>
+            
+          </a>
+        </li>
+
+        
+        
+      
+
+      
+      </ul>
+
+    </div>
+  </div>
+</nav>
+
+<div class="pub" itemscope itemtype="http://schema.org/CreativeWork">
+
+  
+
+
+<div class="container">
+  <div class="pub-title"> 
+    <h1 itemprop="name" class ="title-text">Imagining and building wise machines: The centrality of AI metacognition</h1>
+    <p class="pub-authors" itemprop="author">
+      
+      S. G. B. Johnson, A. Karimi, Y. Bengio, N. Chater, T. Gerstenberg, K. Larson, S. Levine, M. Mitchell, B. Schölkopf, I. Grossmann
+      
+    </p>
+    <span class="pull-right">
+      
+
+    </span>
+  </div>
+
+    
+
+    
+    <h3>Abstract</h3>
+    <p class="pub-abstract" itemprop="text">Recent advances in artificial intelligence (AI) have produced systems capable of increasingly sophisticated performance on cognitive tasks. However, AI systems still struggle in critical ways: unpredictable and novel environments (robustness), lack transparency in their reasoning (explainability), face challenges in communication and commitment (cooperation), and pose risks due to potential harmful actions (safety). We argue that these shortcomings stem from one overarching failure: AI systems lack wisdom. Drawing from cognitive and social sciences, we define wisdom as the ability to navigate intractable problems&mdash;those that are ambiguous, radically uncertain, novel, chaotic, or computationally explosive&mdash;through effective task-level and metacognitive strategies. While AI research has focused on task-level strategies, metacognition&mdash;the ability to reflect on and regulate one&rsquo;s thought processes&mdash;is underdeveloped in AI systems. In humans, metacognitive strategies such as recognizing the limits of one&rsquo;s knowledge, considering diverse perspectives, and adapting to context are essential for wise decision-making. We propose that integrating metacognitive capabilities into AI systems is crucial for enhancing their robustness, explainability, cooperation, and safety. By focusing on developing wise AI, we suggest an alternative to aligning AI with specific human values&mdash;a task fraught with conceptual and practical difficulties. Instead, wise AI systems can thoughtfully navigate complex situations, account for diverse human values, and avoid harmful actions. We discuss potential approaches to building wise AI, including benchmarking metacognitive abilities and training AI systems to employ wise reasoning. Prioritizing metacognition in AI research will lead to systems that act not only intelligently but also wisely in complex, real-world situations.</p>
+    
+
+    
+
+
+    
+    <div class="row">
+      <div class="col-sm-1"></div>
+      <div class="col-sm-10">
+        <div class="row">
+          <div class="col-xs-12 col-sm-3 pub-row-heading">Type</div>
+          <div class="col-xs-12 col-sm-9">
+            
+            <a href="/publication/#1">
+              Preprint
+            </a>
+            
+          </div>
+        </div>
+      </div>
+      <div class="col-sm-1"></div>
+    </div>
+    <div class="visible-xs"></div>
+    
+
+    <div class="row">
+      <div class="col-sm-1"></div>
+      <div class="col-sm-10">
+        <div class="row">
+          <div class="col-xs-12 col-sm-3 pub-row-heading">Publication</div>
+          <div class="col-xs-12 col-sm-9">Johnson, S. G. B., Karimi, A., Bengio, Y., Chater, N., Gerstenberg, T., Larson, K., Levine, S., Mitchell, M., Schölkopf, B., Grossmann, I. (2024). Imagining and building wise machines: The centrality of AI metacognition. <em>arXiv</em>.</div>
+        </div>
+      </div>
+      <div class="col-sm-1"></div>
+    </div>
+    <div class="visible-xs"></div>
+
+    <div class="row">
+      <div class="col-sm-1"></div>
+      <div class="col-sm-10">
+        <div class="row">
+          <div class="col-xs-12 col-sm-3 pub-row-heading">Date</div>
+          <div class="col-xs-12 col-sm-9" itemprop="datePublished">
+            2024
+          </div>
+        </div>
+      </div>
+      <div class="col-sm-1"></div>
+    </div>
+    <div class="visible-xs"></div>
+
+    
+    <div class="row" style="padding-top: 10px">
+      <div class="col-sm-1"></div>
+      <div class="col-sm-10">
+        <div class="row">
+          <div class="col-xs-12 col-sm-3 pub-row-heading" style="line-height:34px;">Links</div>
+          <div class="col-xs-12 col-sm-9">
+            
+
+
+
+<a class="btn btn-outline-primary my-1 mr-1" href="https://arxiv.org/abs/2411.02478" target="_blank" rel="noopener">
+  Preprint
+</a>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+          </div>
+        </div>
+      </div>
+      <div class="col-sm-1"></div>
+    </div>
+    <div class="visible-xs space-below"></div>
+    
+    
+    <div class="space-below"></div>
+
+    <div class="article-style"></div>
+
+<a href="../"><p>&lt;&lt; Back to list of publications</p></a>
+</div>
+
+</div>
+
+
+
+
+
+<div class="container">
+  <footer class="site-footer">
+  
+
+  <p class="powered-by">
+    &copy; 2024 Tobias Gerstenberg &middot; 
+
+    Powered by the
+    <a href="https://sourcethemes.com/academic/" target="_blank" rel="noopener">Academic theme</a> for
+    <a href="https://gohugo.io" target="_blank" rel="noopener">Hugo</a>.
+
+    <span class="float-right" aria-hidden="true">
+      <a href="#" id="back_to_top">
+        <span class="button_icon">
+          <i class="fas fa-chevron-up fa-2x"></i>
+        </span>
+      </a>
+    </span>
+  </p>
+</footer>
+
+</div>
+
+
+<div id="modal" class="modal fade" role="dialog">
+  <div class="modal-dialog">
+    <div class="modal-content">
+      <div class="modal-header">
+        <h5 class="modal-title">Cite</h5>
+        <button type="button" class="close" data-dismiss="modal" aria-label="Close">
+          <span aria-hidden="true">&times;</span>
+        </button>
+      </div>
+      <div class="modal-body">
+        <pre><code class="tex hljs"></code></pre>
+      </div>
+      <div class="modal-footer">
+        <a class="btn btn-outline-primary my-1 js-copy-cite" href="#" target="_blank">
+          <i class="fas fa-copy"></i> Copy
+        </a>
+        <a class="btn btn-outline-primary my-1 js-download-cite" href="#" target="_blank">
+          <i class="fas fa-download"></i> Download
+        </a>
+        <div id="modal-error"></div>
+      </div>
+    </div>
+  </div>
+</div>
+
+    
+
+    
+    
+    <script type="text/x-mathjax-config">
+      MathJax.Hub.Config({
+        CommonHTML: { linebreaks: { automatic: true } },
+        tex2jax: { inlineMath: [ ['$', '$'], ['\\(','\\)'] ], displayMath: [ ['$$','$$'], ['\\[', '\\]'] ], processEscapes: false },
+        TeX: { noUndefined: { attributes: { mathcolor: 'red', mathbackground: '#FFEEEE', mathsize: '90%' } } },
+        messageStyle: 'none'
+      });
+    </script>
+    
+
+    
+    
+    
+      <script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.3.1/jquery.min.js" integrity="sha512-+NqPlbbtM1QqiK8ZAo4Yrj2c4lNQoGv8P79DPtKzj++l5jnN39rHA/xsqn8zE9l0uSoxaCdrOgFs6yjyfbBxSg==" crossorigin="anonymous"></script>
+      <script src="https://cdnjs.cloudflare.com/ajax/libs/jquery.imagesloaded/4.1.3/imagesloaded.pkgd.min.js" integrity="sha512-umsR78NN0D23AzgoZ11K7raBD+R6hqKojyBZs1w8WvYlsI+QuKRGBx3LFCwhatzBunCjDuJpDHwxD13sLMbpRA==" crossorigin="anonymous"></script>
+      <script src="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/4.1.3/js/bootstrap.min.js" integrity="sha256-VsEqElsCHSGmnmHXGQzvoWjWwoznFSZc6hs7ARLRacQ=" crossorigin="anonymous"></script>
+      <script src="https://cdnjs.cloudflare.com/ajax/libs/jquery.isotope/3.0.4/isotope.pkgd.min.js" integrity="sha512-VDBOIlDbuC4VWxGJNmuFRQ0Li0SKkDpmGyuhAG5LTDLd/dJ/S0WMVxriR2Y+CyPL5gzjpN4f/6iqWVBJlht0tQ==" crossorigin="anonymous"></script>
+      <script src="https://cdnjs.cloudflare.com/ajax/libs/fancybox/3.2.5/jquery.fancybox.min.js" integrity="sha256-X5PoE3KU5l+JcX+w09p/wHl9AzK333C4hJ2I9S5mD4M=" crossorigin="anonymous"></script>
+
+      
+        
+        <script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/highlight.min.js" integrity="sha256-/BfiIkHlHoVihZdc6TFuj7MmJ0TWcWsMXkeDFwhi0zw=" crossorigin="anonymous"></script>
+        
+      
+
+      
+      
+      <script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.4/MathJax.js?config=TeX-AMS_CHTML-full" integrity="sha256-GhM+5JHb6QUzOQPXSJLEWP7R73CbkisjzK5Eyij4U9w=" crossorigin="anonymous" async></script>
+      
+    
+
+    <script src="/js/hugo-academic.js"></script>
+    
+
+    
+    
+
+    
+    
+    
+
+    
+    
+    <script>hljs.initHighlightingOnLoad();</script>
+    
+
+    
+    
+    <script>
+      const search_index_filename = "/index.json";
+      const i18n = {
+        'placeholder': "Search...",
+        'results': "Search Results",
+        'no_results': "No results found"
+      };
+      const content_type = {
+        'post': "Posts",
+        'project': "Projects",
+        'publication' : "Publications",
+        'talk' : "Talks"
+        };
+    </script>
+    
+
+    
+    
+
+    
+    
+
+    
+    
+
+    
+
+  </body>
+</html>
+
diff --git a/docs/publication_types/1/index.html b/docs/publication_types/1/index.html
index 844d5ff..61a3b44 100644
--- a/docs/publication_types/1/index.html
+++ b/docs/publication_types/1/index.html
@@ -111,7 +111,7 @@
   <meta property="og:description" content="">
   <meta property="og:locale" content="en-us">
   
-  <meta property="og:updated_time" content="2024-09-20T00:00:00&#43;00:00">
+  <meta property="og:updated_time" content="2024-11-06T00:00:00&#43;00:00">
   
 
   
@@ -238,6 +238,15 @@ <h1 class="pt-3">1</h1>
 
   
   
+  <div>
+    <h2><a href="https://cicl.stanford.edu/publication/johnson2024wise/">Imagining and building wise machines: The centrality of AI metacognition</a></h2>
+    <div class="article-style">
+      
+      Recent advances in artificial intelligence (AI) have produced systems capable of increasingly sophisticated performance on cognitive tasks. However, AI systems still struggle in critical ways: unpredictable and novel environments (robustness), lack …
+      
+    </div>
+  </div>
+  
   <div>
     <h2><a href="https://cicl.stanford.edu/publication/beller2024causation/">Causation, Meaning, and Communication</a></h2>
     <div class="article-style">
diff --git a/docs/publication_types/1/index.xml b/docs/publication_types/1/index.xml
index d3bb7a4..58c9750 100644
--- a/docs/publication_types/1/index.xml
+++ b/docs/publication_types/1/index.xml
@@ -7,11 +7,20 @@
     <generator>Hugo -- gohugo.io</generator>
     <language>en-us</language>
     <copyright>&amp;copy; 2024 Tobias Gerstenberg</copyright>
-    <lastBuildDate>Fri, 20 Sep 2024 00:00:00 +0000</lastBuildDate>
+    <lastBuildDate>Wed, 06 Nov 2024 00:00:00 +0000</lastBuildDate>
     
 	<atom:link href="https://cicl.stanford.edu/publication_types/1/index.xml" rel="self" type="application/rss+xml" />
     
     
+    <item>
+      <title>Imagining and building wise machines: The centrality of AI metacognition</title>
+      <link>https://cicl.stanford.edu/publication/johnson2024wise/</link>
+      <pubDate>Wed, 06 Nov 2024 00:00:00 +0000</pubDate>
+      
+      <guid>https://cicl.stanford.edu/publication/johnson2024wise/</guid>
+      <description></description>
+    </item>
+    
     <item>
       <title>Causation, Meaning, and Communication</title>
       <link>https://cicl.stanford.edu/publication/beller2024causation/</link>
diff --git a/docs/publication_types/index.html b/docs/publication_types/index.html
index 701846d..20d923b 100644
--- a/docs/publication_types/index.html
+++ b/docs/publication_types/index.html
@@ -111,7 +111,7 @@
   <meta property="og:description" content="">
   <meta property="og:locale" content="en-us">
   
-  <meta property="og:updated_time" content="2024-10-26T00:00:00&#43;00:00">
+  <meta property="og:updated_time" content="2024-11-06T00:00:00&#43;00:00">
   
 
   
@@ -239,21 +239,21 @@ <h1 class="pt-3">Publication_types</h1>
   
   
   <div>
-    <h2><a href="https://cicl.stanford.edu/publication_types/2/">2</a></h2>
+    <h2><a href="https://cicl.stanford.edu/publication_types/1/">1</a></h2>
     <div class="article-style">
       
     </div>
   </div>
   
   <div>
-    <h2><a href="https://cicl.stanford.edu/publication_types/3/">3</a></h2>
+    <h2><a href="https://cicl.stanford.edu/publication_types/2/">2</a></h2>
     <div class="article-style">
       
     </div>
   </div>
   
   <div>
-    <h2><a href="https://cicl.stanford.edu/publication_types/1/">1</a></h2>
+    <h2><a href="https://cicl.stanford.edu/publication_types/3/">3</a></h2>
     <div class="article-style">
       
     </div>
diff --git a/docs/publication_types/index.xml b/docs/publication_types/index.xml
index ead7c54..9b9e80b 100644
--- a/docs/publication_types/index.xml
+++ b/docs/publication_types/index.xml
@@ -7,11 +7,20 @@
     <generator>Hugo -- gohugo.io</generator>
     <language>en-us</language>
     <copyright>&amp;copy; 2024 Tobias Gerstenberg</copyright>
-    <lastBuildDate>Sat, 26 Oct 2024 00:00:00 +0000</lastBuildDate>
+    <lastBuildDate>Wed, 06 Nov 2024 00:00:00 +0000</lastBuildDate>
     
 	<atom:link href="https://cicl.stanford.edu/publication_types/index.xml" rel="self" type="application/rss+xml" />
     
     
+    <item>
+      <title>1</title>
+      <link>https://cicl.stanford.edu/publication_types/1/</link>
+      <pubDate>Wed, 06 Nov 2024 00:00:00 +0000</pubDate>
+      
+      <guid>https://cicl.stanford.edu/publication_types/1/</guid>
+      <description></description>
+    </item>
+    
     <item>
       <title>2</title>
       <link>https://cicl.stanford.edu/publication_types/2/</link>
@@ -30,15 +39,6 @@
       <description></description>
     </item>
     
-    <item>
-      <title>1</title>
-      <link>https://cicl.stanford.edu/publication_types/1/</link>
-      <pubDate>Fri, 20 Sep 2024 00:00:00 +0000</pubDate>
-      
-      <guid>https://cicl.stanford.edu/publication_types/1/</guid>
-      <description></description>
-    </item>
-    
     <item>
       <title>0</title>
       <link>https://cicl.stanford.edu/publication_types/0/</link>
diff --git a/docs/sitemap.xml b/docs/sitemap.xml
index 519ffe6..02fb4e1 100644
--- a/docs/sitemap.xml
+++ b/docs/sitemap.xml
@@ -3,8 +3,8 @@
     
     <url>
     
-      <loc>https://cicl.stanford.edu/publication_types/2/</loc>
-      <lastmod>2024-10-26T00:00:00+00:00</lastmod>
+      <loc>https://cicl.stanford.edu/publication_types/1/</loc>
+      <lastmod>2024-11-06T00:00:00+00:00</lastmod>
       <priority>0</priority>
     </url>
     
@@ -13,7 +13,7 @@
     <url>
     
       <loc>https://cicl.stanford.edu/</loc>
-      <lastmod>2024-10-26T00:00:00+00:00</lastmod>
+      <lastmod>2024-11-06T00:00:00+00:00</lastmod>
       <priority>0</priority>
     </url>
     
@@ -21,8 +21,8 @@
     
     <url>
     
-      <loc>https://cicl.stanford.edu/publication/prinzing2024purpose/</loc>
-      <lastmod>2024-10-26T00:00:00+00:00</lastmod>
+      <loc>https://cicl.stanford.edu/publication/johnson2024wise/</loc>
+      <lastmod>2024-11-06T00:00:00+00:00</lastmod>
     </url>
     
   
@@ -30,7 +30,7 @@
     <url>
     
       <loc>https://cicl.stanford.edu/publication_types/</loc>
-      <lastmod>2024-10-26T00:00:00+00:00</lastmod>
+      <lastmod>2024-11-06T00:00:00+00:00</lastmod>
       <priority>0</priority>
     </url>
     
@@ -38,8 +38,8 @@
     
     <url>
     
-      <loc>https://cicl.stanford.edu/publication_types/3/</loc>
-      <lastmod>2024-10-08T00:00:00+00:00</lastmod>
+      <loc>https://cicl.stanford.edu/publication_types/2/</loc>
+      <lastmod>2024-10-26T00:00:00+00:00</lastmod>
       <priority>0</priority>
     </url>
     
@@ -47,25 +47,33 @@
     
     <url>
     
-      <loc>https://cicl.stanford.edu/publication/franken2024sami/</loc>
+      <loc>https://cicl.stanford.edu/publication/prinzing2024purpose/</loc>
+      <lastmod>2024-10-26T00:00:00+00:00</lastmod>
+    </url>
+    
+  
+    
+    <url>
+    
+      <loc>https://cicl.stanford.edu/publication_types/3/</loc>
       <lastmod>2024-10-08T00:00:00+00:00</lastmod>
+      <priority>0</priority>
     </url>
     
   
     
     <url>
     
-      <loc>https://cicl.stanford.edu/publication/jin2024marple/</loc>
-      <lastmod>2024-10-04T00:00:00+00:00</lastmod>
+      <loc>https://cicl.stanford.edu/publication/franken2024sami/</loc>
+      <lastmod>2024-10-08T00:00:00+00:00</lastmod>
     </url>
     
   
     
     <url>
     
-      <loc>https://cicl.stanford.edu/publication_types/1/</loc>
-      <lastmod>2024-09-20T00:00:00+00:00</lastmod>
-      <priority>0</priority>
+      <loc>https://cicl.stanford.edu/publication/jin2024marple/</loc>
+      <lastmod>2024-10-04T00:00:00+00:00</lastmod>
     </url>
     
   
diff --git a/static/bibtex/cic_papers.bib b/static/bibtex/cic_papers.bib
index a8c30af..4cbfcef 100644
--- a/static/bibtex/cic_papers.bib
+++ b/static/bibtex/cic_papers.bib
@@ -1,13 +1,22 @@
 %% This BibTeX bibliography file was created using BibDesk.
 %% https://bibdesk.sourceforge.io/
 
-%% Created for Tobias Gerstenberg at 2024-10-26 12:42:26 -0700 
+%% Created for Tobias Gerstenberg at 2024-11-06 11:16:38 -0600 
 
 
 %% Saved with string encoding Unicode (UTF-8) 
 
 
 
+@article{johnson2024wise,
+	abstract = {Recent advances in artificial intelligence (AI) have produced systems capable of increasingly sophisticated performance on cognitive tasks. However, AI systems still struggle in critical ways: unpredictable and novel environments (robustness), lack transparency in their reasoning (explainability), face challenges in communication and commitment (cooperation), and pose risks due to potential harmful actions (safety). We argue that these shortcomings stem from one overarching failure: AI systems lack wisdom. Drawing from cognitive and social sciences, we define wisdom as the ability to navigate intractable problems---those that are ambiguous, radically uncertain, novel, chaotic, or computationally explosive---through effective task-level and metacognitive strategies. While AI research has focused on task-level strategies, metacognition---the ability to reflect on and regulate one's thought processes---is underdeveloped in AI systems. In humans, metacognitive strategies such as recognizing the limits of one's knowledge, considering diverse perspectives, and adapting to context are essential for wise decision-making. We propose that integrating metacognitive capabilities into AI systems is crucial for enhancing their robustness, explainability, cooperation, and safety. By focusing on developing wise AI, we suggest an alternative to aligning AI with specific human values---a task fraught with conceptual and practical difficulties. Instead, wise AI systems can thoughtfully navigate complex situations, account for diverse human values, and avoid harmful actions. We discuss potential approaches to building wise AI, including benchmarking metacognitive abilities and training AI systems to employ wise reasoning. Prioritizing metacognition in AI research will lead to systems that act not only intelligently but also wisely in complex, real-world situations.},
+	author = {Johnson, Samuel G B and Karimi, Amir-Hossein and Bengio, Yoshua and Chater, Nick and Gerstenberg, Tobias and Larson, Kate and Levine, Sydney and Mitchell, Melanie and Sch{\"o}lkopf, Bernhard and Grossmann, Igor},
+	date-added = {2024-11-06 11:16:21 -0600},
+	date-modified = {2024-11-06 11:16:21 -0600},
+	journal = {arXiv},
+	title = {{Imagining and building wise machines: The centrality of AI metacognition}},
+	year = {2024}}
+
 @article{jin2024marple,
 	abstract = {Reconstructing past events requires reasoning across long time horizons. To figure out what happened, we need to use our prior knowledge about the world and human behavior and draw inferences from various sources of evidence including visual, language, and auditory cues. We introduce MARPLE, a benchmark for evaluating long-horizon inference capabilities using multi-modal evidence. Our benchmark features agents interacting with simulated households, supporting vision, language, and auditory stimuli, as well as procedurally generated environments and agent behaviors. Inspired by classic ``whodunit'' stories, we ask AI models and human participants to infer which agent caused a change in the environment based on a step-by-step replay of what actually happened. The goal is to correctly identify the culprit as early as possible. Our findings show that human participants outperform both traditional Monte Carlo simulation methods and an LLM baseline (GPT-4) on this task. Compared to humans, traditional inference models are less robust and performant, while GPT-4 has difficulty comprehending environmental changes. We analyze what factors influence inference performance and ablate different modes of evidence, finding that all modes are valuable for performance. Overall, our experiments demonstrate that the long-horizon, multimodal inference tasks in our benchmark present a challenge to current models. Project website: https: //marple-benchmark.github.io/.},
 	annote = {Comment: NeurIPS 2024. First two authors contributed equally. Project page: https://marple-benchmark.github.io/},