diff --git a/content/publication/franken2023rails.md b/content/publication/franken2023rails.md new file mode 100644 index 0000000..a2f8185 --- /dev/null +++ b/content/publication/franken2023rails.md @@ -0,0 +1,33 @@ ++++ +# 0 -> 'Forthcoming', +# 1 -> 'Preprint', +# 2 -> 'Journal', +# 3 -> 'Conference Proceedings', +# 4 -> 'Book chapter', +# 5 -> 'Thesis' + +title = "Off The Rails: Procedural Dilemma Generation for Moral Reasoning" +date = "2023-10-30" +authors = ['J. Fränken',"A. Khawaja","K. Gandhi","J. Moore","N. D. Goodman","T. Gerstenberg"] +publication_types = ["3"] +publication_short = "_AI Meets Moral Philosophy and Moral Psychology Workshop (NeurIPS 2023)_" +publication = "Fränken J., Khawaja A., Gandhi K., Moore J., Goodman N. D., Gerstenberg T. (2023). Off The Rails: Procedural Dilemma Generation for Moral Reasoning. In _AI Meets Moral Philosophy and Moral Psychology Workshop (NeurIPS 2023)_." +abstract = "As AI systems like language models are increasingly integrated into making decisions that affect people, it's critical to ensure that these systems have sound moral reasoning. To test whether they do, we need to develop systematic evaluations. Recent work has introduced a method for procedurally generating LLM evaluations from abstract causal templates, and tested this method in the context of social reasoning (i.e., theory-of-mind). In this paper, we extend this method to the domain of moral dilemmas. We develop a framework that translates causal graphs into a prompt template which can then be used to procedurally generate a large and diverse set of moral dilemmas using a language model. Using this framework, we created the OffTheRails dataset which consists of 50 scenarios and 500 unique test items. We evaluated the quality of our model-written test items using two independent human experts and found that 90% of the test-items met the desired structure. We collect moral permissibility and intention judgments from 100 human crowdworkers and compared these judgments with those from GPT-4 and Claude-2 across eight control conditions. Both humans and GPT-4 assigned higher intentionality to agents when a harmful outcome was evitable and a necessary means. However, our findings did not match previous findings on permissibility judgments. This difference may be a result of not controlling the severity of harmful outcomes during scenario generation. We conclude by discussing future extensions of our benchmark to address this limitation." +image_preview = "" +selected = false +projects = [] +#url_pdf = "papers/franken2023rails.pdf" +url_preprint = "" +url_code = "" +url_dataset = "" +url_slides = "" +url_video = "" +url_poster = "" +url_source = "" +#url_custom = [{name = "Github", url = ""}] +math = true +highlight = true +[header] +# image = "publications/franken2023rails.png" +caption = "" ++++ \ No newline at end of file diff --git a/content/publication/franken2023social.md b/content/publication/franken2023social.md new file mode 100644 index 0000000..f9834d5 --- /dev/null +++ b/content/publication/franken2023social.md @@ -0,0 +1,33 @@ ++++ +# 0 -> 'Forthcoming', +# 1 -> 'Preprint', +# 2 -> 'Journal', +# 3 -> 'Conference Proceedings', +# 4 -> 'Book chapter', +# 5 -> 'Thesis' + +title = "Social Contract AI: Aligning AI Assistants with Implicit Group Norms" +date = "2023-10-30" +authors = ['J. Fränken',"S. Kwok","P. Ye","K. Gandhi","D. Arumugam","J. Moore","A. Tamkin","T. Gerstenberg","N. D. Goodman"] +publication_types = ["3"] +publication_short = "_Socially Responsible Language Modelling Research Workshop (NeurIPS 2023)_" +publication = "Fränken J., Kwok S., Ye P., Gandhi K., Arumugam D., Moore J., Tamkin A., Gerstenberg T., Goodman N. D. (2023). Social Contract AI: Aligning AI Assistants with Implicit Group Norms. In _Socially Responsible Language Modelling Research Workshop (NeurIPS 2023)." +abstract = "We explore the idea of aligning an AI assistant by inverting a model of users' (unknown) preferences from observed interactions. To validate our proposal, we run proof-of-concept simulations in the economic ultimatum game, formalizing user preferences as policies that guide the actions of simulated players. We find that the AI assistant accurately aligns its behavior to match standard policies from the economic literature (e.g., selfish, altruistic). However, the assistant's learned policies lack robustness and exhibit limited generalization in an out-of-distribution setting when confronted with a currency (e.g., grams of medicine) that was not included in the assistant's training distribution. Additionally, we find that when there is inconsistency in the relationship between language use and an unknown policy (e.g., an altruistic policy combined with rude language), the assistant's learning of the policy is slowed. Overall, our preliminary results suggest that developing simulation frameworks in which AI assistants need to infer preferences from diverse users can provide a valuable approach for studying practical alignment questions." +image_preview = "" +selected = false +projects = [] +url_pdf = "papers/franken2023social.pdf" +url_preprint = "https://arxiv.org/abs/2310.17769" +url_code = "" +url_dataset = "" +url_slides = "" +url_video = "" +url_poster = "" +url_source = "" +url_custom = [{name = "Github", url = "https://github.com/janphilippfranken/scai/tree/release"}] +math = true +highlight = true +[header] +# image = "publications/franken2023social.png" +caption = "" ++++ \ No newline at end of file diff --git a/content/publication/gandhi2023understanding.md b/content/publication/gandhi2023understanding.md index eaa0eac..f26adb0 100644 --- a/content/publication/gandhi2023understanding.md +++ b/content/publication/gandhi2023understanding.md @@ -9,9 +9,9 @@ title = "Understanding Social Reasoning in Language Models with Language Models" date = "2023-06-27" authors = ["K. Gandhi",'J. Fränken',"T. Gerstenberg","N. D. Goodman"] -publication_types = ["1"] -publication_short = "_arXiv_" -publication = "Gandhi K., Fränken J., Gerstenberg T., Goodman N. D. (2023). Understanding Social Reasoning in Language Models with Language Models. In _arXiv_." +publication_types = ["3"] +publication_short = "_Advances in Neural Information Processing Systems_" +publication = "Gandhi K., Fränken J., Gerstenberg T., Goodman N. D. (2023). Understanding Social Reasoning in Language Models with Language Models. _Advances in Neural Information Processing Systems_." abstract = "As Large Language Models (LLMs) become increasingly integrated into our everyday lives, understanding their ability to comprehend human mental states becomes critical for ensuring effective interactions. However, despite the recent attempts to assess the Theory-of-Mind (ToM) reasoning capabilities of LLMs, the degree to which these models can align with human ToM remains a nuanced topic of exploration. This is primarily due to two distinct challenges: (1) the presence of inconsistent results from previous evaluations, and (2) concerns surrounding the validity of existing evaluation methodologies. To address these challenges, we present a novel framework for procedurally generating evaluations with LLMs by populating causal templates. Using our framework, we create a new social reasoning benchmark (BigToM) for LLMs which consists of 25 controls and 5,000 model-written evaluations. We find that human participants rate the quality of our benchmark higher than previous crowd-sourced evaluations and comparable to expert-written evaluations. Using BigToM, we evaluate the social reasoning capabilities of a variety of LLMs and compare model performances with human performance. Our results suggest that GPT4 has ToM capabilities that mirror human inference patterns, though less reliable, while other LLMs struggle." tags = ["Spotlight at NeurIPS 2023"] image_preview = "" diff --git a/content/publication/goodman2023probabilistic.md b/content/publication/goodman2023probabilistic.md index 11ad464..b41caae 100644 --- a/content/publication/goodman2023probabilistic.md +++ b/content/publication/goodman2023probabilistic.md @@ -7,7 +7,7 @@ # 5 -> 'Thesis' title = "Probabilistic programs as a unifying language of thought" -date = "2023-01-01" +date = "2023-10-20" year = "{in press}" authors = ["N. D. Goodman","T. Gerstenberg","J. B. Tenenbaum"] publication_types = ["4", "0"] diff --git a/content/publication/kirfel2023anticipating.md b/content/publication/kirfel2023anticipating.md new file mode 100644 index 0000000..1989749 --- /dev/null +++ b/content/publication/kirfel2023anticipating.md @@ -0,0 +1,33 @@ ++++ +# 0 -> 'Forthcoming', +# 1 -> 'Preprint', +# 2 -> 'Journal', +# 3 -> 'Conference Proceedings', +# 4 -> 'Book chapter', +# 5 -> 'Thesis' + +title = "Anticipating the risks and benefits of counterfactual world simulation models" +date = "2023-10-30" +authors = ["L. Kirfel","R. J. MacCoun","T. Icard","T. Gerstenberg"] +publication_types = ["3"] +publication_short = "_AI Meets Moral Philosophy and Moral Psychology Workshop (NeurIPS 2023)_" +publication = "Kirfel L., MacCoun R. J., Icard T., Gerstenberg T. (2023). Anticipating the risks and benefits of counterfactual world simulation models. In _AI Meets Moral Philosophy and Moral Psychology Workshop (NeurIPS 2023)_." +abstract = "This paper examines the transformative potential of Counterfactual World Simulation Models (CWSMs). A CWSM uses multi-modal evidence, such as the CCTV footage of a road accident, to build a high-fidelity 3D reconstruction of what happened. It can answer causal questions, such as whether the accident happened because the driver was speeding, by simulating what would have happened in relevant counterfactual situations. We argue for a normative and ethical framework that guides and constrains the simulation of counterfactuals. We address the challenge of ensuring fidelity in reconstructions while simultaneously preventing stereotype perpetuation during counterfactual simulations. We anticipate different modes of how users will interact with CWSMs and discuss how their outputs may be presented. Finally, we address the prospective applications of CWSMs in the legal domain, recognizing both their potential to revolutionize legal proceedings as well as the ethical concerns they engender. Sketching a new genre of AI, this paper seeks to illuminate the path forward for responsible and effective use of CWSMs." +image_preview = "" +selected = false +projects = [] +#url_pdf = "papers/kirfel2023anticipating.pdf" +url_preprint = "" +url_code = "" +url_dataset = "" +url_slides = "" +url_video = "" +url_poster = "" +url_source = "" +#url_custom = [{name = "Github", url = ""}] +math = true +highlight = true +[header] +# image = "publications/kirfel2023anticipating.png" +caption = "" ++++ \ No newline at end of file diff --git a/docs/404.html b/docs/404.html index 92cb07e..663093b 100644 --- a/docs/404.html +++ b/docs/404.html @@ -238,23 +238,23 @@

Page not found

Publications

diff --git a/docs/bibtex/cic_papers.bib b/docs/bibtex/cic_papers.bib index ee6ce25..c5deb15 100644 --- a/docs/bibtex/cic_papers.bib +++ b/docs/bibtex/cic_papers.bib @@ -1,13 +1,40 @@ %% This BibTeX bibliography file was created using BibDesk. %% https://bibdesk.sourceforge.io/ -%% Created for Tobias Gerstenberg at 2023-10-15 13:46:48 -0700 +%% Created for Tobias Gerstenberg at 2023-10-30 18:25:01 -0700 %% Saved with string encoding Unicode (UTF-8) +@inproceedings{kirfel2023anticipating, + abstract = {This paper examines the transformative potential of Counterfactual World Simulation Models (CWSMs). A CWSM uses multi-modal evidence, such as the CCTV footage of a road accident, to build a high-fidelity 3D reconstruction of what happened. It can answer causal questions, such as whether the accident happened because the driver was speeding, by simulating what would have happened in relevant counterfactual situations. We argue for a normative and ethical framework that guides and constrains the simulation of counterfactuals. We address the challenge of ensuring fidelity in reconstructions while simultaneously preventing stereotype perpetuation during counterfactual simulations. We anticipate different modes of how users will interact with CWSMs and discuss how their outputs may be presented. Finally, we address the prospective applications of CWSMs in the legal domain, recognizing both their potential to revolutionize legal proceedings as well as the ethical concerns they engender. Sketching a new genre of AI, this paper seeks to illuminate the path forward for responsible and effective use of CWSMs.}, + author = {Lara Kirfel and Robert J. MacCoun and Thomas Icard and Tobias Gerstenberg}, + booktitle = {{AI Meets Moral Philosophy and Moral Psychology Workshop (NeurIPS 2023)}}, + date-added = {2023-10-30 18:20:41 -0700}, + date-modified = {2023-10-30 18:20:50 -0700}, + title = {Anticipating the risks and benefits of counterfactual world simulation models}, + year = {2023}} + +@inproceedings{franken2023rails, + abstract = {As AI systems like language models are increasingly integrated into making decisions that affect people, it's critical to ensure that these systems have sound moral reasoning. To test whether they do, we need to develop systematic evaluations. Recent work has introduced a method for procedurally generating LLM evaluations from abstract causal templates, and tested this method in the context of social reasoning (i.e., theory-of-mind). In this paper, we extend this method to the domain of moral dilemmas. We develop a framework that translates causal graphs into a prompt template which can then be used to procedurally generate a large and diverse set of moral dilemmas using a language model. Using this framework, we created the OffTheRails dataset which consists of 50 scenarios and 500 unique test items. We evaluated the quality of our model-written test items using two independent human experts and found that 90% of the test-items met the desired structure. We collect moral permissibility and intention judgments from 100 human crowdworkers and compared these judgments with those from GPT-4 and Claude-2 across eight control conditions. Both humans and GPT-4 assigned higher intentionality to agents when a harmful outcome was evitable and a necessary means. However, our findings did not match previous findings on permissibility judgments. This difference may be a result of not controlling the severity of harmful outcomes during scenario generation. We conclude by discussing future extensions of our benchmark to address this limitation.}, + author = {Jan-Philipp Fr{\"a}nken and Ayesha Khawaja and Kanishk Gandhi and Jared Moore and Noah D. Goodman and Tobias Gerstenberg}, + booktitle = {{AI Meets Moral Philosophy and Moral Psychology Workshop (NeurIPS 2023)}}, + date-added = {2023-10-30 18:18:28 -0700}, + date-modified = {2023-10-30 18:18:35 -0700}, + title = {Off The Rails: Procedural Dilemma Generation for Moral Reasoning}, + year = {2023}} + +@inproceedings{franken2023social, + abstract = {We explore the idea of aligning an AI assistant by inverting a model of users' (unknown) preferences from observed interactions. To validate our proposal, we run proof-of-concept simulations in the economic ultimatum game, formalizing user preferences as policies that guide the actions of simulated players. We find that the AI assistant accurately aligns its behavior to match standard policies from the economic literature (e.g., selfish, altruistic). However, the assistant's learned policies lack robustness and exhibit limited generalization in an out-of-distribution setting when confronted with a currency (e.g., grams of medicine) that was not included in the assistant's training distribution. Additionally, we find that when there is inconsistency in the relationship between language use and an unknown policy (e.g., an altruistic policy combined with rude language), the assistant's learning of the policy is slowed. Overall, our preliminary results suggest that developing simulation frameworks in which AI assistants need to infer preferences from diverse users can provide a valuable approach for studying practical alignment questions.}, + author = {Jan-Philipp Fr{\"a}nken and Sam Kwok and Peixuan Ye and Kanishk Gandhi and Dilip Arumugam and Jared Moore and Alex Tamkin and Tobias Gerstenberg and Noah D. Goodman}, + booktitle = {Socially Responsible Language Modelling Research Workshop (NeurIPS 2023)}, + date-added = {2023-10-30 18:11:09 -0700}, + date-modified = {2023-10-30 18:11:15 -0700}, + title = {Social Contract AI: Aligning AI Assistants with Implicit Group Norms}, + year = {2023}} + @incollection{goodman2023probabilistic, author = {Noah D. Goodman and Tobias Gerstenberg and Joshua B. Tenenbaum}, booktitle = {Reverse-engineering the mind: The Bayesian approach to cognitive science}, @@ -49,16 +76,14 @@ @inproceedings{chase2023realism title = {Realism of Visual, Auditory, and Haptic Cues in Phenomenal Causality}, year = {2023}} -@article{gandhi2023understanding, +@inproceedings{gandhi2023understanding, abstract = {As Large Language Models (LLMs) become increasingly integrated into our everyday lives, understanding their ability to comprehend human mental states becomes critical for ensuring effective interactions. However, despite the recent attempts to assess the Theory-of-Mind (ToM) reasoning capabilities of LLMs, the degree to which these models can align with human ToM remains a nuanced topic of exploration. This is primarily due to two distinct challenges: (1) the presence of inconsistent results from previous evaluations, and (2) concerns surrounding the validity of existing evaluation methodologies. To address these challenges, we present a novel framework for procedurally generating evaluations with LLMs by populating causal templates. Using our framework, we create a new social reasoning benchmark (BigToM) for LLMs which consists of 25 controls and 5,000 model-written evaluations. We find that human participants rate the quality of our benchmark higher than previous crowd-sourced evaluations and comparable to expert-written evaluations. Using BigToM, we evaluate the social reasoning capabilities of a variety of LLMs and compare model performances with human performance. Our results suggest that GPT4 has ToM capabilities that mirror human inference patterns, though less reliable, while other LLMs struggle.}, author = {Kanishk Gandhi and Jan-Philipp Fr{\"a}nken and Tobias Gerstenberg and Noah D. Goodman}, + booktitle = {{Advances in Neural Information Processing Systems}}, date-added = {2023-06-27 19:45:01 -0700}, - date-modified = {2023-06-27 19:45:09 -0700}, - journal = {arXiv}, + date-modified = {2023-10-30 18:11:40 -0700}, title = {Understanding Social Reasoning in Language Models with Language Models}, - url = {https://arxiv.org/abs/2306.15448}, - year = {2023}, - bdsk-url-1 = {https://arxiv.org/abs/2306.15448}} + year = {2023}} @article{vasconcelos2023explanations, author = {Vasconcelos, Helena and J{\"o}rke, Matthew and Grunde-McLaughlin, Madeleine and Gerstenberg, Tobias and Bernstein, Michael S and Krishna, Ranjay}, diff --git a/docs/index.html b/docs/index.html index d2d2786..c817bb0 100644 --- a/docs/index.html +++ b/docs/index.html @@ -110,7 +110,7 @@ - + diff --git a/docs/index.xml b/docs/index.xml index f1726ee..e59f42f 100644 --- a/docs/index.xml +++ b/docs/index.xml @@ -6,9 +6,36 @@ Hugo -- gohugo.io en-us © 2023 Tobias Gerstenberg - Fri, 27 Oct 2023 00:00:00 +0000 + Mon, 30 Oct 2023 00:00:00 +0000 + + Anticipating the risks and benefits of counterfactual world simulation models + https://cicl.stanford.edu/publication/kirfel2023anticipating/ + Mon, 30 Oct 2023 00:00:00 +0000 + + https://cicl.stanford.edu/publication/kirfel2023anticipating/ + + + + + Off The Rails: Procedural Dilemma Generation for Moral Reasoning + https://cicl.stanford.edu/publication/franken2023rails/ + Mon, 30 Oct 2023 00:00:00 +0000 + + https://cicl.stanford.edu/publication/franken2023rails/ + + + + + Social Contract AI: Aligning AI Assistants with Implicit Group Norms + https://cicl.stanford.edu/publication/franken2023social/ + Mon, 30 Oct 2023 00:00:00 +0000 + + https://cicl.stanford.edu/publication/franken2023social/ + + + If not me, then who? Responsibility and replacement https://cicl.stanford.edu/publication/wu2023replacement/ @@ -18,6 +45,15 @@ + + Probabilistic programs as a unifying language of thought + https://cicl.stanford.edu/publication/goodman2023probabilistic/ + Fri, 20 Oct 2023 00:00:00 +0000 + + https://cicl.stanford.edu/publication/goodman2023probabilistic/ + + + Children use disagreement to infer what happened https://cicl.stanford.edu/publication/amemiya2023disagreement/ @@ -108,41 +144,5 @@ - - Causal Reasoning Across Agents and Objects - https://cicl.stanford.edu/publication/gonzalez2023agents/ - Wed, 10 May 2023 00:00:00 +0000 - - https://cicl.stanford.edu/publication/gonzalez2023agents/ - - - - - Learning what matters: Causal abstraction in human inference - https://cicl.stanford.edu/publication/shin2023abstraction/ - Tue, 09 May 2023 00:00:00 +0000 - - https://cicl.stanford.edu/publication/shin2023abstraction/ - - - - - Teleology and generics - https://cicl.stanford.edu/publication/rose2023teleology/ - Mon, 08 May 2023 00:00:00 +0000 - - https://cicl.stanford.edu/publication/rose2023teleology/ - - - - - Mental Jenga: A counterfactual simulation model of causal judgments about physical support - https://cicl.stanford.edu/publication/zhou2023jenga/ - Thu, 26 Jan 2023 00:00:00 +0000 - - https://cicl.stanford.edu/publication/zhou2023jenga/ - - - diff --git a/docs/member/tobias_gerstenberg/index.html b/docs/member/tobias_gerstenberg/index.html index 91feec8..8ba68f6 100644 --- a/docs/member/tobias_gerstenberg/index.html +++ b/docs/member/tobias_gerstenberg/index.html @@ -354,6 +354,123 @@

Publications

+ + + (2023). + + Anticipating the risks and benefits of counterfactual world simulation models. + AI Meets Moral Philosophy and Moral Psychology Workshop (NeurIPS 2023). + + + + +

+ + + + + + + + + + + + + + + + + + + +

+ +
+
+ + + (2023). + + Off The Rails: Procedural Dilemma Generation for Moral Reasoning. + AI Meets Moral Philosophy and Moral Psychology Workshop (NeurIPS 2023). + + + + +

+ + + + + + + + + + + + + + + + + + + +

+ +
+
+ + + (2023). + + Social Contract AI: Aligning AI Assistants with Implicit Group Norms. + Socially Responsible Language Modelling Research Workshop (NeurIPS 2023). + + + + +

+ + + + + + Preprint + + + + + PDF + + + + + + + + + + + + + + + + + Github + + + +

+ +
+
@@ -397,6 +514,45 @@

Publications

+

+ +
+
+ + + (2023). + + Probabilistic programs as a unifying language of thought. + Reverse-engineering the mind: The Bayesian approach to cognitive science. + + + + +

+ + + + + + + PDF + + + + + + + + + + + + + + + +

@@ -542,7 +698,7 @@

Publications

(2023). Understanding Social Reasoning in Language Models with Language Models. - arXiv. + Advances in Neural Information Processing Systems. @@ -1218,45 +1374,6 @@

Publications

-

- - -
- - - (2023). - - Probabilistic programs as a unifying language of thought. - Reverse-engineering the mind: The Bayesian approach to cognitive science. - - - - -

- - - - - - - PDF - - - - - - - - - - - - - - - -

diff --git a/docs/papers/franken2023social.pdf b/docs/papers/franken2023social.pdf new file mode 100644 index 0000000..47edf02 Binary files /dev/null and b/docs/papers/franken2023social.pdf differ diff --git a/docs/publication/franken2023rails/index.html b/docs/publication/franken2023rails/index.html new file mode 100644 index 0000000..547d013 --- /dev/null +++ b/docs/publication/franken2023rails/index.html @@ -0,0 +1,484 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Off The Rails: Procedural Dilemma Generation for Moral Reasoning | Causality in Cognition Lab + + + + + + +
+ + + + +
+
+

Off The Rails: Procedural Dilemma Generation for Moral Reasoning

+ + + + + +
+ + + + +

Abstract

+

As AI systems like language models are increasingly integrated into making decisions that affect people, it’s critical to ensure that these systems have sound moral reasoning. To test whether they do, we need to develop systematic evaluations. Recent work has introduced a method for procedurally generating LLM evaluations from abstract causal templates, and tested this method in the context of social reasoning (i.e., theory-of-mind). In this paper, we extend this method to the domain of moral dilemmas. We develop a framework that translates causal graphs into a prompt template which can then be used to procedurally generate a large and diverse set of moral dilemmas using a language model. Using this framework, we created the OffTheRails dataset which consists of 50 scenarios and 500 unique test items. We evaluated the quality of our model-written test items using two independent human experts and found that 90% of the test-items met the desired structure. We collect moral permissibility and intention judgments from 100 human crowdworkers and compared these judgments with those from GPT-4 and Claude-2 across eight control conditions. Both humans and GPT-4 assigned higher intentionality to agents when a harmful outcome was evitable and a necessary means. However, our findings did not match previous findings on permissibility judgments. This difference may be a result of not controlling the severity of harmful outcomes during scenario generation. We conclude by discussing future extensions of our benchmark to address this limitation.

+ + + + + + +
+
+
+
+
Type
+ +
+
+
+
+
+ + +
+
+
+
+
Publication
+
Fränken J., Khawaja A., Gandhi K., Moore J., Goodman N. D., Gerstenberg T. (2023). Off The Rails: Procedural Dilemma Generation for Moral Reasoning. In AI Meets Moral Philosophy and Moral Psychology Workshop (NeurIPS 2023).
+
+
+
+
+
+ +
+
+
+
+
Date
+ +
+
+
+
+
+ + +
+
+
+
+
Links
+
+ + + + + + + + + + + + + + + + + + + +
+
+
+
+
+
+ + +
+ +
+ +

<< Back to list of publications

+
+ +
+ + + + + +
+ + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/publication/franken2023social/index.html b/docs/publication/franken2023social/index.html new file mode 100644 index 0000000..fde6a7d --- /dev/null +++ b/docs/publication/franken2023social/index.html @@ -0,0 +1,496 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Social Contract AI: Aligning AI Assistants with Implicit Group Norms | Causality in Cognition Lab + + + + + + +
+ + + + +
+
+

Social Contract AI: Aligning AI Assistants with Implicit Group Norms

+ + + + + +
+ + + + +

Abstract

+

We explore the idea of aligning an AI assistant by inverting a model of users’ (unknown) preferences from observed interactions. To validate our proposal, we run proof-of-concept simulations in the economic ultimatum game, formalizing user preferences as policies that guide the actions of simulated players. We find that the AI assistant accurately aligns its behavior to match standard policies from the economic literature (e.g., selfish, altruistic). However, the assistant’s learned policies lack robustness and exhibit limited generalization in an out-of-distribution setting when confronted with a currency (e.g., grams of medicine) that was not included in the assistant’s training distribution. Additionally, we find that when there is inconsistency in the relationship between language use and an unknown policy (e.g., an altruistic policy combined with rude language), the assistant’s learning of the policy is slowed. Overall, our preliminary results suggest that developing simulation frameworks in which AI assistants need to infer preferences from diverse users can provide a valuable approach for studying practical alignment questions.

+ + + + + + +
+
+
+
+
Type
+ +
+
+
+
+
+ + +
+
+
+
+
Publication
+
Fränken J., Kwok S., Ye P., Gandhi K., Arumugam D., Moore J., Tamkin A., Gerstenberg T., Goodman N. D. (2023). Social Contract AI: Aligning AI Assistants with Implicit Group Norms. In _Socially Responsible Language Modelling Research Workshop (NeurIPS 2023).
+
+
+
+
+
+ +
+
+
+
+
Date
+ +
+
+
+
+
+ + +
+
+
+
+
Links
+
+ + + + + + Preprint + + + + + PDF + + + + + + + + + + + + + + + + + Github + + + +
+
+
+
+
+
+ + +
+ +
+ +

<< Back to list of publications

+
+ +
+ + + + + +
+ + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/publication/gandhi2023understanding/index.html b/docs/publication/gandhi2023understanding/index.html index 3538e89..f213b09 100644 --- a/docs/publication/gandhi2023understanding/index.html +++ b/docs/publication/gandhi2023understanding/index.html @@ -277,8 +277,8 @@

Abstract

Type
- - Preprint + + Conference Proceedings
@@ -294,7 +294,7 @@

Abstract

Publication
-
Gandhi K., Fränken J., Gerstenberg T., Goodman N. D. (2023). Understanding Social Reasoning in Language Models with Language Models. In arXiv.
+
Gandhi K., Fränken J., Gerstenberg T., Goodman N. D. (2023). Understanding Social Reasoning in Language Models with Language Models. Advances in Neural Information Processing Systems.
diff --git a/docs/publication/goodman2023probabilistic/index.html b/docs/publication/goodman2023probabilistic/index.html index 2e119a2..3878e3e 100644 --- a/docs/publication/goodman2023probabilistic/index.html +++ b/docs/publication/goodman2023probabilistic/index.html @@ -111,9 +111,9 @@ - + - + diff --git a/docs/publication/index.html b/docs/publication/index.html index 6001837..26cbf4f 100644 --- a/docs/publication/index.html +++ b/docs/publication/index.html @@ -1359,6 +1359,45 @@

Publications

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -1495,6 +1534,159 @@

Publications

+
+ +
+ + + (2023). + + Anticipating the risks and benefits of counterfactual world simulation models. + AI Meets Moral Philosophy and Moral Psychology Workshop (NeurIPS 2023). + + + + +

+ + + + + + + + + + + + + + + + + + + +

+ +
+ + +
+ + + + + + + +
+ +
+ + + (2023). + + Off The Rails: Procedural Dilemma Generation for Moral Reasoning. + AI Meets Moral Philosophy and Moral Psychology Workshop (NeurIPS 2023). + + + + +

+ + + + + + + + + + + + + + + + + + + +

+ +
+ + +
+ + + + + + + +
+ +
+ + + (2023). + + Social Contract AI: Aligning AI Assistants with Implicit Group Norms. + Socially Responsible Language Modelling Research Workshop (NeurIPS 2023). + + + + +

+ + + + + + Preprint + + + + + PDF + + + + + + + + + + + + + + + + + Github + + + +

+ +
+ + +
+ + + + + + +
@@ -1541,6 +1733,57 @@

Publications

+

+ +
+ + +
+ + + + + + + +
+ +
+ + + (2023). + + Probabilistic programs as a unifying language of thought. + Reverse-engineering the mind: The Bayesian approach to cognitive science. + + + + +

+ + + + + + + PDF + + + + + + + + + + + + + + + +

@@ -1725,7 +1968,7 @@

Publications

-
+
- - -
- - - - - - - -
- -
- - - (2023). - - Probabilistic programs as a unifying language of thought. - Reverse-engineering the mind: The Bayesian approach to cognitive science. - - - - -

- - - - - - - PDF - - - - - - - - - - - - - - - -

diff --git a/docs/publication/index.xml b/docs/publication/index.xml index 7165e47..16503fa 100644 --- a/docs/publication/index.xml +++ b/docs/publication/index.xml @@ -12,6 +12,33 @@ + + Anticipating the risks and benefits of counterfactual world simulation models + https://cicl.stanford.edu/publication/kirfel2023anticipating/ + Mon, 30 Oct 2023 00:00:00 +0000 + + https://cicl.stanford.edu/publication/kirfel2023anticipating/ + + + + + Off The Rails: Procedural Dilemma Generation for Moral Reasoning + https://cicl.stanford.edu/publication/franken2023rails/ + Mon, 30 Oct 2023 00:00:00 +0000 + + https://cicl.stanford.edu/publication/franken2023rails/ + + + + + Social Contract AI: Aligning AI Assistants with Implicit Group Norms + https://cicl.stanford.edu/publication/franken2023social/ + Mon, 30 Oct 2023 00:00:00 +0000 + + https://cicl.stanford.edu/publication/franken2023social/ + + + If not me, then who? Responsibility and replacement https://cicl.stanford.edu/publication/wu2023replacement/ @@ -21,6 +48,15 @@ + + Probabilistic programs as a unifying language of thought + https://cicl.stanford.edu/publication/goodman2023probabilistic/ + Fri, 20 Oct 2023 00:00:00 +0000 + + https://cicl.stanford.edu/publication/goodman2023probabilistic/ + + + Children use disagreement to infer what happened https://cicl.stanford.edu/publication/amemiya2023disagreement/ @@ -174,15 +210,6 @@ - - Probabilistic programs as a unifying language of thought - https://cicl.stanford.edu/publication/goodman2023probabilistic/ - Sun, 01 Jan 2023 00:00:00 +0000 - - https://cicl.stanford.edu/publication/goodman2023probabilistic/ - - - What would have happened? Counterfactuals, hypotheticals, and causal judgments https://cicl.stanford.edu/publication/gerstenberg2022hypothetical/ diff --git a/docs/publication/kirfel2023anticipating/index.html b/docs/publication/kirfel2023anticipating/index.html new file mode 100644 index 0000000..8f555f6 --- /dev/null +++ b/docs/publication/kirfel2023anticipating/index.html @@ -0,0 +1,484 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Anticipating the risks and benefits of counterfactual world simulation models | Causality in Cognition Lab + + + + + + +
+ + + + +
+
+

Anticipating the risks and benefits of counterfactual world simulation models

+ + + + + +
+ + + + +

Abstract

+

This paper examines the transformative potential of Counterfactual World Simulation Models (CWSMs). A CWSM uses multi-modal evidence, such as the CCTV footage of a road accident, to build a high-fidelity 3D reconstruction of what happened. It can answer causal questions, such as whether the accident happened because the driver was speeding, by simulating what would have happened in relevant counterfactual situations. We argue for a normative and ethical framework that guides and constrains the simulation of counterfactuals. We address the challenge of ensuring fidelity in reconstructions while simultaneously preventing stereotype perpetuation during counterfactual simulations. We anticipate different modes of how users will interact with CWSMs and discuss how their outputs may be presented. Finally, we address the prospective applications of CWSMs in the legal domain, recognizing both their potential to revolutionize legal proceedings as well as the ethical concerns they engender. Sketching a new genre of AI, this paper seeks to illuminate the path forward for responsible and effective use of CWSMs.

+ + + + + + +
+
+
+
+
Type
+ +
+
+
+
+
+ + +
+
+
+
+
Publication
+
Kirfel L., MacCoun R. J., Icard T., Gerstenberg T. (2023). Anticipating the risks and benefits of counterfactual world simulation models. In AI Meets Moral Philosophy and Moral Psychology Workshop (NeurIPS 2023).
+
+
+
+
+
+ +
+
+
+
+
Date
+ +
+
+
+
+
+ + +
+
+
+
+
Links
+
+ + + + + + + + + + + + + + + + + + + +
+
+
+
+
+
+ + +
+ +
+ +

<< Back to list of publications

+
+ +
+ + + + + +
+ + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/publication_types/0/index.html b/docs/publication_types/0/index.html index e1638e2..18c23c7 100644 --- a/docs/publication_types/0/index.html +++ b/docs/publication_types/0/index.html @@ -111,7 +111,7 @@ - + @@ -239,14 +239,14 @@

0

-

Probabilistic programs as a unifying language of thought

+

Probabilistic models of physical reasoning

diff --git a/docs/publication_types/0/index.xml b/docs/publication_types/0/index.xml index 8d09cba..7aecbaf 100644 --- a/docs/publication_types/0/index.xml +++ b/docs/publication_types/0/index.xml @@ -7,26 +7,26 @@ Hugo -- gohugo.io en-us &copy; 2023 Tobias Gerstenberg - Sun, 01 Jan 2023 00:00:00 +0000 + Fri, 20 Oct 2023 00:00:00 +0000 - Probabilistic models of physical reasoning - https://cicl.stanford.edu/publication/smith2023probabilistic/ - Sun, 01 Jan 2023 00:00:00 +0000 + Probabilistic programs as a unifying language of thought + https://cicl.stanford.edu/publication/goodman2023probabilistic/ + Fri, 20 Oct 2023 00:00:00 +0000 - https://cicl.stanford.edu/publication/smith2023probabilistic/ + https://cicl.stanford.edu/publication/goodman2023probabilistic/ - Probabilistic programs as a unifying language of thought - https://cicl.stanford.edu/publication/goodman2023probabilistic/ + Probabilistic models of physical reasoning + https://cicl.stanford.edu/publication/smith2023probabilistic/ Sun, 01 Jan 2023 00:00:00 +0000 - https://cicl.stanford.edu/publication/goodman2023probabilistic/ + https://cicl.stanford.edu/publication/smith2023probabilistic/ diff --git a/docs/publication_types/1/index.html b/docs/publication_types/1/index.html index a79fe49..1b33a31 100644 --- a/docs/publication_types/1/index.html +++ b/docs/publication_types/1/index.html @@ -256,15 +256,6 @@

A counte

-
-

Understanding Social Reasoning in Language Models with Language Models

-
- - As Large Language Models (LLMs) become increasingly integrated into our everyday lives, understanding their ability to comprehend human mental states becomes critical for ensuring effective interactions. However, despite the recent attempts to assess … - -
-
-

Beyond the Imitation Game: Quantifying and extrapolating the capabilities of language models

diff --git a/docs/publication_types/1/index.xml b/docs/publication_types/1/index.xml index 685fe3a..2c014c6 100644 --- a/docs/publication_types/1/index.xml +++ b/docs/publication_types/1/index.xml @@ -30,15 +30,6 @@ - - Understanding Social Reasoning in Language Models with Language Models - https://cicl.stanford.edu/publication/gandhi2023understanding/ - Tue, 27 Jun 2023 00:00:00 +0000 - - https://cicl.stanford.edu/publication/gandhi2023understanding/ - - - Beyond the Imitation Game: Quantifying and extrapolating the capabilities of language models https://cicl.stanford.edu/publication/srivastava2022imitation/ diff --git a/docs/publication_types/3/index.html b/docs/publication_types/3/index.html index b8b12df..b4e310c 100644 --- a/docs/publication_types/3/index.html +++ b/docs/publication_types/3/index.html @@ -111,7 +111,7 @@ - + @@ -239,91 +239,91 @@

3

-

Realism of Visual, Auditory, and Haptic Cues in Phenomenal Causality

+

Anticipating the risks and benefits of counterfactual world simulation models

- Interacting in real environments, such as manipulating objects, involves multisensory information. However, little is known about how multisensory cue characteristics help us determine what has occurred in a scene, including whether two events were … + This paper examines the transformative potential of Counterfactual World Simulation Models (CWSMs). A CWSM uses multi-modal evidence, such as the CCTV footage of a road accident, to build a high-fidelity 3D reconstruction of what happened. It can …
-

A computational model of responsibility judgments from counterfactual simulations and intention inferences

+

Off The Rails: Procedural Dilemma Generation for Moral Reasoning

- How responsible someone is for an outcome depends on both the causal role of their actions, and what those actions reveal about their moral character. Prior work has successfully modeled people's causal attributions and mental state inferences using … + As AI systems like language models are increasingly integrated into making decisions that affect people, it's critical to ensure that these systems have sound moral reasoning. To test whether they do, we need to develop systematic evaluations. Recent …
-

Father, don't forgive them, for they could have known what they're doing

+

Social Contract AI: Aligning AI Assistants with Implicit Group Norms

- What someone knew matters for how we hold them responsible. In three studies, we explore people's responsibility judgments for negative outcomes to knowledgeable versus ignorant agents. We manipulate whether agents arrived at their knowledge state … + We explore the idea of aligning an AI assistant by inverting a model of users' (unknown) preferences from observed interactions. To validate our proposal, we run proof-of-concept simulations in the economic ultimatum game, formalizing user …
-

Show and tell: Learning causal structures from observations and explanations

+

Realism of Visual, Auditory, and Haptic Cues in Phenomenal Causality

- There are at least three ways of learning how the world works: learning from observations, from interventions, and from explanations. Prior work on causal inference focused on how people learn causal structures through observation and intervention. … + Interacting in real environments, such as manipulating objects, involves multisensory information. However, little is known about how multisensory cue characteristics help us determine what has occurred in a scene, including whether two events were …
-

You are what you're for: Essentialist categorization in large language models

+

Understanding Social Reasoning in Language Models with Language Models

- How do essentialist beliefs about categories arise? We hypothesize that such beliefs are transmitted via language. We subject large language models (LLMs) to vignettes from the literature on essentialist categorization and find that they align well … + As Large Language Models (LLMs) become increasingly integrated into our everyday lives, understanding their ability to comprehend human mental states becomes critical for ensuring effective interactions. However, despite the recent attempts to assess …
-

A Semantics for Causing, Enabling, and Preventing Verbs Using Structural Causal Models

+

A computational model of responsibility judgments from counterfactual simulations and intention inferences

- When choosing how to describe what happened, we have a number of causal verbs at our disposal. In this paper, we develop a model-theoretic formal semantics for nine causal verbs that span the categories of CAUSE, ENABLE, and PREVENT. We use … + How responsible someone is for an outcome depends on both the causal role of their actions, and what those actions reveal about their moral character. Prior work has successfully modeled people's causal attributions and mental state inferences using …
-

Causal Reasoning Across Agents and Objects

+

Father, don't forgive them, for they could have known what they're doing

- This work attempts to bridge the divide between accounts of causal reasoning with respect to agents and objects. We begin by examining the influence of animacy. In a collision-based context, we vary the animacy status of an object using 3D … + What someone knew matters for how we hold them responsible. In three studies, we explore people's responsibility judgments for negative outcomes to knowledgeable versus ignorant agents. We manipulate whether agents arrived at their knowledge state …
-

Learning what matters: Causal abstraction in human inference

+

Show and tell: Learning causal structures from observations and explanations

- What shape do people's mental models take? We hypothesize that people build causal models that are suited to the task at hand. These models abstract away information to represent what matters. To test this idea empirically, we presented participants … + There are at least three ways of learning how the world works: learning from observations, from interventions, and from explanations. Prior work on causal inference focused on how people learn causal structures through observation and intervention. …
-

Teleology and generics

+

You are what you're for: Essentialist categorization in large language models

- Generic statements, such as "Bees are striped" are thought to be a central vehicle by which essentialist beliefs are transmitted. But work on generics and essentialism almost never focuses on the type of properties mentioned in generic statements. We … + How do essentialist beliefs about categories arise? We hypothesize that such beliefs are transmitted via language. We subject large language models (LLMs) to vignettes from the literature on essentialist categorization and find that they align well …
-

Explanations can reduce overreliance on AI systems during decision-making

+

A Semantics for Causing, Enabling, and Preventing Verbs Using Structural Causal Models

- Prior work has identified a resilient phenomenon that threatens the performance of human-AI decision-making teams: overreliance, when people agree with an AI, even when it is incorrect. Surprisingly, overreliance does not reduce when the AI produces … + When choosing how to describe what happened, we have a number of causal verbs at our disposal. In this paper, we develop a model-theoretic formal semantics for nine causal verbs that span the categories of CAUSE, ENABLE, and PREVENT. We use …
diff --git a/docs/publication_types/3/index.xml b/docs/publication_types/3/index.xml index 64542b4..7f69b42 100644 --- a/docs/publication_types/3/index.xml +++ b/docs/publication_types/3/index.xml @@ -7,11 +7,38 @@ Hugo -- gohugo.io en-us &copy; 2023 Tobias Gerstenberg - Thu, 29 Jun 2023 00:00:00 +0000 + Mon, 30 Oct 2023 00:00:00 +0000 + + Anticipating the risks and benefits of counterfactual world simulation models + https://cicl.stanford.edu/publication/kirfel2023anticipating/ + Mon, 30 Oct 2023 00:00:00 +0000 + + https://cicl.stanford.edu/publication/kirfel2023anticipating/ + + + + + Off The Rails: Procedural Dilemma Generation for Moral Reasoning + https://cicl.stanford.edu/publication/franken2023rails/ + Mon, 30 Oct 2023 00:00:00 +0000 + + https://cicl.stanford.edu/publication/franken2023rails/ + + + + + Social Contract AI: Aligning AI Assistants with Implicit Group Norms + https://cicl.stanford.edu/publication/franken2023social/ + Mon, 30 Oct 2023 00:00:00 +0000 + + https://cicl.stanford.edu/publication/franken2023social/ + + + Realism of Visual, Auditory, and Haptic Cues in Phenomenal Causality https://cicl.stanford.edu/publication/chase2023realism/ @@ -21,6 +48,15 @@ + + Understanding Social Reasoning in Language Models with Language Models + https://cicl.stanford.edu/publication/gandhi2023understanding/ + Tue, 27 Jun 2023 00:00:00 +0000 + + https://cicl.stanford.edu/publication/gandhi2023understanding/ + + + A computational model of responsibility judgments from counterfactual simulations and intention inferences https://cicl.stanford.edu/publication/wu2023computational/ diff --git a/docs/publication_types/3/page/2/index.html b/docs/publication_types/3/page/2/index.html index 74868fa..1b2f078 100644 --- a/docs/publication_types/3/page/2/index.html +++ b/docs/publication_types/3/page/2/index.html @@ -111,7 +111,7 @@ - + @@ -239,91 +239,91 @@

3

-

Stop, children what's that sound? Multi-modal inference through mental simulation

+

Causal Reasoning Across Agents and Objects

- Human adults can figure out what happened by combining evidence from different sensory modalities, such as vision and sound. How does the ability to integrate multi-modal information develop in early childhood? Inspired by prior computational work … + This work attempts to bridge the divide between accounts of causal reasoning with respect to agents and objects. We begin by examining the influence of animacy. In a collision-based context, we vary the animacy status of an object using 3D …
-

That was close! A counterfactual simulation model of causal judgments about decisions

+

Learning what matters: Causal abstraction in human inference

- How do people make causal judgments about other's decisions? Prior work has argued that judging causation requires going beyond what actually happened and simulating what would have happened in a relevant counterfactual situation. Here, we extend the … + What shape do people's mental models take? We hypothesize that people build causal models that are suited to the task at hand. These models abstract away information to represent what matters. To test this idea empirically, we presented participants …
-

Looking into the past: Eye-tracking mental simulation in physical inference

+

Teleology and generics

- Mental simulation is a powerful cognitive capacity that underlies people's ability to draw inferences about what happened in the past from the present. Recent work suggests that eye-tracking can be used as a window through which one can study the … + Generic statements, such as "Bees are striped" are thought to be a central vehicle by which essentialist beliefs are transmitted. But work on generics and essentialism almost never focuses on the type of properties mentioned in generic statements. We …
-

Uncalibrated Models Can Improve Human-AI Collaboration

+

Explanations can reduce overreliance on AI systems during decision-making

- In many practical applications of AI, an AI model is used as a decision aid for human users. The AI provides advice that a human (sometimes) incorporates into their decision-making process. The AI advice is often presented with some measure of … + Prior work has identified a resilient phenomenon that threatens the performance of human-AI decision-making teams: overreliance, when people agree with an AI, even when it is incorrect. Surprisingly, overreliance does not reduce when the AI produces …
-

Who went fishing? Inferences from social evaluations

+

Stop, children what's that sound? Multi-modal inference through mental simulation

- Humans have a remarkable ability to go beyond the observable. From seeing the current state of our shared kitchen, we can infer what happened and who did it. Prior work has shown how the physical state of the world licenses inferences about the … + Human adults can figure out what happened by combining evidence from different sensory modalities, such as vision and sound. How does the ability to integrate multi-modal information develop in early childhood? Inspired by prior computational work …
-

The language of causation

+

That was close! A counterfactual simulation model of causal judgments about decisions

- People use varied language to express their causal understanding of the world. But how does that language map onto people’s underlying representations, and how do people choose between competing ways to best describe what happened? In this paper we … + How do people make causal judgments about other's decisions? Prior work has argued that judging causation requires going beyond what actually happened and simulating what would have happened in a relevant counterfactual situation. Here, we extend the …
-

Whom will Granny thank? Thinking about what could have been informs children's inferences about relative helpfulness

+

Looking into the past: Eye-tracking mental simulation in physical inference

- To evaluate others' actions, we consider action outcomes (e.g., positive or negative) and the actors' underlying intentions (e.g., intentional or accidental). However, we often encounter situ- ations where neither actual outcomes nor intentions … + Mental simulation is a powerful cognitive capacity that underlies people's ability to draw inferences about what happened in the past from the present. Recent work suggests that eye-tracking can be used as a window through which one can study the …
-

Explaining intuitive difficulty judgments by modeling physical effort and risk

+

Uncalibrated Models Can Improve Human-AI Collaboration

- How do we estimate the difficulty of performing a new task, a task we've never tried before such as making a sculpture, a birthday cake, or building a tower with LEGO blocks? Estimating difficulty helps us appreciate others' accomplishments, and … + In many practical applications of AI, an AI model is used as a decision aid for human users. The AI provides advice that a human (sometimes) incorporates into their decision-making process. The AI advice is often presented with some measure of …
-

Tiptoeing around it: Inference from absence in potentially offensive speech

+

Who went fishing? Inferences from social evaluations

- Language that describes people in a concise manner may conflict with social norms (e.g., referring to people by their race), presenting a conflict between transferring information efficiently and avoiding offensive language. When a speaker is … + Humans have a remarkable ability to go beyond the observable. From seeing the current state of our shared kitchen, we can infer what happened and who did it. Prior work has shown how the physical state of the world licenses inferences about the …
-

What happened? Reconstructing the past from vision and sound

+

The language of causation

- We introduce a novel experimental paradigm for studying multi-modal integration in causal inference. Our experiments feature a physically realistic Plinko machine in which a ball is dropped through one of three holes and comes to rest at the bottom … + People use varied language to express their causal understanding of the world. But how does that language map onto people’s underlying representations, and how do people choose between competing ways to best describe what happened? In this paper we …
diff --git a/docs/publication_types/3/page/3/index.html b/docs/publication_types/3/page/3/index.html index b9911c2..b753ba8 100644 --- a/docs/publication_types/3/page/3/index.html +++ b/docs/publication_types/3/page/3/index.html @@ -111,7 +111,7 @@ - + @@ -239,91 +239,91 @@

3

-

Causal learning from interventions and dynamics in continuous time

+

Whom will Granny thank? Thinking about what could have been informs children's inferences about relative helpfulness

- Event timing and interventions are important and intertwined cues to causal structure, yet they have typically been studied separately. We bring them together for the first time in an experiment where participants learn causal structure by performing … + To evaluate others' actions, we consider action outcomes (e.g., positive or negative) and the actors' underlying intentions (e.g., intentional or accidental). However, we often encounter situ- ations where neither actual outcomes nor intentions …
-

Faulty towers: A hypothetical simulation model of physical support

+

Explaining intuitive difficulty judgments by modeling physical effort and risk

- In this paper we introduce the hypothetical simulation model (HSM) of physical support. The HSM predicts that people judge physical support by mentally simulating what would happen if the object of interest were removed. Two experiments test the … + How do we estimate the difficulty of performing a new task, a task we've never tried before such as making a sculpture, a birthday cake, or building a tower with LEGO blocks? Estimating difficulty helps us appreciate others' accomplishments, and …
-

Marbles in inaction: Counterfactual simulation and causation by omission

+

Tiptoeing around it: Inference from absence in potentially offensive speech

- Consider the following causal explanation: The ball went through the goal because the defender didn’t block it. There are at least two problems with citing omissions as causal explanations. First, how do we choose the relevant candidate omission … + Language that describes people in a concise manner may conflict with social norms (e.g., referring to people by their race), presenting a conflict between transferring information efficiently and avoiding offensive language. When a speaker is …
-

Physical problem solving: Joint planning with symbolic, geometric, and dynamic constraints

+

What happened? Reconstructing the past from vision and sound

- In this paper, we present a new task that investigates how people interact with and make judgments about towers of blocks. In Experiment 1, participants in the lab solved a series of problems in which they had to re-configure three blocks from an … + We introduce a novel experimental paradigm for studying multi-modal integration in causal inference. Our experiments feature a physically realistic Plinko machine in which a ball is dropped through one of three holes and comes to rest at the bottom …
-

Implicit measurement of motivated causal attribution

+

Causal learning from interventions and dynamics in continuous time

- Moral judgment often involves pinning causation for harm to a particular person. Since it reveals “who one sides with”, expression of moral judgment can be a costly social act that people may be motivated to conceal. Here, we demonstrate that a … + Event timing and interventions are important and intertwined cues to causal structure, yet they have typically been studied separately. We bring them together for the first time in an experiment where participants learn causal structure by performing …
-

Natural science: Active learning in dynamic physical microworlds

+

Faulty towers: A hypothetical simulation model of physical support

- In this paper, we bring together research on active learning and intuitive physics to explore how people learn about “microworlds” with continuous spatiotemporal dynamics. Participants interacted with objects in simple two-dimensional worlds governed … + In this paper we introduce the hypothetical simulation model (HSM) of physical support. The HSM predicts that people judge physical support by mentally simulating what would happen if the object of interest were removed. Two experiments test the …
-

Understanding ``almost'': Empirical and computational studies of near misses

+

Marbles in inaction: Counterfactual simulation and causation by omission

- When did something almost happen? In this paper, we investigate what brings counterfactual worlds close. In Experiments 1 and 2, we find that participants’ judgments about whether something almost happened are determined by the causal proximity of … + Consider the following causal explanation: The ball went through the goal because the defender didn’t block it. There are at least two problems with citing omissions as causal explanations. First, how do we choose the relevant candidate omission …
-

Go fishing! Responsibility judgments when cooperation breaks down

+

Physical problem solving: Joint planning with symbolic, geometric, and dynamic constraints

- Many social judgments hinge on assigning responsibility to individuals for their role in a group’s success or failure. Often the group’s success depends on every team member acting in a rational way. When someone does not conform to what others … + In this paper, we present a new task that investigates how people interact with and make judgments about towers of blocks. In Experiment 1, participants in the lab solved a series of problems in which they had to re-configure three blocks from an …
-

How, whether, why: Causal judgments as counterfactual contrasts

+

Implicit measurement of motivated causal attribution

- How do people make causal judgments? Here, we propose a counterfactual simulation model (CSM) of causal judgment that unifies different views on causation. The CSM predicts that people’s causal judgments are influenced by whether a candidate cause … + Moral judgment often involves pinning causation for harm to a particular person. Since it reveals “who one sides with”, expression of moral judgment can be a costly social act that people may be motivated to conceal. Here, we demonstrate that a …
-

Inference of intention and permissibility in moral decision making

+

Natural science: Active learning in dynamic physical microworlds

- The actions of a rational agent reveal information about its mental states. These inferred mental states, particularly the agent’s intentions, play an important role in the evaluation of moral permissibility. While previous computational models have … + In this paper, we bring together research on active learning and intuitive physics to explore how people learn about “microworlds” with continuous spatiotemporal dynamics. Participants interacted with objects in simple two-dimensional worlds governed …
diff --git a/docs/publication_types/3/page/4/index.html b/docs/publication_types/3/page/4/index.html index 7d4fac5..d055290 100644 --- a/docs/publication_types/3/page/4/index.html +++ b/docs/publication_types/3/page/4/index.html @@ -111,7 +111,7 @@ - + @@ -239,91 +239,91 @@

3

-

Responsibility judgments in voting scenarios

+

Understanding ``almost'': Empirical and computational studies of near misses

- How do people assign responsibility for the outcome of an election? In previous work, we have shown that responsibility judgments in achievement contexts are affected by the probability that a person’s contribution is necessary, and by how close it … + When did something almost happen? In this paper, we investigate what brings counterfactual worlds close. In Experiments 1 and 2, we find that participants’ judgments about whether something almost happened are determined by the causal proximity of …
-

Causal supersession

+

Go fishing! Responsibility judgments when cooperation breaks down

- When agents violate norms, they are typically judged to be more of a cause of resulting outcomes. In this study, we suggest that norm violations also reduce the causality of other agents, a novel phenomenon we refer to as “causal supersession.” We … + Many social judgments hinge on assigning responsibility to individuals for their role in a group’s success or failure. Often the group’s success depends on every team member acting in a rational way. When someone does not conform to what others …
-

From counterfactual simulation to causal judgment

+

How, whether, why: Causal judgments as counterfactual contrasts

- In this paper, we demonstrate that people’s causal judgments are inextricably linked to counterfactuals. In our experiments, participants judge whether one billiard ball A caused another ball B to go through a gate. Our counterfactual simulation … + How do people make causal judgments? Here, we propose a counterfactual simulation model (CSM) of causal judgment that unifies different views on causation. The CSM predicts that people’s causal judgments are influenced by whether a candidate cause …
-

The order of things: Inferring causal structure from temporal patterns

+

Inference of intention and permissibility in moral decision making

- The timing and order in which a set of events occur strongly influences whether people judge them to be causally related. But what do people think particular temporal patterns of events tell them about causal structure? And how do they integrate … + The actions of a rational agent reveal information about its mental states. These inferred mental states, particularly the agent’s intentions, play an important role in the evaluation of moral permissibility. While previous computational models have …
-

Wins above replacement: Responsibility attributions as counterfactual replacements

+

Responsibility judgments in voting scenarios

- In order to be held responsible, a person’s action has to have made some sort of difference to the outcome. In this paper, we propose a counterfactual replacement model according to which people attribute responsibility by comparing their prior … + How do people assign responsibility for the outcome of an election? In previous work, we have shown that responsibility judgments in achievement contexts are affected by the probability that a person’s contribution is necessary, and by how close it …
-

Back on track: Backtracking in counterfactual reasoning

+

Causal supersession

- Would Dan have died if Bob hadn’t shot? In this paper, we show that people’s answer depends on whether or not they are asked about what would have caused Bob not to shoot. Something needs to change in order to turn an actual world into a … + When agents violate norms, they are typically judged to be more of a cause of resulting outcomes. In this study, we suggest that norm violations also reduce the causality of other agents, a novel phenomenon we refer to as “causal supersession.” We …
-

Noisy Newtons: Unifying process and dependency accounts of causal attribution

+

From counterfactual simulation to causal judgment

- There is a long tradition in both philosophy and psychology to separate process accounts from dependency accounts of causation. In this paper, we motivate a unifying account that explains people’s causal attributions in terms of counterfactuals … + In this paper, we demonstrate that people’s causal judgments are inextricably linked to counterfactuals. In our experiments, participants judge whether one billiard ball A caused another ball B to go through a gate. Our counterfactual simulation …
-

Ping Pong in Church: Productive use of concepts in human probabilistic inference

+

The order of things: Inferring causal structure from temporal patterns

- How do people make inferences from complex patterns of evidence across diverse situations? What does a computational model need in order to capture the abstract knowledge people use for everyday reasoning? In this paper, we explore a novel modeling … + The timing and order in which a set of events occur strongly influences whether people judge them to be causally related. But what do people think particular temporal patterns of events tell them about causal structure? And how do they integrate …
-

Why blame Bob? Probabilistic generative models, counterfactual reasoning, and blame attribution

+

Wins above replacement: Responsibility attributions as counterfactual replacements

- We consider an approach to blame attribution based on counterfactual reasoning in probabilistic generative models. In this view, people intervene on each variable within their model and assign blame in proportion to how much a change to a variable … + In order to be held responsible, a person’s action has to have made some sort of difference to the outcome. In this paper, we propose a counterfactual replacement model according to which people attribute responsibility by comparing their prior …
-

Beyond outcomes: The influence of intentions and deception

+

Back on track: Backtracking in counterfactual reasoning

- To what extent do people care about the intentions behind an action? What if the intentions can be deceptive? We conducted two experiments to complement previous evidence about the roles of outcomes and intentions in economic games. The results of … + Would Dan have died if Bob hadn’t shot? In this paper, we show that people’s answer depends on whether or not they are asked about what would have caused Bob not to shoot. Something needs to change in order to turn an actual world into a …
diff --git a/docs/publication_types/3/page/5/index.html b/docs/publication_types/3/page/5/index.html index e0bc62f..f979f1f 100644 --- a/docs/publication_types/3/page/5/index.html +++ b/docs/publication_types/3/page/5/index.html @@ -111,7 +111,7 @@ - + @@ -238,6 +238,42 @@

3

+
+

Noisy Newtons: Unifying process and dependency accounts of causal attribution

+
+ + There is a long tradition in both philosophy and psychology to separate process accounts from dependency accounts of causation. In this paper, we motivate a unifying account that explains people’s causal attributions in terms of counterfactuals … + +
+
+ +
+

Ping Pong in Church: Productive use of concepts in human probabilistic inference

+
+ + How do people make inferences from complex patterns of evidence across diverse situations? What does a computational model need in order to capture the abstract knowledge people use for everyday reasoning? In this paper, we explore a novel modeling … + +
+
+ +
+

Why blame Bob? Probabilistic generative models, counterfactual reasoning, and blame attribution

+
+ + We consider an approach to blame attribution based on counterfactual reasoning in probabilistic generative models. In this view, people intervene on each variable within their model and assign blame in proportion to how much a change to a variable … + +
+
+ +
+

Beyond outcomes: The influence of intentions and deception

+
+ + To what extent do people care about the intentions behind an action? What if the intentions can be deceptive? We conducted two experiments to complement previous evidence about the roles of outcomes and intentions in economic games. The results of … + +
+
+

Blame the skilled

diff --git a/docs/publication_types/4/index.html b/docs/publication_types/4/index.html index c7698cd..bbd218a 100644 --- a/docs/publication_types/4/index.html +++ b/docs/publication_types/4/index.html @@ -111,7 +111,7 @@ - + @@ -239,14 +239,14 @@

4

-

Probabilistic programs as a unifying language of thought

+

Probabilistic models of physical reasoning

diff --git a/docs/publication_types/4/index.xml b/docs/publication_types/4/index.xml index b6b7e1c..d117dbc 100644 --- a/docs/publication_types/4/index.xml +++ b/docs/publication_types/4/index.xml @@ -7,26 +7,26 @@ Hugo -- gohugo.io en-us &copy; 2023 Tobias Gerstenberg - Sun, 01 Jan 2023 00:00:00 +0000 + Fri, 20 Oct 2023 00:00:00 +0000 - Probabilistic models of physical reasoning - https://cicl.stanford.edu/publication/smith2023probabilistic/ - Sun, 01 Jan 2023 00:00:00 +0000 + Probabilistic programs as a unifying language of thought + https://cicl.stanford.edu/publication/goodman2023probabilistic/ + Fri, 20 Oct 2023 00:00:00 +0000 - https://cicl.stanford.edu/publication/smith2023probabilistic/ + https://cicl.stanford.edu/publication/goodman2023probabilistic/ - Probabilistic programs as a unifying language of thought - https://cicl.stanford.edu/publication/goodman2023probabilistic/ + Probabilistic models of physical reasoning + https://cicl.stanford.edu/publication/smith2023probabilistic/ Sun, 01 Jan 2023 00:00:00 +0000 - https://cicl.stanford.edu/publication/goodman2023probabilistic/ + https://cicl.stanford.edu/publication/smith2023probabilistic/ diff --git a/docs/publication_types/index.html b/docs/publication_types/index.html index 9176a9e..63befcc 100644 --- a/docs/publication_types/index.html +++ b/docs/publication_types/index.html @@ -111,7 +111,7 @@ - + @@ -239,35 +239,35 @@

Publication_types

-

2

+

3

-

1

+

2

-

3

+

0

-

0

+

4

-

4

+

1

diff --git a/docs/publication_types/index.xml b/docs/publication_types/index.xml index 2b2f5b4..cd3b4a5 100644 --- a/docs/publication_types/index.xml +++ b/docs/publication_types/index.xml @@ -7,42 +7,33 @@ Hugo -- gohugo.io en-us &copy; 2023 Tobias Gerstenberg - Fri, 27 Oct 2023 00:00:00 +0000 + Mon, 30 Oct 2023 00:00:00 +0000 - 2 - https://cicl.stanford.edu/publication_types/2/ - Fri, 27 Oct 2023 00:00:00 +0000 - - https://cicl.stanford.edu/publication_types/2/ - - - - - 1 - https://cicl.stanford.edu/publication_types/1/ - Sat, 09 Sep 2023 00:00:00 +0000 + 3 + https://cicl.stanford.edu/publication_types/3/ + Mon, 30 Oct 2023 00:00:00 +0000 - https://cicl.stanford.edu/publication_types/1/ + https://cicl.stanford.edu/publication_types/3/ - 3 - https://cicl.stanford.edu/publication_types/3/ - Thu, 29 Jun 2023 00:00:00 +0000 + 2 + https://cicl.stanford.edu/publication_types/2/ + Fri, 27 Oct 2023 00:00:00 +0000 - https://cicl.stanford.edu/publication_types/3/ + https://cicl.stanford.edu/publication_types/2/ 0 https://cicl.stanford.edu/publication_types/0/ - Sun, 01 Jan 2023 00:00:00 +0000 + Fri, 20 Oct 2023 00:00:00 +0000 https://cicl.stanford.edu/publication_types/0/ @@ -51,12 +42,21 @@ 4 https://cicl.stanford.edu/publication_types/4/ - Sun, 01 Jan 2023 00:00:00 +0000 + Fri, 20 Oct 2023 00:00:00 +0000 https://cicl.stanford.edu/publication_types/4/ + + 1 + https://cicl.stanford.edu/publication_types/1/ + Sat, 09 Sep 2023 00:00:00 +0000 + + https://cicl.stanford.edu/publication_types/1/ + + + 5 https://cicl.stanford.edu/publication_types/5/ diff --git a/docs/sitemap.xml b/docs/sitemap.xml index 33619f3..b2c06b4 100644 --- a/docs/sitemap.xml +++ b/docs/sitemap.xml @@ -3,16 +3,58 @@ - https://cicl.stanford.edu/publication_types/2/ - 2023-10-27T00:00:00+00:00 + https://cicl.stanford.edu/publication_types/3/ + 2023-10-30T00:00:00+00:00 0 + + + https://cicl.stanford.edu/publication/kirfel2023anticipating/ + 2023-10-30T00:00:00+00:00 + + + + https://cicl.stanford.edu/ + 2023-10-30T00:00:00+00:00 + 0 + + + + + + + https://cicl.stanford.edu/publication/franken2023rails/ + 2023-10-30T00:00:00+00:00 + + + + + + + https://cicl.stanford.edu/publication_types/ + 2023-10-30T00:00:00+00:00 + 0 + + + + + + + https://cicl.stanford.edu/publication/franken2023social/ + 2023-10-30T00:00:00+00:00 + + + + + + + https://cicl.stanford.edu/publication_types/2/ 2023-10-27T00:00:00+00:00 0 @@ -29,8 +71,8 @@ - https://cicl.stanford.edu/publication_types/ - 2023-10-27T00:00:00+00:00 + https://cicl.stanford.edu/publication_types/0/ + 2023-10-20T00:00:00+00:00 0 @@ -38,8 +80,8 @@ - https://cicl.stanford.edu/publication_types/1/ - 2023-09-09T00:00:00+00:00 + https://cicl.stanford.edu/publication_types/4/ + 2023-10-20T00:00:00+00:00 0 @@ -47,25 +89,33 @@ - https://cicl.stanford.edu/publication/amemiya2023disagreement/ + https://cicl.stanford.edu/publication/goodman2023probabilistic/ + 2023-10-20T00:00:00+00:00 + + + + + + + https://cicl.stanford.edu/publication_types/1/ 2023-09-09T00:00:00+00:00 + 0 - https://cicl.stanford.edu/publication/beller2023language/ - 2023-07-04T00:00:00+00:00 + https://cicl.stanford.edu/publication/amemiya2023disagreement/ + 2023-09-09T00:00:00+00:00 - https://cicl.stanford.edu/publication_types/3/ - 2023-06-29T00:00:00+00:00 - 0 + https://cicl.stanford.edu/publication/beller2023language/ + 2023-07-04T00:00:00+00:00 @@ -193,24 +243,6 @@ - - - https://cicl.stanford.edu/publication_types/0/ - 2023-01-01T00:00:00+00:00 - 0 - - - - - - - https://cicl.stanford.edu/publication_types/4/ - 2023-01-01T00:00:00+00:00 - 0 - - - - https://cicl.stanford.edu/publication/gong2023active/ @@ -244,14 +276,6 @@ - - - https://cicl.stanford.edu/publication/goodman2023probabilistic/ - 2023-01-01T00:00:00+00:00 - - - - https://cicl.stanford.edu/publication/gerstenberg2022hypothetical/ diff --git a/static/bibtex/cic_papers.bib b/static/bibtex/cic_papers.bib index ee6ce25..c5deb15 100644 --- a/static/bibtex/cic_papers.bib +++ b/static/bibtex/cic_papers.bib @@ -1,13 +1,40 @@ %% This BibTeX bibliography file was created using BibDesk. %% https://bibdesk.sourceforge.io/ -%% Created for Tobias Gerstenberg at 2023-10-15 13:46:48 -0700 +%% Created for Tobias Gerstenberg at 2023-10-30 18:25:01 -0700 %% Saved with string encoding Unicode (UTF-8) +@inproceedings{kirfel2023anticipating, + abstract = {This paper examines the transformative potential of Counterfactual World Simulation Models (CWSMs). A CWSM uses multi-modal evidence, such as the CCTV footage of a road accident, to build a high-fidelity 3D reconstruction of what happened. It can answer causal questions, such as whether the accident happened because the driver was speeding, by simulating what would have happened in relevant counterfactual situations. We argue for a normative and ethical framework that guides and constrains the simulation of counterfactuals. We address the challenge of ensuring fidelity in reconstructions while simultaneously preventing stereotype perpetuation during counterfactual simulations. We anticipate different modes of how users will interact with CWSMs and discuss how their outputs may be presented. Finally, we address the prospective applications of CWSMs in the legal domain, recognizing both their potential to revolutionize legal proceedings as well as the ethical concerns they engender. Sketching a new genre of AI, this paper seeks to illuminate the path forward for responsible and effective use of CWSMs.}, + author = {Lara Kirfel and Robert J. MacCoun and Thomas Icard and Tobias Gerstenberg}, + booktitle = {{AI Meets Moral Philosophy and Moral Psychology Workshop (NeurIPS 2023)}}, + date-added = {2023-10-30 18:20:41 -0700}, + date-modified = {2023-10-30 18:20:50 -0700}, + title = {Anticipating the risks and benefits of counterfactual world simulation models}, + year = {2023}} + +@inproceedings{franken2023rails, + abstract = {As AI systems like language models are increasingly integrated into making decisions that affect people, it's critical to ensure that these systems have sound moral reasoning. To test whether they do, we need to develop systematic evaluations. Recent work has introduced a method for procedurally generating LLM evaluations from abstract causal templates, and tested this method in the context of social reasoning (i.e., theory-of-mind). In this paper, we extend this method to the domain of moral dilemmas. We develop a framework that translates causal graphs into a prompt template which can then be used to procedurally generate a large and diverse set of moral dilemmas using a language model. Using this framework, we created the OffTheRails dataset which consists of 50 scenarios and 500 unique test items. We evaluated the quality of our model-written test items using two independent human experts and found that 90% of the test-items met the desired structure. We collect moral permissibility and intention judgments from 100 human crowdworkers and compared these judgments with those from GPT-4 and Claude-2 across eight control conditions. Both humans and GPT-4 assigned higher intentionality to agents when a harmful outcome was evitable and a necessary means. However, our findings did not match previous findings on permissibility judgments. This difference may be a result of not controlling the severity of harmful outcomes during scenario generation. We conclude by discussing future extensions of our benchmark to address this limitation.}, + author = {Jan-Philipp Fr{\"a}nken and Ayesha Khawaja and Kanishk Gandhi and Jared Moore and Noah D. Goodman and Tobias Gerstenberg}, + booktitle = {{AI Meets Moral Philosophy and Moral Psychology Workshop (NeurIPS 2023)}}, + date-added = {2023-10-30 18:18:28 -0700}, + date-modified = {2023-10-30 18:18:35 -0700}, + title = {Off The Rails: Procedural Dilemma Generation for Moral Reasoning}, + year = {2023}} + +@inproceedings{franken2023social, + abstract = {We explore the idea of aligning an AI assistant by inverting a model of users' (unknown) preferences from observed interactions. To validate our proposal, we run proof-of-concept simulations in the economic ultimatum game, formalizing user preferences as policies that guide the actions of simulated players. We find that the AI assistant accurately aligns its behavior to match standard policies from the economic literature (e.g., selfish, altruistic). However, the assistant's learned policies lack robustness and exhibit limited generalization in an out-of-distribution setting when confronted with a currency (e.g., grams of medicine) that was not included in the assistant's training distribution. Additionally, we find that when there is inconsistency in the relationship between language use and an unknown policy (e.g., an altruistic policy combined with rude language), the assistant's learning of the policy is slowed. Overall, our preliminary results suggest that developing simulation frameworks in which AI assistants need to infer preferences from diverse users can provide a valuable approach for studying practical alignment questions.}, + author = {Jan-Philipp Fr{\"a}nken and Sam Kwok and Peixuan Ye and Kanishk Gandhi and Dilip Arumugam and Jared Moore and Alex Tamkin and Tobias Gerstenberg and Noah D. Goodman}, + booktitle = {Socially Responsible Language Modelling Research Workshop (NeurIPS 2023)}, + date-added = {2023-10-30 18:11:09 -0700}, + date-modified = {2023-10-30 18:11:15 -0700}, + title = {Social Contract AI: Aligning AI Assistants with Implicit Group Norms}, + year = {2023}} + @incollection{goodman2023probabilistic, author = {Noah D. Goodman and Tobias Gerstenberg and Joshua B. Tenenbaum}, booktitle = {Reverse-engineering the mind: The Bayesian approach to cognitive science}, @@ -49,16 +76,14 @@ @inproceedings{chase2023realism title = {Realism of Visual, Auditory, and Haptic Cues in Phenomenal Causality}, year = {2023}} -@article{gandhi2023understanding, +@inproceedings{gandhi2023understanding, abstract = {As Large Language Models (LLMs) become increasingly integrated into our everyday lives, understanding their ability to comprehend human mental states becomes critical for ensuring effective interactions. However, despite the recent attempts to assess the Theory-of-Mind (ToM) reasoning capabilities of LLMs, the degree to which these models can align with human ToM remains a nuanced topic of exploration. This is primarily due to two distinct challenges: (1) the presence of inconsistent results from previous evaluations, and (2) concerns surrounding the validity of existing evaluation methodologies. To address these challenges, we present a novel framework for procedurally generating evaluations with LLMs by populating causal templates. Using our framework, we create a new social reasoning benchmark (BigToM) for LLMs which consists of 25 controls and 5,000 model-written evaluations. We find that human participants rate the quality of our benchmark higher than previous crowd-sourced evaluations and comparable to expert-written evaluations. Using BigToM, we evaluate the social reasoning capabilities of a variety of LLMs and compare model performances with human performance. Our results suggest that GPT4 has ToM capabilities that mirror human inference patterns, though less reliable, while other LLMs struggle.}, author = {Kanishk Gandhi and Jan-Philipp Fr{\"a}nken and Tobias Gerstenberg and Noah D. Goodman}, + booktitle = {{Advances in Neural Information Processing Systems}}, date-added = {2023-06-27 19:45:01 -0700}, - date-modified = {2023-06-27 19:45:09 -0700}, - journal = {arXiv}, + date-modified = {2023-10-30 18:11:40 -0700}, title = {Understanding Social Reasoning in Language Models with Language Models}, - url = {https://arxiv.org/abs/2306.15448}, - year = {2023}, - bdsk-url-1 = {https://arxiv.org/abs/2306.15448}} + year = {2023}} @article{vasconcelos2023explanations, author = {Vasconcelos, Helena and J{\"o}rke, Matthew and Grunde-McLaughlin, Madeleine and Gerstenberg, Tobias and Bernstein, Michael S and Krishna, Ranjay}, diff --git a/static/papers/franken2023social.pdf b/static/papers/franken2023social.pdf new file mode 100644 index 0000000..47edf02 Binary files /dev/null and b/static/papers/franken2023social.pdf differ