From 998d650f8063befb00b9c4e256cf763eeda2024b Mon Sep 17 00:00:00 2001 From: Tobias Gerstenberg Date: Sun, 12 Nov 2023 20:02:01 -0800 Subject: [PATCH] added tag to publication --- content/publication/franken2023social.md | 1 + .../publication/gandhi2023understanding.md | 2 +- docs/member/tobias_gerstenberg/index.html | 4 +- docs/publication/franken2023social/index.html | 11 + .../gandhi2023understanding/index.html | 2 +- docs/publication/index.html | 4 +- docs/sitemap.xml | 29 +- docs/tags/index.html | 11 +- docs/tags/index.xml | 17 +- .../index.html | 375 ++++++++++++++++++ .../index.xml | 25 ++ .../page/1/index.html | 1 + .../spotlight-at-neurips-workshop/index.html | 375 ++++++++++++++++++ .../spotlight-at-neurips-workshop/index.xml | 25 ++ .../page/1/index.html | 1 + 15 files changed, 863 insertions(+), 20 deletions(-) create mode 100644 docs/tags/spotlight-at-neurips-2023-datasets-and-benchmarks-track/index.html create mode 100644 docs/tags/spotlight-at-neurips-2023-datasets-and-benchmarks-track/index.xml create mode 100644 docs/tags/spotlight-at-neurips-2023-datasets-and-benchmarks-track/page/1/index.html create mode 100644 docs/tags/spotlight-at-neurips-workshop/index.html create mode 100644 docs/tags/spotlight-at-neurips-workshop/index.xml create mode 100644 docs/tags/spotlight-at-neurips-workshop/page/1/index.html diff --git a/content/publication/franken2023social.md b/content/publication/franken2023social.md index f9834d5..70c43e5 100644 --- a/content/publication/franken2023social.md +++ b/content/publication/franken2023social.md @@ -13,6 +13,7 @@ publication_types = ["3"] publication_short = "_Socially Responsible Language Modelling Research Workshop (NeurIPS 2023)_" publication = "Fränken J., Kwok S., Ye P., Gandhi K., Arumugam D., Moore J., Tamkin A., Gerstenberg T., Goodman N. D. (2023). Social Contract AI: Aligning AI Assistants with Implicit Group Norms. In _Socially Responsible Language Modelling Research Workshop (NeurIPS 2023)." abstract = "We explore the idea of aligning an AI assistant by inverting a model of users' (unknown) preferences from observed interactions. To validate our proposal, we run proof-of-concept simulations in the economic ultimatum game, formalizing user preferences as policies that guide the actions of simulated players. We find that the AI assistant accurately aligns its behavior to match standard policies from the economic literature (e.g., selfish, altruistic). However, the assistant's learned policies lack robustness and exhibit limited generalization in an out-of-distribution setting when confronted with a currency (e.g., grams of medicine) that was not included in the assistant's training distribution. Additionally, we find that when there is inconsistency in the relationship between language use and an unknown policy (e.g., an altruistic policy combined with rude language), the assistant's learning of the policy is slowed. Overall, our preliminary results suggest that developing simulation frameworks in which AI assistants need to infer preferences from diverse users can provide a valuable approach for studying practical alignment questions." +tags = ["Spotlight at NeurIPS Workshop"] image_preview = "" selected = false projects = [] diff --git a/content/publication/gandhi2023understanding.md b/content/publication/gandhi2023understanding.md index f26adb0..7046f9a 100644 --- a/content/publication/gandhi2023understanding.md +++ b/content/publication/gandhi2023understanding.md @@ -13,7 +13,7 @@ publication_types = ["3"] publication_short = "_Advances in Neural Information Processing Systems_" publication = "Gandhi K., Fränken J., Gerstenberg T., Goodman N. D. (2023). Understanding Social Reasoning in Language Models with Language Models. _Advances in Neural Information Processing Systems_." abstract = "As Large Language Models (LLMs) become increasingly integrated into our everyday lives, understanding their ability to comprehend human mental states becomes critical for ensuring effective interactions. However, despite the recent attempts to assess the Theory-of-Mind (ToM) reasoning capabilities of LLMs, the degree to which these models can align with human ToM remains a nuanced topic of exploration. This is primarily due to two distinct challenges: (1) the presence of inconsistent results from previous evaluations, and (2) concerns surrounding the validity of existing evaluation methodologies. To address these challenges, we present a novel framework for procedurally generating evaluations with LLMs by populating causal templates. Using our framework, we create a new social reasoning benchmark (BigToM) for LLMs which consists of 25 controls and 5,000 model-written evaluations. We find that human participants rate the quality of our benchmark higher than previous crowd-sourced evaluations and comparable to expert-written evaluations. Using BigToM, we evaluate the social reasoning capabilities of a variety of LLMs and compare model performances with human performance. Our results suggest that GPT4 has ToM capabilities that mirror human inference patterns, though less reliable, while other LLMs struggle." -tags = ["Spotlight at NeurIPS 2023"] +tags = ["Spotlight at NeurIPS 2023 Datasets and Benchmarks Track"] image_preview = "" selected = false projects = [] diff --git a/docs/member/tobias_gerstenberg/index.html b/docs/member/tobias_gerstenberg/index.html index 85a5e39..f8c0db4 100644 --- a/docs/member/tobias_gerstenberg/index.html +++ b/docs/member/tobias_gerstenberg/index.html @@ -434,6 +434,8 @@

Publications

+
Spotlight at NeurIPS Workshop +

@@ -737,7 +739,7 @@

Publications

-
Spotlight at NeurIPS 2023 +
Spotlight at NeurIPS 2023 Datasets and Benchmarks Track

diff --git a/docs/publication/franken2023social/index.html b/docs/publication/franken2023social/index.html index fde6a7d..701aac0 100644 --- a/docs/publication/franken2023social/index.html +++ b/docs/publication/franken2023social/index.html @@ -258,6 +258,17 @@

Abstract

+
+

+ + Spotlight at NeurIPS Workshop
+ +

+
+ + + +
diff --git a/docs/publication/gandhi2023understanding/index.html b/docs/publication/gandhi2023understanding/index.html index f213b09..07d1fb4 100644 --- a/docs/publication/gandhi2023understanding/index.html +++ b/docs/publication/gandhi2023understanding/index.html @@ -261,7 +261,7 @@

Abstract

diff --git a/docs/publication/index.html b/docs/publication/index.html index 0a67e75..26661d0 100644 --- a/docs/publication/index.html +++ b/docs/publication/index.html @@ -1654,6 +1654,8 @@

Publications

+
Spotlight at NeurIPS Workshop +

@@ -2041,7 +2043,7 @@

Publications

-
Spotlight at NeurIPS 2023 +
Spotlight at NeurIPS 2023 Datasets and Benchmarks Track

diff --git a/docs/sitemap.xml b/docs/sitemap.xml index 4d9183b..b98fe9d 100644 --- a/docs/sitemap.xml +++ b/docs/sitemap.xml @@ -52,6 +52,24 @@ + + + https://cicl.stanford.edu/tags/spotlight-at-neurips-workshop/ + 2023-10-30T00:00:00+00:00 + 0 + + + + + + + https://cicl.stanford.edu/tags/ + 2023-10-30T00:00:00+00:00 + 0 + + + + https://cicl.stanford.edu/publication_types/2/ @@ -147,16 +165,7 @@ - https://cicl.stanford.edu/tags/ - 2023-06-29T00:00:00+00:00 - 0 - - - - - - - https://cicl.stanford.edu/tags/spotlight-at-neurips-2023/ + https://cicl.stanford.edu/tags/spotlight-at-neurips-2023-datasets-and-benchmarks-track/ 2023-06-27T00:00:00+00:00 0 diff --git a/docs/tags/index.html b/docs/tags/index.html index bb210de..24f0759 100644 --- a/docs/tags/index.html +++ b/docs/tags/index.html @@ -111,7 +111,7 @@ - + @@ -238,6 +238,13 @@

Tags

+
+

Spotlight at NeurIPS Workshop

+
+ +
+
+

Best Work-in-Progress Award

@@ -246,7 +253,7 @@

Best W

-

Spotlight at NeurIPS 2023

+

Spotlight at NeurIPS 2023 Datasets and Benchmarks Track

diff --git a/docs/tags/index.xml b/docs/tags/index.xml index 7e7cfb1..e31c732 100644 --- a/docs/tags/index.xml +++ b/docs/tags/index.xml @@ -7,11 +7,20 @@ Hugo -- gohugo.io en-us © 2023 Tobias Gerstenberg - Thu, 29 Jun 2023 00:00:00 +0000 + Mon, 30 Oct 2023 00:00:00 +0000 + + Spotlight at NeurIPS Workshop + https://cicl.stanford.edu/tags/spotlight-at-neurips-workshop/ + Mon, 30 Oct 2023 00:00:00 +0000 + + https://cicl.stanford.edu/tags/spotlight-at-neurips-workshop/ + + + Best Work-in-Progress Award https://cicl.stanford.edu/tags/best-work-in-progress-award/ @@ -22,11 +31,11 @@ - Spotlight at NeurIPS 2023 - https://cicl.stanford.edu/tags/spotlight-at-neurips-2023/ + Spotlight at NeurIPS 2023 Datasets and Benchmarks Track + https://cicl.stanford.edu/tags/spotlight-at-neurips-2023-datasets-and-benchmarks-track/ Tue, 27 Jun 2023 00:00:00 +0000 - https://cicl.stanford.edu/tags/spotlight-at-neurips-2023/ + https://cicl.stanford.edu/tags/spotlight-at-neurips-2023-datasets-and-benchmarks-track/ diff --git a/docs/tags/spotlight-at-neurips-2023-datasets-and-benchmarks-track/index.html b/docs/tags/spotlight-at-neurips-2023-datasets-and-benchmarks-track/index.html new file mode 100644 index 0000000..be03408 --- /dev/null +++ b/docs/tags/spotlight-at-neurips-2023-datasets-and-benchmarks-track/index.html @@ -0,0 +1,375 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Spotlight at NeurIPS 2023 Datasets and Benchmarks Track | Causality in Cognition Lab + + + + + + + + + + +
+

Spotlight at NeurIPS 2023 Datasets and Benchmarks Track

+ + + + + +
+

Understanding Social Reasoning in Language Models with Language Models

+
+ + As Large Language Models (LLMs) become increasingly integrated into our everyday lives, understanding their ability to comprehend human mental states becomes critical for ensuring effective interactions. However, despite the recent attempts to assess … + +
+
+ + + + +
+
+ + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/tags/spotlight-at-neurips-2023-datasets-and-benchmarks-track/index.xml b/docs/tags/spotlight-at-neurips-2023-datasets-and-benchmarks-track/index.xml new file mode 100644 index 0000000..0e23403 --- /dev/null +++ b/docs/tags/spotlight-at-neurips-2023-datasets-and-benchmarks-track/index.xml @@ -0,0 +1,25 @@ + + + + Spotlight at NeurIPS 2023 Datasets and Benchmarks Track on Causality in Cognition Lab + https://cicl.stanford.edu/tags/spotlight-at-neurips-2023-datasets-and-benchmarks-track/ + Recent content in Spotlight at NeurIPS 2023 Datasets and Benchmarks Track on Causality in Cognition Lab + Hugo -- gohugo.io + en-us + © 2023 Tobias Gerstenberg + Tue, 27 Jun 2023 00:00:00 +0000 + + + + + + Understanding Social Reasoning in Language Models with Language Models + https://cicl.stanford.edu/publication/gandhi2023understanding/ + Tue, 27 Jun 2023 00:00:00 +0000 + + https://cicl.stanford.edu/publication/gandhi2023understanding/ + + + + + \ No newline at end of file diff --git a/docs/tags/spotlight-at-neurips-2023-datasets-and-benchmarks-track/page/1/index.html b/docs/tags/spotlight-at-neurips-2023-datasets-and-benchmarks-track/page/1/index.html new file mode 100644 index 0000000..10d92d2 --- /dev/null +++ b/docs/tags/spotlight-at-neurips-2023-datasets-and-benchmarks-track/page/1/index.html @@ -0,0 +1 @@ +https://cicl.stanford.edu/tags/spotlight-at-neurips-2023-datasets-and-benchmarks-track/ \ No newline at end of file diff --git a/docs/tags/spotlight-at-neurips-workshop/index.html b/docs/tags/spotlight-at-neurips-workshop/index.html new file mode 100644 index 0000000..62840a6 --- /dev/null +++ b/docs/tags/spotlight-at-neurips-workshop/index.html @@ -0,0 +1,375 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Spotlight at NeurIPS Workshop | Causality in Cognition Lab + + + + + + + + + + +
+

Spotlight at NeurIPS Workshop

+ + + + + +
+

Social Contract AI: Aligning AI Assistants with Implicit Group Norms

+
+ + We explore the idea of aligning an AI assistant by inverting a model of users' (unknown) preferences from observed interactions. To validate our proposal, we run proof-of-concept simulations in the economic ultimatum game, formalizing user … + +
+
+ + + + +
+
+ + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/tags/spotlight-at-neurips-workshop/index.xml b/docs/tags/spotlight-at-neurips-workshop/index.xml new file mode 100644 index 0000000..a189e12 --- /dev/null +++ b/docs/tags/spotlight-at-neurips-workshop/index.xml @@ -0,0 +1,25 @@ + + + + Spotlight at NeurIPS Workshop on Causality in Cognition Lab + https://cicl.stanford.edu/tags/spotlight-at-neurips-workshop/ + Recent content in Spotlight at NeurIPS Workshop on Causality in Cognition Lab + Hugo -- gohugo.io + en-us + © 2023 Tobias Gerstenberg + Mon, 30 Oct 2023 00:00:00 +0000 + + + + + + Social Contract AI: Aligning AI Assistants with Implicit Group Norms + https://cicl.stanford.edu/publication/franken2023social/ + Mon, 30 Oct 2023 00:00:00 +0000 + + https://cicl.stanford.edu/publication/franken2023social/ + + + + + \ No newline at end of file diff --git a/docs/tags/spotlight-at-neurips-workshop/page/1/index.html b/docs/tags/spotlight-at-neurips-workshop/page/1/index.html new file mode 100644 index 0000000..eb5ae08 --- /dev/null +++ b/docs/tags/spotlight-at-neurips-workshop/page/1/index.html @@ -0,0 +1 @@ +https://cicl.stanford.edu/tags/spotlight-at-neurips-workshop/ \ No newline at end of file