From 12e1207dce67bd2c3a5c37c7d3cc724b3ade5adb Mon Sep 17 00:00:00 2001 From: Tobias Gerstenberg Date: Mon, 27 Nov 2023 16:50:01 -0800 Subject: [PATCH] updated tag on franken paper --- content/publication/franken2023social.md | 2 +- docs/member/tobias_gerstenberg/index.html | 2 +- docs/publication/franken2023social/index.html | 2 +- docs/publication/index.html | 2 +- docs/sitemap.xml | 8 +- docs/tags/index.html | 2 +- docs/tags/index.xml | 6 +- docs/tags/oral-at-neurips-workshop/index.html | 375 ++++++++++++++++++ docs/tags/oral-at-neurips-workshop/index.xml | 25 ++ .../page/1/index.html | 1 + 10 files changed, 413 insertions(+), 12 deletions(-) create mode 100644 docs/tags/oral-at-neurips-workshop/index.html create mode 100644 docs/tags/oral-at-neurips-workshop/index.xml create mode 100644 docs/tags/oral-at-neurips-workshop/page/1/index.html diff --git a/content/publication/franken2023social.md b/content/publication/franken2023social.md index 70c43e5..b2fab7a 100644 --- a/content/publication/franken2023social.md +++ b/content/publication/franken2023social.md @@ -13,7 +13,7 @@ publication_types = ["3"] publication_short = "_Socially Responsible Language Modelling Research Workshop (NeurIPS 2023)_" publication = "Fränken J., Kwok S., Ye P., Gandhi K., Arumugam D., Moore J., Tamkin A., Gerstenberg T., Goodman N. D. (2023). Social Contract AI: Aligning AI Assistants with Implicit Group Norms. In _Socially Responsible Language Modelling Research Workshop (NeurIPS 2023)." abstract = "We explore the idea of aligning an AI assistant by inverting a model of users' (unknown) preferences from observed interactions. To validate our proposal, we run proof-of-concept simulations in the economic ultimatum game, formalizing user preferences as policies that guide the actions of simulated players. We find that the AI assistant accurately aligns its behavior to match standard policies from the economic literature (e.g., selfish, altruistic). However, the assistant's learned policies lack robustness and exhibit limited generalization in an out-of-distribution setting when confronted with a currency (e.g., grams of medicine) that was not included in the assistant's training distribution. Additionally, we find that when there is inconsistency in the relationship between language use and an unknown policy (e.g., an altruistic policy combined with rude language), the assistant's learning of the policy is slowed. Overall, our preliminary results suggest that developing simulation frameworks in which AI assistants need to infer preferences from diverse users can provide a valuable approach for studying practical alignment questions." -tags = ["Spotlight at NeurIPS Workshop"] +tags = ["Oral at NeurIPS Workshop"] image_preview = "" selected = false projects = [] diff --git a/docs/member/tobias_gerstenberg/index.html b/docs/member/tobias_gerstenberg/index.html index 4f2eca5..7622f01 100644 --- a/docs/member/tobias_gerstenberg/index.html +++ b/docs/member/tobias_gerstenberg/index.html @@ -454,7 +454,7 @@

Publications

-
Spotlight at NeurIPS Workshop +
Oral at NeurIPS Workshop

diff --git a/docs/publication/franken2023social/index.html b/docs/publication/franken2023social/index.html index 701aac0..c108315 100644 --- a/docs/publication/franken2023social/index.html +++ b/docs/publication/franken2023social/index.html @@ -261,7 +261,7 @@

Abstract

- Spotlight at NeurIPS Workshop
+ Oral at NeurIPS Workshop

diff --git a/docs/publication/index.html b/docs/publication/index.html index 2eb2848..1df90a8 100644 --- a/docs/publication/index.html +++ b/docs/publication/index.html @@ -1674,7 +1674,7 @@

Publications

-
Spotlight at NeurIPS Workshop +
Oral at NeurIPS Workshop

diff --git a/docs/sitemap.xml b/docs/sitemap.xml index b98fe9d..bca7c85 100644 --- a/docs/sitemap.xml +++ b/docs/sitemap.xml @@ -37,7 +37,7 @@ - https://cicl.stanford.edu/publication_types/ + https://cicl.stanford.edu/tags/oral-at-neurips-workshop/ 2023-10-30T00:00:00+00:00 0 @@ -46,17 +46,17 @@ - https://cicl.stanford.edu/publication/franken2023social/ + https://cicl.stanford.edu/publication_types/ 2023-10-30T00:00:00+00:00 + 0 - https://cicl.stanford.edu/tags/spotlight-at-neurips-workshop/ + https://cicl.stanford.edu/publication/franken2023social/ 2023-10-30T00:00:00+00:00 - 0 diff --git a/docs/tags/index.html b/docs/tags/index.html index 24f0759..f66436a 100644 --- a/docs/tags/index.html +++ b/docs/tags/index.html @@ -239,7 +239,7 @@

Tags

-

Spotlight at NeurIPS Workshop

+

Oral at NeurIPS Workshop

diff --git a/docs/tags/index.xml b/docs/tags/index.xml index e31c732..4318b68 100644 --- a/docs/tags/index.xml +++ b/docs/tags/index.xml @@ -13,11 +13,11 @@ - Spotlight at NeurIPS Workshop - https://cicl.stanford.edu/tags/spotlight-at-neurips-workshop/ + Oral at NeurIPS Workshop + https://cicl.stanford.edu/tags/oral-at-neurips-workshop/ Mon, 30 Oct 2023 00:00:00 +0000 - https://cicl.stanford.edu/tags/spotlight-at-neurips-workshop/ + https://cicl.stanford.edu/tags/oral-at-neurips-workshop/ diff --git a/docs/tags/oral-at-neurips-workshop/index.html b/docs/tags/oral-at-neurips-workshop/index.html new file mode 100644 index 0000000..d1e4116 --- /dev/null +++ b/docs/tags/oral-at-neurips-workshop/index.html @@ -0,0 +1,375 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Oral at NeurIPS Workshop | Causality in Cognition Lab + + + + + + + + + + +
+

Oral at NeurIPS Workshop

+ + + + + +
+

Social Contract AI: Aligning AI Assistants with Implicit Group Norms

+
+ + We explore the idea of aligning an AI assistant by inverting a model of users' (unknown) preferences from observed interactions. To validate our proposal, we run proof-of-concept simulations in the economic ultimatum game, formalizing user … + +
+
+ + + + +
+
+ + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/tags/oral-at-neurips-workshop/index.xml b/docs/tags/oral-at-neurips-workshop/index.xml new file mode 100644 index 0000000..7efd7ce --- /dev/null +++ b/docs/tags/oral-at-neurips-workshop/index.xml @@ -0,0 +1,25 @@ + + + + Oral at NeurIPS Workshop on Causality in Cognition Lab + https://cicl.stanford.edu/tags/oral-at-neurips-workshop/ + Recent content in Oral at NeurIPS Workshop on Causality in Cognition Lab + Hugo -- gohugo.io + en-us + © 2023 Tobias Gerstenberg + Mon, 30 Oct 2023 00:00:00 +0000 + + + + + + Social Contract AI: Aligning AI Assistants with Implicit Group Norms + https://cicl.stanford.edu/publication/franken2023social/ + Mon, 30 Oct 2023 00:00:00 +0000 + + https://cicl.stanford.edu/publication/franken2023social/ + + + + + \ No newline at end of file diff --git a/docs/tags/oral-at-neurips-workshop/page/1/index.html b/docs/tags/oral-at-neurips-workshop/page/1/index.html new file mode 100644 index 0000000..8eacf65 --- /dev/null +++ b/docs/tags/oral-at-neurips-workshop/page/1/index.html @@ -0,0 +1 @@ +https://cicl.stanford.edu/tags/oral-at-neurips-workshop/ \ No newline at end of file