Skip to content

Commit

Permalink
Built site for gh-pages
Browse files Browse the repository at this point in the history
  • Loading branch information
Quarto GHA Workflow Runner committed Oct 15, 2024
1 parent 6cf525d commit 79312f8
Show file tree
Hide file tree
Showing 14 changed files with 62 additions and 62 deletions.
2 changes: 1 addition & 1 deletion .nojekyll
Original file line number Diff line number Diff line change
@@ -1 +1 @@
1a0e5151
a89614ad
16 changes: 8 additions & 8 deletions index.html
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,7 @@ <h2 class="anchored" data-anchor-id="articles">Articles</h2>

<div class="quarto-listing quarto-listing-container-grid" id="listing-listing">
<div class="list grid quarto-listing-cols-3">
<div class="g-col-1" data-index="0" data-listing-date-sort="1728345600000" data-listing-file-modified-sort="1728966007195" data-listing-date-modified-sort="NaN" data-listing-reading-time-sort="3" data-listing-word-count-sort="577">
<div class="g-col-1" data-index="0" data-listing-date-sort="1728864000000" data-listing-file-modified-sort="1728966041433" data-listing-date-modified-sort="NaN" data-listing-reading-time-sort="3" data-listing-word-count-sort="577">
<a href="./posts/sae_dream.html" class="quarto-grid-link">
<div class="quarto-grid-item card h-100 card-left">
<p class="card-img-top"><img src="posts/sae_dream_files/figure-html/cell-9-output-1.png" style="height: 150px;" class="thumbnail-image card-img"/></p>
Expand All @@ -223,14 +223,14 @@ <h5 class="no-anchor card-title listing-title">
T. Ben Thompson
</div>
<div class="listing-date">
Oct 8, 2024
Oct 14, 2024
</div>
</div>
</div>
</div>
</a>
</div>
<div class="g-col-1" data-index="1" data-listing-date-sort="1721692800000" data-listing-file-modified-sort="1728966007191" data-listing-date-modified-sort="NaN" data-listing-reading-time-sort="5" data-listing-word-count-sort="806">
<div class="g-col-1" data-index="1" data-listing-date-sort="1721692800000" data-listing-file-modified-sort="1728966041425" data-listing-date-modified-sort="NaN" data-listing-reading-time-sort="5" data-listing-word-count-sort="806">
<a href="./posts/flrt.html" class="quarto-grid-link">
<div class="quarto-grid-item card h-100 card-left">
<p class="card-img-top"><img src="posts/bomb.png" style="height: 150px;" class="thumbnail-image card-img"/></p>
Expand All @@ -253,7 +253,7 @@ <h5 class="no-anchor card-title listing-title">
</div>
</a>
</div>
<div class="g-col-1" data-index="2" data-listing-date-sort="1720742400000" data-listing-file-modified-sort="1728966007179" data-listing-date-modified-sort="NaN" data-listing-reading-time-sort="9" data-listing-word-count-sort="1747">
<div class="g-col-1" data-index="2" data-listing-date-sort="1720742400000" data-listing-file-modified-sort="1728966041413" data-listing-date-modified-sort="NaN" data-listing-reading-time-sort="9" data-listing-word-count-sort="1747">
<a href="./posts/circuit_breaking.html" class="quarto-grid-link">
<div class="quarto-grid-item card h-100 card-left">
<p class="card-img-top"><img src="posts/circuit_breaking_files/figure-html/cell-4-output-1.png" style="height: 150px;" class="thumbnail-image card-img"/></p>
Expand All @@ -276,7 +276,7 @@ <h5 class="no-anchor card-title listing-title">
</div>
</a>
</div>
<div class="g-col-1" data-index="3" data-listing-date-sort="1705968000000" data-listing-file-modified-sort="1728966007191" data-listing-date-modified-sort="NaN" data-listing-reading-time-sort="4" data-listing-word-count-sort="771">
<div class="g-col-1" data-index="3" data-listing-date-sort="1705968000000" data-listing-file-modified-sort="1728966041425" data-listing-date-modified-sort="NaN" data-listing-reading-time-sort="4" data-listing-word-count-sort="771">
<a href="./posts/dreamy.html" class="quarto-grid-link">
<div class="quarto-grid-item card h-100 card-left">
<p class="card-img-top"><img src="posts/dream_wow.png" style="height: 150px;" class="thumbnail-image card-img"/></p>
Expand All @@ -299,7 +299,7 @@ <h5 class="no-anchor card-title listing-title">
</div>
</a>
</div>
<div class="g-col-1" data-index="4" data-listing-date-sort="1705104000000" data-listing-file-modified-sort="1728966007155" data-listing-date-modified-sort="NaN" data-listing-reading-time-sort="25" data-listing-word-count-sort="4985">
<div class="g-col-1" data-index="4" data-listing-date-sort="1705104000000" data-listing-file-modified-sort="1728966041389" data-listing-date-modified-sort="NaN" data-listing-reading-time-sort="25" data-listing-word-count-sort="4985">
<a href="./posts/TDC2023.html" class="quarto-grid-link">
<div class="quarto-grid-item card h-100 card-left">
<p class="card-img-top"><img src="posts/TDC2023-sample-instances.png" alt="The Z-scores of activation vector similarity for the provided sample instances" style="height: 150px;" class="thumbnail-image card-img"/></p>
Expand All @@ -322,7 +322,7 @@ <h5 class="no-anchor card-title listing-title">
</div>
</a>
</div>
<div class="g-col-1" data-index="5" data-listing-date-sort="1701302400000" data-listing-file-modified-sort="1728966007191" data-listing-date-modified-sort="NaN" data-listing-reading-time-sort="7" data-listing-word-count-sort="1303">
<div class="g-col-1" data-index="5" data-listing-date-sort="1701302400000" data-listing-file-modified-sort="1728966041425" data-listing-date-modified-sort="NaN" data-listing-reading-time-sort="7" data-listing-word-count-sort="1303">
<a href="./posts/fight_the_illusion.html" class="quarto-grid-link">
<div class="quarto-grid-item card h-100 card-left">
<div class="listing-item-img-placeholder card-img-top" style="height: 150px;">&nbsp;</div>
Expand All @@ -345,7 +345,7 @@ <h5 class="no-anchor card-title listing-title">
</div>
</a>
</div>
<div class="g-col-1" data-index="6" data-listing-date-sort="1687651200000" data-listing-file-modified-sort="1728966007179" data-listing-date-modified-sort="NaN" data-listing-reading-time-sort="7" data-listing-word-count-sort="1247">
<div class="g-col-1" data-index="6" data-listing-date-sort="1687651200000" data-listing-file-modified-sort="1728966041413" data-listing-date-modified-sort="NaN" data-listing-reading-time-sort="7" data-listing-word-count-sort="1247">
<a href="./posts/catalog.html" class="quarto-grid-link">
<div class="quarto-grid-item card h-100 card-left">
<p class="card-img-top"><img src="posts/catalog_files/figure-html/cell-9-output-1.png" style="height: 150px;" class="thumbnail-image card-img"/></p>
Expand Down
2 changes: 1 addition & 1 deletion posts/TDC2023.html
Original file line number Diff line number Diff line change
Expand Up @@ -959,7 +959,7 @@ <h4 class="anchored" data-anchor-id="trojan-recovery">Trojan recovery:</h4>
});
</script>
</div> <!-- /content -->
<script>var lightboxQuarto = GLightbox({"closeEffect":"zoom","selector":".lightbox","descPosition":"bottom","loop":false,"openEffect":"zoom"});
<script>var lightboxQuarto = GLightbox({"descPosition":"bottom","closeEffect":"zoom","openEffect":"zoom","loop":false,"selector":".lightbox"});
(function() {
let previousOnload = window.onload;
window.onload = () => {
Expand Down
2 changes: 1 addition & 1 deletion posts/catalog.html
Original file line number Diff line number Diff line change
Expand Up @@ -1058,7 +1058,7 @@ <h2 class="anchored" data-anchor-id="github">GitHub</h2>
});
</script>
</div> <!-- /content -->
<script>var lightboxQuarto = GLightbox({"selector":".lightbox","closeEffect":"zoom","loop":false,"descPosition":"bottom","openEffect":"zoom"});
<script>var lightboxQuarto = GLightbox({"selector":".lightbox","closeEffect":"zoom","descPosition":"bottom","openEffect":"zoom","loop":false});
(function() {
let previousOnload = window.onload;
window.onload = () => {
Expand Down
6 changes: 3 additions & 3 deletions posts/catalog.out.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -297,7 +297,7 @@
"Pythia-12B is miscalibrated on 20% of the bigrams and 45% of the\n",
"trigrams when we ask for prediction of $p \\geq 0.45$."
],
"id": "513a9812-5147-4017-a9f5-be0a3154696d"
"id": "9f8bbc92-cd35-4bbe-b51a-dc733216f243"
},
{
"cell_type": "code",
Expand Down Expand Up @@ -377,7 +377,7 @@
"The dataset is available on Huggingface:\n",
"[pile_scan_4](https://huggingface.co/datasets/Confirm-Labs/pile_scan_4)"
],
"id": "ff751c60-69aa-4c7a-875e-dc110b74317f"
"id": "ab6c1240-7e9c-40f0-bfc9-132ffdfa1232"
},
{
"cell_type": "code",
Expand Down Expand Up @@ -417,7 +417,7 @@
"Computational Linguistics, May 2022, pp. 95–136. doi:\n",
"[10.18653/v1/2022.bigscience-1.9](https://doi.org/10.18653/v1/2022.bigscience-1.9).</span>"
],
"id": "971b97d7-7f5a-4d12-8f2a-7f894a0c831e"
"id": "2c698549-a89e-4abf-ac5d-2988217bf06b"
}
],
"nbformat": 4,
Expand Down
2 changes: 1 addition & 1 deletion posts/circuit_breaking.html
Original file line number Diff line number Diff line change
Expand Up @@ -1739,7 +1739,7 @@ <h2 class="anchored" data-anchor-id="attack-success-fluency">Attack success: flu
});
</script>
</div> <!-- /content -->
<script>var lightboxQuarto = GLightbox({"openEffect":"zoom","loop":false,"selector":".lightbox","closeEffect":"zoom","descPosition":"bottom"});
<script>var lightboxQuarto = GLightbox({"loop":false,"descPosition":"bottom","selector":".lightbox","openEffect":"zoom","closeEffect":"zoom"});
(function() {
let previousOnload = window.onload;
window.onload = () => {
Expand Down
4 changes: 2 additions & 2 deletions posts/circuit_breaking.out.ipynb

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion posts/dreamy.html
Original file line number Diff line number Diff line change
Expand Up @@ -5420,7 +5420,7 @@ <h2 class="anchored" data-anchor-id="causal-token-attribution">Causal token attr
});
</script>
</div> <!-- /content -->
<script>var lightboxQuarto = GLightbox({"descPosition":"bottom","openEffect":"zoom","selector":".lightbox","closeEffect":"zoom","loop":false});
<script>var lightboxQuarto = GLightbox({"loop":false,"selector":".lightbox","closeEffect":"zoom","descPosition":"bottom","openEffect":"zoom"});
(function() {
let previousOnload = window.onload;
window.onload = () => {
Expand Down
14 changes: 7 additions & 7 deletions posts/dreamy.out.ipynb

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion posts/flrt.html
Original file line number Diff line number Diff line change
Expand Up @@ -1362,7 +1362,7 @@ <h2 class="anchored" data-anchor-id="jailbreak">Jailbreak</h2>
});
</script>
</div> <!-- /content -->
<script>var lightboxQuarto = GLightbox({"closeEffect":"zoom","selector":".lightbox","descPosition":"bottom","openEffect":"zoom","loop":false});
<script>var lightboxQuarto = GLightbox({"closeEffect":"zoom","openEffect":"zoom","selector":".lightbox","loop":false,"descPosition":"bottom"});
(function() {
let previousOnload = window.onload;
window.onload = () => {
Expand Down
20 changes: 10 additions & 10 deletions posts/flrt.out.ipynb

Large diffs are not rendered by default.

16 changes: 8 additions & 8 deletions posts/sae_dream.html
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
<meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">

<meta name="author" content="T. Ben Thompson">
<meta name="dcterms.date" content="2024-10-08">
<meta name="dcterms.date" content="2024-10-14">
<meta name="description" content="Better token optimization to maximize SAE features.">

<title>Dreaming with sparse autoencoder features – Confirm</title>
Expand Down Expand Up @@ -144,10 +144,10 @@
<link rel="stylesheet" href="../styles.css">
<meta name="citation_title" content="Dreaming with sparse autoencoder features">
<meta name="citation_author" content="T. Ben Thompson">
<meta name="citation_publication_date" content="2024-10-08">
<meta name="citation_cover_date" content="2024-10-08">
<meta name="citation_publication_date" content="2024-10-14">
<meta name="citation_cover_date" content="2024-10-14">
<meta name="citation_year" content="2024">
<meta name="citation_online_date" content="2024-10-08">
<meta name="citation_online_date" content="2024-10-14">
<meta name="citation_fulltext_html_url" content="https://confirmlabs.org/posts/sae_dream.html">
<meta name="citation_language" content="en">
<meta name="citation_reference" content="citation_title=GPT-NeoX-20B: An open-source autoregressive language model;,citation_abstract=We introduce GPT-NeoX-20B, a 20 billion parameter autoregressive language model trained on the Pile, whose weights will be made freely and openly available to the public through a permissive license. It is, to the best of our knowledge, the largest dense autoregressive model that has publicly available weights at the time of submission. In this work, we describe GPT-NeoX-20B’s architecture and training, and evaluate its performance. We open-source the training and evaluation code, as well as the model weights, at https://github.com/EleutherAI/gpt-neox.;,citation_author=Sidney Black;,citation_author=Stella Biderman;,citation_author=Eric Hallahan;,citation_author=Quentin Anthony;,citation_author=Leo Gao;,citation_author=Laurence Golding;,citation_author=Horace He;,citation_author=Connor Leahy;,citation_author=Kyle McDonell;,citation_author=Jason Phang;,citation_author=Michael Pieler;,citation_author=Usvsn Sai Prashanth;,citation_author=Shivanshu Purohit;,citation_author=Laria Reynolds;,citation_author=Jonathan Tow;,citation_author=Ben Wang;,citation_author=Samuel Weinbach;,citation_publication_date=2022-05;,citation_cover_date=2022-05;,citation_year=2022;,citation_fulltext_html_url=https://aclanthology.org/2022.bigscience-1.9;,citation_doi=10.18653/v1/2022.bigscience-1.9;,citation_conference_title=Proceedings of BigScience episode #5 – workshop on challenges &amp;amp;amp; perspectives in creating large language models;,citation_conference=Association for Computational Linguistics;">
Expand Down Expand Up @@ -254,7 +254,7 @@ <h1 class="title">Dreaming with sparse autoencoder features</h1>
<div>
<div class="quarto-title-meta-heading">Published</div>
<div class="quarto-title-meta-contents">
<p class="date">October 8, 2024</p>
<p class="date">October 14, 2024</p>
</div>
</div>

Expand Down Expand Up @@ -858,14 +858,14 @@ <h2 class="anchored" data-anchor-id="dreaming-decoder-directions">Dreaming decod
</section><section class="quarto-appendix-contents" id="quarto-citation"><h2 class="anchored quarto-appendix-heading">Citation</h2><div><div class="quarto-appendix-secondary-label">BibTeX citation:</div><pre class="sourceCode code-with-copy quarto-appendix-bibtex"><code class="sourceCode bibtex">@online{thompson2024,
author = {Thompson, T. Ben},
title = {Dreaming with Sparse Autoencoder Features},
date = {2024-10-08},
date = {2024-10-14},
url = {https://confirmlabs.org/posts/sae_dream.html},
langid = {en}
}
</code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre><div class="quarto-appendix-secondary-label">For attribution, please cite this work as:</div><div id="ref-thompson2024" class="csl-entry quarto-appendix-citeas" role="listitem">
<div class="">T.
B. Thompson, <span>“Dreaming with sparse autoencoder features,”</span>
Oct. 08, 2024. <a href="https://confirmlabs.org/posts/sae_dream.html">https://confirmlabs.org/posts/sae_dream.html</a></div>
Oct. 14, 2024. <a href="https://confirmlabs.org/posts/sae_dream.html">https://confirmlabs.org/posts/sae_dream.html</a></div>
</div></div></section></div></main> <!-- /main -->
<script id="quarto-html-after-body" type="application/javascript">
window.document.addEventListener("DOMContentLoaded", function (event) {
Expand Down Expand Up @@ -1288,7 +1288,7 @@ <h2 class="anchored" data-anchor-id="dreaming-decoder-directions">Dreaming decod
});
</script>
</div> <!-- /content -->
<script>var lightboxQuarto = GLightbox({"descPosition":"bottom","selector":".lightbox","loop":false,"closeEffect":"zoom","openEffect":"zoom"});
<script>var lightboxQuarto = GLightbox({"loop":false,"openEffect":"zoom","descPosition":"bottom","selector":".lightbox","closeEffect":"zoom"});
(function() {
let previousOnload = window.onload;
window.onload = () => {
Expand Down
20 changes: 10 additions & 10 deletions posts/sae_dream.out.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
"# Dreaming with sparse autoencoder features\n",
"\n",
"T. Ben Thompson \n",
"2024-10-08\n",
"2024-10-14\n",
"\n",
"I get a lot of questions about dreaming/feature visualization applied to\n",
"sparse autoencoders (SAEs) [1]. When we were writing [Fluent dreaming\n",
Expand Down Expand Up @@ -54,7 +54,7 @@
"written just for this post. I expect inefficiences and possibly some\n",
"bugs."
],
"id": "dfcbb8be-8a40-46a3-b0c9-a4019f5484a0"
"id": "0bb6f36e-91b2-48ea-a4a5-ba9eb037e332"
},
{
"cell_type": "code",
Expand Down Expand Up @@ -389,7 +389,7 @@
"\n",
"First, we’ll load up Gemma 2 2B."
],
"id": "7663a80e-cd34-45c1-a6db-11a5d1bb8996"
"id": "9fc1dba4-9a3c-4895-a7e5-a7d65de8cb1c"
},
{
"cell_type": "code",
Expand Down Expand Up @@ -434,7 +434,7 @@
"function will be passed as the `get_feature_and_logits` argument to the\n",
"`dream` function."
],
"id": "b8dd48f6-8309-44b0-a864-ddb1cd7150ba"
"id": "3fa85ad5-44c8-4a7c-94cc-c803c5b49d97"
},
{
"cell_type": "code",
Expand Down Expand Up @@ -484,7 +484,7 @@
"seems to respond strongly to variants of the word “label”. We run a\n",
"bunch of examples through the SAE and see how they score:"
],
"id": "d6597a44-bae8-4df2-974d-007c8ca7b395"
"id": "4e4c59b4-fcc9-45f2-b27d-987b9b1abb19"
},
{
"cell_type": "code",
Expand Down Expand Up @@ -545,7 +545,7 @@
"original notebook to see\n",
"it.](https://github.com/Confirm-Solutions/confirmlabs/blob/main/posts/sae_dream.ipynb)"
],
"id": "1d912b1b-9a65-48d4-826b-b1fb0e289afb"
"id": "a435c183-9bd3-42cb-b4ba-ed57bc14bc10"
},
{
"cell_type": "code",
Expand Down Expand Up @@ -614,7 +614,7 @@
"Out of curiosity, do we get similar results if we apply feature\n",
"visualization to the corresponding decoder direction?"
],
"id": "d2795cca-f26b-4369-886e-98cd881ae795"
"id": "76315e59-1833-401d-bfef-5e3335c9d3e5"
},
{
"cell_type": "code",
Expand Down Expand Up @@ -662,7 +662,7 @@
"yes for this particular feature. However, it might be different for\n",
"other features."
],
"id": "06cda7b0-ceb2-410e-a51b-be71e5be97a1"
"id": "ca7cf061-03b2-4a6b-a8d0-ec11f21a6da3"
},
{
"cell_type": "code",
Expand Down Expand Up @@ -746,7 +746,7 @@
"Feature visualization on the decoder direction also acquires the “label”\n",
"phrase:"
],
"id": "986abf0c-319a-4906-b220-c96d98509438"
"id": "c65fabe5-f3ae-4306-8702-e05264f710e6"
},
{
"cell_type": "code",
Expand Down Expand Up @@ -785,7 +785,7 @@
"“FLRT: Fluent student-teacher redteaming.” 2024. Available:\n",
"<https://arxiv.org/abs/2407.17447></span>"
],
"id": "44a51ce2-0f90-446e-af76-a37a5f347150"
"id": "f28edba4-c39c-4d18-8eb2-06de8b30a3fd"
}
],
"nbformat": 4,
Expand Down
16 changes: 8 additions & 8 deletions sitemap.xml
Original file line number Diff line number Diff line change
Expand Up @@ -2,34 +2,34 @@
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
<url>
<loc>https://confirmlabs.org/posts/dreamy.html</loc>
<lastmod>2024-10-15T04:20:07.191Z</lastmod>
<lastmod>2024-10-15T04:20:41.425Z</lastmod>
</url>
<url>
<loc>https://confirmlabs.org/posts/circuit_breaking.html</loc>
<lastmod>2024-10-15T04:20:07.179Z</lastmod>
<lastmod>2024-10-15T04:20:41.413Z</lastmod>
</url>
<url>
<loc>https://confirmlabs.org/posts/TDC2023.html</loc>
<lastmod>2024-10-15T04:20:07.155Z</lastmod>
<lastmod>2024-10-15T04:20:41.389Z</lastmod>
</url>
<url>
<loc>https://confirmlabs.org/posts/fight_the_illusion.html</loc>
<lastmod>2024-10-15T04:20:07.191Z</lastmod>
<lastmod>2024-10-15T04:20:41.425Z</lastmod>
</url>
<url>
<loc>https://confirmlabs.org/index.html</loc>
<lastmod>2024-10-15T04:20:07.143Z</lastmod>
<lastmod>2024-10-15T04:20:41.381Z</lastmod>
</url>
<url>
<loc>https://confirmlabs.org/posts/catalog.html</loc>
<lastmod>2024-10-15T04:20:07.179Z</lastmod>
<lastmod>2024-10-15T04:20:41.413Z</lastmod>
</url>
<url>
<loc>https://confirmlabs.org/posts/flrt.html</loc>
<lastmod>2024-10-15T04:20:07.191Z</lastmod>
<lastmod>2024-10-15T04:20:41.425Z</lastmod>
</url>
<url>
<loc>https://confirmlabs.org/posts/sae_dream.html</loc>
<lastmod>2024-10-15T04:20:07.195Z</lastmod>
<lastmod>2024-10-15T04:20:41.433Z</lastmod>
</url>
</urlset>

0 comments on commit 79312f8

Please sign in to comment.