From c0abed67156eb45884dab2e40578b11e15f7f944 Mon Sep 17 00:00:00 2001 From: JadenFiottoKaufman Date: Wed, 20 Dec 2023 13:24:10 -0500 Subject: [PATCH] Updated public server ndif.dev. Gradients feature guide --- .../notebooks/features/gradients.ipynb.txt | 172 +++++++++++++++++- .../_sources/notebooks/walkthrough.ipynb.txt | 2 +- public/_static/css/custom.css | 39 ++-- public/about/index.html | 6 +- public/documentation/contexts/index.html | 6 +- public/documentation/index.html | 6 +- public/documentation/intervention/index.html | 6 +- public/documentation/models/index.html | 6 +- public/documentation/module/index.html | 20 +- public/documentation/patching/index.html | 24 +-- public/documentation/tracing/index.html | 6 +- public/documentation/util/index.html | 6 +- public/features/index.html | 6 +- public/genindex/index.html | 16 +- public/index.html | 6 +- .../features/cross_prompt/index.html | 6 +- public/notebooks/features/getting/index.html | 6 +- public/notebooks/features/gradients.ipynb | 172 +++++++++++++++++- .../notebooks/features/gradients/index.html | 145 ++++++++++++++- public/notebooks/features/modules/index.html | 6 +- .../features/multiple_token/index.html | 6 +- .../notebooks/features/operations/index.html | 6 +- .../features/remote_execution/index.html | 6 +- public/notebooks/features/setting/index.html | 6 +- .../features/token_indexing/index.html | 6 +- .../tutorials/attribution_patching/index.html | 6 +- .../tutorials/dictionary_learning/index.html | 6 +- .../tutorials/function_vectors/index.html | 6 +- .../tutorials/future_lens/index.html | 6 +- .../tutorials/ioi_patching/index.html | 6 +- .../notebooks/tutorials/logit_lens/index.html | 6 +- public/notebooks/tutorials/sae/index.html | 6 +- public/notebooks/walkthrough.ipynb | 2 +- public/notebooks/walkthrough/index.html | 8 +- public/objects.inv | Bin 4225 -> 4228 bytes public/py-modindex/index.html | 6 +- public/search/index.html | 6 +- public/searchindex.js | 2 +- public/start/index.html | 6 +- public/tutorials/index.html | 6 +- 40 files changed, 619 insertions(+), 151 deletions(-) diff --git a/public/_sources/notebooks/features/gradients.ipynb.txt b/public/_sources/notebooks/features/gradients.ipynb.txt index 93adb64..ab32d00 100644 --- a/public/_sources/notebooks/features/gradients.ipynb.txt +++ b/public/_sources/notebooks/features/gradients.ipynb.txt @@ -11,13 +11,181 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Coming Soon!" + "There are a couple of ways we can interact with the gradients during and after a backward pass." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In the following example, we save the hidden states of the last layer and do a backward pass on the sum of the logits.\n", + "\n", + "Note two things:\n", + "\n", + "1. We use `inference=False` in the `.forward` call to turn off inference mode. This allows gradients to be calculated. \n", + "2. We can all `.backward()` on a value within the tracing context just like you normally would." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "You're using a GPT2TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "tensor([[[ 0.5216, -1.1755, -0.4617, ..., -1.1919, 0.0204, -2.0075],\n", + " [ 0.9841, 2.2175, 3.5851, ..., 0.5212, -2.2286, 5.7334]]],\n", + " device='cuda:0', grad_fn=)\n" + ] + } + ], + "source": [ + "from nnsight import LanguageModel\n", + "\n", + "model = LanguageModel('gpt2', device_map='cuda')\n", + "\n", + "with model.forward(inference=False) as runner:\n", + " with runner.invoke('Hello World') as invoker:\n", + "\n", + " hidden_states = model.transformer.h[-1].output[0].save()\n", + "\n", + " logits = model.lm_head.output\n", + "\n", + " logits.sum().backward()\n", + "\n", + "print(hidden_states.value)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If we wanted to see the gradients for the hidden_states, we can call `.retain_grad()` on it and access the `.grad` attribute after execution. " + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "You're using a GPT2TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "tensor([[[ 0.5216, -1.1755, -0.4617, ..., -1.1919, 0.0204, -2.0075],\n", + " [ 0.9841, 2.2175, 3.5851, ..., 0.5212, -2.2286, 5.7334]]],\n", + " device='cuda:0', grad_fn=)\n", + "tensor([[[ 28.7976, -282.5977, 868.7343, ..., 120.1742, 52.2264,\n", + " 168.6447],\n", + " [ 79.4183, -253.6227, 1322.1290, ..., 208.3981, -19.5544,\n", + " 509.9856]]], device='cuda:0')\n" + ] + } + ], + "source": [ + "from nnsight import LanguageModel\n", + "\n", + "model = LanguageModel('gpt2', device_map='cuda')\n", + "\n", + "with model.forward(inference=False) as runner:\n", + " with runner.invoke('Hello World') as invoker:\n", + "\n", + " hidden_states = model.transformer.h[-1].output[0].save()\n", + " hidden_states.retain_grad()\n", + "\n", + " logits = model.lm_head.output\n", + "\n", + " logits.sum().backward()\n", + "\n", + "print(hidden_states.value)\n", + "print(hidden_states.value.grad)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Torch also provides hooks into the backward process via the inputs and outputs. NNsight uses these in a similar way as `.input` and `.output` by also providing `.backward_input` and `.backward_output`." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "You're using a GPT2TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "tensor([[[ 0.5216, -1.1755, -0.4617, ..., -1.1919, 0.0204, -2.0075],\n", + " [ 0.9841, 2.2175, 3.5851, ..., 0.5212, -2.2286, 5.7334]]],\n", + " device='cuda:0', grad_fn=)\n", + "tensor([[[ 28.7976, -282.5977, 868.7343, ..., 120.1742, 52.2264,\n", + " 168.6447],\n", + " [ 79.4183, -253.6227, 1322.1290, ..., 208.3981, -19.5544,\n", + " 509.9856]]], device='cuda:0')\n" + ] + } + ], + "source": [ + "from nnsight import LanguageModel\n", + "\n", + "model = LanguageModel('gpt2', device_map='cuda')\n", + "\n", + "with model.forward(inference=False) as runner:\n", + " with runner.invoke('Hello World') as invoker:\n", + "\n", + " hidden_states = model.transformer.h[-1].output[0].save()\n", + " hidden_states_grad = model.transformer.h[-1].backward_output[0].save()\n", + " logits = model.lm_head.output\n", + "\n", + " logits.sum().backward()\n", + "\n", + "print(hidden_states.value)\n", + "print(hidden_states_grad.value)" ] } ], "metadata": { + "kernelspec": { + "display_name": "ndif", + "language": "python", + "name": "python3" + }, "language_info": { - "name": "python" + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.13" } }, "nbformat": 4, diff --git a/public/_sources/notebooks/walkthrough.ipynb.txt b/public/_sources/notebooks/walkthrough.ipynb.txt index 61122bd..bae480d 100644 --- a/public/_sources/notebooks/walkthrough.ipynb.txt +++ b/public/_sources/notebooks/walkthrough.ipynb.txt @@ -2623,7 +2623,7 @@ "source": [ "# Next steps\n", "\n", - "Check out the [Features](/tutorials/features/) and [Documentation](/documentation/) pages and the [README](https://github.com/JadenFiotto-Kaufman/nnsight/blob/main/README.md) for more guides." + "Check out the [Features](/features/) and [Documentation](/documentation/) pages and the [README](https://github.com/JadenFiotto-Kaufman/nnsight/blob/main/README.md) for more guides." ] } ], diff --git a/public/_static/css/custom.css b/public/_static/css/custom.css index 6f5acd8..510f679 100644 --- a/public/_static/css/custom.css +++ b/public/_static/css/custom.css @@ -1,10 +1,10 @@ .button-group { - display: flex; - flex-direction: row; - flex-wrap: nowrap; - justify-content: flex-start; - align-items: center; - align-content: stretch; + display: flex; + flex-direction: row; + flex-wrap: nowrap; + justify-content: flex-start; + align-items: center; + align-content: stretch; gap: 10px; } @@ -34,7 +34,7 @@ html[data-theme="light"] { } -.features { +.features { height: 60vh; overflow: hidden; } @@ -44,7 +44,9 @@ html[data-theme="light"] { margin-top: 50px; } - +img { + pointer-events: none; +} .title-bot { margin-bottom: -10px !important; @@ -62,21 +64,28 @@ html[data-theme="light"] { gap: 20px; } -@media only screen and (max-width: 768px) { /* Adjust this value based on your breakpoint for mobile */ - .front-container, .hero { - height: auto; /* Change from fixed height to auto */ - min-height: 50vh; /* Adjust this as needed */ +@media only screen and (max-width: 768px) { + + /* Adjust this value based on your breakpoint for mobile */ + .front-container, + .hero { + height: auto; + /* Change from fixed height to auto */ + min-height: 50vh; + /* Adjust this as needed */ } .features-container { - margin-bottom: 20px; /* Increase bottom margin */ + margin-bottom: 20px; + /* Increase bottom margin */ } .hero { - margin-bottom: 30px; /* Adjust the bottom margin of the main container */ + margin-bottom: 30px; + /* Adjust the bottom margin of the main container */ } .features { height: 110vh; } -} +} \ No newline at end of file diff --git a/public/about/index.html b/public/about/index.html index e93f8e1..738c651 100644 --- a/public/about/index.html +++ b/public/about/index.html @@ -31,7 +31,7 @@ - + @@ -247,7 +247,7 @@ NDIF Status: