diff --git a/docs/Makefile b/docs/Makefile
new file mode 100644
index 0000000..d0c3cbf
--- /dev/null
+++ b/docs/Makefile
@@ -0,0 +1,20 @@
+# Minimal makefile for Sphinx documentation
+#
+
+# You can set these variables from the command line, and also
+# from the environment for the first two.
+SPHINXOPTS    ?=
+SPHINXBUILD   ?= sphinx-build
+SOURCEDIR     = source
+BUILDDIR      = build
+
+# Put it first so that "make" without argument is like "make help".
+help:
+	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
+
+.PHONY: help Makefile
+
+# Catch-all target: route all unknown targets to Sphinx using the new
+# "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
+%: Makefile
+	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
diff --git a/docs/make.bat b/docs/make.bat
new file mode 100644
index 0000000..dc1312a
--- /dev/null
+++ b/docs/make.bat
@@ -0,0 +1,35 @@
+@ECHO OFF
+
+pushd %~dp0
+
+REM Command file for Sphinx documentation
+
+if "%SPHINXBUILD%" == "" (
+	set SPHINXBUILD=sphinx-build
+)
+set SOURCEDIR=source
+set BUILDDIR=build
+
+%SPHINXBUILD% >NUL 2>NUL
+if errorlevel 9009 (
+	echo.
+	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
+	echo.installed, then set the SPHINXBUILD environment variable to point
+	echo.to the full path of the 'sphinx-build' executable. Alternatively you
+	echo.may add the Sphinx directory to PATH.
+	echo.
+	echo.If you don't have Sphinx installed, grab it from
+	echo.https://www.sphinx-doc.org/
+	exit /b 1
+)
+
+if "%1" == "" goto help
+
+%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
+goto end
+
+:help
+%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
+
+:end
+popd
diff --git a/docs/requirements.txt b/docs/requirements.txt
new file mode 100644
index 0000000..517d8ac
--- /dev/null
+++ b/docs/requirements.txt
@@ -0,0 +1,12 @@
+sphinx_autodoc_typehints
+toml
+sphinx-panels
+sphinx-autobuild
+sphinx_book_theme
+sphinx-toolbox
+sphinx-copybutton
+sphinx_design
+jinja2
+sphinx
+setuptools
+textgrad
\ No newline at end of file
diff --git a/docs/source/assets/analogy.png b/docs/source/assets/analogy.png
new file mode 100644
index 0000000..a94b3bd
Binary files /dev/null and b/docs/source/assets/analogy.png differ
diff --git a/docs/source/assets/logo_full.png b/docs/source/assets/logo_full.png
new file mode 100644
index 0000000..d24f129
Binary files /dev/null and b/docs/source/assets/logo_full.png differ
diff --git a/docs/source/conf.py b/docs/source/conf.py
new file mode 100644
index 0000000..997da48
--- /dev/null
+++ b/docs/source/conf.py
@@ -0,0 +1,72 @@
+# Configuration file for the Sphinx documentation builder.
+#
+# This file only contains a selection of the most common options. For a full
+# list see the documentation:
+# https://www.sphinx-doc.org/en/master/usage/configuration.html
+
+# -- Path setup --------------------------------------------------------------
+
+# If extensions (or modules to document with autodoc) are in another directory,
+# add these directories to sys.path here. If the directory is relative to the
+# documentation root, use os.path.abspath to make it absolute, like shown here.
+#
+# import os
+# import sys
+# sys.path.insert(0, os.path.abspath('.'))
+
+
+# -- Project information -----------------------------------------------------
+
+project = 'TextGrad'
+copyright = '2024, TextGrad authors'
+author = 'TextGrad authors'
+
+# The full version, including alpha/beta/rc tags
+release = '0.1.3'
+
+
+# -- General configuration ---------------------------------------------------
+
+# Add any Sphinx extension module names here, as strings. They can be
+# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
+# ones.
+extensions = ['sphinx.ext.duration',
+              'sphinx.ext.doctest',
+              'sphinx.ext.autodoc',
+              'sphinx.ext.autosummary',
+              'sphinx.ext.napoleon',
+              'sphinx_book_theme'
+              ]
+
+autodoc_default_options = {
+    'members': True,
+    'special-members': ',__call__',  # Add other special methods as needed
+    'undoc-members': True,
+    'show-inheritance': True
+}
+
+
+autodoc_typehints = 'description'
+autosummary_generate = True  # Enable autosummary
+autoclass_content = 'both'
+
+# Add any paths that contain templates here, relative to this directory.
+templates_path = ['_templates']
+
+# List of patterns, relative to source directory, that match files and
+# directories to ignore when looking for source files.
+# This pattern also affects html_static_path and html_extra_path.
+exclude_patterns = []
+
+
+# -- Options for HTML output -------------------------------------------------
+
+# The theme to use for HTML and HTML Help pages.  See the documentation for
+# a list of builtin themes.
+#
+html_theme = 'sphinx_book_theme'
+
+# Add any paths that contain custom static files (such as style sheets) here,
+# relative to this directory. They are copied after the builtin static files,
+# so a file named "default.css" will overwrite the builtin "default.css".
+html_static_path = ['_static']
\ No newline at end of file
diff --git a/docs/source/index.rst b/docs/source/index.rst
new file mode 100644
index 0000000..1d7613f
--- /dev/null
+++ b/docs/source/index.rst
@@ -0,0 +1,306 @@
+.. TextGrad documentation master file, created by
+   sphinx-quickstart on Sat May  4 17:35:54 2024.
+   You can adapt this file completely to your liking, but it should at least
+   contain the root toctree directive.
+Welcome to TextGrad's documentation!
+====================================
+
+TextGrad is a Python package that provides a simple interface to implement LLM-"gradients" pipelines for text optimization!
+
+Check out the :doc:`usage` section for further information, including how to
+:doc:`install <installation>` the project. Want to directly jump to the optimization process?
+Check out the :doc:`quickstart` guide!
+
+
+.. image:: assets/logo_full.png
+   :align: center
+
+An autograd engine -- for textual gradients! 
+
+TextGrad is a powerful framework building automatic ``differentiation'' via text.
+TextGrad implements backpropagation through text feedback provided by LLMs, strongly building on the gradient metaphor.
+
+We provide a simple and intuitive API that allows you to define your own loss functions and optimize them using text feedback.
+This API is similar to the Pytorch API, making it simple to adapt to your use cases.
+
+.. image:: assets/analogy.png
+   :align: center
+
+QuickStart
+==========
+
+If you know PyTorch, you know 80% of TextGrad.
+Let's walk through the key components with a simple example. Say we want to use GPT-4o to generate a punchline for TextGrad.
+
+.. code-block:: python
+
+    import textgrad as tg
+    # Step 1: Get an initial response from an LLM
+    model = tg.BlackboxLLM("gpt-4o")
+    punchline = model(tg.Variable("write a punchline for my github package about optimizing compound AI systems", role_description="prompt", requires_grad=False))
+    punchline.set_role_description("a concise punchline that must hook everyone")
+
+Initial `punchline` from the model:
+> Supercharge your AI synergy with our optimization toolkit – where compound intelligence meets peak performance!
+
+Not bad, but we (gpt-4o, I guess) can do better! Let's optimize the punchline using TextGrad.
+
+.. code-block:: python
+
+    # Step 2: Define the loss function and the optimizer, just like in PyTorch!
+    loss_fn = tg.TextLoss("We want to have a super smart and funny punchline. Is the current one concise and addictive? Is the punch fun, makes sense, and subtle enough?")
+    optimizer = tg.TGD(parameters=[punchline])
+
+.. code-block:: python
+
+    # Step 3: Do the loss computation, backward pass, and update the punchline
+    loss = loss_fn(punchline)
+    loss.backward()
+    optimizer.step()
+
+Optimized punchline:
+> Boost your AI with our toolkit – because even robots need a tune-up!
+
+Okay this model isn’t really ready for a comedy show yet (and maybe a bit cringy) but it is clearly trying. But who gets to maxima in one step? 
+
+We have many more examples around how TextGrad can optimize all kinds of variables -- code, solutions to problems, molecules, prompts, and all that!
+
+Tutorials
+---------
+
+We have prepared a couple of tutorials to get you started with TextGrad. 
+You can run them directly in Google Colab by clicking on the links below.
+
+.. |primiti| image:: https://colab.research.google.com/assets/colab-badge.svg
+    :target: https://colab.research.google.com/github/zou-group/TextGrad/blob/main/examples/notebooks/Tutorial-Primitives.ipynb
+    :alt: Open In Colab
+
+.. |code| image:: https://colab.research.google.com/assets/colab-badge.svg
+    :target: https://colab.research.google.com/github/zou-group/textgrad/blob/main/examples/notebooks/Tutorial-Test-Time-Loss-for-Code.ipynb
+    :alt: Open In Colab
+
+.. |promptopt| image:: https://colab.research.google.com/assets/colab-badge.svg
+    :target: https://colab.research.google.com/github/zou-group/TextGrad/blob/main/examples/notebooks/Tutorial-Prompt-Optimization.ipynb
+    :alt: Open In Colab
+
+.. |solut| image:: https://colab.research.google.com/assets/colab-badge.svg
+    :target: https://colab.research.google.com/github/zou-group/TextGrad/blob/main/examples/notebooks/Tutorial-Solution-Optimization.ipynb
+    :alt: Open In Colab
+
+
+
++--------------------------------------------------------------------------------+------------------+
+| Name                                                                           | Link             |
++================================================================================+==================+
+| Introduction to TextGrad primitives                                            | |primiti|        |
++--------------------------------------------------------------------------------+------------------+
+| Code Optimization and New Loss Implementation                                  | |code|           |
++--------------------------------------------------------------------------------+------------------+
+| Prompt Optimization                                                            | |promptopt|      |
++--------------------------------------------------------------------------------+------------------+
+| Solution Optimization                                                          | |solut|          |
++--------------------------------------------------------------------------------+------------------+
+
+Installation
+============
+
+You can install TextGrad via pip:
+
+.. code-block:: bash
+
+    pip install textgrad
+
+Examples
+========
+
+Minimal Instance Optimization Example
+-------------------------------------
+
+TextGrad can optimize unstructured variables, such as text. Let us have an initial solution to a math problem that we want to improve. Here's how to do it with TextGrad, using GPT-4o:
+
+.. code-block:: python
+
+    import textgrad as tg
+    tg.set_backward_engine(tg.get_engine("gpt-4o"))
+
+    initial_solution = """To solve the equation 3x^2 - 7x + 2 = 0, we use the quadratic formula:
+    x = (-b ± √(b^2 - 4ac)) / 2a
+    a = 3, b = -7, c = 2
+    x = (7 ± √((-7)^2 - 4 * 3(2))) / 6
+    x = (7 ± √(7^3) / 6
+    The solutions are:
+    x1 = (7 + √73)
+    x2 = (7 - √73)"""
+
+    # Define the variable to optimize, let requires_grad=True to enable gradient computation
+    solution = tg.Variable(initial_solution,
+                           requires_grad=True,
+                           role_description="solution to the math question")
+
+    # Define the loss function, via a system prompt to an LLM
+    loss_system_prompt = tg.Variable("""You will evaluate a solution to a math question. Do not attempt to solve it yourself, do not give a solution, only identify errors. Be super concise.""",
+                                     requires_grad=False,
+                                     role_description="system prompt")
+
+    loss_fn = tg.TextLoss(loss_system_prompt)
+
+    # Define the optimizer, let the optimizer know which variables to optimize
+    optimizer = tg.TGD(parameters=[solution])
+
+    loss = loss_fn(solution)
+
+Output:
+
+    Variable(value=Errors:
+    1. Incorrect sign in the discriminant calculation: it should be b^2 - 4ac, not b^2 + 4ac.
+    2. Incorrect simplification of the quadratic formula: the denominator should be 2a, not 6.
+    3. Final solutions are missing the division by 2a., role=response from the language model, grads=)
+
+.. code-block:: python
+
+    loss.backward()
+    optimizer.step()
+    print(solution.value)
+
+Output:
+
+    To solve the equation 3x^2 - 7x + 2 = 0, we use the quadratic formula:
+    x = (-b ± √(b^2 - 4ac)) / 2a
+
+    Given:
+    a = 3, b = -7, c = 2
+
+    Substitute the values into the formula:
+    x = (7 ± √((-7)^2 - 4(3)(2))) / 6
+    x = (7 ± √(49 - 24)) / 6
+    x = (7 ± √25) / 6
+    x = (7 ± 5) / 6
+
+    The solutions are:
+    x1 = (7 + 5) / 6 = 12 / 6 = 2
+    x2 = (7 - 5) / 6 = 2 / 6 = 1/3
+
+Minimal Prompt Optimization Example
+-----------------------------------
+
+TextGrad can also optimize prompts in PyTorch style! Here's how to do it with TextGrad, using GPT-4o for feedback, and optimizing a prompt for gpt-3.5-turbo:
+
+.. code-block:: python
+
+    import textgrad as tg
+    llm_engine = tg.get_engine("gpt-3.5-turbo")
+    tg.set_backward_engine(tg.get_engine("gpt-4o"))
+
+    _, val_set, _, eval_fn = load_task("BBH_object_counting", llm_engine)
+    question_str, answer_str = val_set[0]
+    question = tg.Variable(question_str, role_description="question to the LLM", requires_grad=False)
+    answer = tg.Variable(answer_str, role_description="answer to the question", requires_grad=False)
+
+Question:
+
+    I have two stalks of celery, two garlics, a potato, three heads of broccoli, a carrot, and a yam. How many vegetables do I have?
+
+Ground Truth Answer:
+
+    10
+
+.. code-block:: python
+
+    system_prompt = tg.Variable("You are a concise LLM. Think step by step.",
+                                requires_grad=True,
+                                role_description="system prompt to guide the LLM's reasoning strategy for accurate responses")
+
+    model = tg.BlackboxLLM(llm_engine, system_prompt=system_prompt)
+    optimizer = tg.TGD(parameters=list(model.parameters()))
+
+    prediction = model(question)
+
+Prediction:
+
+    You have a total of seven vegetables: two stalks of celery, two garlics, one potato, three heads of broccoli, one carrot, and one yam.
+
+.. code-block:: python
+
+    loss = eval_fn(inputs=dict(prediction=prediction, ground_truth_answer=answer))
+
+Loss denoting accuracy:
+
+    Variable(value=0, grads=)
+
+.. code-block:: python
+
+    loss.backward()
+
+System prompt gradients:
+
+    ... 2. **Encourage Explicit Summation**: - The prompt should encourage the model to explicitly state the summation process. This can help in verifying the accuracy of the count. For example, "Explain your calculations clearly and verify the total."....
+
+.. code-block:: python
+
+    optimizer.step()
+
+New system prompt value:
+
+    You are a concise LLM. Think step by step. Prioritize accuracy in your calculations. Identify and count each item individually. Explain your calculations clearly and verify the total. After calculating, review your steps to ensure the total is correct. If you notice a discrepancy in your count, re-evaluate the list and correct any mistakes.
+
+.. code-block:: python
+
+    prediction = model(question)
+
+New prediction:
+
+    Let's count the number of each vegetable:
+    1. Celery stalks: 2
+    2. Garlics: 2
+    3. Potato: 1
+    4. Broccoli heads: 3
+    5. Carrot: 1
+    6. Yam: 1
+
+    Now, let's add up the total number of vegetables:
+    2 + 2 + 1 + 3 + 1 + 1 = 10
+
+    You have a total of 10 vegetables.
+
+Resources
+=========
+
+Inspiration
+-----------
+
+Many existing works greatly inspired this project! Here is a non-exhaustive list:
+
+- 📚 `PyTorch <https://github.com/pytorch/pytorch/>`_ The one and only. We owe a ton to PyTorch, hard to do justice here.
+- 📚 `DSPy <https://github.com/stanfordnlp/dspy>`_ is a pioneer in writing LM-based programs in many different ways! Has been a huge inspiration for us.
+- 📚 `Micrograd <https://github.com/karpathy/micrograd>`_: A tiny autograd engine greatly inspired our simple design!
+- 📚 `ProTeGi <https://github.com/microsoft/LMOps/tree/main/prompt_optimization>`_: We owe the term "Textual Gradients" to ProTeGi!
+- 📚 `Reflexion <https://github.com/noahshinn/reflexion>`_: A self-reflection that showed us the power of text-based reflection!
+
+Citation
+========
+
+.. code-block:: bibtex
+
+    @article{yuksekgonul2024textgrad,
+      title={{TextGrad: Automatic ``Differentiation'' with Text}},
+      author={Mert Yuksekgonul and Federico Bianchi and Joseph Boen and Sheng Liu and Zhi Huang and Carlos Guestrin and James Zou},
+      year={2024},
+    }
+
+Contents
+--------
+
+.. toctree::
+   :maxdepth: 2
+   :caption: Contents:
+
+   usage
+   textgrad
+   quickstart
+
+Indices and tables
+==================
+
+* :ref:`genindex`
+* :ref:`modindex`
+* :ref:`search`
\ No newline at end of file
diff --git a/docs/source/quickstart.rst b/docs/source/quickstart.rst
new file mode 100644
index 0000000..84544f2
--- /dev/null
+++ b/docs/source/quickstart.rst
@@ -0,0 +1,46 @@
+QuickStart
+==========
+
+What can TextGrad do? TextGrad can optimize your prompts from a language model in an automatic way.
+
+
+.. code-block:: python
+
+    import textgrad
+
+    # Set the backward engine as an External LLM API object.
+    See textgrad.config for more details.
+    textgrad.set_backward_engine(llm_api)
+    basic_system_prompt = "You are a language model that summarizes \
+                            a given document"
+
+    system_prompt = textgrad.Variable(basic_system_prompt, requires_grad=True)
+
+    api_model = textgrad.model.BlackboxLLM(llm_api)
+
+    # this tells the model to use the following system prompt
+    api_model = api_model + system_prompt
+
+    big_document = "This is a big document that we want to summarize."
+
+    # Since we will not need the criticisms for the document,
+    # we will explicitly set requires_grad=False
+    doc = textgrad.Variable(data, requires_grad=False)
+    # Get the summary
+    summary = api_model(big_document)
+
+    # Compute a loss
+    evaluation_prompt = "Evaluate if this is a good summary \
+                        based on completeness and fluency."
+
+    loss_fn = textgrad.ResponseEvaluation(engine=llm_api,
+                    evaluation_instruction=Variable(evaluation_prompt,
+                    requires_grad=False))
+
+    loss = loss_fn(summary)
+    loss.backward() # This populates gradients
+
+    optimizer = textgrad.TextualGradientDescent(engine=llm_api,
+    parameters=[system_prompt])
+    optimizer.step()
+    print(system_prompt)
\ No newline at end of file
diff --git a/docs/source/textgrad.rst b/docs/source/textgrad.rst
new file mode 100644
index 0000000..c4d578e
--- /dev/null
+++ b/docs/source/textgrad.rst
@@ -0,0 +1,14 @@
+Documentation
+=============
+
+.. autosummary::
+    :toctree: modules
+    :recursive:
+
+    textgrad.variable
+    textgrad.loss
+    textgrad.optimizer
+    textgrad.engine
+    textgrad.model
+    textgrad.autograd
+  
\ No newline at end of file
diff --git a/docs/source/usage.rst b/docs/source/usage.rst
new file mode 100644
index 0000000..63bfee4
--- /dev/null
+++ b/docs/source/usage.rst
@@ -0,0 +1,13 @@
+Usage
+=====
+
+.. _installation:
+
+Installation
+------------
+
+To use TextGrad, first install it using pip:
+
+.. code-block:: console
+
+   $ pip install textgrad
diff --git a/readthedocs.yaml b/readthedocs.yaml
new file mode 100644
index 0000000..ac3d7ab
--- /dev/null
+++ b/readthedocs.yaml
@@ -0,0 +1,35 @@
+# Read the Docs configuration file for Sphinx projects
+# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
+
+# Required
+version: 2
+
+# Set the OS, Python version and other tools you might need
+build:
+  os: ubuntu-22.04
+  tools:
+    python: "3.12"
+    # You can also specify other tool versions:
+    # nodejs: "20"
+    # rust: "1.70"
+    # golang: "1.20"
+
+# Build documentation in the "docs/" directory with Sphinx
+sphinx:
+  configuration: docs/source/conf.py
+  # You can configure Sphinx to use a different builder, for instance use the dirhtml builder for simpler URLs
+  # builder: "dirhtml"
+  # Fail on all warnings to avoid broken references
+  # fail_on_warning: true
+
+# Optionally build your docs in additional formats such as PDF and ePub
+# formats:
+#   - pdf
+#   - epub
+
+# Optional but recommended, declare the Python requirements required
+# to build your documentation
+# See https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html
+python:
+ install:
+   - requirements: docs/requirements.txt
\ No newline at end of file