From 10296434453da0df92b19683e61721ba3f6fe8b3 Mon Sep 17 00:00:00 2001
From: Laura Gutierrez Funderburk <lgutierrwr@gmail.com>
Date: Sun, 19 Nov 2023 21:42:13 -0800
Subject: [PATCH] add embedding document exercise

---
 ch3/jupyter-notebooks/components.ipynb | 59 ++++++++++++++++++--------
 1 file changed, 42 insertions(+), 17 deletions(-)

diff --git a/ch3/jupyter-notebooks/components.ipynb b/ch3/jupyter-notebooks/components.ipynb
index 52bf954..92a1122 100644
--- a/ch3/jupyter-notebooks/components.ipynb
+++ b/ch3/jupyter-notebooks/components.ipynb
@@ -162,11 +162,6 @@
     "In this snippet, `text_embedder` is created with an OpenAI API key and used to generate an embedding for the string \"I love pizza!\". The resulting embedding and associated metadata are then printed out."
    ]
   },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": []
-  },
   {
    "cell_type": "code",
    "execution_count": 4,
@@ -499,7 +494,16 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Writing embedded documents."
+    "Writing embedded documents.\n",
+    "\n",
+    "* Compute Embeddings: Use either the OpenAIDocumentEmbedder or SentenceTransformersDocumentEmbedder to compute the embeddings for your documents.\n",
+    "\n",
+    "* Store Embeddings: The computed embeddings are stored in the embedding field of the Document objects.\n",
+    "\n",
+    "* Write to DocumentStore: Use the DocumentWriter component to write these Document objects, now with embeddings, into a DocumentStore.\n",
+    "\n",
+    "Here's an example code snippet that demonstrates how to use the SentenceTransformersDocumentEmbedder to write embeddings into a document store:\n",
+    "\n"
    ]
   },
   {
@@ -557,25 +561,46 @@
     "document_writer.run(documents=embedded_docs)\n"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Showing the document content and their embeddings"
+   ]
+  },
   {
    "cell_type": "code",
-   "execution_count": 35,
+   "execution_count": 47,
    "metadata": {},
    "outputs": [
     {
-     "data": {
-      "text/plain": [
-       "[Document(id='2e3218009b01cfc57f865bbf81fa70de81b5ebae02c4cc7092e46ffde03f3c49', content='The quick brown fox jumps over the lazy dog.', dataframe=None, blob=None, meta={}, score=None),\n",
-       " Document(id='8baba41960a8807c42da6783a39dbbf50873f9700ff861844ec8ccce65d4f50e', content='When it comes to natural language processing, context is key.', dataframe=None, blob=None, meta={}, score=None)]"
-      ]
-     },
-     "execution_count": 35,
-     "metadata": {},
-     "output_type": "execute_result"
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Document ID: 2e3218009b01cfc57f865bbf81fa70de81b5ebae02c4cc7092e46ffde03f3c49\n",
+      "Content: The quick brown fox jumps over the lazy dog.\n",
+      "Embedding: [-0.03429264575242996, -0.0013394346460700035, 0.004336129408329725, -0.0018683503149077296, 0.025440821424126625]...\n",
+      "\n",
+      "\n",
+      "Document ID: 8baba41960a8807c42da6783a39dbbf50873f9700ff861844ec8ccce65d4f50e\n",
+      "Content: When it comes to natural language processing, context is key.\n",
+      "Embedding: [0.049897201359272, -0.023004200309515, -0.03653186932206154, 0.05246769264340401, -0.01983010210096836]...\n",
+      "\n",
+      "\n"
+     ]
     }
    ],
    "source": [
-    "doc_store.filter_documents()"
+    "# Retrieve all documents\n",
+    "all_documents = doc_store.filter_documents()\n",
+    "\n",
+    "# Print details of each document, including the embedding if it exists\n",
+    "for doc in all_documents:\n",
+    "    print(f\"Document ID: {doc.id}\")\n",
+    "    print(f\"Content: {doc.content}\")\n",
+    "    if doc.embedding:\n",
+    "        print(f\"Embedding: {doc.embedding[:5]}...\")  # Displaying first 5 values of the embedding for brevity\n",
+    "    print(\"\\n\")\n"
    ]
   },
   {