From 10296434453da0df92b19683e61721ba3f6fe8b3 Mon Sep 17 00:00:00 2001 From: Laura Gutierrez Funderburk Date: Sun, 19 Nov 2023 21:42:13 -0800 Subject: [PATCH] add embedding document exercise --- ch3/jupyter-notebooks/components.ipynb | 59 ++++++++++++++++++-------- 1 file changed, 42 insertions(+), 17 deletions(-) diff --git a/ch3/jupyter-notebooks/components.ipynb b/ch3/jupyter-notebooks/components.ipynb index 52bf954..92a1122 100644 --- a/ch3/jupyter-notebooks/components.ipynb +++ b/ch3/jupyter-notebooks/components.ipynb @@ -162,11 +162,6 @@ "In this snippet, `text_embedder` is created with an OpenAI API key and used to generate an embedding for the string \"I love pizza!\". The resulting embedding and associated metadata are then printed out." ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [] - }, { "cell_type": "code", "execution_count": 4, @@ -499,7 +494,16 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Writing embedded documents." + "Writing embedded documents.\n", + "\n", + "* Compute Embeddings: Use either the OpenAIDocumentEmbedder or SentenceTransformersDocumentEmbedder to compute the embeddings for your documents.\n", + "\n", + "* Store Embeddings: The computed embeddings are stored in the embedding field of the Document objects.\n", + "\n", + "* Write to DocumentStore: Use the DocumentWriter component to write these Document objects, now with embeddings, into a DocumentStore.\n", + "\n", + "Here's an example code snippet that demonstrates how to use the SentenceTransformersDocumentEmbedder to write embeddings into a document store:\n", + "\n" ] }, { @@ -557,25 +561,46 @@ "document_writer.run(documents=embedded_docs)\n" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Showing the document content and their embeddings" + ] + }, { "cell_type": "code", - "execution_count": 35, + "execution_count": 47, "metadata": {}, "outputs": [ { - "data": { - "text/plain": [ - "[Document(id='2e3218009b01cfc57f865bbf81fa70de81b5ebae02c4cc7092e46ffde03f3c49', content='The quick brown fox jumps over the lazy dog.', dataframe=None, blob=None, meta={}, score=None),\n", - " Document(id='8baba41960a8807c42da6783a39dbbf50873f9700ff861844ec8ccce65d4f50e', content='When it comes to natural language processing, context is key.', dataframe=None, blob=None, meta={}, score=None)]" - ] - }, - "execution_count": 35, - "metadata": {}, - "output_type": "execute_result" + "name": "stdout", + "output_type": "stream", + "text": [ + "Document ID: 2e3218009b01cfc57f865bbf81fa70de81b5ebae02c4cc7092e46ffde03f3c49\n", + "Content: The quick brown fox jumps over the lazy dog.\n", + "Embedding: [-0.03429264575242996, -0.0013394346460700035, 0.004336129408329725, -0.0018683503149077296, 0.025440821424126625]...\n", + "\n", + "\n", + "Document ID: 8baba41960a8807c42da6783a39dbbf50873f9700ff861844ec8ccce65d4f50e\n", + "Content: When it comes to natural language processing, context is key.\n", + "Embedding: [0.049897201359272, -0.023004200309515, -0.03653186932206154, 0.05246769264340401, -0.01983010210096836]...\n", + "\n", + "\n" + ] } ], "source": [ - "doc_store.filter_documents()" + "# Retrieve all documents\n", + "all_documents = doc_store.filter_documents()\n", + "\n", + "# Print details of each document, including the embedding if it exists\n", + "for doc in all_documents:\n", + " print(f\"Document ID: {doc.id}\")\n", + " print(f\"Content: {doc.content}\")\n", + " if doc.embedding:\n", + " print(f\"Embedding: {doc.embedding[:5]}...\") # Displaying first 5 values of the embedding for brevity\n", + " print(\"\\n\")\n" ] }, {