diff --git a/ch3/jupyter-notebooks/data_classes.ipynb b/ch3/jupyter-notebooks/data_classes.ipynb index 293b471..5fb82d8 100644 --- a/ch3/jupyter-notebooks/data_classes.ipynb +++ b/ch3/jupyter-notebooks/data_classes.ipynb @@ -38,7 +38,7 @@ }, { "cell_type": "code", - "execution_count": 46, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -56,7 +56,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 5, "metadata": {}, "outputs": [ { @@ -65,7 +65,7 @@ "Document(id='ca53157e450d009adb4c2217111faadc9e7c02aefb22717c4901e1c1c1ba314a', content='This is a simple document', dataframe=None, blob=None, meta={'name': 'test_doc'}, score=None)" ] }, - "execution_count": 2, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -77,7 +77,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 6, "metadata": {}, "outputs": [ { @@ -86,7 +86,7 @@ "'ca53157e450d009adb4c2217111faadc9e7c02aefb22717c4901e1c1c1ba314a'" ] }, - "execution_count": 3, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -104,7 +104,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 7, "metadata": {}, "outputs": [ { @@ -113,7 +113,7 @@ "'This is a simple document'" ] }, - "execution_count": 4, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -124,7 +124,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 8, "metadata": {}, "outputs": [ { @@ -133,7 +133,7 @@ "{'name': 'test_doc'}" ] }, - "execution_count": 5, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } @@ -151,7 +151,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ @@ -175,20 +175,19 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "Document(id='22cf9396b67c1929c273ed65a6fcea5b8ba8b384ae45d5164be9ca7b6827c66c', content=None, dataframe= sepal length (cm) sepal width (cm) petal length (cm) petal width (cm) \\\n", - "0 5.1 3.5 1.4 0.2 \n", + "Document(id='22cf9396b67c1929c273ed65a6fcea5b8ba8b384ae45d5164be9ca7b6827c66c', content=None, dataframe= sepal length (cm) sepal width (cm) ... petal width (cm) target\n", + "0 5.1 3.5 ... 0.2 0.0\n", "\n", - " target \n", - "0 0.0 , blob=None, meta={}, score=None)" + "[1 rows x 5 columns], blob=None, meta={}, score=None)" ] }, - "execution_count": 7, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } @@ -199,7 +198,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 11, "metadata": {}, "outputs": [ { @@ -208,7 +207,7 @@ "'22cf9396b67c1929c273ed65a6fcea5b8ba8b384ae45d5164be9ca7b6827c66c'" ] }, - "execution_count": 8, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } @@ -219,7 +218,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 12, "metadata": {}, "outputs": [ { @@ -264,14 +263,13 @@ "" ], "text/plain": [ - " sepal length (cm) sepal width (cm) petal length (cm) petal width (cm) \\\n", - "0 5.1 3.5 1.4 0.2 \n", + " sepal length (cm) sepal width (cm) ... petal width (cm) target\n", + "0 5.1 3.5 ... 0.2 0.0\n", "\n", - " target \n", - "0 0.0 " + "[1 rows x 5 columns]" ] }, - "execution_count": 9, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } @@ -291,7 +289,7 @@ }, { "cell_type": "code", - "execution_count": 47, + "execution_count": 13, "metadata": {}, "outputs": [], "source": [ @@ -300,7 +298,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 14, "metadata": {}, "outputs": [], "source": [ @@ -314,7 +312,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 15, "metadata": {}, "outputs": [ { @@ -323,7 +321,7 @@ "ByteStream(data=b'Your binary data here', metadata={}, mime_type='application/pdf')" ] }, - "execution_count": 12, + "execution_count": 15, "metadata": {}, "output_type": "execute_result" } @@ -334,7 +332,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 16, "metadata": {}, "outputs": [ { @@ -343,7 +341,7 @@ "'93c323201fc3b8509e51056dff8baee6ca9dec1c22cf2ce2f6cfc0bb04397c14'" ] }, - "execution_count": 13, + "execution_count": 16, "metadata": {}, "output_type": "execute_result" } @@ -377,7 +375,7 @@ }, { "cell_type": "code", - "execution_count": 48, + "execution_count": 17, "metadata": {}, "outputs": [], "source": [ @@ -386,7 +384,7 @@ }, { "cell_type": "code", - "execution_count": 49, + "execution_count": 18, "metadata": {}, "outputs": [ { @@ -406,7 +404,7 @@ }, { "cell_type": "code", - "execution_count": 51, + "execution_count": 19, "metadata": {}, "outputs": [ { @@ -426,7 +424,7 @@ }, { "cell_type": "code", - "execution_count": 52, + "execution_count": 20, "metadata": {}, "outputs": [ { @@ -446,7 +444,7 @@ }, { "cell_type": "code", - "execution_count": 53, + "execution_count": 21, "metadata": {}, "outputs": [ { @@ -473,7 +471,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 22, "metadata": {}, "outputs": [ { @@ -482,7 +480,7 @@ "'740dcdab24b6c171e89af1f1158056d6f09c6cd238a39866dfe7160a47eeba9a'" ] }, - "execution_count": 16, + "execution_count": 22, "metadata": {}, "output_type": "execute_result" } @@ -495,7 +493,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 23, "metadata": {}, "outputs": [ { @@ -504,7 +502,7 @@ "ChatMessage(content='Can you show me the weather forecast?', role=, name=None, metadata={})" ] }, - "execution_count": 17, + "execution_count": 23, "metadata": {}, "output_type": "execute_result" } @@ -524,7 +522,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 24, "metadata": {}, "outputs": [], "source": [ @@ -540,7 +538,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 25, "metadata": {}, "outputs": [ { @@ -577,7 +575,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 26, "metadata": {}, "outputs": [ { @@ -586,7 +584,7 @@ "'55b3a7072cc30c752c726922b929f073bf377fb72dbe89431c323031cf5360cd'" ] }, - "execution_count": 29, + "execution_count": 26, "metadata": {}, "output_type": "execute_result" } @@ -599,7 +597,7 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 27, "metadata": {}, "outputs": [ { @@ -608,7 +606,7 @@ "'This is the first segment of the live stream.'" ] }, - "execution_count": 32, + "execution_count": 27, "metadata": {}, "output_type": "execute_result" } @@ -630,7 +628,7 @@ }, { "cell_type": "code", - "execution_count": 39, + "execution_count": 28, "metadata": {}, "outputs": [], "source": [ @@ -646,40 +644,35 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 29, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "[Document(id='22cf9396b67c1929c273ed65a6fcea5b8ba8b384ae45d5164be9ca7b6827c66c', content=None, dataframe= sepal length (cm) sepal width (cm) petal length (cm) petal width (cm) \\\n", - " 0 5.1 3.5 1.4 0.2 \n", + "[Document(id='22cf9396b67c1929c273ed65a6fcea5b8ba8b384ae45d5164be9ca7b6827c66c', content=None, dataframe= sepal length (cm) sepal width (cm) ... petal width (cm) target\n", + " 0 5.1 3.5 ... 0.2 0.0\n", " \n", - " target \n", - " 0 0.0 , blob=None, meta={}, score=None),\n", - " Document(id='c4852f58c6c65daaa7b11d7c009d8cbf7198c52c55f63fe27bf888beec64b673', content=None, dataframe= sepal length (cm) sepal width (cm) petal length (cm) petal width (cm) \\\n", - " 1 4.9 3.0 1.4 0.2 \n", + " [1 rows x 5 columns], blob=None, meta={}, score=None),\n", + " Document(id='c4852f58c6c65daaa7b11d7c009d8cbf7198c52c55f63fe27bf888beec64b673', content=None, dataframe= sepal length (cm) sepal width (cm) ... petal width (cm) target\n", + " 1 4.9 3.0 ... 0.2 0.0\n", " \n", - " target \n", - " 1 0.0 , blob=None, meta={}, score=None),\n", - " Document(id='109c0409cdbcf2343ee97efd3ec334e74e73b5eeed3ecc362cbcff8adda10603', content=None, dataframe= sepal length (cm) sepal width (cm) petal length (cm) petal width (cm) \\\n", - " 2 4.7 3.2 1.3 0.2 \n", + " [1 rows x 5 columns], blob=None, meta={}, score=None),\n", + " Document(id='109c0409cdbcf2343ee97efd3ec334e74e73b5eeed3ecc362cbcff8adda10603', content=None, dataframe= sepal length (cm) sepal width (cm) ... petal width (cm) target\n", + " 2 4.7 3.2 ... 0.2 0.0\n", " \n", - " target \n", - " 2 0.0 , blob=None, meta={}, score=None),\n", - " Document(id='3eef63e56ef7174a490478bd4147b70c113521fee93a13f430e14641a330fff3', content=None, dataframe= sepal length (cm) sepal width (cm) petal length (cm) petal width (cm) \\\n", - " 3 4.6 3.1 1.5 0.2 \n", + " [1 rows x 5 columns], blob=None, meta={}, score=None),\n", + " Document(id='3eef63e56ef7174a490478bd4147b70c113521fee93a13f430e14641a330fff3', content=None, dataframe= sepal length (cm) sepal width (cm) ... petal width (cm) target\n", + " 3 4.6 3.1 ... 0.2 0.0\n", " \n", - " target \n", - " 3 0.0 , blob=None, meta={}, score=None),\n", - " Document(id='eb5cd52bfc94cfc8fc3f750558e8f13cd9f0f69058994416850cba7c3ed8c895', content=None, dataframe= sepal length (cm) sepal width (cm) petal length (cm) petal width (cm) \\\n", - " 4 5.0 3.6 1.4 0.2 \n", + " [1 rows x 5 columns], blob=None, meta={}, score=None),\n", + " Document(id='eb5cd52bfc94cfc8fc3f750558e8f13cd9f0f69058994416850cba7c3ed8c895', content=None, dataframe= sepal length (cm) sepal width (cm) ... petal width (cm) target\n", + " 4 5.0 3.6 ... 0.2 0.0\n", " \n", - " target \n", - " 4 0.0 , blob=None, meta={}, score=None)]" + " [1 rows x 5 columns], blob=None, meta={}, score=None)]" ] }, - "execution_count": 33, + "execution_count": 29, "metadata": {}, "output_type": "execute_result" } @@ -701,7 +694,7 @@ }, { "cell_type": "code", - "execution_count": 34, + "execution_count": 30, "metadata": {}, "outputs": [], "source": [ @@ -719,7 +712,7 @@ }, { "cell_type": "code", - "execution_count": 36, + "execution_count": 31, "metadata": {}, "outputs": [ { @@ -728,7 +721,7 @@ "150" ] }, - "execution_count": 36, + "execution_count": 31, "metadata": {}, "output_type": "execute_result" } @@ -746,7 +739,7 @@ }, { "cell_type": "code", - "execution_count": 42, + "execution_count": 32, "metadata": {}, "outputs": [ { @@ -759,7 +752,7 @@ " 'embedding_similarity_function': 'dot_product'}}" ] }, - "execution_count": 42, + "execution_count": 32, "metadata": {}, "output_type": "execute_result" } @@ -768,6 +761,101 @@ "iris_docstore.to_dict()" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Answer, ExtractedAnswer and GeneratedAnswer\n", + "\n", + "The Answer, ExtractedAnswer, and GeneratedAnswer classes are data structures commonly used in natural language processing (NLP) pipelines, particularly in the context of question answering systems.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [], + "source": [ + "from haystack.preview.dataclasses import Answer, GeneratedAnswer, ExtractedAnswer" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### `Answer` is a base data class that represents a generic answer structure. It contains the fields: \n", + "\n", + "* data: The content of the answer. \n", + "\n", + "* query: The original question or query that prompted the answer. \n", + "\n", + "* metadata: A dictionary containing any additional information about the answer. \n", + "\n", + "#### `ExtractedAnswer` inherits from Answer and is more specific to scenarios where the answer is extracted from a text. It includes additional fields: \n", + "\n", + "* data: The text of the answer extracted from a document. \n", + "\n", + "* document: The Document object from which the answer was extracted. \n", + "\n", + "* probability: A float representing the confidence score of the extracted answer being correct. \n", + "\n", + "* start: The start index of the answer in the content of the Document. \n", + "\n", + "* end: The end index of the answer in the content of the Document. \n", + "\n", + "#### `GeneratedAnswer` also inherits from Answer and is used when the answer is generated (for example, by a language model) rather than extracted. Its fields are: \n", + "\n", + "* data: The generated text of the answer. \n", + "\n", + "* documents: A list of Document objects that were used as context or reference to generate the answer. " + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Extracted Answer: Berlin with probability 0.95\n", + "Generated Answer: Berlin is the capital of Germany.\n" + ] + } + ], + "source": [ + "# Assume we have a document that contains the answer to a question\n", + "doc = Document(content=\"Berlin is the capital of Germany.\", id=\"123\")\n", + "\n", + "answer = Answer(data='Berlin',\n", + " query='What is the capital of Germany?',\n", + " metadata={})\n", + "\n", + "# After processing a query, we find the answer and create an ExtractedAnswer object\n", + "extracted_answer = ExtractedAnswer(\n", + " data=\"Berlin\",\n", + " query=\"What is the capital of Germany?\",\n", + " metadata={},\n", + " document=doc,\n", + " probability=0.95,\n", + " start=0,\n", + " end=6\n", + ")\n", + "\n", + "# In another scenario, we might have a generated answer, not directly extracted from a specific location in a document\n", + "generated_answer = GeneratedAnswer(\n", + " data=\"Berlin is the capital of Germany.\",\n", + " documents=[doc],\n", + " query=\"What is the capital of Germany?\",\n", + " metadata={},\n", + ")\n", + "\n", + "# These objects can then be used to present answers, log results, or further processing\n", + "print(f\"Extracted Answer: {extracted_answer.data} with probability {extracted_answer.probability}\")\n", + "print(f\"Generated Answer: {generated_answer.data}\")" + ] + }, { "cell_type": "markdown", "metadata": {},