diff --git a/ch8/ner-with-haystack.ipynb b/ch8/ner-with-haystack.ipynb
index bf29672..14f57e3 100644
--- a/ch8/ner-with-haystack.ipynb
+++ b/ch8/ner-with-haystack.ipynb
@@ -105,13 +105,12 @@
},
{
"cell_type": "code",
- "execution_count": 37,
+ "execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"\n",
- "# Function to extract entity annotations into a DataFrame\n",
"# Function to extract uniquely identified named entities into a DataFrame with URL\n",
"def extract_named_entities_with_ids_and_url(documents):\n",
" extracted_data = []\n",
@@ -139,6 +138,196 @@
"df_entities.drop_duplicates(subset=['word', 'entity_type','score'], inplace=True)"
]
},
+ {
+ "cell_type": "code",
+ "execution_count": 45,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " document_id | \n",
+ " word | \n",
+ " entity_type | \n",
+ " score | \n",
+ " url | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " eed2cf1f3e92c540b2842f908e54ae572448a45ff5b200... | \n",
+ " Chinese New Year | \n",
+ " MISC | \n",
+ " 0.870644 | \n",
+ " https://www.britannica.com/topic/Chinese-New-Year | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " eed2cf1f3e92c540b2842f908e54ae572448a45ff5b200... | \n",
+ " Lunar New Year | \n",
+ " MISC | \n",
+ " 0.915542 | \n",
+ " https://www.britannica.com/topic/Chinese-New-Year | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " eed2cf1f3e92c540b2842f908e54ae572448a45ff5b200... | \n",
+ " China | \n",
+ " LOC | \n",
+ " 0.918623 | \n",
+ " https://www.britannica.com/topic/Chinese-New-Year | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " eed2cf1f3e92c540b2842f908e54ae572448a45ff5b200... | \n",
+ " Chinese | \n",
+ " MISC | \n",
+ " 0.664121 | \n",
+ " https://www.britannica.com/topic/Chinese-New-Year | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " eed2cf1f3e92c540b2842f908e54ae572448a45ff5b200... | \n",
+ " Lunar New Year | \n",
+ " MISC | \n",
+ " 0.868886 | \n",
+ " https://www.britannica.com/topic/Chinese-New-Year | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " document_id word \\\n",
+ "0 eed2cf1f3e92c540b2842f908e54ae572448a45ff5b200... Chinese New Year \n",
+ "1 eed2cf1f3e92c540b2842f908e54ae572448a45ff5b200... Lunar New Year \n",
+ "2 eed2cf1f3e92c540b2842f908e54ae572448a45ff5b200... China \n",
+ "3 eed2cf1f3e92c540b2842f908e54ae572448a45ff5b200... Chinese \n",
+ "4 eed2cf1f3e92c540b2842f908e54ae572448a45ff5b200... Lunar New Year \n",
+ "\n",
+ " entity_type score url \n",
+ "0 MISC 0.870644 https://www.britannica.com/topic/Chinese-New-Year \n",
+ "1 MISC 0.915542 https://www.britannica.com/topic/Chinese-New-Year \n",
+ "2 LOC 0.918623 https://www.britannica.com/topic/Chinese-New-Year \n",
+ "3 MISC 0.664121 https://www.britannica.com/topic/Chinese-New-Year \n",
+ "4 MISC 0.868886 https://www.britannica.com/topic/Chinese-New-Year "
+ ]
+ },
+ "execution_count": 45,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df_entities.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 46,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " score | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " count | \n",
+ " 98.000000 | \n",
+ "
\n",
+ " \n",
+ " mean | \n",
+ " 0.898904 | \n",
+ "
\n",
+ " \n",
+ " std | \n",
+ " 0.140579 | \n",
+ "
\n",
+ " \n",
+ " min | \n",
+ " 0.374084 | \n",
+ "
\n",
+ " \n",
+ " 25% | \n",
+ " 0.870194 | \n",
+ "
\n",
+ " \n",
+ " 50% | \n",
+ " 0.961658 | \n",
+ "
\n",
+ " \n",
+ " 75% | \n",
+ " 0.999651 | \n",
+ "
\n",
+ " \n",
+ " max | \n",
+ " 0.999793 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " score\n",
+ "count 98.000000\n",
+ "mean 0.898904\n",
+ "std 0.140579\n",
+ "min 0.374084\n",
+ "25% 0.870194\n",
+ "50% 0.961658\n",
+ "75% 0.999651\n",
+ "max 0.999793"
+ ]
+ },
+ "execution_count": 46,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df_entities.describe()"
+ ]
+ },
{
"cell_type": "code",
"execution_count": 44,