From cf3f70a6dde75a875eee644f8d2eeff783fd481f Mon Sep 17 00:00:00 2001 From: Laura Gutierrez Funderburk Date: Tue, 3 Dec 2024 17:43:04 -0800 Subject: [PATCH] Update ner-with-haystack.ipynb --- ch8/ner-with-haystack.ipynb | 193 +++++++++++++++++++++++++++++++++++- 1 file changed, 191 insertions(+), 2 deletions(-) diff --git a/ch8/ner-with-haystack.ipynb b/ch8/ner-with-haystack.ipynb index bf29672..14f57e3 100644 --- a/ch8/ner-with-haystack.ipynb +++ b/ch8/ner-with-haystack.ipynb @@ -105,13 +105,12 @@ }, { "cell_type": "code", - "execution_count": 37, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "\n", - "# Function to extract entity annotations into a DataFrame\n", "# Function to extract uniquely identified named entities into a DataFrame with URL\n", "def extract_named_entities_with_ids_and_url(documents):\n", " extracted_data = []\n", @@ -139,6 +138,196 @@ "df_entities.drop_duplicates(subset=['word', 'entity_type','score'], inplace=True)" ] }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
document_idwordentity_typescoreurl
0eed2cf1f3e92c540b2842f908e54ae572448a45ff5b200...Chinese New YearMISC0.870644https://www.britannica.com/topic/Chinese-New-Year
1eed2cf1f3e92c540b2842f908e54ae572448a45ff5b200...Lunar New YearMISC0.915542https://www.britannica.com/topic/Chinese-New-Year
2eed2cf1f3e92c540b2842f908e54ae572448a45ff5b200...ChinaLOC0.918623https://www.britannica.com/topic/Chinese-New-Year
3eed2cf1f3e92c540b2842f908e54ae572448a45ff5b200...ChineseMISC0.664121https://www.britannica.com/topic/Chinese-New-Year
4eed2cf1f3e92c540b2842f908e54ae572448a45ff5b200...Lunar New YearMISC0.868886https://www.britannica.com/topic/Chinese-New-Year
\n", + "
" + ], + "text/plain": [ + " document_id word \\\n", + "0 eed2cf1f3e92c540b2842f908e54ae572448a45ff5b200... Chinese New Year \n", + "1 eed2cf1f3e92c540b2842f908e54ae572448a45ff5b200... Lunar New Year \n", + "2 eed2cf1f3e92c540b2842f908e54ae572448a45ff5b200... China \n", + "3 eed2cf1f3e92c540b2842f908e54ae572448a45ff5b200... Chinese \n", + "4 eed2cf1f3e92c540b2842f908e54ae572448a45ff5b200... Lunar New Year \n", + "\n", + " entity_type score url \n", + "0 MISC 0.870644 https://www.britannica.com/topic/Chinese-New-Year \n", + "1 MISC 0.915542 https://www.britannica.com/topic/Chinese-New-Year \n", + "2 LOC 0.918623 https://www.britannica.com/topic/Chinese-New-Year \n", + "3 MISC 0.664121 https://www.britannica.com/topic/Chinese-New-Year \n", + "4 MISC 0.868886 https://www.britannica.com/topic/Chinese-New-Year " + ] + }, + "execution_count": 45, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_entities.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
score
count98.000000
mean0.898904
std0.140579
min0.374084
25%0.870194
50%0.961658
75%0.999651
max0.999793
\n", + "
" + ], + "text/plain": [ + " score\n", + "count 98.000000\n", + "mean 0.898904\n", + "std 0.140579\n", + "min 0.374084\n", + "25% 0.870194\n", + "50% 0.961658\n", + "75% 0.999651\n", + "max 0.999793" + ] + }, + "execution_count": 46, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_entities.describe()" + ] + }, { "cell_type": "code", "execution_count": 44,