Skip to content

Commit

Permalink
updated ner/nel (#12)
Browse files Browse the repository at this point in the history
  • Loading branch information
theorm authored Oct 24, 2024
1 parent c61f137 commit de90640
Show file tree
Hide file tree
Showing 7 changed files with 377 additions and 51 deletions.
281 changes: 255 additions & 26 deletions examples/notebooks/tools.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,9 @@
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"🎉 You are now connected to the Impresso API! 🎉\n"
]
}
],
"outputs": [],
"source": [
"from impresso import connect\n",
"\n",
Expand All @@ -34,9 +26,13 @@
{
"data": {
"text/html": [
"<div style=\"display: grid; \">\n",
"<div>\n",
"<h2>Ner result</h2>\n",
"<div>Contains <b>19</b> items of <b>19</b> total items.</div>\n",
"<div>Contains <b>9</b> items of <b>9</b> total items.</div>\n",
"<br/>\n",
"</div>\n",
"</div>\n",
"<h3>Data preview:</h3>\n",
"<div>\n",
"<style scoped>\n",
Expand All @@ -58,9 +54,11 @@
" <th></th>\n",
" <th>type</th>\n",
" <th>surfaceForm</th>\n",
" <th>function</th>\n",
" <th>name</th>\n",
" <th>confidence.ner</th>\n",
" <th>offset.start</th>\n",
" <th>offset.end</th>\n",
" <th>confidence.ner</th>\n",
" <th>wikidata.id</th>\n",
" <th>wikidata.wikipediaPageName</th>\n",
" </tr>\n",
Expand All @@ -73,36 +71,44 @@
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1:37:pers:ner-stacked-2-bert-medium-historic-multilingual|ner-mgenre-multilingual</th>\n",
" <td>pers</td>\n",
" <td>Jean-Baptiste Nicolas Robert Schuman</td>\n",
" <td>N/A</td>\n",
" <td>Baptiste Nicolas Robert Schuman</td>\n",
" <td>93.81</td>\n",
" <td>1</td>\n",
" <td>37</td>\n",
" <td>93.81</td>\n",
" <td>N/A</td>\n",
" <td>N/A</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15:37:comp.name:ner-stacked-2-bert-medium-historic-multilingual|ner-mgenre-multilingual</th>\n",
" <td>comp.name</td>\n",
" <td>Nicolas Robert Schuman</td>\n",
" <td>15</td>\n",
" <td>37</td>\n",
" <td>50.58</td>\n",
" <th>41:53:time:ner-stacked-2-bert-medium-historic-multilingual|ner-mgenre-multilingual</th>\n",
" <td>time</td>\n",
" <td>29 June 1886</td>\n",
" <td>N/A</td>\n",
" <td>N/A</td>\n",
" <td>86.49</td>\n",
" <td>41</td>\n",
" <td>53</td>\n",
" <td>N/A</td>\n",
" <td>N/A</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1:37:pers.ind:ner-stacked-2-bert-medium-historic-multilingual|ner-mgenre-multilingual</th>\n",
" <td>pers.ind</td>\n",
" <td>Jean-Baptiste Nicolas Robert Schuman</td>\n",
" <td>1</td>\n",
" <td>37</td>\n",
" <td>85.42</td>\n",
" <th>56:72:time:ner-stacked-2-bert-medium-historic-multilingual|ner-mgenre-multilingual</th>\n",
" <td>time</td>\n",
" <td>4 September 1963</td>\n",
" <td>N/A</td>\n",
" <td>N/A</td>\n",
" <td>74.53</td>\n",
" <td>56</td>\n",
" <td>72</td>\n",
" <td>N/A</td>\n",
" <td>N/A</td>\n",
" </tr>\n",
Expand All @@ -111,7 +117,7 @@
"</div>"
],
"text/plain": [
"<impresso.resources.tools.NerContainer at 0x11a0e8890>"
"<impresso.resources.tools.NerContainer at 0x11b12b1d0>"
]
},
"execution_count": 2,
Expand All @@ -131,6 +137,229 @@
")\n",
"result"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div style=\"display: grid; \">\n",
"<div>\n",
"<h2>Ner result</h2>\n",
"<div>Contains <b>9</b> items of <b>9</b> total items.</div>\n",
"<br/>\n",
"</div>\n",
"</div>\n",
"<h3>Data preview:</h3>\n",
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>type</th>\n",
" <th>surfaceForm</th>\n",
" <th>function</th>\n",
" <th>name</th>\n",
" <th>confidence.ner</th>\n",
" <th>confidence.nel</th>\n",
" <th>offset.start</th>\n",
" <th>offset.end</th>\n",
" <th>wikidata.id</th>\n",
" <th>wikidata.wikipediaPageName</th>\n",
" <th>wikidata.wikipediaPageUrl</th>\n",
" </tr>\n",
" <tr>\n",
" <th>id</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1:37:pers:ner-stacked-2-bert-medium-historic-multilingual|ner-mgenre-multilingual</th>\n",
" <td>pers</td>\n",
" <td>Jean-Baptiste Nicolas Robert Schuman</td>\n",
" <td>N/A</td>\n",
" <td>Baptiste Nicolas Robert Schuman</td>\n",
" <td>93.81</td>\n",
" <td>99.57</td>\n",
" <td>1</td>\n",
" <td>37</td>\n",
" <td>Q15981</td>\n",
" <td>Robert Schuman</td>\n",
" <td>https://en.wikipedia.org/wiki/Robert_Schuman</td>\n",
" </tr>\n",
" <tr>\n",
" <th>41:53:time:ner-stacked-2-bert-medium-historic-multilingual|ner-mgenre-multilingual</th>\n",
" <td>time</td>\n",
" <td>29 June 1886</td>\n",
" <td>N/A</td>\n",
" <td>N/A</td>\n",
" <td>86.49</td>\n",
" <td>93.57</td>\n",
" <td>41</td>\n",
" <td>53</td>\n",
" <td>Q15981</td>\n",
" <td>Robert Schuman</td>\n",
" <td>https://en.wikipedia.org/wiki/Robert_Schuman</td>\n",
" </tr>\n",
" <tr>\n",
" <th>56:72:time:ner-stacked-2-bert-medium-historic-multilingual|ner-mgenre-multilingual</th>\n",
" <td>time</td>\n",
" <td>4 September 1963</td>\n",
" <td>N/A</td>\n",
" <td>N/A</td>\n",
" <td>74.53</td>\n",
" <td>81.87</td>\n",
" <td>56</td>\n",
" <td>72</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
"<impresso.resources.tools.NerContainer at 0x11b0bbe10>"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"text = \"\"\"\n",
"Jean-Baptiste Nicolas Robert Schuman ( \n",
"29 June 1886 – 4 September 1963) was a Luxembourg-born French \n",
"statesman. Schuman was a Christian democratic (Popular \n",
"Republican Movement) political thinker and activist. \n",
"\"\"\"\n",
"result = impresso.tools.ner_nel(\n",
" text=text,\n",
")\n",
"result"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div style=\"display: grid; \">\n",
"<div>\n",
"<h2>Ner result</h2>\n",
"<div>Contains <b>1</b> items of <b>1</b> total items.</div>\n",
"<br/>\n",
"</div>\n",
"</div>\n",
"<h3>Data preview:</h3>\n",
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>type</th>\n",
" <th>surfaceForm</th>\n",
" <th>confidence.nel</th>\n",
" <th>offset.start</th>\n",
" <th>offset.end</th>\n",
" <th>wikidata.id</th>\n",
" <th>wikidata.wikipediaPageName</th>\n",
" <th>wikidata.wikipediaPageUrl</th>\n",
" </tr>\n",
" <tr>\n",
" <th>id</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>8:44:UNK:nel-mgenre-multilingual</th>\n",
" <td>unk</td>\n",
" <td>Jean-Baptiste Nicolas Robert Schuman</td>\n",
" <td>99.94</td>\n",
" <td>8</td>\n",
" <td>44</td>\n",
" <td>Q15981</td>\n",
" <td>Robert Schuman</td>\n",
" <td>https://en.wikipedia.org/wiki/Robert_Schuman</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
"<impresso.resources.tools.NerContainer at 0x11b2a2390>"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"text = \"\"\"\n",
"[START]Jean-Baptiste Nicolas Robert Schuman[END] ( \n",
"29 June 1886 – 4 September 1963) was a Luxembourg-born French \n",
"statesman. Schuman was a Christian democratic (Popular \n",
"Republican Movement) political thinker and activist. \n",
"\"\"\"\n",
"result = impresso.tools.nel(\n",
" text=text,\n",
")\n",
"result"
]
}
],
"metadata": {
Expand Down
Loading

0 comments on commit de90640

Please sign in to comment.