diff --git a/examples/notebooks/basic.ipynb b/examples/notebooks/basic.ipynb index bafe031..317d038 100644 --- a/examples/notebooks/basic.ipynb +++ b/examples/notebooks/basic.ipynb @@ -13,9 +13,18 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "🎉 You are now connected to the Impresso API! 🎉\n", + "🔗 Using API: https://dev.impresso-project.ch/public-api\n" + ] + } + ], "source": [ "from impresso import connect\n", "\n", @@ -70,64 +79,13 @@ "
2 rows × 60 columns
\n", "" ], "text/plain": [ - " type title size nbPages \\\n", - "uid \n", - "luxwort-1948-11-25-a-i0033 ar [REDACTED] 734 1 \n", - "FZG-1950-06-17-a-i0045 ar [REDACTED] 1353 1 \n", + " type title transcript \\\n", + "uid \n", + "luxwort-1948-11-25-a-i0033 ar [REDACTED] [REDACTED] \n", + "FZG-1950-06-17-a-i0045 ar [REDACTED] [REDACTED] \n", "\n", - " pages \\\n", + " locations \\\n", "uid \n", - "luxwort-1948-11-25-a-i0033 [{'uid': 'luxwort-1948-11-25-a-p0006', 'num': ... \n", - "FZG-1950-06-17-a-i0045 [{'uid': 'FZG-1950-06-17-a-p0001', 'num': 1, '... \n", + "luxwort-1948-11-25-a-i0033 [{'uid': 'aida-0001-54-Rome', 'relevance': 1},... \n", + "FZG-1950-06-17-a-i0045 [{'uid': 'aida-0001-54-London', 'relevance': 5... \n", "\n", - " isCC excerpt labels accessRight year \\\n", - "uid \n", - "luxwort-1948-11-25-a-i0033 True [REDACTED] [article] Closed 1948 \n", - "FZG-1950-06-17-a-i0045 False [REDACTED] [article] OpenPrivate 1950 \n", - "\n", - " ... newspaper.firstIssue.date \\\n", - "uid ... \n", - "luxwort-1948-11-25-a-i0033 ... 1848-03-23T00:00:00+00:00 \n", - "FZG-1950-06-17-a-i0045 ... 1864-12-17T00:00:00+00:00 \n", + " persons \\\n", + "uid \n", + "luxwort-1948-11-25-a-i0033 [] \n", + "FZG-1950-06-17-a-i0045 [{'uid': 'aida-0001-50-Karl_Arnold', 'relevanc... \n", "\n", - " newspaper.firstIssue.year newspaper.lastIssue.uid \\\n", + " topics transcriptLength isOnFrontPage mediaType \n", "uid \n", - "luxwort-1948-11-25-a-i0033 1848 luxwort-1950-12-30-a \n", - "FZG-1950-06-17-a-i0045 1864 FZG-2006-12-30-a \n", - "\n", - " newspaper.lastIssue.cover \\\n", - "uid \n", - "luxwort-1948-11-25-a-i0033 \n", - "FZG-1950-06-17-a-i0045 \n", - "\n", - " newspaper.lastIssue.labels \\\n", - "uid \n", - "luxwort-1948-11-25-a-i0033 [issue] \n", - "FZG-1950-06-17-a-i0045 [issue] \n", - "\n", - " newspaper.lastIssue.fresh \\\n", - "uid \n", - "luxwort-1948-11-25-a-i0033 False \n", - "FZG-1950-06-17-a-i0045 False \n", - "\n", - " newspaper.lastIssue.accessRights \\\n", - "uid \n", - "luxwort-1948-11-25-a-i0033 NotDefined \n", - "FZG-1950-06-17-a-i0045 NotDefined \n", - "\n", - " newspaper.lastIssue.date \\\n", - "uid \n", - "luxwort-1948-11-25-a-i0033 1950-12-30T00:00:00+00:00 \n", - "FZG-1950-06-17-a-i0045 2006-12-30T00:00:00+00:00 \n", - "\n", - " newspaper.lastIssue.year newspaper.fetched \n", - "uid \n", - "luxwort-1948-11-25-a-i0033 1950 True \n", - "FZG-1950-06-17-a-i0045 2006 True \n", - "\n", - "[2 rows x 60 columns]" + "luxwort-1948-11-25-a-i0033 [] 0 False newspaper \n", + "FZG-1950-06-17-a-i0045 [] 0 False newspaper " ] }, "execution_count": 6, @@ -743,66 +411,19 @@ "\n", - " | type | \n", - "title | \n", - "size | \n", - "nbPages | \n", - "pages | \n", - "isCC | \n", - "excerpt | \n", - "labels | \n", - "accessRight | \n", - "year | \n", - "locations | \n", - "persons | \n", - "language | \n", - "matches | \n", - "regions | \n", - "regionBreaks | \n", - "contentLineBreaks | \n", - "isFront | \n", - "date | \n", - "country | \n", - "tags | \n", - "collections | \n", - "dataProvider | \n", - "topics | \n", - "mentions | \n", - "issue.uid | \n", - "issue.cover | \n", - "issue.labels | \n", - "issue.fresh | \n", - "issue.accessRights | \n", - "issue.date | \n", - "issue.year | \n", - "newspaper.uid | \n", - "newspaper.acronym | \n", - "newspaper.labels | \n", - "newspaper.languages | \n", - "newspaper.included | \n", - "newspaper.name | \n", - "newspaper.endYear | \n", - "newspaper.startYear | \n", - "newspaper.countArticles | \n", - "newspaper.countIssues | \n", - "newspaper.countPages | \n", - "newspaper.deltaYear | \n", - "newspaper.properties | \n", - "newspaper.firstIssue.uid | \n", - "newspaper.firstIssue.cover | \n", - "newspaper.firstIssue.labels | \n", - "newspaper.firstIssue.fresh | \n", - "newspaper.firstIssue.accessRights | \n", - "newspaper.firstIssue.date | \n", - "newspaper.firstIssue.year | \n", - "newspaper.lastIssue.uid | \n", - "newspaper.lastIssue.cover | \n", - "newspaper.lastIssue.labels | \n", - "newspaper.lastIssue.fresh | \n", - "newspaper.lastIssue.accessRights | \n", - "newspaper.lastIssue.date | \n", - "newspaper.lastIssue.year | \n", - "newspaper.fetched | \n", - "|
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uid | \n", - "\n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | |
luxembourg1935-1936-09-12-a-i0036 | \n", - "ar | \n", - "[REDACTED] | \n", - "87 | \n", - "1 | \n", - "[{'uid': 'luxembourg1935-1936-09-12-a-p0003', ... | \n", - "True | \n", - "[REDACTED] | \n", - "[article] | \n", - "Closed | \n", - "1936 | \n", - "[{'uid': 'aida-0001-54-Luxembourg', 'relevance... | \n", - "[] | \n", - "fr | \n", - "[] | \n", "[] | \n", - "[] | \n", - "[] | \n", - "False | \n", - "1936-09-12T00:00:00+00:00 | \n", - "LU | \n", - "[] | \n", - "[] | \n", - "BNL | \n", - "[] | \n", - "[] | \n", - "luxembourg1935-1936-09-12-a | \n", - "\n", - " | [issue] | \n", - "False | \n", - "Closed | \n", - "1936-09-12T00:00:00+00:00 | \n", - "1936 | \n", - "luxembourg1935 | \n", - "luxembourg1935 | \n", - "[newspaper] | \n", - "[fr] | \n", - "True | \n", - "Luxembourg (1935) | \n", - "1940 | \n", - "1935 | \n", - "139719 | \n", - "1540 | \n", - "8892 | \n", - "5 | \n", - "[] | \n", - "luxembourg1935-1935-04-16-a | \n", - "\n", - " | [issue] | \n", - "False | \n", - "NotDefined | \n", - "1935-04-16T00:00:00+00:00 | \n", - "1935 | \n", - "luxembourg1935-1940-05-10-a | \n", - "\n", - " | [issue] | \n", + "0 | \n", "False | \n", - "NotDefined | \n", - "1940-05-10T00:00:00+00:00 | \n", - "1940 | \n", - "True | \n", - "
luxembourg1935-1938-11-12-a-i0038 | \n", - "ar | \n", - "[REDACTED] | \n", - "51 | \n", - "1 | \n", - "[{'uid': 'luxembourg1935-1938-11-12-a-p0003', ... | \n", - "True | \n", - "[REDACTED] | \n", - "[article] | \n", - "Closed | \n", - "1938 | \n", - "[] | \n", - "[] | \n", - "fr | \n", - "[] | \n", - "[] | \n", - "[] | \n", - "[] | \n", - "False | \n", - "1938-11-12T00:00:00+00:00 | \n", - "LU | \n", - "[] | \n", - "[] | \n", - "BNL | \n", - "[] | \n", - "[] | \n", - "luxembourg1935-1938-11-12-a | \n", - "\n", - " | [issue] | \n", - "False | \n", - "Closed | \n", - "1938-11-12T00:00:00+00:00 | \n", - "1938 | \n", - "luxembourg1935 | \n", - "luxembourg1935 | \n", - "[newspaper] | \n", - "[fr] | \n", - "True | \n", - "Luxembourg (1935) | \n", - "1940 | \n", - "1935 | \n", - "139719 | \n", - "1540 | \n", - "8892 | \n", - "5 | \n", - "[] | \n", - "luxembourg1935-1935-04-16-a | \n", - "\n", - " | [issue] | \n", - "False | \n", - "NotDefined | \n", - "1935-04-16T00:00:00+00:00 | \n", - "1935 | \n", - "luxembourg1935-1940-05-10-a | \n", - "\n", - " | [issue] | \n", - "False | \n", - "NotDefined | \n", - "1940-05-10T00:00:00+00:00 | \n", - "1940 | \n", - "True | \n", - "|
obermosel-1934-10-02-a-i0047 | \n", - "ob | \n", - "[REDACTED] | \n", - "161 | \n", - "1 | \n", - "[{'uid': 'obermosel-1934-10-02-a-p0003', 'num'... | \n", - "True | \n", - "[REDACTED] | \n", - "[article] | \n", - "Closed | \n", - "1934 | \n", - "[{'uid': 'aida-0001-54-Paris', 'relevance': 1}] | \n", - "[] | \n", - "fr | \n", - "[] | \n", - "[] | \n", - "[] | \n", - "[] | \n", - "False | \n", - "1934-10-02T00:00:00+00:00 | \n", - "LU | \n", - "[] | \n", - "[] | \n", - "BNL | \n", - "[] | \n", - "[] | \n", - "obermosel-1934-10-02-a | \n", - "\n", - " | [issue] | \n", - "False | \n", - "Closed | \n", - "1934-10-02T00:00:00+00:00 | \n", - "1934 | \n", - "obermosel | \n", - "obermosel | \n", - "[newspaper] | \n", - "[de] | \n", - "True | \n", - "Obermosel-Zeitung | \n", - "1948 | \n", - "1881 | \n", - "549523 | \n", - "12563 | \n", - "64212 | \n", - "67 | \n", - "[] | \n", - "obermosel-1881-06-18-a | \n", - "\n", - " | [issue] | \n", - "False | \n", - "NotDefined | \n", - "1881-06-18T00:00:00+00:00 | \n", - "1881 | \n", - "obermosel-1948-04-03-a | \n", - "\n", - " | [issue] | \n", - "False | \n", - "NotDefined | \n", - "1948-04-03T00:00:00+00:00 | \n", - "1948 | \n", - "True | \n", + "newspaper | \n", "
\n", - " | type | \n", - "surfaceForm | \n", - "function | \n", - "name | \n", - "confidence.ner | \n", - "offset.start | \n", - "offset.end | \n", - "wikidata.id | \n", - "wikidata.wikipediaPageName | \n", - "
---|---|---|---|---|---|---|---|---|---|
id | \n", - "\n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " |
1:37:pers:ner-stacked-2-bert-medium-historic-multilingual|ner-mgenre-multilingual | \n", - "pers | \n", - "Jean-Baptiste Nicolas Robert Schuman | \n", - "N/A | \n", - "Baptiste Nicolas Robert Schuman | \n", - "93.81 | \n", - "1 | \n", - "37 | \n", - "N/A | \n", - "N/A | \n", - "
41:53:time:ner-stacked-2-bert-medium-historic-multilingual|ner-mgenre-multilingual | \n", - "time | \n", - "29 June 1886 | \n", - "N/A | \n", - "N/A | \n", - "86.49 | \n", - "41 | \n", - "53 | \n", - "N/A | \n", - "N/A | \n", - "
56:72:time:ner-stacked-2-bert-medium-historic-multilingual|ner-mgenre-multilingual | \n", - "time | \n", - "4 September 1963 | \n", - "N/A | \n", - "N/A | \n", - "74.53 | \n", - "56 | \n", - "72 | \n", - "N/A | \n", - "N/A | \n", - "
\n", - " | type | \n", - "surfaceForm | \n", - "function | \n", - "name | \n", - "confidence.ner | \n", - "confidence.nel | \n", - "offset.start | \n", - "offset.end | \n", - "wikidata.id | \n", - "wikidata.wikipediaPageName | \n", - "wikidata.wikipediaPageUrl | \n", - "
---|---|---|---|---|---|---|---|---|---|---|---|
id | \n", - "\n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " |
1:37:pers:ner-stacked-2-bert-medium-historic-multilingual|ner-mgenre-multilingual | \n", - "pers | \n", - "Jean-Baptiste Nicolas Robert Schuman | \n", - "N/A | \n", - "Baptiste Nicolas Robert Schuman | \n", - "93.81 | \n", - "99.57 | \n", - "1 | \n", - "37 | \n", - "Q15981 | \n", - "Robert Schuman | \n", - "https://en.wikipedia.org/wiki/Robert_Schuman | \n", - "
41:53:time:ner-stacked-2-bert-medium-historic-multilingual|ner-mgenre-multilingual | \n", - "time | \n", - "29 June 1886 | \n", - "N/A | \n", - "N/A | \n", - "86.49 | \n", - "93.57 | \n", - "41 | \n", - "53 | \n", - "Q15981 | \n", - "Robert Schuman | \n", - "https://en.wikipedia.org/wiki/Robert_Schuman | \n", - "
56:72:time:ner-stacked-2-bert-medium-historic-multilingual|ner-mgenre-multilingual | \n", - "time | \n", - "4 September 1963 | \n", - "N/A | \n", - "N/A | \n", - "74.53 | \n", - "81.87 | \n", - "56 | \n", - "72 | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "
\n", - " | type | \n", - "surfaceForm | \n", - "confidence.nel | \n", - "offset.start | \n", - "offset.end | \n", - "wikidata.id | \n", - "wikidata.wikipediaPageName | \n", - "wikidata.wikipediaPageUrl | \n", - "
---|---|---|---|---|---|---|---|---|
id | \n", - "\n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " |
8:44:UNK:nel-mgenre-multilingual | \n", - "unk | \n", - "Jean-Baptiste Nicolas Robert Schuman | \n", - "99.94 | \n", - "8 | \n", - "44 | \n", - "Q15981 | \n", - "Robert Schuman | \n", - "https://en.wikipedia.org/wiki/Robert_Schuman | \n", - "