diff --git a/.flake8 b/.flake8 index d89f543..1eb6011 100644 --- a/.flake8 +++ b/.flake8 @@ -1,3 +1,3 @@ [flake8] max-line-length = 120 -exclude = impresso/api_client, impresso/protobuf +exclude = impresso/api_client, impresso/protobuf, impresso/api_models.py diff --git a/impresso/api_client/api/tools/perform_ner.py b/impresso/api_client/api/tools/perform_ner.py index 3783460..41e275c 100644 --- a/impresso/api_client/api/tools/perform_ner.py +++ b/impresso/api_client/api/tools/perform_ner.py @@ -80,7 +80,7 @@ def sync_detailed( client: AuthenticatedClient, body: ImpressoNamedEntityRecognitionRequest, ) -> Response[Union[Error, ImpressoNamedEntityRecognitionResponse]]: - """Perform Named Entity Recognition of a text + """Perform named entity recognition (and optional named entity linking) of a text Args: body (ImpressoNamedEntityRecognitionRequest): Request body for the Impresso NER endpoint @@ -109,7 +109,7 @@ def sync( client: AuthenticatedClient, body: ImpressoNamedEntityRecognitionRequest, ) -> Optional[Union[Error, ImpressoNamedEntityRecognitionResponse]]: - """Perform Named Entity Recognition of a text + """Perform named entity recognition (and optional named entity linking) of a text Args: body (ImpressoNamedEntityRecognitionRequest): Request body for the Impresso NER endpoint @@ -133,7 +133,7 @@ async def asyncio_detailed( client: AuthenticatedClient, body: ImpressoNamedEntityRecognitionRequest, ) -> Response[Union[Error, ImpressoNamedEntityRecognitionResponse]]: - """Perform Named Entity Recognition of a text + """Perform named entity recognition (and optional named entity linking) of a text Args: body (ImpressoNamedEntityRecognitionRequest): Request body for the Impresso NER endpoint @@ -160,7 +160,7 @@ async def asyncio( client: AuthenticatedClient, body: ImpressoNamedEntityRecognitionRequest, ) -> Optional[Union[Error, ImpressoNamedEntityRecognitionResponse]]: - """Perform Named Entity Recognition of a text + """Perform named entity recognition (and optional named entity linking) of a text Args: body (ImpressoNamedEntityRecognitionRequest): Request body for the Impresso NER endpoint diff --git a/impresso/api_client/models/__init__.py b/impresso/api_client/models/__init__.py index dc44d38..8f4eb48 100644 --- a/impresso/api_client/models/__init__.py +++ b/impresso/api_client/models/__init__.py @@ -93,6 +93,7 @@ from .impresso_named_entity_recognition_entity_type import ImpressoNamedEntityRecognitionEntityType from .impresso_named_entity_recognition_entity_wikidata import ImpressoNamedEntityRecognitionEntityWikidata from .impresso_named_entity_recognition_request import ImpressoNamedEntityRecognitionRequest +from .impresso_named_entity_recognition_request_method import ImpressoNamedEntityRecognitionRequestMethod from .impresso_named_entity_recognition_response import ImpressoNamedEntityRecognitionResponse from .location_specific_fields import LocationSpecificFields from .location_specific_fields_descriptions import LocationSpecificFieldsDescriptions @@ -230,6 +231,7 @@ "ImpressoNamedEntityRecognitionEntityType", "ImpressoNamedEntityRecognitionEntityWikidata", "ImpressoNamedEntityRecognitionRequest", + "ImpressoNamedEntityRecognitionRequestMethod", "ImpressoNamedEntityRecognitionResponse", "LocationSpecificFields", "LocationSpecificFieldsDescriptions", diff --git a/impresso/api_client/models/impresso_named_entity_recognition_request.py b/impresso/api_client/models/impresso_named_entity_recognition_request.py index 6a3b01c..45540b2 100644 --- a/impresso/api_client/models/impresso_named_entity_recognition_request.py +++ b/impresso/api_client/models/impresso_named_entity_recognition_request.py @@ -1,7 +1,10 @@ -from typing import Any, Dict, Type, TypeVar +from typing import Any, Dict, Type, TypeVar, Union from attrs import define as _attrs_define +from ..models.impresso_named_entity_recognition_request_method import ImpressoNamedEntityRecognitionRequestMethod +from ..types import UNSET, Unset + T = TypeVar("T", bound="ImpressoNamedEntityRecognitionRequest") @@ -11,19 +14,29 @@ class ImpressoNamedEntityRecognitionRequest: Attributes: text (str): Text to be processed for named entity recognition + method (Union[Unset, ImpressoNamedEntityRecognitionRequestMethod]): NER method to be used: `ner` (default), + `ner-nel` (named entity recognition with named entity linking) and `nel` (linking only - enclose entities in + [START] [END] tags). Default: ImpressoNamedEntityRecognitionRequestMethod.NER. """ text: str + method: Union[Unset, ImpressoNamedEntityRecognitionRequestMethod] = ImpressoNamedEntityRecognitionRequestMethod.NER def to_dict(self) -> Dict[str, Any]: text = self.text + method: Union[Unset, str] = UNSET + if not isinstance(self.method, Unset): + method = self.method.value + field_dict: Dict[str, Any] = {} field_dict.update( { "text": text, } ) + if method is not UNSET: + field_dict["method"] = method return field_dict @@ -32,8 +45,16 @@ def from_dict(cls: Type[T], src_dict: Dict[str, Any]) -> T: d = src_dict.copy() text = d.pop("text") + _method = d.pop("method", UNSET) + method: Union[Unset, ImpressoNamedEntityRecognitionRequestMethod] + if isinstance(_method, Unset): + method = UNSET + else: + method = ImpressoNamedEntityRecognitionRequestMethod(_method) + impresso_named_entity_recognition_request = cls( text=text, + method=method, ) return impresso_named_entity_recognition_request diff --git a/impresso/api_client/models/impresso_named_entity_recognition_request_method.py b/impresso/api_client/models/impresso_named_entity_recognition_request_method.py new file mode 100644 index 0000000..1765f7b --- /dev/null +++ b/impresso/api_client/models/impresso_named_entity_recognition_request_method.py @@ -0,0 +1,18 @@ +from enum import Enum +from typing import Literal + + +class ImpressoNamedEntityRecognitionRequestMethod(str, Enum): + NEL = "nel" + NER = "ner" + NER_NEL = "ner-nel" + + def __str__(self) -> str: + return str(self.value) + + +ImpressoNamedEntityRecognitionRequestMethodLiteral = Literal[ + "nel", + "ner", + "ner-nel", +] diff --git a/impresso/api_models.py b/impresso/api_models.py index f98bbd4..0dd2015 100644 --- a/impresso/api_models.py +++ b/impresso/api_models.py @@ -329,6 +329,13 @@ class ImpressoNerRequest(BaseModel): min_length=1, ), ] + method: Annotated[ + Optional[Literal['ner', 'ner-nel', 'nel']], + Field( + 'ner', + description='NER method to be used: `ner` (default), `ner-nel` (named entity recognition with named entity linking) and `nel` (linking only - enclose entities in [START] [END] tags).', + ), + ] class ImpressoNerResponse(BaseModel): diff --git a/impresso/data_container.py b/impresso/data_container.py index d53705b..6bcfbd0 100644 --- a/impresso/data_container.py +++ b/impresso/data_container.py @@ -53,7 +53,10 @@ def _repr_html_(self): ), "", ( - f'
' + ( + f'' + ) if preview_img else None ), diff --git a/impresso/resources/search.py b/impresso/resources/search.py index 3f57e31..d50f7a1 100644 --- a/impresso/resources/search.py +++ b/impresso/resources/search.py @@ -154,13 +154,15 @@ def find( entity_id: Return only content items that mention this entity or all/any of the entities. date_range: Return only content items that were published in this date range. language: Return only content items that are in this language or all/any of the languages. - mention: Return only content items that mention an entity with this term or all/any of entities with the terms. + mention: Return only content items that mention an entity with this term or all/any + of entities with the terms. topic_id: Return only content items that are about this topic or all/any of the topics. collection_id: Return only content items that are in this collection or all/any of the collections. country: Return only content items that are from this country or all/any of the countries. access_rights: Return only content items with this access right or all/any of the access rights. partner_id: Return only content items that are from this partner or all/any of the partners. - text_reuse_cluster_id: Return only content items that are in this text reuse cluster or all/any of the clusters. + text_reuse_cluster_id: Return only content items that are in this text reuse cluster + or all/any of the clusters. Returns: _type_: _description_ diff --git a/impresso/resources/tools.py b/impresso/resources/tools.py index 880c629..91e4bd7 100644 --- a/impresso/resources/tools.py +++ b/impresso/resources/tools.py @@ -3,6 +3,9 @@ from impresso.api_client.models.impresso_named_entity_recognition_request import ( ImpressoNamedEntityRecognitionRequest, ) +from impresso.api_client.models.impresso_named_entity_recognition_request_method import ( + ImpressoNamedEntityRecognitionRequestMethod, +) from impresso.api_models import ImpressoNerResponse from impresso.data_container import DataContainer from impresso.resources.base import Resource @@ -55,6 +58,58 @@ class ToolsResource(Resource): def ner(self, text: str) -> NerContainer: """Named Entity Recognition + This method is faster than `ner_nel` but does not provide any linking to external resources. + + Args: + text (str): Text to process + + Returns: + NerContainer: List of named entities + """ + result = perform_ner.sync( + client=self._api_client, + body=ImpressoNamedEntityRecognitionRequest( + text=text, method=ImpressoNamedEntityRecognitionRequestMethod.NER + ), + ) + raise_for_error(result) + + return NerContainer( + result, + ImpressoNerSchema, + web_app_search_result_url=None, + ) + + def ner_nel(self, text: str) -> NerContainer: + """Named Entity Recognition and Named Entity Linking + + This method is slower than `ner` but provides linking to external resources. + + Args: + text (str): Text to process + + Returns: + NerContainer: List of named entities + """ + result = perform_ner.sync( + client=self._api_client, + body=ImpressoNamedEntityRecognitionRequest( + text=text, method=ImpressoNamedEntityRecognitionRequestMethod.NER_NEL + ), + ) + raise_for_error(result) + + return NerContainer( + result, + ImpressoNerSchema, + web_app_search_result_url=None, + ) + + def nel(self, text: str) -> NerContainer: + """Named Entity Linking + + This method requires named entities to be enclosed in tags: [START]entity[END]. + Args: text (str): Text to process @@ -63,7 +118,9 @@ def ner(self, text: str) -> NerContainer: """ result = perform_ner.sync( client=self._api_client, - body=ImpressoNamedEntityRecognitionRequest(text=text), + body=ImpressoNamedEntityRecognitionRequest( + text=text, method=ImpressoNamedEntityRecognitionRequestMethod.NEL + ), ) raise_for_error(result) diff --git a/pyproject.toml b/pyproject.toml index f9c9851..a1b38ac 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -22,7 +22,7 @@ packages = [ ] readme = "README.md" repository = "https://github.com/impresso/impresso-py" -version = "0.9.7" +version = "0.9.8" [tool.poetry.urls] Endpoint = "https://impresso-project.ch/public-api"