Skip to content

Commit

Permalink
Updated tools (ner, nel) (#10)
Browse files Browse the repository at this point in the history
* added new ner method: ner-nel

* performance explanation

* added nel method

* version bump

* make linter happy

* regenerated models

* make mypy happy
  • Loading branch information
theorm authored Oct 23, 2024
1 parent 174887a commit c61f137
Show file tree
Hide file tree
Showing 10 changed files with 121 additions and 11 deletions.
2 changes: 1 addition & 1 deletion .flake8
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
[flake8]
max-line-length = 120
exclude = impresso/api_client, impresso/protobuf
exclude = impresso/api_client, impresso/protobuf, impresso/api_models.py
8 changes: 4 additions & 4 deletions impresso/api_client/api/tools/perform_ner.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ def sync_detailed(
client: AuthenticatedClient,
body: ImpressoNamedEntityRecognitionRequest,
) -> Response[Union[Error, ImpressoNamedEntityRecognitionResponse]]:
"""Perform Named Entity Recognition of a text
"""Perform named entity recognition (and optional named entity linking) of a text
Args:
body (ImpressoNamedEntityRecognitionRequest): Request body for the Impresso NER endpoint
Expand Down Expand Up @@ -109,7 +109,7 @@ def sync(
client: AuthenticatedClient,
body: ImpressoNamedEntityRecognitionRequest,
) -> Optional[Union[Error, ImpressoNamedEntityRecognitionResponse]]:
"""Perform Named Entity Recognition of a text
"""Perform named entity recognition (and optional named entity linking) of a text
Args:
body (ImpressoNamedEntityRecognitionRequest): Request body for the Impresso NER endpoint
Expand All @@ -133,7 +133,7 @@ async def asyncio_detailed(
client: AuthenticatedClient,
body: ImpressoNamedEntityRecognitionRequest,
) -> Response[Union[Error, ImpressoNamedEntityRecognitionResponse]]:
"""Perform Named Entity Recognition of a text
"""Perform named entity recognition (and optional named entity linking) of a text
Args:
body (ImpressoNamedEntityRecognitionRequest): Request body for the Impresso NER endpoint
Expand All @@ -160,7 +160,7 @@ async def asyncio(
client: AuthenticatedClient,
body: ImpressoNamedEntityRecognitionRequest,
) -> Optional[Union[Error, ImpressoNamedEntityRecognitionResponse]]:
"""Perform Named Entity Recognition of a text
"""Perform named entity recognition (and optional named entity linking) of a text
Args:
body (ImpressoNamedEntityRecognitionRequest): Request body for the Impresso NER endpoint
Expand Down
2 changes: 2 additions & 0 deletions impresso/api_client/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@
from .impresso_named_entity_recognition_entity_type import ImpressoNamedEntityRecognitionEntityType
from .impresso_named_entity_recognition_entity_wikidata import ImpressoNamedEntityRecognitionEntityWikidata
from .impresso_named_entity_recognition_request import ImpressoNamedEntityRecognitionRequest
from .impresso_named_entity_recognition_request_method import ImpressoNamedEntityRecognitionRequestMethod
from .impresso_named_entity_recognition_response import ImpressoNamedEntityRecognitionResponse
from .location_specific_fields import LocationSpecificFields
from .location_specific_fields_descriptions import LocationSpecificFieldsDescriptions
Expand Down Expand Up @@ -230,6 +231,7 @@
"ImpressoNamedEntityRecognitionEntityType",
"ImpressoNamedEntityRecognitionEntityWikidata",
"ImpressoNamedEntityRecognitionRequest",
"ImpressoNamedEntityRecognitionRequestMethod",
"ImpressoNamedEntityRecognitionResponse",
"LocationSpecificFields",
"LocationSpecificFieldsDescriptions",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
from typing import Any, Dict, Type, TypeVar
from typing import Any, Dict, Type, TypeVar, Union

from attrs import define as _attrs_define

from ..models.impresso_named_entity_recognition_request_method import ImpressoNamedEntityRecognitionRequestMethod
from ..types import UNSET, Unset

T = TypeVar("T", bound="ImpressoNamedEntityRecognitionRequest")


Expand All @@ -11,19 +14,29 @@ class ImpressoNamedEntityRecognitionRequest:
Attributes:
text (str): Text to be processed for named entity recognition
method (Union[Unset, ImpressoNamedEntityRecognitionRequestMethod]): NER method to be used: `ner` (default),
`ner-nel` (named entity recognition with named entity linking) and `nel` (linking only - enclose entities in
[START] [END] tags). Default: ImpressoNamedEntityRecognitionRequestMethod.NER.
"""

text: str
method: Union[Unset, ImpressoNamedEntityRecognitionRequestMethod] = ImpressoNamedEntityRecognitionRequestMethod.NER

def to_dict(self) -> Dict[str, Any]:
text = self.text

method: Union[Unset, str] = UNSET
if not isinstance(self.method, Unset):
method = self.method.value

field_dict: Dict[str, Any] = {}
field_dict.update(
{
"text": text,
}
)
if method is not UNSET:
field_dict["method"] = method

return field_dict

Expand All @@ -32,8 +45,16 @@ def from_dict(cls: Type[T], src_dict: Dict[str, Any]) -> T:
d = src_dict.copy()
text = d.pop("text")

_method = d.pop("method", UNSET)
method: Union[Unset, ImpressoNamedEntityRecognitionRequestMethod]
if isinstance(_method, Unset):
method = UNSET
else:
method = ImpressoNamedEntityRecognitionRequestMethod(_method)

impresso_named_entity_recognition_request = cls(
text=text,
method=method,
)

return impresso_named_entity_recognition_request
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
from enum import Enum
from typing import Literal


class ImpressoNamedEntityRecognitionRequestMethod(str, Enum):
NEL = "nel"
NER = "ner"
NER_NEL = "ner-nel"

def __str__(self) -> str:
return str(self.value)


ImpressoNamedEntityRecognitionRequestMethodLiteral = Literal[
"nel",
"ner",
"ner-nel",
]
7 changes: 7 additions & 0 deletions impresso/api_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -329,6 +329,13 @@ class ImpressoNerRequest(BaseModel):
min_length=1,
),
]
method: Annotated[
Optional[Literal['ner', 'ner-nel', 'nel']],
Field(
'ner',
description='NER method to be used: `ner` (default), `ner-nel` (named entity recognition with named entity linking) and `nel` (linking only - enclose entities in [START] [END] tags).',
),
]


class ImpressoNerResponse(BaseModel):
Expand Down
5 changes: 4 additions & 1 deletion impresso/data_container.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,10 @@ def _repr_html_(self):
),
"</div>",
(
f'<div style="align-content: center;"><img src="data:image/png;base64,{preview_img}" style="max-width: 800px; width: 100%;"></div>'
(
f'<div style="align-content: center;"><img src="data:image/png;base64,{preview_img}" '
+ 'style="max-width: 800px; width: 100%;"></div>'
)
if preview_img
else None
),
Expand Down
6 changes: 4 additions & 2 deletions impresso/resources/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,13 +154,15 @@ def find(
entity_id: Return only content items that mention this entity or all/any of the entities.
date_range: Return only content items that were published in this date range.
language: Return only content items that are in this language or all/any of the languages.
mention: Return only content items that mention an entity with this term or all/any of entities with the terms.
mention: Return only content items that mention an entity with this term or all/any
of entities with the terms.
topic_id: Return only content items that are about this topic or all/any of the topics.
collection_id: Return only content items that are in this collection or all/any of the collections.
country: Return only content items that are from this country or all/any of the countries.
access_rights: Return only content items with this access right or all/any of the access rights.
partner_id: Return only content items that are from this partner or all/any of the partners.
text_reuse_cluster_id: Return only content items that are in this text reuse cluster or all/any of the clusters.
text_reuse_cluster_id: Return only content items that are in this text reuse cluster
or all/any of the clusters.
Returns:
_type_: _description_
Expand Down
59 changes: 58 additions & 1 deletion impresso/resources/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@
from impresso.api_client.models.impresso_named_entity_recognition_request import (
ImpressoNamedEntityRecognitionRequest,
)
from impresso.api_client.models.impresso_named_entity_recognition_request_method import (
ImpressoNamedEntityRecognitionRequestMethod,
)
from impresso.api_models import ImpressoNerResponse
from impresso.data_container import DataContainer
from impresso.resources.base import Resource
Expand Down Expand Up @@ -55,6 +58,58 @@ class ToolsResource(Resource):
def ner(self, text: str) -> NerContainer:
"""Named Entity Recognition
This method is faster than `ner_nel` but does not provide any linking to external resources.
Args:
text (str): Text to process
Returns:
NerContainer: List of named entities
"""
result = perform_ner.sync(
client=self._api_client,
body=ImpressoNamedEntityRecognitionRequest(
text=text, method=ImpressoNamedEntityRecognitionRequestMethod.NER
),
)
raise_for_error(result)

return NerContainer(
result,
ImpressoNerSchema,
web_app_search_result_url=None,
)

def ner_nel(self, text: str) -> NerContainer:
"""Named Entity Recognition and Named Entity Linking
This method is slower than `ner` but provides linking to external resources.
Args:
text (str): Text to process
Returns:
NerContainer: List of named entities
"""
result = perform_ner.sync(
client=self._api_client,
body=ImpressoNamedEntityRecognitionRequest(
text=text, method=ImpressoNamedEntityRecognitionRequestMethod.NER_NEL
),
)
raise_for_error(result)

return NerContainer(
result,
ImpressoNerSchema,
web_app_search_result_url=None,
)

def nel(self, text: str) -> NerContainer:
"""Named Entity Linking
This method requires named entities to be enclosed in tags: [START]entity[END].
Args:
text (str): Text to process
Expand All @@ -63,7 +118,9 @@ def ner(self, text: str) -> NerContainer:
"""
result = perform_ner.sync(
client=self._api_client,
body=ImpressoNamedEntityRecognitionRequest(text=text),
body=ImpressoNamedEntityRecognitionRequest(
text=text, method=ImpressoNamedEntityRecognitionRequestMethod.NEL
),
)
raise_for_error(result)

Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ packages = [
]
readme = "README.md"
repository = "https://github.com/impresso/impresso-py"
version = "0.9.7"
version = "0.9.8"

[tool.poetry.urls]
Endpoint = "https://impresso-project.ch/public-api"
Expand Down

0 comments on commit c61f137

Please sign in to comment.