From a359e999c135ee0ee613bef0f2f28620d8b21e9e Mon Sep 17 00:00:00 2001
From: Roman Kalyakin <roman@kalyakin.com>
Date: Thu, 21 Nov 2024 16:37:36 +0100
Subject: [PATCH] added all resources

---
 docs/index.md                             | 33 +++++++++++-
 docs/preparing_queries.md                 | 30 +++++++++++
 docs/resources.md                         | 65 ++++++++++++++++++++++-
 impresso/client.py                        |  6 +++
 impresso/data_container.py                | 21 +++++---
 impresso/resources/collections.py         | 42 +++++++++++++--
 impresso/resources/entities.py            | 19 ++++++-
 impresso/resources/newspapers.py          | 13 ++++-
 impresso/resources/search.py              | 51 +-----------------
 impresso/resources/text_reuse/clusters.py | 32 -----------
 mkdocs.yml                                |  3 +-
 pyproject.toml                            |  2 +-
 12 files changed, 216 insertions(+), 101 deletions(-)
 create mode 100644 docs/preparing_queries.md
diff --git a/docs/index.md b/docs/index.md
index 6f56091..5e11145 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -1,5 +1,36 @@
 # Impresso Python
 
+<p align="center">
+  <img src="https://github.com/impresso/impresso.github.io/blob/master/assets/images/3x1--Yellow-Impresso-Black-on-White--transparent.png?raw=true" width="350" alt="Impresso Project Logo"/>
+</p>
+
+Impresso is a library to interact with the [Impresso](https://impresso-project.ch/app) dataset. It provides a set of classes to interact with the API and a set of tools that make working with the data easier.
+
+## Installation and prerequisites
+
+The Impresso python library can be installed using `pip`:
+
+```shell
+pip install impresso
+```
+
+The library requires Python version `3.10` as a minimum. It has a number of dependencies that are likely already present in a Jupyter environment like `matplotlib` and `pandas`.
+
+## Create a session
+
 ::: impresso.connect
 
-Hi there.
\ No newline at end of file
+
+## About Impresso
+
+### Impresso project
+
+[Impresso - Media Monitoring of the Past](https://impresso-project.ch) is an interdisciplinary research project that aims to develop and consolidate tools for processing and exploring large collections of media archives across modalities, time, languages and national borders. The first project (2017-2021) was funded by the Swiss National Science Foundation under grant No. [CRSII5_173719](http://p3.snf.ch/project-173719) and the second project (2023-2027) by the SNSF under grant No. [CRSII5_213585](https://data.snf.ch/grants/grant/213585) and the Luxembourg National Research Fund under grant No. 17498891.
+
+### Copyright
+
+Copyright (C) 2024 The Impresso team.
+
+### License
+
+This program is provided as open source under the [GNU Affero General Public License](https://github.com/impresso/impresso-pyindexation/blob/master/LICENSE) v3 or later.
diff --git a/docs/preparing_queries.md b/docs/preparing_queries.md
new file mode 100644
index 0000000..3e6721b
--- /dev/null
+++ b/docs/preparing_queries.md
@@ -0,0 +1,30 @@
+# Preparing queries
+
+Some filter parameters accept a combination of modifiers that can be used to create complex queries. For example, if we want to search for a term `Titanic` and `ship`, we can use the `AND` modifier to combine the two conditions:
+
+```python
+from impresso import AND, OR
+
+impresso.search.find(term=AND("Titanic", "ship"))
+```
+
+We can refine this condition and search for all content items that mention `Titanic` and `ship` together **OR** mention `Titanic` and `iceberg` together **AND** do not mention `Di Caprio`. 
+
+
+```python
+from impresso import AND, OR
+
+impresso.search.find(
+  term=(
+    AND("Titanic", "ship") | 
+    AND("Titanic", "iceberg")
+  ) & ~OR("Di Caprio")
+)
+```
+
+## Modifiers
+
+::: impresso.structures.OR
+::: impresso.structures.AND
+::: impresso.structures.DateRange
+::: impresso.structures.NumericRange
\ No newline at end of file
diff --git a/docs/resources.md b/docs/resources.md
index bffe386..df60048 100644
--- a/docs/resources.md
+++ b/docs/resources.md
@@ -8,4 +8,67 @@ Search content items in the Impresso corpus.
 impresso.search.find(term='Titanic', limit=10)
 ```
 
-::: impresso.resources.search.SearchResource
\ No newline at end of file
+::: impresso.resources.search.SearchResource
+
+::: impresso.api_client.models.search_order_by.SearchOrderByLiteral
+::: impresso.resources.search.SearchDataContainer
+
+## Entities
+
+Search entities in the Impresso corpus.
+
+```python
+impresso.entities.find(term="Douglas Adams")
+```
+
+::: impresso.resources.entities.EntitiesResource
+
+::: impresso.resources.entities.EntityType
+::: impresso.api_client.models.find_entities_order_by.FindEntitiesOrderByLiteral
+
+## Newspapers
+
+Search newspapers available in the Impresso corpus.
+
+```python
+impresso.newspapers.find(
+    term="wort",
+    order_by="lastIssue",
+)
+```
+
+::: impresso.resources.newspapers.NewspapersResource
+
+::: impresso.api_client.models.find_newspapers_order_by.FindNewspapersOrderByLiteral
+::: impresso.resources.newspapers.FindNewspapersContainer
+
+## Content Items
+
+Get a single content item by ID.
+
+```python
+impresso.content_items.get("NZZ-1794-08-09-a-i0002")
+```
+
+## Collections
+
+Work with collections
+
+::: impresso.resources.collections.CollectionsResource
+
+::: impresso.api_client.models.find_collections_order_by.FindCollectionsOrderByLiteral
+::: impresso.resources.collections.FindCollectionsContainer
+
+## Named entity recognition
+
+The python library contains a set of named entity recognition methods that use the same NER model used to add entities to the Impresso database.
+
+::: impresso.resources.tools.ToolsResource
+::: impresso.resources.tools.NerContainer
+
+## Text reuse
+
+Two resources can be used to search text reuse clusters and passages.
+
+::: impresso.resources.text_reuse.clusters.TextReuseClustersResource
+::: impresso.resources.text_reuse.passages.TextReusePassagesResource
diff --git a/impresso/client.py b/impresso/client.py
index 9aac292..124e742 100644
--- a/impresso/client.py
+++ b/impresso/client.py
@@ -82,6 +82,12 @@ def connect(
     """
     Connect to the Impresso API and return a client object.
 
+    ```python
+    from impresso import connect
+
+    impresso = connect()
+    ```
+
     Args:
         public_api_url (str): The URL of the Impresso API to connect to. By default using the default URL set
                               in the config file (~/.impresso_py.yml) or the Impresso default URL ({DEFAULT_API_URL}).
diff --git a/impresso/data_container.py b/impresso/data_container.py
index 8085b55..b520d77 100644
--- a/impresso/data_container.py
+++ b/impresso/data_container.py
@@ -7,7 +7,11 @@
 
 
 class DataContainer(Generic[IT, T]):
-    """Response of a resource call"""
+    """
+    Generic container for responses from the Impresso API
+    returned by resource methods (`get`, `find`).
+    Generally represents a single page of the result.
+    """
 
     def __init__(
         self,
@@ -72,17 +76,17 @@ def _get_preview_image_(self) -> str | None:
 
     @property
     def raw(self) -> dict[str, Any]:
-        """Return the data as a python dictionary."""
+        """Returns the response data as a python dictionary."""
         return getattr(self._data, "to_dict")()
 
     @property
     def pydantic(self) -> T:
-        """Return the data as a pydantic model."""
+        """Returns the response data as a pydantic model."""
         return self._pydantic_model.model_validate(self.raw)
 
     @property
     def df(self) -> DataFrame:
-        """Return the data as a pandas dataframe."""
+        """Returns the response data as a pandas dataframe."""
         return DataFrame.from_dict(self._data)  # type: ignore
 
     @property
@@ -92,12 +96,12 @@ def total(self) -> int:
 
     @property
     def limit(self) -> int:
-        """Page size."""
+        """Current page size."""
         return self.raw.get("pagination", {}).get("limit", 0)
 
     @property
     def offset(self) -> int:
-        """Page offset."""
+        """Current page offset."""
         return self.raw.get("pagination", {}).get("offset", 0)
 
     @property
@@ -107,5 +111,8 @@ def size(self) -> int:
 
     @property
     def url(self) -> str | None:
-        """A URL of the result set in the Impresso web app."""
+        """
+        URL of an Impresso web application page
+        representing the result set from this container.
+        """
         return self._web_app_search_result_url
diff --git a/impresso/resources/collections.py b/impresso/resources/collections.py
index 9012df5..638a29e 100644
--- a/impresso/resources/collections.py
+++ b/impresso/resources/collections.py
@@ -64,7 +64,9 @@ def total(self) -> int:
 
 
 class CollectionsResource(Resource):
-    """Work with collections"""
+    """
+    Work with collections.
+    """
 
     name = "collections"
 
@@ -75,7 +77,18 @@ def find(
         limit: int | None = None,
         offset: int | None = None,
     ) -> FindCollectionsContainer:
-        """Find collections."""
+        """
+        Search collections in Impresso.
+
+        Args:
+            term: Search term.
+            order_by: Order by aspect.
+            limit: Number of results to return.
+            offset: Number of results to skip.
+
+        Returns:
+            FindCollectionsContainer: Data container with a page of results of the search.
+        """
 
         result = find_collections.sync(
             client=self._api_client,
@@ -122,7 +135,18 @@ def items(
         limit: int | None = None,
         offset: int | None = None,
     ) -> SearchDataContainer:
-        """Return all items in a collection."""
+        """
+        Return all content items from a collection.
+
+        Args:
+            collection_id: ID of the collection.
+            limit: Number of results to return.
+            offset: Number of results to skip.
+
+        Returns:
+            SearchDataContainer: Data container with a page of results of the search.
+        """
+
         search_resource = SearchResource(self._api_client)
         return search_resource.find(
             collection_id=collection_id, limit=limit, offset=offset
@@ -135,6 +159,10 @@ def add_items(self, collection_id: str, item_ids: list[str]) -> None:
         **NOTE**: Items are not added immediately.
         This operation may take up to a few minutes
         to complete and reflect in the collection.
+
+        Args:
+            collection_id: ID of the collection.
+            item_ids: IDs of the content items to add.
         """
         result = patch_collections_collection_id_items.sync(
             client=self._api_client,
@@ -148,11 +176,15 @@ def add_items(self, collection_id: str, item_ids: list[str]) -> None:
 
     def remove_items(self, collection_id: str, item_ids: list[str]) -> None:
         """
-        Remove items from a collection by their IDs.
+        Add items to a collection by their IDs.
 
-        **NOTE**: Items are not added immediately.
+        **NOTE**: Items are not removed immediately.
         This operation may take up to a few minutes
         to complete and reflect in the collection.
+
+        Args:
+            collection_id: ID of the collection.
+            item_ids: IDs of the content items to add.
         """
         result = patch_collections_collection_id_items.sync(
             client=self._api_client,
diff --git a/impresso/resources/entities.py b/impresso/resources/entities.py
index 6a9536b..3f66609 100644
--- a/impresso/resources/entities.py
+++ b/impresso/resources/entities.py
@@ -51,7 +51,7 @@ def df(self) -> DataFrame:
 
 
 class EntitiesResource(Resource):
-    """Work with entities"""
+    """Search entities in the Impresso database."""
 
     name = "entities"
 
@@ -66,7 +66,22 @@ def find(
         limit: int | None = None,
         offset: int | None = None,
     ) -> FindEntitiesContainer:
-        """Find entities."""
+        """
+        Search entities in Impresso.
+
+        Args:
+            term: Search term.
+            wikidata_id: Return only entities resolved to this Wikidata ID.
+            entity_id: Return only entity with this ID.
+            entity_type: Return only entities of this type.
+            order_by: Field to order results by.
+            resolve: Return Wikidata details of the entities, if the entity is linked to a Wikidata entry.
+            limit: Number of results to return.
+            offset: Number of results to skip.
+
+        Returns:
+            FindEntitiesContainer: Data container with a page of results of the search.
+        """
 
         filters: list[Filter] = []
         if entity_type is not None:
diff --git a/impresso/resources/newspapers.py b/impresso/resources/newspapers.py
index 0ba5831..1e1a123 100644
--- a/impresso/resources/newspapers.py
+++ b/impresso/resources/newspapers.py
@@ -32,7 +32,7 @@ def df(self) -> DataFrame:
 
 
 class NewspapersResource(Resource):
-    """Search newspapers"""
+    """Search newspapers in the Impresso database."""
 
     name = "newspapers"
 
@@ -43,7 +43,18 @@ def find(
         limit: int | None = None,
         offset: int | None = None,
     ) -> FindNewspapersContainer:
+        """
+        Search newspapers in Impresso.
 
+        Args:
+            term: Search term.
+            order_by: Field to order results by.
+            limit: Number of results to return.
+            offset: Number of results to skip.
+
+        Returns:
+            FindNewspapersContainer: Data container with a page of results of the search.
+        """
         result = find_newspapers.sync(
             client=self._api_client,
             term=term if term is not None else UNSET,
diff --git a/impresso/resources/search.py b/impresso/resources/search.py
index 18727b4..6b011e0 100644
--- a/impresso/resources/search.py
+++ b/impresso/resources/search.py
@@ -55,55 +55,6 @@ def df(self) -> DataFrame:
 class FacetDataContainer(DataContainer):
     """Response of a get facet call."""
 
-    # def __init__(
-    #     self,
-    #     data: IT,
-    #     pydantic_model: type[T],
-    #     limit: int | None,
-    #     offset: int | None,
-    #     web_app_search_result_url: str,
-    # ):
-    #     super().__init__(data, pydantic_model, web_app_search_result_url)
-    #     self._limit = limit
-    #     self._offset = offset
-
-    # @property
-    # def raw(self) -> dict[str, Any]:
-    #     """Return the data as a python dictionary."""
-    #     return self._data.to_dict()
-
-    # @property
-    # def pydantic(self) -> list[SearchFacetBucket]:
-    #     """Return the data as a pydantic model."""
-    #     return self._pydantic_model.model_validate(self.raw)
-
-    # @property
-    # def df(self) -> DataFrame:
-    #     """Return the data as a pandas dataframe."""
-    #     if len(self.raw["buckets"]) == 0:
-    #         return DataFrame()
-    #     return json_normalize(self.raw["buckets"]).set_index("val")
-
-    # @property
-    # def size(self) -> int:
-    #     """Current page size."""
-    #     return len(self.raw.get("buckets", []))
-
-    # @property
-    # def total(self) -> int:
-    #     """Total number of results."""
-    #     return self.raw.get("numBuckets", 0)
-
-    # @property
-    # def limit(self) -> int:
-    #     """Page size."""
-    #     return self._limit or len(self.raw["buckets"])
-
-    # @property
-    # def offset(self) -> int:
-    #     """Page offset."""
-    #     return self._offset or 0
-
     @property
     def df(self) -> DataFrame:
         """Return the data as a pandas dataframe."""
@@ -170,7 +121,7 @@ def find(
                                    or all/any of the clusters.
 
         Returns:
-            _type_: _description_
+            SearchDataContainer: Data container with a page of results of the search.
         """
 
         filters = self._build_filters(
diff --git a/impresso/resources/text_reuse/clusters.py b/impresso/resources/text_reuse/clusters.py
index 669f452..97453dd 100644
--- a/impresso/resources/text_reuse/clusters.py
+++ b/impresso/resources/text_reuse/clusters.py
@@ -46,38 +46,6 @@ def df(self) -> DataFrame:
             return json_normalize(data).set_index("uid")
         return DataFrame()
 
-    # @property
-    # def pydantic(self):
-    #     """Return the data as a pydantic model."""
-    #     remapped_raw = {
-    #         "data": self.raw.get("clusters", []),
-    #         "info": self.raw.get("info", {}),
-    #         "total": self.total,
-    #         "limit": self.limit,
-    #         "offset": self.offset,
-    #     }
-    #     return self._pydantic_model.model_validate(remapped_raw)
-
-    # @property
-    # def size(self) -> int:
-    #     """Current page size."""
-    #     return len(self.raw.get("clusters", []))
-
-    # @property
-    # def total(self) -> int:
-    #     """Total number of results."""
-    #     return self.raw.get("info", {}).get("total", 0)
-
-    # @property
-    # def limit(self) -> int:
-    #     """Page size."""
-    #     return self.raw.get("info", {}).get("limit", 0)
-
-    # @property
-    # def offset(self) -> int:
-    #     """Page offset."""
-    #     return self.raw.get("info", {}).get("offset", 0)
-
 
 Range = tuple[int, int]
 
diff --git a/mkdocs.yml b/mkdocs.yml
index 0efc563..6456e2c 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -28,4 +28,5 @@ markdown_extensions:
 nav:
   - Home: index.md
   - Result Object: result.md
-  - Resources: resources.md
\ No newline at end of file
+  - Resources: resources.md
+  - Preparing Queries: preparing_queries.md
\ No newline at end of file
diff --git a/pyproject.toml b/pyproject.toml
index 68cea79..fcf1095 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -45,7 +45,7 @@ pandas = "^2.1.0"
 pandas-stubs = "^2.2.1.240316"
 protobuf = "^4.25.0"
 pydantic = "^2.6.4"
-python = "^3.10.0 || ^3.11.0"
+python = "^3.10.0 || ^3.11.0 || ^3.12.0"
 python-dateutil = "^2.8.0"
 types-PyYAML = "^6.0.12.20240311"
 types-protobuf = "^5.27.0.20240626"