From 7d057b0db8173249a3d974ec2a69adeeaadcca11 Mon Sep 17 00:00:00 2001 From: Stefano Lottini Date: Thu, 14 Mar 2024 18:13:30 +0100 Subject: [PATCH 01/13] timeout support throughout astrapy/core layer --- astrapy/core/api.py | 10 +- astrapy/core/db.py | 776 ++++++++++++++++++++++++++++---- astrapy/core/ops.py | 345 +++++++++++--- astrapy/core/utils.py | 40 +- tests/core/test_async_db_dml.py | 2 - tests/core/test_db_dml.py | 2 - 6 files changed, 998 insertions(+), 177 deletions(-) diff --git a/astrapy/core/api.py b/astrapy/core/api.py index cebeab84..e97d214e 100644 --- a/astrapy/core/api.py +++ b/astrapy/core/api.py @@ -1,6 +1,6 @@ import logging import httpx -from typing import Any, Dict, Optional, cast +from typing import Any, Dict, Optional, Union, cast from astrapy.core.core_types import API_RESPONSE from astrapy.core.utils import amake_request, make_request @@ -32,6 +32,7 @@ def raw_api_request( path: Optional[str], caller_name: Optional[str], caller_version: Optional[str], + timeout: Optional[Union[httpx.Timeout, float]], ) -> httpx.Response: return make_request( client=client, @@ -44,6 +45,7 @@ def raw_api_request( path=path, caller_name=caller_name, caller_version=caller_version, + timeout=timeout, ) @@ -82,6 +84,7 @@ def api_request( skip_error_check: bool, caller_name: Optional[str], caller_version: Optional[str], + timeout: Optional[Union[httpx.Timeout, float]], ) -> API_RESPONSE: raw_response = raw_api_request( client=client, @@ -94,6 +97,7 @@ def api_request( path=path, caller_name=caller_name, caller_version=caller_version, + timeout=timeout, ) raw_response.raise_for_status() return process_raw_api_response( @@ -113,6 +117,7 @@ async def async_raw_api_request( path: Optional[str], caller_name: Optional[str], caller_version: Optional[str], + timeout: Optional[Union[httpx.Timeout, float]], ) -> httpx.Response: return await amake_request( client=client, @@ -125,6 +130,7 @@ async def async_raw_api_request( path=path, caller_name=caller_name, caller_version=caller_version, + timeout=timeout, ) @@ -163,6 +169,7 @@ async def async_api_request( skip_error_check: bool, caller_name: Optional[str], caller_version: Optional[str], + timeout: Optional[Union[httpx.Timeout, float]], ) -> API_RESPONSE: raw_response = await async_raw_api_request( client=client, @@ -175,6 +182,7 @@ async def async_api_request( path=path, caller_name=caller_name, caller_version=caller_version, + timeout=timeout, ) raw_response.raise_for_status() return await async_process_raw_api_response( diff --git a/astrapy/core/db.py b/astrapy/core/db.py index e1a9d33b..daec4be9 100644 --- a/astrapy/core/db.py +++ b/astrapy/core/db.py @@ -53,9 +53,11 @@ from astrapy.core.utils import ( convert_vector_to_floats, make_payload, - http_methods, normalize_for_api, restore_from_api, + http_methods, + to_httpx_timeout, + TimeoutInfoWideType, ) from astrapy.core.core_types import ( API_DOC, @@ -84,6 +86,7 @@ def __init__( ) -> None: """ Initialize an AstraDBCollection instance. + Args: collection_name (str): The name of the collection. astra_db (AstraDB, optional): An instance of Astra DB. @@ -194,6 +197,7 @@ def _request( json_data: Optional[Dict[str, Any]] = None, url_params: Optional[Dict[str, Any]] = None, skip_error_check: bool = False, + timeout_info: TimeoutInfoWideType = None, ) -> API_RESPONSE: direct_response = api_request( client=self.client, @@ -207,43 +211,65 @@ def _request( skip_error_check=skip_error_check, caller_name=self.caller_name, caller_version=self.caller_version, + timeout=to_httpx_timeout(timeout_info), ) response = restore_from_api(direct_response) return response - def post_raw_request(self, body: Dict[str, Any]) -> API_RESPONSE: + def post_raw_request( + self, body: Dict[str, Any], timeout_info: TimeoutInfoWideType = None + ) -> API_RESPONSE: return self._request( method=http_methods.POST, path=self.base_path, json_data=body, + timeout_info=timeout_info, ) def _get( - self, path: Optional[str] = None, options: Optional[Dict[str, Any]] = None + self, + path: Optional[str] = None, + options: Optional[Dict[str, Any]] = None, + timeout_info: TimeoutInfoWideType = None, ) -> Optional[API_RESPONSE]: full_path = f"{self.base_path}/{path}" if path else self.base_path response = self._request( - method=http_methods.GET, path=full_path, url_params=options + method=http_methods.GET, + path=full_path, + url_params=options, + timeout_info=timeout_info, ) if isinstance(response, dict): return response return None def _put( - self, path: Optional[str] = None, document: Optional[API_RESPONSE] = None + self, + path: Optional[str] = None, + document: Optional[API_RESPONSE] = None, + timeout_info: TimeoutInfoWideType = None, ) -> API_RESPONSE: full_path = f"{self.base_path}/{path}" if path else self.base_path response = self._request( - method=http_methods.PUT, path=full_path, json_data=document + method=http_methods.PUT, + path=full_path, + json_data=document, + timeout_info=timeout_info, ) return response def _post( - self, path: Optional[str] = None, document: Optional[API_DOC] = None + self, + path: Optional[str] = None, + document: Optional[API_DOC] = None, + timeout_info: TimeoutInfoWideType = None, ) -> API_RESPONSE: full_path = f"{self.base_path}/{path}" if path else self.base_path response = self._request( - method=http_methods.POST, path=full_path, json_data=document + method=http_methods.POST, + path=full_path, + json_data=document, + timeout_info=timeout_info, ) return response @@ -276,15 +302,22 @@ def _recast_as_sort_projection( return sort, projection - def get(self, path: Optional[str] = None) -> Optional[API_RESPONSE]: + def get( + self, path: Optional[str] = None, timeout_info: TimeoutInfoWideType = None + ) -> Optional[API_RESPONSE]: """ Retrieve a document from the collection by its path. + Args: path (str, optional): The path of the document to retrieve. + timeout_info: a float, or a TimeoutInfo dict, for the HTTP request. + Note that a 'read' timeout event will not block the action taken + by the API server if it has received the request already. + Returns: dict: The retrieved document. """ - return self._get(path=path) + return self._get(path=path, timeout_info=timeout_info) def find( self, @@ -292,14 +325,20 @@ def find( projection: Optional[Dict[str, Any]] = None, sort: Optional[Dict[str, Any]] = None, options: Optional[Dict[str, Any]] = None, + timeout_info: TimeoutInfoWideType = None, ) -> API_RESPONSE: """ Find documents in the collection that match the given filter. + Args: filter (dict, optional): Criteria to filter documents. projection (dict, optional): Specifies the fields to return. sort (dict, optional): Specifies the order in which to return matching documents. options (dict, optional): Additional options for the query. + timeout_info: a float, or a TimeoutInfo dict, for the HTTP request. + Note that a 'read' timeout event will not block the action taken + by the API server if it has received the request already. + Returns: dict: The query response containing matched documents. """ @@ -311,9 +350,7 @@ def find( sort=sort, ) - response = self._post( - document=json_query, - ) + response = self._post(document=json_query, timeout_info=timeout_info) return response @@ -325,15 +362,21 @@ def vector_find( filter: Optional[Dict[str, Any]] = None, fields: Optional[List[str]] = None, include_similarity: bool = True, + timeout_info: TimeoutInfoWideType = None, ) -> List[API_DOC]: """ Perform a vector-based search in the collection. + Args: vector (list): The vector to search with. limit (int): The maximum number of documents to return. filter (dict, optional): Criteria to filter documents. fields (list, optional): Specifies the fields to return. include_similarity (bool, optional): Whether to include similarity score in the result. + timeout_info: a float, or a TimeoutInfo dict, for the HTTP request. + Note that a 'read' timeout event will not block the action taken + by the API server if it has received the request already. + Returns: list: A list of documents matching the vector search criteria. """ @@ -356,6 +399,7 @@ def vector_find( "limit": limit, "includeSimilarity": include_similarity, }, + timeout_info=timeout_info, ) return cast(List[API_DOC], raw_find_result["data"]["documents"]) @@ -369,10 +413,12 @@ def paginate( ) -> Generator[API_DOC, None, None]: """ Generate paginated results for a given database query method. + Args: request_method (function): The database query method to paginate. options (dict, optional): Options for the database query. prefetched (int, optional): Number of pre-fetched documents. + Yields: dict: The next document in the paginated result set. """ @@ -425,15 +471,24 @@ def paginated_find( sort: Optional[Dict[str, Any]] = None, options: Optional[Dict[str, Any]] = None, prefetched: Optional[int] = None, + timeout_info: TimeoutInfoWideType = None, ) -> Iterator[API_DOC]: """ Perform a paginated search in the collection. + Args: filter (dict, optional): Criteria to filter documents. projection (dict, optional): Specifies the fields to return. sort (dict, optional): Specifies the order in which to return matching documents. options (dict, optional): Additional options for the query. prefetched (int, optional): Number of pre-fetched documents. + timeout_info: a float, or a TimeoutInfo dict, for each + single HTTP request. + This is a paginated method, that issues several requests as it + needs more data. This parameter controls a single request. + Note that a 'read' timeout event will not block the action taken + by the API server if it has received the request already. + Returns: generator: A generator yielding documents in the paginated result set. """ @@ -442,6 +497,7 @@ def paginated_find( filter=filter, projection=projection, sort=sort, + timeout_info=timeout_info, ) return self.paginate( request_method=partialed_find, @@ -450,14 +506,23 @@ def paginated_find( ) def pop( - self, filter: Dict[str, Any], pop: Dict[str, Any], options: Dict[str, Any] + self, + filter: Dict[str, Any], + pop: Dict[str, Any], + options: Dict[str, Any], + timeout_info: TimeoutInfoWideType = None, ) -> API_RESPONSE: """ Pop the last data in the tags array + Args: filter (dict): Criteria to identify the document to update. pop (dict): The pop to apply to the tags. options (dict): Additional options for the update operation. + timeout_info: a float, or a TimeoutInfo dict, for the HTTP request. + Note that a 'read' timeout event will not block the action taken + by the API server if it has received the request already. + Returns: dict: The original document before the update. """ @@ -472,19 +537,29 @@ def pop( method=http_methods.POST, path=self.base_path, json_data=json_query, + timeout_info=timeout_info, ) return response def push( - self, filter: Dict[str, Any], push: Dict[str, Any], options: Dict[str, Any] + self, + filter: Dict[str, Any], + push: Dict[str, Any], + options: Dict[str, Any], + timeout_info: TimeoutInfoWideType = None, ) -> API_RESPONSE: """ Push new data to the tags array + Args: filter (dict): Criteria to identify the document to update. push (dict): The push to apply to the tags. options (dict): Additional options for the update operation. + timeout_info: a float, or a TimeoutInfo dict, for the HTTP request. + Note that a 'read' timeout event will not block the action taken + by the API server if it has received the request already. + Returns: dict: The result of the update operation. """ @@ -499,6 +574,7 @@ def push( method=http_methods.POST, path=self.base_path, json_data=json_query, + timeout_info=timeout_info, ) return response @@ -511,14 +587,20 @@ def find_one_and_replace( projection: Optional[Dict[str, Any]] = None, sort: Optional[Dict[str, Any]] = None, options: Optional[Dict[str, Any]] = None, + timeout_info: TimeoutInfoWideType = None, ) -> API_RESPONSE: """ Find a single document and replace it. + Args: replacement (dict): The new document to replace the existing one. filter (dict, optional): Criteria to filter documents. sort (dict, optional): Specifies the order in which to find the document. options (dict, optional): Additional options for the operation. + timeout_info: a float, or a TimeoutInfo dict, for the HTTP request. + Note that a 'read' timeout event will not block the action taken + by the API server if it has received the request already. + Returns: dict: The result of the find and replace operation. """ @@ -532,7 +614,10 @@ def find_one_and_replace( ) response = self._request( - method=http_methods.POST, path=f"{self.base_path}", json_data=json_query + method=http_methods.POST, + path=f"{self.base_path}", + json_data=json_query, + timeout_info=timeout_info, ) return response @@ -544,14 +629,20 @@ def vector_find_one_and_replace( *, filter: Optional[Dict[str, Any]] = None, fields: Optional[List[str]] = None, + timeout_info: TimeoutInfoWideType = None, ) -> Union[API_DOC, None]: """ Perform a vector-based search and replace the first matched document. + Args: vector (dict): The vector to search with. replacement (dict): The new document to replace the existing one. filter (dict, optional): Criteria to filter documents. fields (list, optional): Specifies the fields to return in the result. + timeout_info: a float, or a TimeoutInfo dict, for the HTTP request. + Note that a 'read' timeout event will not block the action taken + by the API server if it has received the request already. + Returns: dict or None: either the matched document or None if nothing found """ @@ -567,6 +658,7 @@ def vector_find_one_and_replace( filter=filter, projection=projection, sort=sort, + timeout_info=timeout_info, ) return cast(Union[API_DOC, None], raw_find_result["data"]["document"]) @@ -578,14 +670,20 @@ def find_one_and_update( filter: Optional[Dict[str, Any]] = None, options: Optional[Dict[str, Any]] = None, projection: Optional[Dict[str, Any]] = None, + timeout_info: TimeoutInfoWideType = None, ) -> API_RESPONSE: """ Find a single document and update it. + Args: update (dict): The update to apply to the document. sort (dict, optional): Specifies the order in which to find the document. filter (dict, optional): Criteria to filter documents. options (dict, optional): Additional options for the operation. + timeout_info: a float, or a TimeoutInfo dict, for the HTTP request. + Note that a 'read' timeout event will not block the action taken + by the API server if it has received the request already. + Returns: dict: The result of the find and update operation. """ @@ -602,6 +700,7 @@ def find_one_and_update( method=http_methods.POST, path=f"{self.base_path}", json_data=json_query, + timeout_info=timeout_info, ) return response @@ -613,14 +712,20 @@ def vector_find_one_and_update( *, filter: Optional[Dict[str, Any]] = None, fields: Optional[List[str]] = None, + timeout_info: TimeoutInfoWideType = None, ) -> Union[API_DOC, None]: """ Perform a vector-based search and update the first matched document. + Args: vector (list): The vector to search with. update (dict): The update to apply to the matched document. filter (dict, optional): Criteria to filter documents before applying the vector search. fields (list, optional): Specifies the fields to return in the updated document. + timeout_info: a float, or a TimeoutInfo dict, for the HTTP request. + Note that a 'read' timeout event will not block the action taken + by the API server if it has received the request already. + Returns: dict or None: The result of the vector-based find and update operation, or None if nothing found @@ -637,6 +742,7 @@ def vector_find_one_and_update( filter=filter, sort=sort, projection=projection, + timeout_info=timeout_info, ) return cast(Union[API_DOC, None], raw_find_result["data"]["document"]) @@ -646,13 +752,19 @@ def find_one_and_delete( sort: Optional[Dict[str, Any]] = {}, filter: Optional[Dict[str, Any]] = None, projection: Optional[Dict[str, Any]] = None, + timeout_info: TimeoutInfoWideType = None, ) -> API_RESPONSE: """ Find a single document and delete it. + Args: sort (dict, optional): Specifies the order in which to find the document. filter (dict, optional): Criteria to filter documents. projection (dict, optional): Specifies the fields to return. + timeout_info: a float, or a TimeoutInfo dict, for the HTTP request. + Note that a 'read' timeout event will not block the action taken + by the API server if it has received the request already. + Returns: dict: The result of the find and delete operation. """ @@ -667,18 +779,23 @@ def find_one_and_delete( method=http_methods.POST, path=f"{self.base_path}", json_data=json_query, + timeout_info=timeout_info, ) return response def count_documents( - self, - filter: Dict[str, Any] = {}, + self, filter: Dict[str, Any] = {}, timeout_info: TimeoutInfoWideType = None ) -> API_RESPONSE: """ Count documents matching a given predicate (expressed as filter). + Args: filter (dict, defaults to {}): Criteria to filter documents. + timeout_info: a float, or a TimeoutInfo dict, for the HTTP request. + Note that a 'read' timeout event will not block the action taken + by the API server if it has received the request already. + Returns: dict: the response, either {"status": {"count": }} @@ -690,9 +807,7 @@ def count_documents( filter=filter, ) - response = self._post( - document=json_query, - ) + response = self._post(document=json_query, timeout_info=timeout_info) return response @@ -702,14 +817,20 @@ def find_one( projection: Optional[Dict[str, Any]] = {}, sort: Optional[Dict[str, Any]] = {}, options: Optional[Dict[str, Any]] = {}, + timeout_info: TimeoutInfoWideType = None, ) -> API_RESPONSE: """ Find a single document in the collection. + Args: filter (dict, optional): Criteria to filter documents. projection (dict, optional): Specifies the fields to return. sort (dict, optional): Specifies the order in which to return the document. options (dict, optional): Additional options for the query. + timeout_info: a float, or a TimeoutInfo dict, for the HTTP request. + Note that a 'read' timeout event will not block the action taken + by the API server if it has received the request already. + Returns: dict: the response, either {"data": {"document": }} @@ -725,9 +846,7 @@ def find_one( sort=sort, ) - response = self._post( - document=json_query, - ) + response = self._post(document=json_query, timeout_info=timeout_info) return response @@ -738,14 +857,20 @@ def vector_find_one( filter: Optional[Dict[str, Any]] = None, fields: Optional[List[str]] = None, include_similarity: bool = True, + timeout_info: TimeoutInfoWideType = None, ) -> Union[API_DOC, None]: """ Perform a vector-based search to find a single document in the collection. + Args: vector (list): The vector to search with. filter (dict, optional): Additional criteria to filter documents. fields (list, optional): Specifies the fields to return in the result. include_similarity (bool, optional): Whether to include similarity score in the result. + timeout_info: a float, or a TimeoutInfo dict, for the HTTP request. + Note that a 'read' timeout event will not block the action taken + by the API server if it has received the request already. + Returns: dict or None: The found document or None if no matching document is found. """ @@ -761,18 +886,27 @@ def vector_find_one( projection=projection, sort=sort, options={"includeSimilarity": include_similarity}, + timeout_info=timeout_info, ) return cast(Union[API_DOC, None], raw_find_result["data"]["document"]) def insert_one( - self, document: API_DOC, failures_allowed: bool = False + self, + document: API_DOC, + failures_allowed: bool = False, + timeout_info: TimeoutInfoWideType = None, ) -> API_RESPONSE: """ Insert a single document into the collection. + Args: document (dict): The document to insert. failures_allowed (bool): Whether to allow failures in the insert operation. + timeout_info: a float, or a TimeoutInfo dict, for the HTTP request. + Note that a 'read' timeout event will not block the action taken + by the API server if it has received the request already. + Returns: dict: The response from the database after the insert operation. """ @@ -783,6 +917,7 @@ def insert_one( path=self.base_path, json_data=json_query, skip_error_check=failures_allowed, + timeout_info=timeout_info, ) return response @@ -792,14 +927,20 @@ def insert_many( documents: List[API_DOC], options: Optional[Dict[str, Any]] = None, partial_failures_allowed: bool = False, + timeout_info: TimeoutInfoWideType = None, ) -> API_RESPONSE: """ Insert multiple documents into the collection. + Args: documents (list): A list of documents to insert. options (dict, optional): Additional options for the insert operation. partial_failures_allowed (bool, optional): Whether to allow partial failures through the insertion (i.e. on some documents). + timeout_info: a float, or a TimeoutInfo dict, for the HTTP request. + Note that a 'read' timeout event will not block the action taken + by the API server if it has received the request already. + Returns: dict: The response from the database after the insert operation. """ @@ -813,6 +954,7 @@ def insert_many( path=f"{self.base_path}", json_data=json_query, skip_error_check=partial_failures_allowed, + timeout_info=timeout_info, ) return response @@ -824,10 +966,12 @@ def chunked_insert_many( partial_failures_allowed: bool = False, chunk_size: int = MAX_INSERT_NUM_DOCUMENTS, concurrency: int = 1, + timeout_info: TimeoutInfoWideType = None, ) -> List[Union[API_RESPONSE, Exception]]: """ Insert multiple documents into the collection, handling chunking and optionally with concurrent insertions. + Args: documents (list): A list of documents to insert. options (dict, optional): Additional options for the insert operation. @@ -837,6 +981,12 @@ def chunked_insert_many( chunk_size (int, optional): Override the default insertion chunk size. concurrency (int, optional): The number of concurrent chunk insertions. Default is no concurrency. + timeout_info: a float, or a TimeoutInfo dict, for each single HTTP request. + This method runs a number of HTTP requests as it works on chunked + data. The timeout refers to each individual such request. + Note that a 'read' timeout event will not block the action taken + by the API server if it has received the request already. + Returns: list: The responses from the database after the chunked insert operation. This is a list of individual responses from the API: the caller @@ -860,6 +1010,7 @@ def chunked_insert_many( documents[i : i + chunk_size], options, partial_failures_allowed, + timeout_info=timeout_info, ) ) except APIRequestError as e: @@ -878,6 +1029,7 @@ def chunked_insert_many( documents[i : i + chunk_size], options, partial_failures_allowed, + timeout_info=timeout_info, ) for i in range(0, len(documents), chunk_size) ] @@ -895,13 +1047,21 @@ def chunked_insert_many( return results def update_one( - self, filter: Dict[str, Any], update: Dict[str, Any] + self, + filter: Dict[str, Any], + update: Dict[str, Any], + timeout_info: TimeoutInfoWideType = None, ) -> API_RESPONSE: """ Update a single document in the collection. + Args: filter (dict): Criteria to identify the document to update. update (dict): The update to apply to the document. + timeout_info: a float, or a TimeoutInfo dict, for the HTTP request. + Note that a 'read' timeout event will not block the action taken + by the API server if it has received the request already. + Returns: dict: The response from the database after the update operation. """ @@ -911,6 +1071,7 @@ def update_one( method=http_methods.POST, path=f"{self.base_path}", json_data=json_query, + timeout_info=timeout_info, ) return response @@ -920,12 +1081,18 @@ def update_many( filter: Dict[str, Any], update: Dict[str, Any], options: Optional[Dict[str, Any]] = None, + timeout_info: TimeoutInfoWideType = None, ) -> API_RESPONSE: """ Updates multiple documents in the collection. + Args: filter (dict): Criteria to identify the document to update. update (dict): The update to apply to the document. + timeout_info: a float, or a TimeoutInfo dict, for the HTTP request. + Note that a 'read' timeout event will not block the action taken + by the API server if it has received the request already. + Returns: dict: The response from the database after the update operation. """ @@ -940,20 +1107,28 @@ def update_many( method=http_methods.POST, path=f"{self.base_path}", json_data=json_query, + timeout_info=timeout_info, ) return response - def replace(self, path: str, document: API_DOC) -> API_RESPONSE: + def replace( + self, path: str, document: API_DOC, timeout_info: TimeoutInfoWideType = None + ) -> API_RESPONSE: """ Replace a document in the collection. + Args: path (str): The path to the document to replace. document (dict): The new document to replace the existing one. + timeout_info: a float, or a TimeoutInfo dict, for the HTTP request. + Note that a 'read' timeout event will not block the action taken + by the API server if it has received the request already. + Returns: dict: The response from the database after the replace operation. """ - return self._put(path=path, document=document) + return self._put(path=path, document=document, timeout_info=timeout_info) @deprecation.deprecated( # type: ignore deprecated_in="0.7.0", @@ -961,14 +1136,21 @@ def replace(self, path: str, document: API_DOC) -> API_RESPONSE: current_version=__version__, details="Use the 'delete_one' method instead", ) - def delete(self, id: str) -> API_RESPONSE: - return self.delete_one(id) + def delete(self, id: str, timeout_info: TimeoutInfoWideType = None) -> API_RESPONSE: + return self.delete_one(id, timeout_info=timeout_info) - def delete_one(self, id: str) -> API_RESPONSE: + def delete_one( + self, id: str, timeout_info: TimeoutInfoWideType = None + ) -> API_RESPONSE: """ Delete a single document from the collection based on its ID. + Args: id (str): The ID of the document to delete. + timeout_info: a float, or a TimeoutInfo dict, for the HTTP request. + Note that a 'read' timeout event will not block the action taken + by the API server if it has received the request already. + Returns: dict: The response from the database after the delete operation. """ @@ -979,16 +1161,26 @@ def delete_one(self, id: str) -> API_RESPONSE: } response = self._request( - method=http_methods.POST, path=f"{self.base_path}", json_data=json_query + method=http_methods.POST, + path=f"{self.base_path}", + json_data=json_query, + timeout_info=timeout_info, ) return response - def delete_one_by_predicate(self, filter: Dict[str, Any]) -> API_RESPONSE: + def delete_one_by_predicate( + self, filter: Dict[str, Any], timeout_info: TimeoutInfoWideType = None + ) -> API_RESPONSE: """ Delete a single document from the collection based on a filter clause + Args: filter: any filter dictionary + timeout_info: a float, or a TimeoutInfo dict, for the HTTP request. + Note that a 'read' timeout event will not block the action taken + by the API server if it has received the request already. + Returns: dict: The response from the database after the delete operation. """ @@ -999,7 +1191,10 @@ def delete_one_by_predicate(self, filter: Dict[str, Any]) -> API_RESPONSE: } response = self._request( - method=http_methods.POST, path=f"{self.base_path}", json_data=json_query + method=http_methods.POST, + path=f"{self.base_path}", + json_data=json_query, + timeout_info=timeout_info, ) return response @@ -1008,13 +1203,19 @@ def delete_many( self, filter: Dict[str, Any], skip_error_check: bool = False, + timeout_info: TimeoutInfoWideType = None, ) -> API_RESPONSE: """ Delete many documents from the collection based on a filter condition + Args: filter (dict): Criteria to identify the documents to delete. skip_error_check (bool): whether to ignore the check for API error and return the response untouched. Default is False. + timeout_info: a float, or a TimeoutInfo dict, for the HTTP request. + Note that a 'read' timeout event will not block the action taken + by the API server if it has received the request already. + Returns: dict: The response from the database after the delete operation. """ @@ -1029,34 +1230,50 @@ def delete_many( path=f"{self.base_path}", json_data=json_query, skip_error_check=skip_error_check, + timeout_info=timeout_info, ) return response - def chunked_delete_many(self, filter: Dict[str, Any]) -> List[API_RESPONSE]: + def chunked_delete_many( + self, filter: Dict[str, Any], timeout_info: TimeoutInfoWideType = None + ) -> List[API_RESPONSE]: """ Delete many documents from the collection based on a filter condition, chaining several API calls until exhaustion of the documents to delete. + Args: filter (dict): Criteria to identify the documents to delete. + timeout_info: a float, or a TimeoutInfo dict, for each single HTTP request. + This method runs a number of HTTP requests as it works on a + pagination basis. The timeout refers to each individual such request. + Note that a 'read' timeout event will not block the action taken + by the API server if it has received the request already. + Returns: List[dict]: The responses from the database from all the calls """ responses = [] must_proceed = True while must_proceed: - dm_response = self.delete_many(filter=filter) + dm_response = self.delete_many(filter=filter, timeout_info=timeout_info) responses.append(dm_response) must_proceed = dm_response.get("status", {}).get("moreData", False) return responses - def clear(self) -> API_RESPONSE: + def clear(self, timeout_info: TimeoutInfoWideType = None) -> API_RESPONSE: """ Clear the collection, deleting all documents + + Args: + timeout_info: a float, or a TimeoutInfo dict, for the HTTP request. + Note that a 'read' timeout event will not block the action taken + by the API server if it has received the request already. + Returns: dict: The response from the database. """ - clear_response = self.delete_many(filter={}) + clear_response = self.delete_many(filter={}, timeout_info=timeout_info) if clear_response.get("status", {}).get("deletedCount") != -1: raise ValueError( @@ -1065,12 +1282,19 @@ def clear(self) -> API_RESPONSE: return clear_response - def delete_subdocument(self, id: str, subdoc: str) -> API_RESPONSE: + def delete_subdocument( + self, id: str, subdoc: str, timeout_info: TimeoutInfoWideType = None + ) -> API_RESPONSE: """ Delete a subdocument or field from a document in the collection. + Args: id (str): The ID of the document containing the subdocument. subdoc (str): The key of the subdocument or field to remove. + timeout_info: a float, or a TimeoutInfo dict, for the HTTP request. + Note that a 'read' timeout event will not block the action taken + by the API server if it has received the request already. + Returns: dict: The response from the database after the update operation. """ @@ -1082,7 +1306,10 @@ def delete_subdocument(self, id: str, subdoc: str) -> API_RESPONSE: } response = self._request( - method=http_methods.POST, path=f"{self.base_path}", json_data=json_query + method=http_methods.POST, + path=f"{self.base_path}", + json_data=json_query, + timeout_info=timeout_info, ) return response @@ -1093,10 +1320,14 @@ def delete_subdocument(self, id: str, subdoc: str) -> API_RESPONSE: current_version=__version__, details="Use the 'upsert_one' method instead", ) - def upsert(self, document: API_DOC) -> str: - return self.upsert_one(document) + def upsert( + self, document: API_DOC, timeout_info: TimeoutInfoWideType = None + ) -> str: + return self.upsert_one(document, timeout_info=timeout_info) - def upsert_one(self, document: API_DOC) -> str: + def upsert_one( + self, document: API_DOC, timeout_info: TimeoutInfoWideType = None + ) -> str: """ Emulate an upsert operation for a single document in the collection. @@ -1105,12 +1336,19 @@ def upsert_one(self, document: API_DOC) -> str: Args: document (dict): The document to insert or update. + timeout_info: a float, or a TimeoutInfo dict, for the HTTP requests. + This method may issue one or two requests, depending on what + is detected on DB. This timeout controls each HTTP request individually. + Note that a 'read' timeout event will not block the action taken + by the API server if it has received the request already. Returns: str: The _id of the inserted or updated document. """ # Build the payload for the insert attempt - result = self.insert_one(document, failures_allowed=True) + result = self.insert_one( + document, failures_allowed=True, timeout_info=timeout_info + ) # If the call failed because of preexisting doc, then we replace it if "errors" in result: @@ -1122,6 +1360,7 @@ def upsert_one(self, document: API_DOC) -> str: result = self.find_one_and_replace( replacement=document, filter={"_id": document["_id"]}, + timeout_info=timeout_info, ) upserted_id = cast(str, result["data"]["document"]["_id"]) else: @@ -1139,6 +1378,7 @@ def upsert_many( documents: list[API_DOC], concurrency: int = 1, partial_failures_allowed: bool = False, + timeout_info: TimeoutInfoWideType = None, ) -> List[Union[str, Exception]]: """ Emulate an upsert operation for multiple documents in the collection. @@ -1151,6 +1391,11 @@ def upsert_many( concurrency (int, optional): The number of concurrent upserts. partial_failures_allowed (bool, optional): Whether to allow partial failures in the batch. + timeout_info: a float, or a TimeoutInfo dict, for each HTTP request. + This method issues a separate HTTP request for each document to + insert: the timeout controls each such request individually. + Note that a 'read' timeout event will not block the action taken + by the API server if it has received the request already. Returns: List[Union[str, Exception]]: A list of "_id"s of the inserted or updated documents. @@ -1161,7 +1406,7 @@ def upsert_many( if concurrency == 1: for document in documents: try: - results.append(self.upsert_one(document)) + results.append(self.upsert_one(document, timeout_info=timeout_info)) except Exception as e: results.append(e) return results @@ -1169,7 +1414,10 @@ def upsert_many( # Perform the bulk upsert with concurrency with ThreadPoolExecutor(max_workers=concurrency) as executor: # Submit the jobs - futures = [executor.submit(self.upsert, document) for document in documents] + futures = [ + executor.submit(self.upsert, document, timeout_info=timeout_info) + for document in documents + ] # Collect the results for future in futures: @@ -1197,6 +1445,7 @@ def __init__( ) -> None: """ Initialize an AstraDBCollection instance. + Args: collection_name (str): The name of the collection. astra_db (AstraDB, optional): An instance of Astra DB. @@ -1308,6 +1557,7 @@ async def _request( json_data: Optional[Dict[str, Any]] = None, url_params: Optional[Dict[str, Any]] = None, skip_error_check: bool = False, + timeout_info: TimeoutInfoWideType = None, **kwargs: Any, ) -> API_RESPONSE: adirect_response = await async_api_request( @@ -1322,43 +1572,65 @@ async def _request( skip_error_check=skip_error_check, caller_name=self.caller_name, caller_version=self.caller_version, + timeout=to_httpx_timeout(timeout_info), ) response = restore_from_api(adirect_response) return response - async def post_raw_request(self, body: Dict[str, Any]) -> API_RESPONSE: + async def post_raw_request( + self, body: Dict[str, Any], timeout_info: TimeoutInfoWideType = None + ) -> API_RESPONSE: return await self._request( method=http_methods.POST, path=self.base_path, json_data=body, + timeout_info=timeout_info, ) async def _get( - self, path: Optional[str] = None, options: Optional[Dict[str, Any]] = None + self, + path: Optional[str] = None, + options: Optional[Dict[str, Any]] = None, + timeout_info: TimeoutInfoWideType = None, ) -> Optional[API_RESPONSE]: full_path = f"{self.base_path}/{path}" if path else self.base_path response = await self._request( - method=http_methods.GET, path=full_path, url_params=options + method=http_methods.GET, + path=full_path, + url_params=options, + timeout_info=timeout_info, ) if isinstance(response, dict): return response return None async def _put( - self, path: Optional[str] = None, document: Optional[API_RESPONSE] = None + self, + path: Optional[str] = None, + document: Optional[API_RESPONSE] = None, + timeout_info: TimeoutInfoWideType = None, ) -> API_RESPONSE: full_path = f"{self.base_path}/{path}" if path else self.base_path response = await self._request( - method=http_methods.PUT, path=full_path, json_data=document + method=http_methods.PUT, + path=full_path, + json_data=document, + timeout_info=timeout_info, ) return response async def _post( - self, path: Optional[str] = None, document: Optional[API_DOC] = None + self, + path: Optional[str] = None, + document: Optional[API_DOC] = None, + timeout_info: TimeoutInfoWideType = None, ) -> API_RESPONSE: full_path = f"{self.base_path}/{path}" if path else self.base_path response = await self._request( - method=http_methods.POST, path=full_path, json_data=document + method=http_methods.POST, + path=full_path, + json_data=document, + timeout_info=timeout_info, ) return response @@ -1391,15 +1663,22 @@ def _recast_as_sort_projection( return sort, projection - async def get(self, path: Optional[str] = None) -> Optional[API_RESPONSE]: + async def get( + self, path: Optional[str] = None, timeout_info: TimeoutInfoWideType = None + ) -> Optional[API_RESPONSE]: """ Retrieve a document from the collection by its path. + Args: path (str, optional): The path of the document to retrieve. + timeout_info: a float, or a TimeoutInfo dict, for the HTTP request. + Note that a 'read' timeout event will not block the action taken + by the API server if it has received the request already. + Returns: dict: The retrieved document. """ - return await self._get(path=path) + return await self._get(path=path, timeout_info=timeout_info) async def find( self, @@ -1407,14 +1686,20 @@ async def find( projection: Optional[Dict[str, Any]] = None, sort: Optional[Dict[str, Any]] = None, options: Optional[Dict[str, Any]] = None, + timeout_info: TimeoutInfoWideType = None, ) -> API_RESPONSE: """ Find documents in the collection that match the given filter. + Args: filter (dict, optional): Criteria to filter documents. projection (dict, optional): Specifies the fields to return. sort (dict, optional): Specifies the order in which to return matching documents. options (dict, optional): Additional options for the query. + timeout_info: a float, or a TimeoutInfo dict, for the HTTP request. + Note that a 'read' timeout event will not block the action taken + by the API server if it has received the request already. + Returns: dict: The query response containing matched documents. """ @@ -1426,9 +1711,7 @@ async def find( sort=sort, ) - response = await self._post( - document=json_query, - ) + response = await self._post(document=json_query, timeout_info=timeout_info) return response @@ -1440,15 +1723,21 @@ async def vector_find( filter: Optional[Dict[str, Any]] = None, fields: Optional[List[str]] = None, include_similarity: bool = True, + timeout_info: TimeoutInfoWideType = None, ) -> List[API_DOC]: """ Perform a vector-based search in the collection. + Args: vector (list): The vector to search with. limit (int): The maximum number of documents to return. filter (dict, optional): Criteria to filter documents. fields (list, optional): Specifies the fields to return. include_similarity (bool, optional): Whether to include similarity score in the result. + timeout_info: a float, or a TimeoutInfo dict, for the HTTP request. + Note that a 'read' timeout event will not block the action taken + by the API server if it has received the request already. + Returns: list: A list of documents matching the vector search criteria. """ @@ -1471,6 +1760,7 @@ async def vector_find( "limit": limit, "includeSimilarity": include_similarity, }, + timeout_info=timeout_info, ) return cast(List[API_DOC], raw_find_result["data"]["documents"]) @@ -1481,13 +1771,19 @@ async def paginate( request_method: AsyncPaginableRequestMethod, options: Optional[Dict[str, Any]], prefetched: Optional[int] = None, + timeout_info: TimeoutInfoWideType = None, ) -> AsyncGenerator[API_DOC, None]: """ Generate paginated results for a given database query method. + Args: request_method (function): The database query method to paginate. options (dict, optional): Options for the database query. prefetched (int, optional): Number of pre-fetched documents. + timeout_info: a float, or a TimeoutInfo dict, for the HTTP request. + Note that a 'read' timeout event will not block the action taken + by the API server if it has received the request already. + Yields: dict: The next document in the paginated result set. """ @@ -1536,15 +1832,24 @@ def paginated_find( sort: Optional[Dict[str, Any]] = None, options: Optional[Dict[str, Any]] = None, prefetched: Optional[int] = None, + timeout_info: TimeoutInfoWideType = None, ) -> AsyncIterator[API_DOC]: """ Perform a paginated search in the collection. + Args: filter (dict, optional): Criteria to filter documents. projection (dict, optional): Specifies the fields to return. sort (dict, optional): Specifies the order in which to return matching documents. options (dict, optional): Additional options for the query. prefetched (int, optional): Number of pre-fetched documents + timeout_info: a float, or a TimeoutInfo dict, for each + single HTTP request. + This is a paginated method, that issues several requests as it + needs more data. This parameter controls a single request. + Note that a 'read' timeout event will not block the action taken + by the API server if it has received the request already. + Returns: generator: A generator yielding documents in the paginated result set. """ @@ -1553,6 +1858,7 @@ def paginated_find( filter=filter, projection=projection, sort=sort, + timeout_info=timeout_info, ) return self.paginate( request_method=partialed_find, @@ -1561,14 +1867,23 @@ def paginated_find( ) async def pop( - self, filter: Dict[str, Any], pop: Dict[str, Any], options: Dict[str, Any] + self, + filter: Dict[str, Any], + pop: Dict[str, Any], + options: Dict[str, Any], + timeout_info: TimeoutInfoWideType = None, ) -> API_RESPONSE: """ Pop the last data in the tags array + Args: filter (dict): Criteria to identify the document to update. pop (dict): The pop to apply to the tags. options (dict): Additional options for the update operation. + timeout_info: a float, or a TimeoutInfo dict, for the HTTP request. + Note that a 'read' timeout event will not block the action taken + by the API server if it has received the request already. + Returns: dict: The original document before the update. """ @@ -1583,19 +1898,29 @@ async def pop( method=http_methods.POST, path=self.base_path, json_data=json_query, + timeout_info=timeout_info, ) return response async def push( - self, filter: Dict[str, Any], push: Dict[str, Any], options: Dict[str, Any] + self, + filter: Dict[str, Any], + push: Dict[str, Any], + options: Dict[str, Any], + timeout_info: TimeoutInfoWideType = None, ) -> API_RESPONSE: """ Push new data to the tags array + Args: filter (dict): Criteria to identify the document to update. push (dict): The push to apply to the tags. options (dict): Additional options for the update operation. + timeout_info: a float, or a TimeoutInfo dict, for the HTTP request. + Note that a 'read' timeout event will not block the action taken + by the API server if it has received the request already. + Returns: dict: The result of the update operation. """ @@ -1610,6 +1935,7 @@ async def push( method=http_methods.POST, path=self.base_path, json_data=json_query, + timeout_info=timeout_info, ) return response @@ -1622,14 +1948,20 @@ async def find_one_and_replace( projection: Optional[Dict[str, Any]] = None, sort: Optional[Dict[str, Any]] = None, options: Optional[Dict[str, Any]] = None, + timeout_info: TimeoutInfoWideType = None, ) -> API_RESPONSE: """ Find a single document and replace it. + Args: replacement (dict): The new document to replace the existing one. filter (dict, optional): Criteria to filter documents. sort (dict, optional): Specifies the order in which to find the document. options (dict, optional): Additional options for the operation. + timeout_info: a float, or a TimeoutInfo dict, for the HTTP request. + Note that a 'read' timeout event will not block the action taken + by the API server if it has received the request already. + Returns: dict: The result of the find and replace operation. """ @@ -1643,7 +1975,10 @@ async def find_one_and_replace( ) response = await self._request( - method=http_methods.POST, path=f"{self.base_path}", json_data=json_query + method=http_methods.POST, + path=f"{self.base_path}", + json_data=json_query, + timeout_info=timeout_info, ) return response @@ -1655,14 +1990,20 @@ async def vector_find_one_and_replace( *, filter: Optional[Dict[str, Any]] = None, fields: Optional[List[str]] = None, + timeout_info: TimeoutInfoWideType = None, ) -> Union[API_DOC, None]: """ Perform a vector-based search and replace the first matched document. + Args: vector (dict): The vector to search with. replacement (dict): The new document to replace the existing one. filter (dict, optional): Criteria to filter documents. fields (list, optional): Specifies the fields to return in the result. + timeout_info: a float, or a TimeoutInfo dict, for the HTTP request. + Note that a 'read' timeout event will not block the action taken + by the API server if it has received the request already. + Returns: dict or None: either the matched document or None if nothing found """ @@ -1678,6 +2019,7 @@ async def vector_find_one_and_replace( filter=filter, projection=projection, sort=sort, + timeout_info=timeout_info, ) return cast(Union[API_DOC, None], raw_find_result["data"]["document"]) @@ -1689,14 +2031,20 @@ async def find_one_and_update( filter: Optional[Dict[str, Any]] = None, options: Optional[Dict[str, Any]] = None, projection: Optional[Dict[str, Any]] = None, + timeout_info: TimeoutInfoWideType = None, ) -> API_RESPONSE: """ Find a single document and update it. + Args: sort (dict, optional): Specifies the order in which to find the document. update (dict): The update to apply to the document. filter (dict, optional): Criteria to filter documents. options (dict, optional): Additional options for the operation. + timeout_info: a float, or a TimeoutInfo dict, for the HTTP request. + Note that a 'read' timeout event will not block the action taken + by the API server if it has received the request already. + Returns: dict: The result of the find and update operation. """ @@ -1713,6 +2061,7 @@ async def find_one_and_update( method=http_methods.POST, path=f"{self.base_path}", json_data=json_query, + timeout_info=timeout_info, ) return response @@ -1724,14 +2073,20 @@ async def vector_find_one_and_update( *, filter: Optional[Dict[str, Any]] = None, fields: Optional[List[str]] = None, + timeout_info: TimeoutInfoWideType = None, ) -> Union[API_DOC, None]: """ Perform a vector-based search and update the first matched document. + Args: vector (list): The vector to search with. update (dict): The update to apply to the matched document. filter (dict, optional): Criteria to filter documents before applying the vector search. fields (list, optional): Specifies the fields to return in the updated document. + timeout_info: a float, or a TimeoutInfo dict, for the HTTP request. + Note that a 'read' timeout event will not block the action taken + by the API server if it has received the request already. + Returns: dict or None: The result of the vector-based find and update operation, or None if nothing found @@ -1748,6 +2103,7 @@ async def vector_find_one_and_update( filter=filter, sort=sort, projection=projection, + timeout_info=timeout_info, ) return cast(Union[API_DOC, None], raw_find_result["data"]["document"]) @@ -1757,13 +2113,19 @@ async def find_one_and_delete( sort: Optional[Dict[str, Any]] = {}, filter: Optional[Dict[str, Any]] = None, projection: Optional[Dict[str, Any]] = None, + timeout_info: TimeoutInfoWideType = None, ) -> API_RESPONSE: """ Find a single document and delete it. + Args: sort (dict, optional): Specifies the order in which to find the document. filter (dict, optional): Criteria to filter documents. projection (dict, optional): Specifies the fields to return. + timeout_info: a float, or a TimeoutInfo dict, for the HTTP request. + Note that a 'read' timeout event will not block the action taken + by the API server if it has received the request already. + Returns: dict: The result of the find and delete operation. """ @@ -1778,18 +2140,23 @@ async def find_one_and_delete( method=http_methods.POST, path=f"{self.base_path}", json_data=json_query, + timeout_info=timeout_info, ) return response async def count_documents( - self, - filter: Dict[str, Any] = {}, + self, filter: Dict[str, Any] = {}, timeout_info: TimeoutInfoWideType = None ) -> API_RESPONSE: """ Count documents matching a given predicate (expressed as filter). + Args: filter (dict, defaults to {}): Criteria to filter documents. + timeout_info: a float, or a TimeoutInfo dict, for the HTTP request. + Note that a 'read' timeout event will not block the action taken + by the API server if it has received the request already. + Returns: dict: the response, either {"status": {"count": }} @@ -1801,9 +2168,7 @@ async def count_documents( filter=filter, ) - response = await self._post( - document=json_query, - ) + response = await self._post(document=json_query, timeout_info=timeout_info) return response @@ -1813,14 +2178,20 @@ async def find_one( projection: Optional[Dict[str, Any]] = {}, sort: Optional[Dict[str, Any]] = {}, options: Optional[Dict[str, Any]] = {}, + timeout_info: TimeoutInfoWideType = None, ) -> API_RESPONSE: """ Find a single document in the collection. + Args: filter (dict, optional): Criteria to filter documents. projection (dict, optional): Specifies the fields to return. sort (dict, optional): Specifies the order in which to return the document. options (dict, optional): Additional options for the query. + timeout_info: a float, or a TimeoutInfo dict, for the HTTP request. + Note that a 'read' timeout event will not block the action taken + by the API server if it has received the request already. + Returns: dict: the response, either {"data": {"document": }} @@ -1836,9 +2207,7 @@ async def find_one( sort=sort, ) - response = await self._post( - document=json_query, - ) + response = await self._post(document=json_query, timeout_info=timeout_info) return response @@ -1849,14 +2218,20 @@ async def vector_find_one( filter: Optional[Dict[str, Any]] = None, fields: Optional[List[str]] = None, include_similarity: bool = True, + timeout_info: TimeoutInfoWideType = None, ) -> Union[API_DOC, None]: """ Perform a vector-based search to find a single document in the collection. + Args: vector (list): The vector to search with. filter (dict, optional): Additional criteria to filter documents. fields (list, optional): Specifies the fields to return in the result. include_similarity (bool, optional): Whether to include similarity score in the result. + timeout_info: a float, or a TimeoutInfo dict, for the HTTP request. + Note that a 'read' timeout event will not block the action taken + by the API server if it has received the request already. + Returns: dict or None: The found document or None if no matching document is found. """ @@ -1872,18 +2247,27 @@ async def vector_find_one( projection=projection, sort=sort, options={"includeSimilarity": include_similarity}, + timeout_info=timeout_info, ) return cast(Union[API_DOC, None], raw_find_result["data"]["document"]) async def insert_one( - self, document: API_DOC, failures_allowed: bool = False + self, + document: API_DOC, + failures_allowed: bool = False, + timeout_info: TimeoutInfoWideType = None, ) -> API_RESPONSE: """ Insert a single document into the collection. + Args: document (dict): The document to insert. failures_allowed (bool): Whether to allow failures in the insert operation. + timeout_info: a float, or a TimeoutInfo dict, for the HTTP request. + Note that a 'read' timeout event will not block the action taken + by the API server if it has received the request already. + Returns: dict: The response from the database after the insert operation. """ @@ -1894,6 +2278,7 @@ async def insert_one( path=self.base_path, json_data=json_query, skip_error_check=failures_allowed, + timeout_info=timeout_info, ) return response @@ -1903,14 +2288,20 @@ async def insert_many( documents: List[API_DOC], options: Optional[Dict[str, Any]] = None, partial_failures_allowed: bool = False, + timeout_info: TimeoutInfoWideType = None, ) -> API_RESPONSE: """ Insert multiple documents into the collection. + Args: documents (list): A list of documents to insert. options (dict, optional): Additional options for the insert operation. partial_failures_allowed (bool, optional): Whether to allow partial failures through the insertion (i.e. on some documents). + timeout_info: a float, or a TimeoutInfo dict, for the HTTP request. + Note that a 'read' timeout event will not block the action taken + by the API server if it has received the request already. + Returns: dict: The response from the database after the insert operation. """ @@ -1923,6 +2314,7 @@ async def insert_many( path=f"{self.base_path}", json_data=json_query, skip_error_check=partial_failures_allowed, + timeout_info=timeout_info, ) return response @@ -1934,10 +2326,12 @@ async def chunked_insert_many( partial_failures_allowed: bool = False, chunk_size: int = MAX_INSERT_NUM_DOCUMENTS, concurrency: int = 1, + timeout_info: TimeoutInfoWideType = None, ) -> List[Union[API_RESPONSE, Exception]]: """ Insert multiple documents into the collection, handling chunking and optionally with concurrent insertions. + Args: documents (list): A list of documents to insert. options (dict, optional): Additional options for the insert operation. @@ -1947,6 +2341,12 @@ async def chunked_insert_many( chunk_size (int, optional): Override the default insertion chunk size. concurrency (int, optional): The number of concurrent chunk insertions. Default is no concurrency. + timeout_info: a float, or a TimeoutInfo dict, for each single HTTP request. + This method runs a number of HTTP requests as it works on chunked + data. The timeout refers to each individual such request. + Note that a 'read' timeout event will not block the action taken + by the API server if it has received the request already. + Returns: list: The responses from the database after the chunked insert operation. This is a list of individual responses from the API: the caller @@ -1966,6 +2366,7 @@ async def concurrent_insert_many( documents=docs, options=options, partial_failures_allowed=partial_failures_allowed, + timeout_info=timeout_info, ) except APIRequestError as e: if partial_failures_allowed: @@ -1995,13 +2396,21 @@ async def concurrent_insert_many( return results async def update_one( - self, filter: Dict[str, Any], update: Dict[str, Any] + self, + filter: Dict[str, Any], + update: Dict[str, Any], + timeout_info: TimeoutInfoWideType = None, ) -> API_RESPONSE: """ Update a single document in the collection. + Args: filter (dict): Criteria to identify the document to update. update (dict): The update to apply to the document. + timeout_info: a float, or a TimeoutInfo dict, for the HTTP request. + Note that a 'read' timeout event will not block the action taken + by the API server if it has received the request already. + Returns: dict: The response from the database after the update operation. """ @@ -2011,6 +2420,7 @@ async def update_one( method=http_methods.POST, path=f"{self.base_path}", json_data=json_query, + timeout_info=timeout_info, ) return response @@ -2020,12 +2430,18 @@ async def update_many( filter: Dict[str, Any], update: Dict[str, Any], options: Optional[Dict[str, Any]] = None, + timeout_info: TimeoutInfoWideType = None, ) -> API_RESPONSE: """ Updates multiple documents in the collection. + Args: filter (dict): Criteria to identify the document to update. update (dict): The update to apply to the document. + timeout_info: a float, or a TimeoutInfo dict, for the HTTP request. + Note that a 'read' timeout event will not block the action taken + by the API server if it has received the request already. + Returns: dict: The response from the database after the update operation. """ @@ -2040,26 +2456,41 @@ async def update_many( method=http_methods.POST, path=f"{self.base_path}", json_data=json_query, + timeout_info=timeout_info, ) return response - async def replace(self, path: str, document: API_DOC) -> API_RESPONSE: + async def replace( + self, path: str, document: API_DOC, timeout_info: TimeoutInfoWideType = None + ) -> API_RESPONSE: """ Replace a document in the collection. + Args: path (str): The path to the document to replace. document (dict): The new document to replace the existing one. + timeout_info: a float, or a TimeoutInfo dict, for the HTTP request. + Note that a 'read' timeout event will not block the action taken + by the API server if it has received the request already. + Returns: dict: The response from the database after the replace operation. """ - return await self._put(path=path, document=document) + return await self._put(path=path, document=document, timeout_info=timeout_info) - async def delete_one(self, id: str) -> API_RESPONSE: + async def delete_one( + self, id: str, timeout_info: TimeoutInfoWideType = None + ) -> API_RESPONSE: """ Delete a single document from the collection based on its ID. + Args: id (str): The ID of the document to delete. + timeout_info: a float, or a TimeoutInfo dict, for the HTTP request. + Note that a 'read' timeout event will not block the action taken + by the API server if it has received the request already. + Returns: dict: The response from the database after the delete operation. """ @@ -2070,16 +2501,26 @@ async def delete_one(self, id: str) -> API_RESPONSE: } response = await self._request( - method=http_methods.POST, path=f"{self.base_path}", json_data=json_query + method=http_methods.POST, + path=f"{self.base_path}", + json_data=json_query, + timeout_info=timeout_info, ) return response - async def delete_one_by_predicate(self, filter: Dict[str, Any]) -> API_RESPONSE: + async def delete_one_by_predicate( + self, filter: Dict[str, Any], timeout_info: TimeoutInfoWideType = None + ) -> API_RESPONSE: """ Delete a single document from the collection based on a filter clause + Args: filter: any filter dictionary + timeout_info: a float, or a TimeoutInfo dict, for the HTTP request. + Note that a 'read' timeout event will not block the action taken + by the API server if it has received the request already. + Returns: dict: The response from the database after the delete operation. """ @@ -2090,7 +2531,10 @@ async def delete_one_by_predicate(self, filter: Dict[str, Any]) -> API_RESPONSE: } response = await self._request( - method=http_methods.POST, path=f"{self.base_path}", json_data=json_query + method=http_methods.POST, + path=f"{self.base_path}", + json_data=json_query, + timeout_info=timeout_info, ) return response @@ -2099,13 +2543,19 @@ async def delete_many( self, filter: Dict[str, Any], skip_error_check: bool = False, + timeout_info: TimeoutInfoWideType = None, ) -> API_RESPONSE: """ Delete many documents from the collection based on a filter condition + Args: filter (dict): Criteria to identify the documents to delete. skip_error_check (bool): whether to ignore the check for API error and return the response untouched. Default is False. + timeout_info: a float, or a TimeoutInfo dict, for the HTTP request. + Note that a 'read' timeout event will not block the action taken + by the API server if it has received the request already. + Returns: dict: The response from the database after the delete operation. """ @@ -2120,34 +2570,52 @@ async def delete_many( path=f"{self.base_path}", json_data=json_query, skip_error_check=skip_error_check, + timeout_info=timeout_info, ) return response - async def chunked_delete_many(self, filter: Dict[str, Any]) -> List[API_RESPONSE]: + async def chunked_delete_many( + self, filter: Dict[str, Any], timeout_info: TimeoutInfoWideType = None + ) -> List[API_RESPONSE]: """ Delete many documents from the collection based on a filter condition, chaining several API calls until exhaustion of the documents to delete. + Args: filter (dict): Criteria to identify the documents to delete. + timeout_info: a float, or a TimeoutInfo dict, for each single HTTP request. + This method runs a number of HTTP requests as it works on a + pagination basis. The timeout refers to each individual such request. + Note that a 'read' timeout event will not block the action taken + by the API server if it has received the request already. + Returns: List[dict]: The responses from the database from all the calls """ responses = [] must_proceed = True while must_proceed: - dm_response = await self.delete_many(filter=filter) + dm_response = await self.delete_many( + filter=filter, timeout_info=timeout_info + ) responses.append(dm_response) must_proceed = dm_response.get("status", {}).get("moreData", False) return responses - async def clear(self) -> API_RESPONSE: + async def clear(self, timeout_info: TimeoutInfoWideType = None) -> API_RESPONSE: """ Clear the collection, deleting all documents + + Args: + timeout_info: a float, or a TimeoutInfo dict, for the HTTP request. + Note that a 'read' timeout event will not block the action taken + by the API server if it has received the request already. + Returns: dict: The response from the database. """ - clear_response = await self.delete_many(filter={}) + clear_response = await self.delete_many(filter={}, timeout_info=timeout_info) if clear_response.get("status", {}).get("deletedCount") != -1: raise ValueError( @@ -2156,12 +2624,19 @@ async def clear(self) -> API_RESPONSE: return clear_response - async def delete_subdocument(self, id: str, subdoc: str) -> API_RESPONSE: + async def delete_subdocument( + self, id: str, subdoc: str, timeout_info: TimeoutInfoWideType = None + ) -> API_RESPONSE: """ Delete a subdocument or field from a document in the collection. + Args: id (str): The ID of the document containing the subdocument. subdoc (str): The key of the subdocument or field to remove. + timeout_info: a float, or a TimeoutInfo dict, for the HTTP request. + Note that a 'read' timeout event will not block the action taken + by the API server if it has received the request already. + Returns: dict: The response from the database after the update operation. """ @@ -2173,7 +2648,10 @@ async def delete_subdocument(self, id: str, subdoc: str) -> API_RESPONSE: } response = await self._request( - method=http_methods.POST, path=f"{self.base_path}", json_data=json_query + method=http_methods.POST, + path=f"{self.base_path}", + json_data=json_query, + timeout_info=timeout_info, ) return response @@ -2184,10 +2662,16 @@ async def delete_subdocument(self, id: str, subdoc: str) -> API_RESPONSE: current_version=__version__, details="Use the 'upsert_one' method instead", ) - async def upsert(self, document: API_DOC) -> str: - return await self.upsert_one(document) + async def upsert( + self, document: API_DOC, timeout_info: TimeoutInfoWideType = None + ) -> str: + return await self.upsert_one(document, timeout_info=timeout_info) - async def upsert_one(self, document: API_DOC) -> str: + async def upsert_one( + self, + document: API_DOC, + timeout_info: TimeoutInfoWideType = None, + ) -> str: """ Emulate an upsert operation for a single document in the collection. @@ -2196,12 +2680,19 @@ async def upsert_one(self, document: API_DOC) -> str: Args: document (dict): The document to insert or update. + timeout_info: a float, or a TimeoutInfo dict, for the HTTP requests. + This method may issue one or two requests, depending on what + is detected on DB. This timeout controls each HTTP request individually. + Note that a 'read' timeout event will not block the action taken + by the API server if it has received the request already. Returns: str: The _id of the inserted or updated document. """ # Build the payload for the insert attempt - result = await self.insert_one(document, failures_allowed=True) + result = await self.insert_one( + document, failures_allowed=True, timeout_info=timeout_info + ) # If the call failed because of preexisting doc, then we replace it if "errors" in result: @@ -2213,6 +2704,7 @@ async def upsert_one(self, document: API_DOC) -> str: result = await self.find_one_and_replace( replacement=document, filter={"_id": document["_id"]}, + timeout_info=timeout_info, ) upserted_id = cast(str, result["data"]["document"]["_id"]) else: @@ -2230,16 +2722,23 @@ async def upsert_many( documents: list[API_DOC], concurrency: int = 1, partial_failures_allowed: bool = False, + timeout_info: TimeoutInfoWideType = None, ) -> List[Union[str, Exception]]: """ Emulate an upsert operation for multiple documents in the collection. This method attempts to insert the documents. If a document with the same _id exists, it updates the existing document. + Args: documents (List[dict]): The documents to insert or update. concurrency (int, optional): The number of concurrent upserts. partial_failures_allowed (bool, optional): Whether to allow partial failures in the batch. + timeout_info: a float, or a TimeoutInfo dict, for each HTTP request. + This method issues a separate HTTP request for each document to + insert: the timeout controls each such request individually. + Note that a 'read' timeout event will not block the action taken + by the API server if it has received the request already. Returns: List[Union[str, Exception]]: A list of "_id"s of the inserted or updated documents. @@ -2248,7 +2747,7 @@ async def upsert_many( async def concurrent_upsert(doc: API_DOC) -> str: async with sem: - return await self.upsert_one(document=doc) + return await self.upsert_one(document=doc, timeout_info=timeout_info) tasks = [asyncio.create_task(concurrent_upsert(doc)) for doc in documents] results = await asyncio.gather( @@ -2276,6 +2775,7 @@ def __init__( ) -> None: """ Initialize an Astra DB instance. + Args: token (str): Authentication token for Astra DB. api_endpoint (str): API endpoint URL. @@ -2380,6 +2880,7 @@ def _request( json_data: Optional[Dict[str, Any]] = None, url_params: Optional[Dict[str, Any]] = None, skip_error_check: bool = False, + timeout_info: TimeoutInfoWideType = None, ) -> API_RESPONSE: direct_response = api_request( client=self.client, @@ -2393,32 +2894,47 @@ def _request( skip_error_check=skip_error_check, caller_name=self.caller_name, caller_version=self.caller_version, + timeout=to_httpx_timeout(timeout_info), ) response = restore_from_api(direct_response) return response - def post_raw_request(self, body: Dict[str, Any]) -> API_RESPONSE: + def post_raw_request( + self, body: Dict[str, Any], timeout_info: TimeoutInfoWideType = None + ) -> API_RESPONSE: return self._request( method=http_methods.POST, path=self.base_path, json_data=body, + timeout_info=timeout_info, ) def collection(self, collection_name: str) -> AstraDBCollection: """ Retrieve a collection from the database. + Args: collection_name (str): The name of the collection to retrieve. + Returns: AstraDBCollection: The collection object. """ return AstraDBCollection(collection_name=collection_name, astra_db=self) - def get_collections(self, options: Optional[Dict[str, Any]] = None) -> API_RESPONSE: + def get_collections( + self, + options: Optional[Dict[str, Any]] = None, + timeout_info: TimeoutInfoWideType = None, + ) -> API_RESPONSE: """ Retrieve a list of collections from the database. + Args: options (dict, optional): Options to get the collection list + timeout_info: a float, or a TimeoutInfo dict, for the HTTP request. + Note that a 'read' timeout event will not block the action taken + by the API server if it has received the request already. + Returns: dict: An object containing the list of collections in the database: {"status": {"collections": [...]}} @@ -2436,6 +2952,7 @@ def get_collections(self, options: Optional[Dict[str, Any]] = None) -> API_RESPO method=http_methods.POST, path=self.base_path, json_data=json_query, + timeout_info=timeout_info, ) return response @@ -2447,14 +2964,20 @@ def create_collection( options: Optional[Dict[str, Any]] = None, dimension: Optional[int] = None, metric: Optional[str] = None, + timeout_info: TimeoutInfoWideType = None, ) -> AstraDBCollection: """ Create a new collection in the database. + Args: collection_name (str): The name of the collection to create. options (dict, optional): Options for the collection. dimension (int, optional): Dimension for vector search. metric (str, optional): Metric choice for vector search. + timeout_info: a float, or a TimeoutInfo dict, for the HTTP request. + Note that a 'read' timeout event will not block the action taken + by the API server if it has received the request already. + Returns: AstraDBCollection: The created collection object. """ @@ -2502,16 +3025,24 @@ def create_collection( method=http_methods.POST, path=f"{self.base_path}", json_data={"createCollection": jsondata}, + timeout_info=timeout_info, ) # Get the instance object as the return of the call return AstraDBCollection(astra_db=self, collection_name=collection_name) - def delete_collection(self, collection_name: str) -> API_RESPONSE: + def delete_collection( + self, collection_name: str, timeout_info: TimeoutInfoWideType = None + ) -> API_RESPONSE: """ Delete a collection from the database. + Args: collection_name (str): The name of the collection to delete. + timeout_info: a float, or a TimeoutInfo dict, for the HTTP request. + Note that a 'read' timeout event will not block the action taken + by the API server if it has received the request already. + Returns: dict: The response from the database. """ @@ -2523,6 +3054,7 @@ def delete_collection(self, collection_name: str) -> API_RESPONSE: method=http_methods.POST, path=f"{self.base_path}", json_data={"deleteCollection": {"name": collection_name}}, + timeout_info=timeout_info, ) return response @@ -2533,11 +3065,18 @@ def delete_collection(self, collection_name: str) -> API_RESPONSE: current_version=__version__, details="Use the 'AstraDBCollection.clear()' method instead", ) - def truncate_collection(self, collection_name: str) -> AstraDBCollection: + def truncate_collection( + self, collection_name: str, timeout_info: TimeoutInfoWideType = None + ) -> AstraDBCollection: """ Clear a collection in the database, deleting all stored documents. + Args: collection_name (str): The name of the collection to clear. + timeout_info: a float, or a TimeoutInfo dict, for the HTTP request. + Note that a 'read' timeout event will not block the action taken + by the API server if it has received the request already. + Returns: collection: an AstraDBCollection instance """ @@ -2545,7 +3084,7 @@ def truncate_collection(self, collection_name: str) -> AstraDBCollection: collection_name=collection_name, astra_db=self, ) - clear_response = collection.clear() + clear_response = collection.clear(timeout_info=timeout_info) if clear_response.get("status", {}).get("deletedCount") != -1: raise ValueError( @@ -2569,6 +3108,7 @@ def __init__( ) -> None: """ Initialize an Astra DB instance. + Args: token (str): Authentication token for Astra DB. api_endpoint (str): API endpoint URL. @@ -2685,6 +3225,7 @@ async def _request( json_data: Optional[Dict[str, Any]] = None, url_params: Optional[Dict[str, Any]] = None, skip_error_check: bool = False, + timeout_info: TimeoutInfoWideType = None, ) -> API_RESPONSE: adirect_response = await async_api_request( client=self.client, @@ -2698,34 +3239,50 @@ async def _request( skip_error_check=skip_error_check, caller_name=self.caller_name, caller_version=self.caller_version, + timeout=to_httpx_timeout(timeout_info), ) response = restore_from_api(adirect_response) return response - async def post_raw_request(self, body: Dict[str, Any]) -> API_RESPONSE: + async def post_raw_request( + self, body: Dict[str, Any], timeout_info: TimeoutInfoWideType = None + ) -> API_RESPONSE: return await self._request( method=http_methods.POST, path=self.base_path, json_data=body, + timeout_info=timeout_info, ) async def collection(self, collection_name: str) -> AsyncAstraDBCollection: """ Retrieve a collection from the database. + Args: collection_name (str): The name of the collection to retrieve. + timeout_info: a float, or a TimeoutInfo dict, for the HTTP request. + Note that a 'read' timeout event will not block the action taken + by the API server if it has received the request already. + Returns: AstraDBCollection: The collection object. """ return AsyncAstraDBCollection(collection_name=collection_name, astra_db=self) async def get_collections( - self, options: Optional[Dict[str, Any]] = None + self, + options: Optional[Dict[str, Any]] = None, + timeout_info: TimeoutInfoWideType = None, ) -> API_RESPONSE: """ Retrieve a list of collections from the database. + Args: options (dict, optional): Options to get the collection list + timeout_info: a float, or a TimeoutInfo dict, for the HTTP request. + Note that a 'read' timeout event will not block the action taken + by the API server if it has received the request already. + Returns: dict: An object containing the list of collections in the database: {"status": {"collections": [...]}} @@ -2743,6 +3300,7 @@ async def get_collections( method=http_methods.POST, path=self.base_path, json_data=json_query, + timeout_info=timeout_info, ) return response @@ -2754,14 +3312,20 @@ async def create_collection( options: Optional[Dict[str, Any]] = None, dimension: Optional[int] = None, metric: Optional[str] = None, + timeout_info: TimeoutInfoWideType = None, ) -> AsyncAstraDBCollection: """ Create a new collection in the database. + Args: collection_name (str): The name of the collection to create. options (dict, optional): Options for the collection. dimension (int, optional): Dimension for vector search. metric (str, optional): Metric choice for vector search. + timeout_info: a float, or a TimeoutInfo dict, for the HTTP request. + Note that a 'read' timeout event will not block the action taken + by the API server if it has received the request already. + Returns: AsyncAstraDBCollection: The created collection object. """ @@ -2809,16 +3373,24 @@ async def create_collection( method=http_methods.POST, path=f"{self.base_path}", json_data={"createCollection": jsondata}, + timeout_info=timeout_info, ) # Get the instance object as the return of the call return AsyncAstraDBCollection(astra_db=self, collection_name=collection_name) - async def delete_collection(self, collection_name: str) -> API_RESPONSE: + async def delete_collection( + self, collection_name: str, timeout_info: TimeoutInfoWideType = None + ) -> API_RESPONSE: """ Delete a collection from the database. + Args: collection_name (str): The name of the collection to delete. + timeout_info: a float, or a TimeoutInfo dict, for the HTTP request. + Note that a 'read' timeout event will not block the action taken + by the API server if it has received the request already. + Returns: dict: The response from the database. """ @@ -2830,6 +3402,7 @@ async def delete_collection(self, collection_name: str) -> API_RESPONSE: method=http_methods.POST, path=f"{self.base_path}", json_data={"deleteCollection": {"name": collection_name}}, + timeout_info=timeout_info, ) return response @@ -2840,11 +3413,18 @@ async def delete_collection(self, collection_name: str) -> API_RESPONSE: current_version=__version__, details="Use the 'AsyncAstraDBCollection.clear()' method instead", ) - async def truncate_collection(self, collection_name: str) -> AsyncAstraDBCollection: + async def truncate_collection( + self, collection_name: str, timeout_info: TimeoutInfoWideType = None + ) -> AsyncAstraDBCollection: """ Clear a collection in the database, deleting all stored documents. + Args: collection_name (str): The name of the collection to clear. + timeout_info: a float, or a TimeoutInfo dict, for the HTTP request. + Note that a 'read' timeout event will not block the action taken + by the API server if it has received the request already. + Returns: collection: an AsyncAstraDBCollection instance """ @@ -2853,7 +3433,7 @@ async def truncate_collection(self, collection_name: str) -> AsyncAstraDBCollect collection_name=collection_name, astra_db=self, ) - clear_response = await collection.clear() + clear_response = await collection.clear(timeout_info=timeout_info) if clear_response.get("status", {}).get("deletedCount") != -1: raise ValueError( diff --git a/astrapy/core/ops.py b/astrapy/core/ops.py index 7928c877..fa42396b 100644 --- a/astrapy/core/ops.py +++ b/astrapy/core/ops.py @@ -19,7 +19,11 @@ import httpx from astrapy.core.api import api_request, raw_api_request -from astrapy.core.utils import http_methods +from astrapy.core.utils import ( + http_methods, + to_httpx_timeout, + TimeoutInfoWideType, +) from astrapy.core.defaults import ( DEFAULT_DEV_OPS_AUTH_HEADER, DEFAULT_DEV_OPS_API_VERSION, @@ -116,6 +120,7 @@ def _ops_request( path: str, options: Optional[Dict[str, Any]] = None, json_data: Optional[Dict[str, Any]] = None, + timeout_info: TimeoutInfoWideType = None, ) -> httpx.Response: _options = {} if options is None else options @@ -130,6 +135,7 @@ def _ops_request( path=path, caller_name=self.caller_name, caller_version=self.caller_version, + timeout=to_httpx_timeout(timeout_info), ) return raw_response @@ -139,6 +145,7 @@ def _json_ops_request( path: str, options: Optional[Dict[str, Any]] = None, json_data: Optional[Dict[str, Any]] = None, + timeout_info: TimeoutInfoWideType = None, ) -> OPS_API_RESPONSE: _options = {} if options is None else options @@ -154,11 +161,14 @@ def _json_ops_request( skip_error_check=False, caller_name=None, caller_version=None, + timeout=to_httpx_timeout(timeout_info), ) return response def get_databases( - self, options: Optional[Dict[str, Any]] = None + self, + options: Optional[Dict[str, Any]] = None, + timeout_info: TimeoutInfoWideType = None, ) -> OPS_API_RESPONSE: """ Retrieve a list of databases. @@ -170,25 +180,34 @@ def get_databases( list: a JSON list of dictionaries, one per database. """ response = self._json_ops_request( - method=http_methods.GET, path="/databases", options=options + method=http_methods.GET, + path="/databases", + options=options, + timeout_info=timeout_info, ) return response def create_database( - self, database_definition: Optional[Dict[str, Any]] = None + self, + database_definition: Optional[Dict[str, Any]] = None, + timeout_info: TimeoutInfoWideType = None, ) -> Optional[Dict[str, str]]: """ Create a new database. Args: database_definition (dict, optional): A dictionary defining the properties of the database to be created. + timeout_info: either a float (seconds) or a TimeoutInfo dict (see) Returns: dict: A dictionary containing the ID of the created database, or None if creation was unsuccessful. """ r = self._ops_request( - method=http_methods.POST, path="/databases", json_data=database_definition + method=http_methods.POST, + path="/databases", + json_data=database_definition, + timeout_info=timeout_info, ) if r.status_code == 201: @@ -196,18 +215,23 @@ def create_database( return None - def terminate_database(self, database: str = "") -> Optional[str]: + def terminate_database( + self, database: str = "", timeout_info: TimeoutInfoWideType = None + ) -> Optional[str]: """ Terminate an existing database. Args: database (str): The identifier of the database to terminate. + timeout_info: either a float (seconds) or a TimeoutInfo dict (see) Returns: str: The identifier of the terminated database, or None if termination was unsuccessful. """ r = self._ops_request( - method=http_methods.POST, path=f"/databases/{database}/terminate" + method=http_methods.POST, + path=f"/databases/{database}/terminate", + timeout_info=timeout_info, ) if r.status_code == 202: @@ -216,7 +240,10 @@ def terminate_database(self, database: str = "") -> Optional[str]: return None def get_database( - self, database: str = "", options: Optional[Dict[str, Any]] = None + self, + database: str = "", + options: Optional[Dict[str, Any]] = None, + timeout_info: TimeoutInfoWideType = None, ) -> API_RESPONSE: """ Retrieve details of a specific database. @@ -234,16 +261,23 @@ def get_database( method=http_methods.GET, path=f"/databases/{database}", options=options, + timeout_info=timeout_info, ), ) - def create_keyspace(self, database: str = "", keyspace: str = "") -> httpx.Response: + def create_keyspace( + self, + database: str = "", + keyspace: str = "", + timeout_info: TimeoutInfoWideType = None, + ) -> httpx.Response: """ Create a keyspace in a specified database. Args: database (str): The identifier of the database where the keyspace will be created. keyspace (str): The name of the keyspace to create. + timeout_info: either a float (seconds) or a TimeoutInfo dict (see) Returns: requests.Response: The response object from the HTTP request. @@ -251,9 +285,12 @@ def create_keyspace(self, database: str = "", keyspace: str = "") -> httpx.Respo return self._ops_request( method=http_methods.POST, path=f"/databases/{database}/keyspaces/{keyspace}", + timeout_info=timeout_info, ) - def park_database(self, database: str = "") -> OPS_API_RESPONSE: + def park_database( + self, database: str = "", timeout_info: TimeoutInfoWideType = None + ) -> OPS_API_RESPONSE: """ Park a specific database, making it inactive. @@ -264,10 +301,14 @@ def park_database(self, database: str = "") -> OPS_API_RESPONSE: dict: The response from the server after parking the database. """ return self._json_ops_request( - method=http_methods.POST, path=f"/databases/{database}/park" + method=http_methods.POST, + path=f"/databases/{database}/park", + timeout_info=timeout_info, ) - def unpark_database(self, database: str = "") -> OPS_API_RESPONSE: + def unpark_database( + self, database: str = "", timeout_info: TimeoutInfoWideType = None + ) -> OPS_API_RESPONSE: """ Unpark a specific database, making it active again. @@ -278,11 +319,16 @@ def unpark_database(self, database: str = "") -> OPS_API_RESPONSE: dict: The response from the server after unparking the database. """ return self._json_ops_request( - method=http_methods.POST, path=f"/databases/{database}/unpark" + method=http_methods.POST, + path=f"/databases/{database}/unpark", + timeout_info=timeout_info, ) def resize_database( - self, database: str = "", options: Optional[Dict[str, Any]] = None + self, + database: str = "", + options: Optional[Dict[str, Any]] = None, + timeout_info: TimeoutInfoWideType = None, ) -> OPS_API_RESPONSE: """ Resize a specific database according to provided options. @@ -298,10 +344,14 @@ def resize_database( method=http_methods.POST, path=f"/databases/{database}/resize", json_data=options, + timeout_info=timeout_info, ) def reset_database_password( - self, database: str = "", options: Optional[Dict[str, Any]] = None + self, + database: str = "", + options: Optional[Dict[str, Any]] = None, + timeout_info: TimeoutInfoWideType = None, ) -> OPS_API_RESPONSE: """ Reset the password for a specific database. @@ -317,9 +367,12 @@ def reset_database_password( method=http_methods.POST, path=f"/databases/{database}/resetPassword", json_data=options, + timeout_info=timeout_info, ) - def get_secure_bundle(self, database: str = "") -> OPS_API_RESPONSE: + def get_secure_bundle( + self, database: str = "", timeout_info: TimeoutInfoWideType = None + ) -> OPS_API_RESPONSE: """ Retrieve a secure bundle URL for a specific database. @@ -332,9 +385,12 @@ def get_secure_bundle(self, database: str = "") -> OPS_API_RESPONSE: return self._json_ops_request( method=http_methods.POST, path=f"/databases/{database}/secureBundleURL", + timeout_info=timeout_info, ) - def get_datacenters(self, database: str = "") -> OPS_API_RESPONSE: + def get_datacenters( + self, database: str = "", timeout_info: TimeoutInfoWideType = None + ) -> OPS_API_RESPONSE: """ Get a list of datacenters associated with a specific database. @@ -347,10 +403,14 @@ def get_datacenters(self, database: str = "") -> OPS_API_RESPONSE: return self._json_ops_request( method=http_methods.GET, path=f"/databases/{database}/datacenters", + timeout_info=timeout_info, ) def create_datacenter( - self, database: str = "", options: Optional[Dict[str, Any]] = None + self, + database: str = "", + options: Optional[Dict[str, Any]] = None, + timeout_info: TimeoutInfoWideType = None, ) -> OPS_API_RESPONSE: """ Create a new datacenter for a specific database. @@ -366,10 +426,14 @@ def create_datacenter( method=http_methods.POST, path=f"/databases/{database}/datacenters", json_data=options, + timeout_info=timeout_info, ) def terminate_datacenter( - self, database: str = "", datacenter: str = "" + self, + database: str = "", + datacenter: str = "", + timeout_info: TimeoutInfoWideType = None, ) -> OPS_API_RESPONSE: """ Terminate a specific datacenter in a database. @@ -384,9 +448,12 @@ def terminate_datacenter( return self._json_ops_request( method=http_methods.POST, path=f"/databases/{database}/datacenters/{datacenter}/terminate", + timeout_info=timeout_info, ) - def get_access_list(self, database: str = "") -> OPS_API_RESPONSE: + def get_access_list( + self, database: str = "", timeout_info: TimeoutInfoWideType = None + ) -> OPS_API_RESPONSE: """ Retrieve the access list for a specific database. @@ -399,10 +466,14 @@ def get_access_list(self, database: str = "") -> OPS_API_RESPONSE: return self._json_ops_request( method=http_methods.GET, path=f"/databases/{database}/access-list", + timeout_info=timeout_info, ) def replace_access_list( - self, database: str = "", access_list: Optional[Dict[str, Any]] = None + self, + database: str = "", + access_list: Optional[Dict[str, Any]] = None, + timeout_info: TimeoutInfoWideType = None, ) -> OPS_API_RESPONSE: """ Replace the entire access list for a specific database. @@ -418,10 +489,14 @@ def replace_access_list( method=http_methods.PUT, path=f"/databases/{database}/access-list", json_data=access_list, + timeout_info=timeout_info, ) def update_access_list( - self, database: str = "", access_list: Optional[Dict[str, Any]] = None + self, + database: str = "", + access_list: Optional[Dict[str, Any]] = None, + timeout_info: TimeoutInfoWideType = None, ) -> OPS_API_RESPONSE: """ Update the access list for a specific database. @@ -437,10 +512,14 @@ def update_access_list( method=http_methods.PATCH, path=f"/databases/{database}/access-list", json_data=access_list, + timeout_info=timeout_info, ) def add_access_list_address( - self, database: str = "", address: Optional[Dict[str, Any]] = None + self, + database: str = "", + address: Optional[Dict[str, Any]] = None, + timeout_info: TimeoutInfoWideType = None, ) -> OPS_API_RESPONSE: """ Add a new address to the access list for a specific database. @@ -456,9 +535,12 @@ def add_access_list_address( method=http_methods.POST, path=f"/databases/{database}/access-list", json_data=address, + timeout_info=timeout_info, ) - def delete_access_list(self, database: str = "") -> OPS_API_RESPONSE: + def delete_access_list( + self, database: str = "", timeout_info: TimeoutInfoWideType = None + ) -> OPS_API_RESPONSE: """ Delete the access list for a specific database. @@ -471,9 +553,12 @@ def delete_access_list(self, database: str = "") -> OPS_API_RESPONSE: return self._json_ops_request( method=http_methods.DELETE, path=f"/databases/{database}/access-list", + timeout_info=timeout_info, ) - def get_private_link(self, database: str = "") -> OPS_API_RESPONSE: + def get_private_link( + self, database: str = "", timeout_info: TimeoutInfoWideType = None + ) -> OPS_API_RESPONSE: """ Retrieve the private link information for a specified database. @@ -486,10 +571,14 @@ def get_private_link(self, database: str = "") -> OPS_API_RESPONSE: return self._json_ops_request( method=http_methods.GET, path=f"/organizations/clusters/{database}/private-link", + timeout_info=timeout_info, ) def get_datacenter_private_link( - self, database: str = "", datacenter: str = "" + self, + database: str = "", + datacenter: str = "", + timeout_info: TimeoutInfoWideType = None, ) -> OPS_API_RESPONSE: """ Retrieve the private link information for a specific datacenter in a database. @@ -504,6 +593,7 @@ def get_datacenter_private_link( return self._json_ops_request( method=http_methods.GET, path=f"/organizations/clusters/{database}/datacenters/{datacenter}/private-link", + timeout_info=timeout_info, ) def create_datacenter_private_link( @@ -511,6 +601,7 @@ def create_datacenter_private_link( database: str = "", datacenter: str = "", private_link: Optional[Dict[str, Any]] = None, + timeout_info: TimeoutInfoWideType = None, ) -> OPS_API_RESPONSE: """ Create a private link for a specific datacenter in a database. @@ -527,6 +618,7 @@ def create_datacenter_private_link( method=http_methods.POST, path=f"/organizations/clusters/{database}/datacenters/{datacenter}/private-link", json_data=private_link, + timeout_info=timeout_info, ) def create_datacenter_endpoint( @@ -534,6 +626,7 @@ def create_datacenter_endpoint( database: str = "", datacenter: str = "", endpoint: Optional[Dict[str, Any]] = None, + timeout_info: TimeoutInfoWideType = None, ) -> OPS_API_RESPONSE: """ Create an endpoint for a specific datacenter in a database. @@ -550,10 +643,15 @@ def create_datacenter_endpoint( method=http_methods.POST, path=f"/organizations/clusters/{database}/datacenters/{datacenter}/endpoint", json_data=endpoint, + timeout_info=timeout_info, ) def update_datacenter_endpoint( - self, database: str = "", datacenter: str = "", endpoint: Dict[str, Any] = {} + self, + database: str = "", + datacenter: str = "", + endpoint: Dict[str, Any] = {}, + timeout_info: TimeoutInfoWideType = None, ) -> OPS_API_RESPONSE: """ Update an existing endpoint for a specific datacenter in a database. @@ -570,10 +668,15 @@ def update_datacenter_endpoint( method=http_methods.PUT, path=f"/organizations/clusters/{database}/datacenters/{datacenter}/endpoints/{endpoint['id']}", json_data=endpoint, + timeout_info=timeout_info, ) def get_datacenter_endpoint( - self, database: str = "", datacenter: str = "", endpoint: str = "" + self, + database: str = "", + datacenter: str = "", + endpoint: str = "", + timeout_info: TimeoutInfoWideType = None, ) -> OPS_API_RESPONSE: """ Retrieve information about a specific endpoint in a datacenter of a database. @@ -589,10 +692,15 @@ def get_datacenter_endpoint( return self._json_ops_request( method=http_methods.GET, path=f"/organizations/clusters/{database}/datacenters/{datacenter}/endpoints/{endpoint}", + timeout_info=timeout_info, ) def delete_datacenter_endpoint( - self, database: str = "", datacenter: str = "", endpoint: str = "" + self, + database: str = "", + datacenter: str = "", + endpoint: str = "", + timeout_info: TimeoutInfoWideType = None, ) -> OPS_API_RESPONSE: """ Delete a specific endpoint in a datacenter of a database. @@ -608,18 +716,25 @@ def delete_datacenter_endpoint( return self._json_ops_request( method=http_methods.DELETE, path=f"/organizations/clusters/{database}/datacenters/{datacenter}/endpoints/{endpoint}", + timeout_info=timeout_info, ) - def get_available_classic_regions(self) -> OPS_API_RESPONSE: + def get_available_classic_regions( + self, timeout_info: TimeoutInfoWideType = None + ) -> OPS_API_RESPONSE: """ Retrieve a list of available classic regions. Returns: dict: A list of available classic regions. """ - return self._json_ops_request(method=http_methods.GET, path="/availableRegions") + return self._json_ops_request( + method=http_methods.GET, path="/availableRegions", timeout_info=timeout_info + ) - def get_available_regions(self) -> OPS_API_RESPONSE: + def get_available_regions( + self, timeout_info: TimeoutInfoWideType = None + ) -> OPS_API_RESPONSE: """ Retrieve a list of available regions for serverless deployment. @@ -627,10 +742,12 @@ def get_available_regions(self) -> OPS_API_RESPONSE: dict: A list of available regions for serverless deployment. """ return self._json_ops_request( - method=http_methods.GET, path="/regions/serverless" + method=http_methods.GET, + path="/regions/serverless", + timeout_info=timeout_info, ) - def get_roles(self) -> OPS_API_RESPONSE: + def get_roles(self, timeout_info: TimeoutInfoWideType = None) -> OPS_API_RESPONSE: """ Retrieve a list of roles within the organization. @@ -638,11 +755,15 @@ def get_roles(self) -> OPS_API_RESPONSE: dict: A list of roles within the organization. """ return self._json_ops_request( - method=http_methods.GET, path="/organizations/roles" + method=http_methods.GET, + path="/organizations/roles", + timeout_info=timeout_info, ) def create_role( - self, role_definition: Optional[Dict[str, Any]] = None + self, + role_definition: Optional[Dict[str, Any]] = None, + timeout_info: TimeoutInfoWideType = None, ) -> OPS_API_RESPONSE: """ Create a new role within the organization. @@ -657,9 +778,12 @@ def create_role( method=http_methods.POST, path="/organizations/roles", json_data=role_definition, + timeout_info=timeout_info, ) - def get_role(self, role: str = "") -> OPS_API_RESPONSE: + def get_role( + self, role: str = "", timeout_info: TimeoutInfoWideType = None + ) -> OPS_API_RESPONSE: """ Retrieve details of a specific role within the organization. @@ -670,11 +794,16 @@ def get_role(self, role: str = "") -> OPS_API_RESPONSE: dict: The details of the specified role. """ return self._json_ops_request( - method=http_methods.GET, path=f"/organizations/roles/{role}" + method=http_methods.GET, + path=f"/organizations/roles/{role}", + timeout_info=timeout_info, ) def update_role( - self, role: str = "", role_definition: Optional[Dict[str, Any]] = None + self, + role: str = "", + role_definition: Optional[Dict[str, Any]] = None, + timeout_info: TimeoutInfoWideType = None, ) -> OPS_API_RESPONSE: """ Update the definition of an existing role within the organization. @@ -690,9 +819,12 @@ def update_role( method=http_methods.PUT, path=f"/organizations/roles/{role}", json_data=role_definition, + timeout_info=timeout_info, ) - def delete_role(self, role: str = "") -> OPS_API_RESPONSE: + def delete_role( + self, role: str = "", timeout_info: TimeoutInfoWideType = None + ) -> OPS_API_RESPONSE: """ Delete a specific role from the organization. @@ -703,11 +835,15 @@ def delete_role(self, role: str = "") -> OPS_API_RESPONSE: dict: The response from the server after deleting the role. """ return self._json_ops_request( - method=http_methods.DELETE, path=f"/organizations/roles/{role}" + method=http_methods.DELETE, + path=f"/organizations/roles/{role}", + timeout_info=timeout_info, ) def invite_user( - self, user_definition: Optional[Dict[str, Any]] = None + self, + user_definition: Optional[Dict[str, Any]] = None, + timeout_info: TimeoutInfoWideType = None, ) -> OPS_API_RESPONSE: """ Invite a new user to the organization. @@ -722,9 +858,10 @@ def invite_user( method=http_methods.PUT, path="/organizations/users", json_data=user_definition, + timeout_info=timeout_info, ) - def get_users(self) -> OPS_API_RESPONSE: + def get_users(self, timeout_info: TimeoutInfoWideType = None) -> OPS_API_RESPONSE: """ Retrieve a list of users within the organization. @@ -732,10 +869,14 @@ def get_users(self) -> OPS_API_RESPONSE: dict: A list of users within the organization. """ return self._json_ops_request( - method=http_methods.GET, path="/organizations/users" + method=http_methods.GET, + path="/organizations/users", + timeout_info=timeout_info, ) - def get_user(self, user: str = "") -> OPS_API_RESPONSE: + def get_user( + self, user: str = "", timeout_info: TimeoutInfoWideType = None + ) -> OPS_API_RESPONSE: """ Retrieve details of a specific user within the organization. @@ -746,10 +887,14 @@ def get_user(self, user: str = "") -> OPS_API_RESPONSE: dict: The details of the specified user. """ return self._json_ops_request( - method=http_methods.GET, path=f"/organizations/users/{user}" + method=http_methods.GET, + path=f"/organizations/users/{user}", + timeout_info=timeout_info, ) - def remove_user(self, user: str = "") -> OPS_API_RESPONSE: + def remove_user( + self, user: str = "", timeout_info: TimeoutInfoWideType = None + ) -> OPS_API_RESPONSE: """ Remove a user from the organization. @@ -760,11 +905,16 @@ def remove_user(self, user: str = "") -> OPS_API_RESPONSE: dict: The response from the server after removing the user. """ return self._json_ops_request( - method=http_methods.DELETE, path=f"/organizations/users/{user}" + method=http_methods.DELETE, + path=f"/organizations/users/{user}", + timeout_info=timeout_info, ) def update_user_roles( - self, user: str = "", roles: Optional[Dict[str, Any]] = None + self, + user: str = "", + roles: Optional[Dict[str, Any]] = None, + timeout_info: TimeoutInfoWideType = None, ) -> OPS_API_RESPONSE: """ Update the roles assigned to a specific user within the organization. @@ -780,18 +930,25 @@ def update_user_roles( method=http_methods.PUT, path=f"/organizations/users/{user}/roles", json_data=roles, + timeout_info=timeout_info, ) - def get_clients(self) -> OPS_API_RESPONSE: + def get_clients(self, timeout_info: TimeoutInfoWideType = None) -> OPS_API_RESPONSE: """ Retrieve a list of client IDs and secrets associated with the organization. Returns: dict: A list of client IDs and their associated secrets. """ - return self._json_ops_request(method=http_methods.GET, path="/clientIdSecrets") + return self._json_ops_request( + method=http_methods.GET, path="/clientIdSecrets", timeout_info=timeout_info + ) - def create_token(self, roles: Optional[Dict[str, Any]] = None) -> OPS_API_RESPONSE: + def create_token( + self, + roles: Optional[Dict[str, Any]] = None, + timeout_info: TimeoutInfoWideType = None, + ) -> OPS_API_RESPONSE: """ Create a new token with specific roles. @@ -806,9 +963,12 @@ def create_token(self, roles: Optional[Dict[str, Any]] = None) -> OPS_API_RESPON method=http_methods.POST, path="/clientIdSecrets", json_data=roles, + timeout_info=timeout_info, ) - def delete_token(self, token: str = "") -> OPS_API_RESPONSE: + def delete_token( + self, token: str = "", timeout_info: TimeoutInfoWideType = None + ) -> OPS_API_RESPONSE: """ Delete a specific token. @@ -819,28 +979,40 @@ def delete_token(self, token: str = "") -> OPS_API_RESPONSE: dict: The response from the server after deleting the token. """ return self._json_ops_request( - method=http_methods.DELETE, path=f"/clientIdSecret/{token}" + method=http_methods.DELETE, + path=f"/clientIdSecret/{token}", + timeout_info=timeout_info, ) - def get_organization(self) -> OPS_API_RESPONSE: + def get_organization( + self, timeout_info: TimeoutInfoWideType = None + ) -> OPS_API_RESPONSE: """ Retrieve details of the current organization. Returns: dict: The details of the organization. """ - return self._json_ops_request(method=http_methods.GET, path="/currentOrg") + return self._json_ops_request( + method=http_methods.GET, path="/currentOrg", timeout_info=timeout_info + ) - def get_access_lists(self) -> OPS_API_RESPONSE: + def get_access_lists( + self, timeout_info: TimeoutInfoWideType = None + ) -> OPS_API_RESPONSE: """ Retrieve a list of access lists for the organization. Returns: dict: A list of access lists. """ - return self._json_ops_request(method=http_methods.GET, path="/access-lists") + return self._json_ops_request( + method=http_methods.GET, path="/access-lists", timeout_info=timeout_info + ) - def get_access_list_template(self) -> OPS_API_RESPONSE: + def get_access_list_template( + self, timeout_info: TimeoutInfoWideType = None + ) -> OPS_API_RESPONSE: """ Retrieve a template for creating an access list. @@ -848,10 +1020,14 @@ def get_access_list_template(self) -> OPS_API_RESPONSE: dict: An access list template. """ return self._json_ops_request( - method=http_methods.GET, path="/access-list/template" + method=http_methods.GET, + path="/access-list/template", + timeout_info=timeout_info, ) - def validate_access_list(self) -> OPS_API_RESPONSE: + def validate_access_list( + self, timeout_info: TimeoutInfoWideType = None + ) -> OPS_API_RESPONSE: """ Validate the configuration of the access list. @@ -859,10 +1035,14 @@ def validate_access_list(self) -> OPS_API_RESPONSE: dict: The validation result of the access list configuration. """ return self._json_ops_request( - method=http_methods.POST, path="/access-list/validate" + method=http_methods.POST, + path="/access-list/validate", + timeout_info=timeout_info, ) - def get_private_links(self) -> OPS_API_RESPONSE: + def get_private_links( + self, timeout_info: TimeoutInfoWideType = None + ) -> OPS_API_RESPONSE: """ Retrieve a list of private link connections for the organization. @@ -870,10 +1050,14 @@ def get_private_links(self) -> OPS_API_RESPONSE: dict: A list of private link connections. """ return self._json_ops_request( - method=http_methods.GET, path="/organizations/private-link" + method=http_methods.GET, + path="/organizations/private-link", + timeout_info=timeout_info, ) - def get_streaming_providers(self) -> OPS_API_RESPONSE: + def get_streaming_providers( + self, timeout_info: TimeoutInfoWideType = None + ) -> OPS_API_RESPONSE: """ Retrieve a list of streaming service providers. @@ -881,10 +1065,14 @@ def get_streaming_providers(self) -> OPS_API_RESPONSE: dict: A list of available streaming service providers. """ return self._json_ops_request( - method=http_methods.GET, path="/streaming/providers" + method=http_methods.GET, + path="/streaming/providers", + timeout_info=timeout_info, ) - def get_streaming_tenants(self) -> OPS_API_RESPONSE: + def get_streaming_tenants( + self, timeout_info: TimeoutInfoWideType = None + ) -> OPS_API_RESPONSE: """ Retrieve a list of streaming tenants. @@ -892,11 +1080,15 @@ def get_streaming_tenants(self) -> OPS_API_RESPONSE: dict: A list of streaming tenants and their details. """ return self._json_ops_request( - method=http_methods.GET, path="/streaming/tenants" + method=http_methods.GET, + path="/streaming/tenants", + timeout_info=timeout_info, ) def create_streaming_tenant( - self, tenant: Optional[Dict[str, Any]] = None + self, + tenant: Optional[Dict[str, Any]] = None, + timeout_info: TimeoutInfoWideType = None, ) -> OPS_API_RESPONSE: """ Create a new streaming tenant. @@ -911,15 +1103,22 @@ def create_streaming_tenant( method=http_methods.POST, path="/streaming/tenants", json_data=tenant, + timeout_info=timeout_info, ) - def delete_streaming_tenant(self, tenant: str = "", cluster: str = "") -> None: + def delete_streaming_tenant( + self, + tenant: str = "", + cluster: str = "", + timeout_info: TimeoutInfoWideType = None, + ) -> None: """ Delete a specific streaming tenant from a cluster. Args: tenant (str): The identifier of the tenant to delete. cluster (str): The identifier of the cluster from which the tenant is to be deleted. + timeout_info: either a float (seconds) or a TimeoutInfo dict (see) Returns: dict: The response from the server after deleting the streaming tenant. @@ -927,6 +1126,7 @@ def delete_streaming_tenant(self, tenant: str = "", cluster: str = "") -> None: r = self._ops_request( method=http_methods.DELETE, path=f"/streaming/tenants/{tenant}/clusters/{cluster}", + timeout_info=timeout_info, ) if r.status_code == 202: # 'Accepted' @@ -934,7 +1134,9 @@ def delete_streaming_tenant(self, tenant: str = "", cluster: str = "") -> None: else: raise ValueError(r.text) - def get_streaming_tenant(self, tenant: str = "") -> OPS_API_RESPONSE: + def get_streaming_tenant( + self, tenant: str = "", timeout_info: TimeoutInfoWideType = None + ) -> OPS_API_RESPONSE: """ Retrieve information about the limits and usage of a specific streaming tenant. @@ -947,4 +1149,5 @@ def get_streaming_tenant(self, tenant: str = "") -> OPS_API_RESPONSE: return self._json_ops_request( method=http_methods.GET, path=f"/streaming/tenants/{tenant}/limits", + timeout_info=timeout_info, ) diff --git a/astrapy/core/utils.py b/astrapy/core/utils.py index 2cc3767c..ae9a6afe 100644 --- a/astrapy/core/utils.py +++ b/astrapy/core/utils.py @@ -1,5 +1,14 @@ from __future__ import annotations -from typing import Any, cast, Dict, Iterable, List, Optional, Union +from typing import ( + Any, + cast, + Dict, + Iterable, + List, + Optional, + TypedDict, + Union, +) import time import datetime import logging @@ -119,6 +128,29 @@ def compose_user_agent( return " ".join(all_user_agents) +class TimeoutInfo(TypedDict, total=False): + read: float + write: float + base: float + + +TimeoutInfoWideType = Union[TimeoutInfo, float, None] + + +def to_httpx_timeout(timeout_info: TimeoutInfoWideType) -> Union[httpx.Timeout, None]: + if timeout_info is None: + return None + if isinstance(timeout_info, float): + return httpx.Timeout(timeout_info) + elif isinstance(timeout_info, dict): + _base = timeout_info.get("base") or DEFAULT_TIMEOUT + _read = timeout_info.get("read") or _base + _write = timeout_info.get("write") or _base + return httpx.Timeout(_base, read=_read, write=_write) + else: + raise ValueError("Invalid timeout info provided.") + + def make_request( client: httpx.Client, base_url: str, @@ -130,6 +162,7 @@ def make_request( path: Optional[str], caller_name: Optional[str], caller_version: Optional[str], + timeout: Optional[Union[httpx.Timeout, float]], ) -> httpx.Response: """ Make an HTTP request to a specified URL. @@ -162,7 +195,7 @@ def make_request( url=f"{base_url}{path}", params=url_params, json=json_data, - timeout=DEFAULT_TIMEOUT, + timeout=timeout or DEFAULT_TIMEOUT, headers=request_headers, ) @@ -183,6 +216,7 @@ async def amake_request( url_params: Optional[Dict[str, Any]], caller_name: Optional[str], caller_version: Optional[str], + timeout: Optional[Union[httpx.Timeout, float]], ) -> httpx.Response: """ Make an HTTP request to a specified URL. @@ -215,7 +249,7 @@ async def amake_request( url=f"{base_url}{path}", params=url_params, json=json_data, - timeout=DEFAULT_TIMEOUT, + timeout=timeout or DEFAULT_TIMEOUT, headers=request_headers, ) diff --git a/tests/core/test_async_db_dml.py b/tests/core/test_async_db_dml.py index 3212cc60..e769e5b7 100644 --- a/tests/core/test_async_db_dml.py +++ b/tests/core/test_async_db_dml.py @@ -583,7 +583,6 @@ async def test_chunked_insert_many_failures( chunk_size=2, concurrency=2, ) - assert len((await async_empty_v_collection.find({}))["data"]["documents"]) >= 2 await async_empty_v_collection.delete_many({}) with pytest.raises(TypeError): @@ -605,7 +604,6 @@ async def test_chunked_insert_many_failures( chunk_size=2, concurrency=2, ) - assert len((await async_empty_v_collection.find({}))["data"]["documents"]) >= 2 await async_empty_v_collection.delete_many({}) with pytest.raises(APIRequestError): diff --git a/tests/core/test_db_dml.py b/tests/core/test_db_dml.py index 69fec531..1378b006 100644 --- a/tests/core/test_db_dml.py +++ b/tests/core/test_db_dml.py @@ -554,7 +554,6 @@ def test_chunked_insert_many_failures( chunk_size=2, concurrency=2, ) - assert len(empty_v_collection.find({})["data"]["documents"]) >= 2 empty_v_collection.delete_many({}) with pytest.raises(TypeError): @@ -576,7 +575,6 @@ def test_chunked_insert_many_failures( chunk_size=2, concurrency=2, ) - assert len(empty_v_collection.find({})["data"]["documents"]) >= 2 empty_v_collection.delete_many({}) with pytest.raises(APIRequestError): From cc9439f0f98888065c9b8ed7553bc39808439835 Mon Sep 17 00:00:00 2001 From: Stefano Lottini Date: Thu, 14 Mar 2024 18:41:31 +0100 Subject: [PATCH 02/13] test for timeout behaviour to db/collection, sync/async, read/write --- astrapy/core/utils.py | 2 +- tests/core/test_timeouts.py | 119 ++++++++++++++++++++++++++++++++++++ 2 files changed, 120 insertions(+), 1 deletion(-) create mode 100644 tests/core/test_timeouts.py diff --git a/astrapy/core/utils.py b/astrapy/core/utils.py index ae9a6afe..23409947 100644 --- a/astrapy/core/utils.py +++ b/astrapy/core/utils.py @@ -140,7 +140,7 @@ class TimeoutInfo(TypedDict, total=False): def to_httpx_timeout(timeout_info: TimeoutInfoWideType) -> Union[httpx.Timeout, None]: if timeout_info is None: return None - if isinstance(timeout_info, float): + if isinstance(timeout_info, float) or isinstance(timeout_info, int): return httpx.Timeout(timeout_info) elif isinstance(timeout_info, dict): _base = timeout_info.get("base") or DEFAULT_TIMEOUT diff --git a/tests/core/test_timeouts.py b/tests/core/test_timeouts.py new file mode 100644 index 00000000..25e109fa --- /dev/null +++ b/tests/core/test_timeouts.py @@ -0,0 +1,119 @@ +# Copyright DataStax, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Tests for the `db.py` parts related to DML & client creation +""" + +import logging + +import pytest +import httpx + +from ..conftest import AstraDBCredentials +from .conftest import TEST_SKIP_COLLECTION_DELETE +from astrapy.core.db import ( + AstraDB, + AstraDBCollection, + AsyncAstraDB, + AsyncAstraDBCollection, +) +from astrapy.core.defaults import DEFAULT_KEYSPACE_NAME + +TEST_CREATE_DELETE_VECTOR_COLLECTION_NAME = "ephemeral_v_col" +TEST_CREATE_DELETE_NONVECTOR_COLLECTION_NAME = "ephemeral_non_v_col" + +logger = logging.getLogger(__name__) + + +@pytest.mark.describe("should obey timeout requirements for databases") +def test_db_timeout_sync(db: AstraDB) -> None: + db.get_collections() + db.get_collections(timeout_info=10) + with pytest.raises(httpx.TimeoutException): + db.get_collections(timeout_info=0.0001) + with pytest.raises(httpx.TimeoutException): + db.get_collections(timeout_info={"read": 0.0001}) + + +@pytest.mark.describe("should obey timeout requirements for databases (async)") +async def test_db_timeout_async(async_db: AsyncAstraDB) -> None: + await async_db.get_collections() + await async_db.get_collections(timeout_info=10) + with pytest.raises(httpx.TimeoutException): + await async_db.get_collections(timeout_info=0.0001) + with pytest.raises(httpx.TimeoutException): + await async_db.get_collections(timeout_info={"read": 0.0001}) + + +@pytest.mark.describe("should obey timeout requirements for collection reads") +def test_collection_read_timeout_sync( + pagination_v_collection: AstraDBCollection, +) -> None: + pagination_v_collection.vector_find([0.1, -0.1], limit=200) + pagination_v_collection.vector_find([0.1, -0.1], limit=200, timeout_info=10) + with pytest.raises(httpx.TimeoutException): + pagination_v_collection.vector_find([0.1, -0.1], limit=200, timeout_info=0.0001) + with pytest.raises(httpx.TimeoutException): + pagination_v_collection.vector_find( + [0.1, -0.1], limit=200, timeout_info={"read": 0.0001} + ) + + +@pytest.mark.describe("should obey timeout requirements for collection reads (async)") +async def test_collection_read_timeout_async( + async_pagination_v_collection: AsyncAstraDBCollection, +) -> None: + await async_pagination_v_collection.vector_find([0.1, -0.1], limit=200) + await async_pagination_v_collection.vector_find( + [0.1, -0.1], limit=200, timeout_info=10 + ) + with pytest.raises(httpx.TimeoutException): + await async_pagination_v_collection.vector_find( + [0.1, -0.1], limit=200, timeout_info=0.0001 + ) + with pytest.raises(httpx.TimeoutException): + await async_pagination_v_collection.vector_find( + [0.1, -0.1], limit=200, timeout_info={"read": 0.0001} + ) + + +@pytest.mark.describe("should obey timeout requirements for collection writes") +def test_collection_write_timeout_sync( + writable_v_collection: AstraDBCollection, +) -> None: + documents = [{"a": ["a" * 10] * 1000}] * 20 + + writable_v_collection.insert_many(documents) + writable_v_collection.insert_many(documents, timeout_info=10) + with pytest.raises(httpx.TimeoutException): + writable_v_collection.insert_many(documents, timeout_info=0.0001) + with pytest.raises(httpx.TimeoutException): + writable_v_collection.insert_many(documents, timeout_info={"write": 0.0001}) + + +@pytest.mark.describe("should obey timeout requirements for collection writes (async)") +async def test_collection_write_timeout_async( + async_writable_v_collection: AsyncAstraDBCollection, +) -> None: + documents = [{"a": ["a" * 10] * 1000}] * 20 + + await async_writable_v_collection.insert_many(documents) + await async_writable_v_collection.insert_many(documents, timeout_info=10) + with pytest.raises(httpx.TimeoutException): + await async_writable_v_collection.insert_many(documents, timeout_info=0.000001) + with pytest.raises(httpx.TimeoutException): + await async_writable_v_collection.insert_many( + documents, timeout_info={"write": 0.000001} + ) From 0ad4a149dbf235e5799337c3ba2b7b0b4f02252f Mon Sep 17 00:00:00 2001 From: Stefano Lottini Date: Fri, 15 Mar 2024 01:45:11 +0100 Subject: [PATCH 03/13] full max_time_ms support for all single-request methods of db and coll --- astrapy/collection.py | 134 +++++++++++++++--- astrapy/cursors.py | 52 +++++++ astrapy/database.py | 120 ++++++++++++---- astrapy/exceptions.py | 58 +++++++- astrapy/info.py | 20 ++- tests/core/test_timeouts.py | 3 - .../integration/test_timeout_async.py | 77 ++++++++++ .../integration/test_timeout_sync.py | 75 ++++++++++ 8 files changed, 484 insertions(+), 55 deletions(-) create mode 100644 tests/idiomatic/integration/test_timeout_async.py create mode 100644 tests/idiomatic/integration/test_timeout_sync.py diff --git a/astrapy/collection.py b/astrapy/collection.py index 1f15e7c2..c4478f0d 100644 --- a/astrapy/collection.py +++ b/astrapy/collection.py @@ -37,6 +37,7 @@ UpdateManyException, recast_method_sync, recast_method_async, + base_timeout_info, ) from astrapy.constants import ( DocumentType, @@ -265,7 +266,7 @@ def set_caller( caller_version=caller_version, ) - def options(self) -> Dict[str, Any]: + def options(self, max_time_ms: Optional[int] = None) -> Dict[str, Any]: """ Get the collection options, i.e. its configuration as read from the database. @@ -273,6 +274,9 @@ def options(self) -> Dict[str, Any]: without caching mechanisms: this ensures up-to-date information for usages such as real-time collection validation by the application. + Args: + max_time_ms: a timeout, in milliseconds, for the underlying HTTP request. + Returns: a dictionary expressing the collection as a set of key-value pairs matching the arguments of a `create_collection` call. @@ -281,7 +285,7 @@ def options(self) -> Dict[str, Any]: self_dicts = [ coll_dict - for coll_dict in self.database.list_collections() + for coll_dict in self.database.list_collections(max_time_ms=max_time_ms) if coll_dict["name"] == self.name ] if self_dicts: @@ -348,6 +352,7 @@ def full_name(self) -> str: def insert_one( self, document: DocumentType, + max_time_ms: Optional[int] = None, ) -> InsertOneResult: """ Insert a single document in the collection in an atomic operation. @@ -356,6 +361,7 @@ def insert_one( document: the dictionary expressing the document to insert. The `_id` field of the document can be left out, in which case it will be created automatically. + max_time_ms: a timeout, in milliseconds, for the underlying HTTP request. Returns: an InsertOneResult object. @@ -383,7 +389,10 @@ def insert_one( the insertion fails. """ - io_response = self._astra_db_collection.insert_one(document) + io_response = self._astra_db_collection.insert_one( + document, + timeout_info=base_timeout_info(max_time_ms), + ) if "insertedIds" in io_response.get("status", {}): if io_response["status"]["insertedIds"]: inserted_id = io_response["status"]["insertedIds"][0] @@ -572,6 +581,7 @@ def find( skip: Optional[int] = None, limit: Optional[int] = None, sort: Optional[SortType] = None, + max_time_ms: Optional[int] = None, ) -> Cursor: """ Find documents on the collection, matching a certain provided filter. @@ -606,6 +616,9 @@ def find( for lack of matching documents), nothing more is returned. sort: with this dictionary parameter one can control the order the documents are returned. See the Note about sorting for details. + max_time_ms: a timeout, in milliseconds, for each single one + of the underlying HTTP requests used to fetch documents as the + cursor is iterated over. Returns: a Cursor object representing iterations over the matching documents @@ -636,6 +649,7 @@ def find( collection=self, filter=filter, projection=projection, + max_time_ms=max_time_ms, ) .skip(skip) .limit(limit) @@ -648,6 +662,7 @@ def find_one( *, projection: Optional[ProjectionType] = None, sort: Optional[SortType] = None, + max_time_ms: Optional[int] = None, ) -> Union[DocumentType, None]: """ Run a search, returning the first document in the collection that matches @@ -669,6 +684,7 @@ def find_one( The default is to return the whole documents. sort: with this dictionary parameter one can control the order the documents are returned. See the Note about sorting for details. + max_time_ms: a timeout, in milliseconds, for the underlying HTTP request. Returns: a dictionary expressing the required document, otherwise None. @@ -684,6 +700,7 @@ def find_one( skip=None, limit=1, sort=sort, + max_time_ms=max_time_ms, ) try: document = fo_cursor.__next__() @@ -749,6 +766,7 @@ def count_documents( self, filter: Dict[str, Any], upper_bound: int, + max_time_ms: Optional[int] = None, ) -> int: """ Count the documents in the collection matching the specified filter. @@ -767,6 +785,7 @@ def count_documents( Furthermore, if the actual number of documents exceeds the maximum count that the Data API can reach (regardless of upper_bound), an exception will be raised. + max_time_ms: a timeout, in milliseconds, for the underlying HTTP request. Returns: the exact count of matching documents. @@ -782,7 +801,10 @@ def count_documents( by this method if this limit is encountered. """ - cd_response = self._astra_db_collection.count_documents(filter=filter) + cd_response = self._astra_db_collection.count_documents( + filter=filter, + timeout_info=base_timeout_info(max_time_ms), + ) if "count" in cd_response.get("status", {}): count: int = cd_response["status"]["count"] if cd_response["status"].get("moreData", False): @@ -814,6 +836,7 @@ def find_one_and_replace( sort: Optional[SortType] = None, upsert: bool = False, return_document: str = ReturnDocument.BEFORE, + max_time_ms: Optional[int] = None, ) -> Union[DocumentType, None]: """ Find a document on the collection and replace it entirely with a new one, @@ -847,6 +870,7 @@ def find_one_and_replace( the document found on database is returned; if set to `ReturnDocument.AFTER`, or the string "after", the new document is returned. The default is "before". + max_time_ms: a timeout, in milliseconds, for the underlying HTTP request. Returns: A document (or a projection thereof, as required), either the one @@ -866,6 +890,7 @@ def find_one_and_replace( projection=normalize_optional_projection(projection), sort=sort, options=options, + timeout_info=base_timeout_info(max_time_ms), ) if "document" in fo_response.get("data", {}): ret_document = fo_response.get("data", {}).get("document") @@ -886,6 +911,7 @@ def replace_one( replacement: DocumentType, *, upsert: bool = False, + max_time_ms: Optional[int] = None, ) -> UpdateResult: """ Replace a single document on the collection with a new one, @@ -904,6 +930,7 @@ def replace_one( If True, `replacement` is inserted as a new document if no matches are found on the collection. If False, the operation silently does nothing in case of no matches. + max_time_ms: a timeout, in milliseconds, for the underlying HTTP request. Returns: an UpdateResult object summarizing the outcome of the replace operation. @@ -916,6 +943,7 @@ def replace_one( replacement=replacement, filter=filter, options=options, + timeout_info=base_timeout_info(max_time_ms), ) if "document" in fo_response.get("data", {}): fo_status = fo_response.get("status") or {} @@ -940,6 +968,7 @@ def find_one_and_update( sort: Optional[SortType] = None, upsert: bool = False, return_document: str = ReturnDocument.BEFORE, + max_time_ms: Optional[int] = None, ) -> Union[DocumentType, None]: """ Find a document on the collection and update it as requested, @@ -979,6 +1008,7 @@ def find_one_and_update( the document found on database is returned; if set to `ReturnDocument.AFTER`, or the string "after", the new document is returned. The default is "before". + max_time_ms: a timeout, in milliseconds, for the underlying HTTP request. Returns: A document (or a projection thereof, as required), either the one @@ -998,6 +1028,7 @@ def find_one_and_update( projection=normalize_optional_projection(projection), sort=sort, options=options, + timeout_info=base_timeout_info(max_time_ms), ) if "document" in fo_response.get("data", {}): ret_document = fo_response.get("data", {}).get("document") @@ -1018,6 +1049,7 @@ def update_one( update: Dict[str, Any], *, upsert: bool = False, + max_time_ms: Optional[int] = None, ) -> UpdateResult: """ Update a single document on the collection as requested, @@ -1042,6 +1074,7 @@ def update_one( to an empty document) is inserted if no matches are found on the collection. If False, the operation silently does nothing in case of no matches. + max_time_ms: a timeout, in milliseconds, for the underlying HTTP request. Returns: an UpdateResult object summarizing the outcome of the update operation. @@ -1054,6 +1087,7 @@ def update_one( update=update, filter=filter, options=options, + timeout_info=base_timeout_info(max_time_ms), ) if "document" in fo_response.get("data", {}): fo_status = fo_response.get("status") or {} @@ -1161,6 +1195,7 @@ def find_one_and_delete( *, projection: Optional[ProjectionType] = None, sort: Optional[SortType] = None, + max_time_ms: Optional[int] = None, ) -> Union[DocumentType, None]: """ Find a document in the collection and delete it. The deleted document, @@ -1186,6 +1221,7 @@ def find_one_and_delete( order of the documents matching the filter, effectively determining what document will come first and hence be the deleted one. See the `find` method for more on sorting. + max_time_ms: a timeout, in milliseconds, for the underlying HTTP request. Returns: Either the document (or a projection thereof, as requested), or None @@ -1197,6 +1233,7 @@ def find_one_and_delete( sort=sort, filter=filter, projection=_projection, + timeout_info=base_timeout_info(max_time_ms), ) if "document" in fo_response.get("data", {}): document = fo_response["data"]["document"] @@ -1215,6 +1252,7 @@ def find_one_and_delete( def delete_one( self, filter: Dict[str, Any], + max_time_ms: Optional[int] = None, ) -> DeleteResult: """ Delete one document matching a provided filter. @@ -1229,12 +1267,15 @@ def delete_one( {"price": {"$le": 100}} {"$and": [{"name": "John"}, {"price": {"$le": 100}}]} See the Data API documentation for the full set of operators. + max_time_ms: a timeout, in milliseconds, for the underlying HTTP request. Returns: a DeleteResult object summarizing the outcome of the delete operation. """ - do_response = self._astra_db_collection.delete_one_by_predicate(filter=filter) + do_response = self._astra_db_collection.delete_one_by_predicate( + filter=filter, timeout_info=base_timeout_info(max_time_ms) + ) if "deletedCount" in do_response.get("status", {}): deleted_count = do_response["status"]["deletedCount"] if deleted_count == -1: @@ -1327,10 +1368,13 @@ def delete_many( ) @recast_method_sync - def delete_all(self) -> Dict[str, Any]: + def delete_all(self, max_time_ms: Optional[int] = None) -> Dict[str, Any]: """ Delete all documents in a collection. + Args: + max_time_ms: a timeout, in milliseconds, for the underlying HTTP request. + Returns: a dictionary of the form {"ok": 1} to signal successful deletion. @@ -1338,7 +1382,9 @@ def delete_all(self) -> Dict[str, Any]: Use with caution. """ - dm_response = self._astra_db_collection.delete_many(filter={}) + dm_response = self._astra_db_collection.delete_many( + filter={}, timeout_info=base_timeout_info(max_time_ms) + ) deleted_count = dm_response["status"]["deletedCount"] if deleted_count == -1: return {"ok": 1} @@ -1481,11 +1527,14 @@ def _execute_as_either( else: return reduce_bulk_write_results(bulk_write_successes) - def drop(self) -> Dict[str, Any]: + def drop(self, max_time_ms: Optional[int] = None) -> Dict[str, Any]: """ Drop the collection, i.e. delete it from the database along with all the documents it contains. + Args: + max_time_ms: a timeout, in milliseconds, for the underlying HTTP request. + Returns: a dictionary of the form {"ok": 1} to signal successful deletion. @@ -1493,7 +1542,7 @@ def drop(self) -> Dict[str, Any]: Use with caution. """ - return self.database.drop_collection(self) # type: ignore[no-any-return] + return self.database.drop_collection(self, max_time_ms=max_time_ms) # type: ignore[no-any-return] class AsyncCollection: @@ -1665,7 +1714,7 @@ def set_caller( caller_version=caller_version, ) - async def options(self) -> Dict[str, Any]: + async def options(self, max_time_ms: Optional[int] = None) -> Dict[str, Any]: """ Get the collection options, i.e. its configuration as read from the database. @@ -1673,6 +1722,9 @@ async def options(self) -> Dict[str, Any]: without caching mechanisms: this ensures up-to-date information for usages such as real-time collection validation by the application. + Args: + max_time_ms: a timeout, in milliseconds, for the underlying HTTP request. + Returns: a dictionary expressing the collection as a set of key-value pairs matching the arguments of a `create_collection` call. @@ -1681,7 +1733,9 @@ async def options(self) -> Dict[str, Any]: self_dicts = [ coll_dict - async for coll_dict in self.database.list_collections() + async for coll_dict in self.database.list_collections( + max_time_ms=max_time_ms + ) if coll_dict["name"] == self.name ] if self_dicts: @@ -1748,6 +1802,7 @@ def full_name(self) -> str: async def insert_one( self, document: DocumentType, + max_time_ms: Optional[int] = None, ) -> InsertOneResult: """ Insert a single document in the collection in an atomic operation. @@ -1756,6 +1811,7 @@ async def insert_one( document: the dictionary expressing the document to insert. The `_id` field of the document can be left out, in which case it will be created automatically. + max_time_ms: a timeout, in milliseconds, for the underlying HTTP request. Returns: an InsertOneResult object. @@ -1766,7 +1822,10 @@ async def insert_one( the insertion fails. """ - io_response = await self._astra_db_collection.insert_one(document) + io_response = await self._astra_db_collection.insert_one( + document, + timeout_info=base_timeout_info(max_time_ms), + ) if "insertedIds" in io_response.get("status", {}): if io_response["status"]["insertedIds"]: inserted_id = io_response["status"]["insertedIds"][0] @@ -1957,6 +2016,7 @@ def find( skip: Optional[int] = None, limit: Optional[int] = None, sort: Optional[SortType] = None, + max_time_ms: Optional[int] = None, ) -> AsyncCursor: """ Find documents on the collection, matching a certain provided filter. @@ -1991,6 +2051,9 @@ def find( for lack of matching documents), nothing more is returned. sort: with this dictionary parameter one can control the order the documents are returned. See the Note about sorting for details. + max_time_ms: a timeout, in milliseconds, for each single one + of the underlying HTTP requests used to fetch documents as the + cursor is iterated over. Returns: an AsyncCursor object representing iterations over the matching documents @@ -2021,6 +2084,7 @@ def find( collection=self, filter=filter, projection=projection, + max_time_ms=max_time_ms, ) .skip(skip) .limit(limit) @@ -2033,6 +2097,7 @@ async def find_one( *, projection: Optional[ProjectionType] = None, sort: Optional[SortType] = None, + max_time_ms: Optional[int] = None, ) -> Union[DocumentType, None]: """ Run a search, returning the first document in the collection that matches @@ -2054,6 +2119,7 @@ async def find_one( The default is to return the whole documents. sort: with this dictionary parameter one can control the order the documents are returned. See the Note about sorting for details. + max_time_ms: a timeout, in milliseconds, for the underlying HTTP request. Returns: a dictionary expressing the required document, otherwise None. @@ -2069,6 +2135,7 @@ async def find_one( skip=None, limit=1, sort=sort, + max_time_ms=max_time_ms, ) try: document = await fo_cursor.__anext__() @@ -2127,6 +2194,7 @@ async def count_documents( self, filter: Dict[str, Any], upper_bound: int, + max_time_ms: Optional[int] = None, ) -> int: """ Count the documents in the collection matching the specified filter. @@ -2145,6 +2213,7 @@ async def count_documents( Furthermore, if the actual number of documents exceeds the maximum count that the Data API can reach (regardless of upper_bound), an exception will be raised. + max_time_ms: a timeout, in milliseconds, for the underlying HTTP request. Returns: the exact count of matching documents. @@ -2160,7 +2229,10 @@ async def count_documents( by this method if this limit is encountered. """ - cd_response = await self._astra_db_collection.count_documents(filter=filter) + cd_response = await self._astra_db_collection.count_documents( + filter=filter, + timeout_info=base_timeout_info(max_time_ms), + ) if "count" in cd_response.get("status", {}): count: int = cd_response["status"]["count"] if cd_response["status"].get("moreData", False): @@ -2192,6 +2264,7 @@ async def find_one_and_replace( sort: Optional[SortType] = None, upsert: bool = False, return_document: str = ReturnDocument.BEFORE, + max_time_ms: Optional[int] = None, ) -> Union[DocumentType, None]: """ Find a document on the collection and replace it entirely with a new one, @@ -2226,6 +2299,7 @@ async def find_one_and_replace( the document found on database is returned; if set to `ReturnDocument.AFTER`, or the string "after", the new document is returned. The default is "before". + max_time_ms: a timeout, in milliseconds, for the underlying HTTP request. Returns: A document, either the one before the replace operation or the @@ -2244,6 +2318,7 @@ async def find_one_and_replace( projection=normalize_optional_projection(projection), sort=sort, options=options, + timeout_info=base_timeout_info(max_time_ms), ) if "document" in fo_response.get("data", {}): ret_document = fo_response.get("data", {}).get("document") @@ -2264,6 +2339,7 @@ async def replace_one( replacement: DocumentType, *, upsert: bool = False, + max_time_ms: Optional[int] = None, ) -> UpdateResult: """ Replace a single document on the collection with a new one, @@ -2282,6 +2358,7 @@ async def replace_one( If True, `replacement` is inserted as a new document if no matches are found on the collection. If False, the operation silently does nothing in case of no matches. + max_time_ms: a timeout, in milliseconds, for the underlying HTTP request. Returns: an UpdateResult object summarizing the outcome of the replace operation. @@ -2294,6 +2371,7 @@ async def replace_one( replacement=replacement, filter=filter, options=options, + timeout_info=base_timeout_info(max_time_ms), ) if "document" in fo_response.get("data", {}): fo_status = fo_response.get("status") or {} @@ -2318,6 +2396,7 @@ async def find_one_and_update( sort: Optional[SortType] = None, upsert: bool = False, return_document: str = ReturnDocument.BEFORE, + max_time_ms: Optional[int] = None, ) -> Union[DocumentType, None]: """ Find a document on the collection and update it as requested, @@ -2357,6 +2436,7 @@ async def find_one_and_update( the document found on database is returned; if set to `ReturnDocument.AFTER`, or the string "after", the new document is returned. The default is "before". + max_time_ms: a timeout, in milliseconds, for the underlying HTTP request. Returns: A document (or a projection thereof, as required), either the one @@ -2376,6 +2456,7 @@ async def find_one_and_update( projection=normalize_optional_projection(projection), sort=sort, options=options, + timeout_info=base_timeout_info(max_time_ms), ) if "document" in fo_response.get("data", {}): ret_document = fo_response.get("data", {}).get("document") @@ -2396,6 +2477,7 @@ async def update_one( update: Dict[str, Any], *, upsert: bool = False, + max_time_ms: Optional[int] = None, ) -> UpdateResult: """ Update a single document on the collection as requested, @@ -2420,6 +2502,7 @@ async def update_one( to an empty document) is inserted if no matches are found on the collection. If False, the operation silently does nothing in case of no matches. + max_time_ms: a timeout, in milliseconds, for the underlying HTTP request. Returns: an UpdateResult object summarizing the outcome of the update operation. @@ -2432,6 +2515,7 @@ async def update_one( update=update, filter=filter, options=options, + timeout_info=base_timeout_info(max_time_ms), ) if "document" in fo_response.get("data", {}): fo_status = fo_response.get("status") or {} @@ -2539,6 +2623,7 @@ async def find_one_and_delete( *, projection: Optional[ProjectionType] = None, sort: Optional[SortType] = None, + max_time_ms: Optional[int] = None, ) -> Union[DocumentType, None]: """ Find a document in the collection and delete it. The deleted document, @@ -2564,6 +2649,7 @@ async def find_one_and_delete( order of the documents matching the filter, effectively determining what document will come first and hence be the deleted one. See the `find` method for more on sorting. + max_time_ms: a timeout, in milliseconds, for the underlying HTTP request. Returns: Either the document (or a projection thereof, as requested), or None @@ -2575,6 +2661,7 @@ async def find_one_and_delete( sort=sort, filter=filter, projection=_projection, + timeout_info=base_timeout_info(max_time_ms), ) if "document" in fo_response.get("data", {}): document = fo_response["data"]["document"] @@ -2593,6 +2680,7 @@ async def find_one_and_delete( async def delete_one( self, filter: Dict[str, Any], + max_time_ms: Optional[int] = None, ) -> DeleteResult: """ Delete one document matching a provided filter. @@ -2607,13 +2695,15 @@ async def delete_one( {"price": {"$le": 100}} {"$and": [{"name": "John"}, {"price": {"$le": 100}}]} See the Data API documentation for the full set of operators. + max_time_ms: a timeout, in milliseconds, for the underlying HTTP request. Returns: a DeleteResult object summarizing the outcome of the delete operation. """ do_response = await self._astra_db_collection.delete_one_by_predicate( - filter=filter + filter=filter, + timeout_info=base_timeout_info(max_time_ms), ) if "deletedCount" in do_response.get("status", {}): deleted_count = do_response["status"]["deletedCount"] @@ -2709,10 +2799,13 @@ async def delete_many( ) @recast_method_async - async def delete_all(self) -> Dict[str, Any]: + async def delete_all(self, max_time_ms: Optional[int] = None) -> Dict[str, Any]: """ Delete all documents in a collection. + Args: + max_time_ms: a timeout, in milliseconds, for the underlying HTTP request. + Returns: a dictionary of the form {"ok": 1} to signal successful deletion. @@ -2720,7 +2813,9 @@ async def delete_all(self) -> Dict[str, Any]: Use with caution. """ - dm_response = await self._astra_db_collection.delete_many(filter={}) + dm_response = await self._astra_db_collection.delete_many( + filter={}, timeout_info=base_timeout_info(max_time_ms) + ) deleted_count = dm_response["status"]["deletedCount"] if deleted_count == -1: return {"ok": 1} @@ -2856,11 +2951,14 @@ async def _concurrent_execute_as_either( else: return reduce_bulk_write_results(bulk_write_successes) - async def drop(self) -> Dict[str, Any]: + async def drop(self, max_time_ms: Optional[int] = None) -> Dict[str, Any]: """ Drop the collection, i.e. delete it from the database along with all the documents it contains. + Args: + max_time_ms: a timeout, in milliseconds, for the underlying HTTP request. + Returns: a dictionary of the form {"ok": 1} to signal successful deletion. @@ -2868,4 +2966,4 @@ async def drop(self) -> Dict[str, Any]: Use with caution. """ - return await self.database.drop_collection(self) # type: ignore[no-any-return] + return await self.database.drop_collection(self, max_time_ms=max_time_ms) # type: ignore[no-any-return] diff --git a/astrapy/cursors.py b/astrapy/cursors.py index 2d074fd8..2270b63d 100644 --- a/astrapy/cursors.py +++ b/astrapy/cursors.py @@ -36,6 +36,7 @@ CursorIsStartedException, recast_method_sync, recast_method_async, + base_timeout_info, ) from astrapy.constants import ( DocumentType, @@ -144,11 +145,14 @@ class BaseCursor: """ Represents a generic Cursor over query results, regardless of whether synchronous or asynchronous. It cannot be instantiated. + + See classes Cursor and AsyncCursor for more information. """ _collection: Union[Collection, AsyncCollection] _filter: Optional[Dict[str, Any]] _projection: Optional[ProjectionType] + _max_time_ms: Optional[int] _limit: Optional[int] _skip: Optional[int] _sort: Optional[Dict[str, Any]] @@ -164,6 +168,7 @@ def __init__( collection: Union[Collection, AsyncCollection], filter: Optional[Dict[str, Any]], projection: Optional[ProjectionType], + max_time_ms: Optional[int], ) -> None: raise NotImplementedError @@ -216,6 +221,7 @@ def _copy( self: BC, *, projection: Optional[ProjectionType] = None, + max_time_ms: Optional[int] = None, limit: Optional[int] = None, skip: Optional[int] = None, started: Optional[bool] = None, @@ -225,6 +231,7 @@ def _copy( collection=self._collection, filter=self._filter, projection=projection or self._projection, + max_time_ms=max_time_ms or self._max_time_ms, ) # Cursor treated as mutable within this function scope: new_cursor._limit = limit if limit is not None else self._limit @@ -382,6 +389,25 @@ class Cursor(BaseCursor): Generally cursors are not supposed to be instantiated directly, rather they are obtained by invoking the `find` method on a collection. + + Attributes: + collection: the collection to find documents in + filter: a predicate expressed as a dictionary according to the + Data API filter syntax. Examples are: + {} + {"name": "John"} + {"price": {"$le": 100}} + {"$and": [{"name": "John"}, {"price": {"$le": 100}}]} + See the Data API documentation for the full set of operators. + projection: used to select a subset of fields in the document being + returned. The projection can be: an iterable over the field names + to return; a dictionary {field_name: True} to positively select + certain fields; or a dictionary {field_name: False} if one wants + to discard some fields from the response. + The default is to return the whole documents. + max_time_ms: a timeout, in milliseconds, for each single one + of the underlying HTTP requests used to fetch documents as the + cursor is iterated over. """ def __init__( @@ -389,10 +415,12 @@ def __init__( collection: Collection, filter: Optional[Dict[str, Any]], projection: Optional[ProjectionType], + max_time_ms: Optional[int], ) -> None: self._collection: Collection = collection self._filter = filter self._projection = projection + self._max_time_ms = max_time_ms self._limit: Optional[int] = None self._skip: Optional[int] = None self._sort: Optional[Dict[str, Any]] = None @@ -462,6 +490,7 @@ def _create_iterator(self) -> Iterator[DocumentType]: sort=pf_sort, options=_options, prefetched=FIND_PREFETCH, + timeout_info=base_timeout_info(self._max_time_ms), ) return iterator @@ -526,6 +555,25 @@ class AsyncCursor(BaseCursor): Generally cursors are not supposed to be instantiated directly, rather they are obtained by invoking the `find` method on a collection. + + Attributes: + collection: the collection to find documents in + filter: a predicate expressed as a dictionary according to the + Data API filter syntax. Examples are: + {} + {"name": "John"} + {"price": {"$le": 100}} + {"$and": [{"name": "John"}, {"price": {"$le": 100}}]} + See the Data API documentation for the full set of operators. + projection: used to select a subset of fields in the document being + returned. The projection can be: an iterable over the field names + to return; a dictionary {field_name: True} to positively select + certain fields; or a dictionary {field_name: False} if one wants + to discard some fields from the response. + The default is to return the whole documents. + max_time_ms: a timeout, in milliseconds, for each single one + of the underlying HTTP requests used to fetch documents as the + cursor is iterated over. """ def __init__( @@ -533,10 +581,12 @@ def __init__( collection: AsyncCollection, filter: Optional[Dict[str, Any]], projection: Optional[ProjectionType], + max_time_ms: Optional[int], ) -> None: self._collection: AsyncCollection = collection self._filter = filter self._projection = projection + self._max_time_ms = max_time_ms self._limit: Optional[int] = None self._skip: Optional[int] = None self._sort: Optional[Dict[str, Any]] = None @@ -606,6 +656,7 @@ def _create_iterator(self) -> AsyncIterator[DocumentType]: sort=pf_sort, options=_options, prefetched=FIND_PREFETCH, + timeout_info=base_timeout_info(self._max_time_ms), ) return iterator @@ -621,6 +672,7 @@ def _to_sync( collection=self._collection.to_sync(), filter=self._filter, projection=self._projection, + max_time_ms=self._max_time_ms, ) # Cursor treated as mutable within this function scope: new_cursor._limit = limit if limit is not None else self._limit diff --git a/astrapy/database.py b/astrapy/database.py index c34e3963..69611acf 100644 --- a/astrapy/database.py +++ b/astrapy/database.py @@ -23,6 +23,7 @@ DataAPIFaultyResponseException, recast_method_sync, recast_method_async, + base_timeout_info, ) from astrapy.cursors import AsyncCommandCursor, CommandCursor from astrapy.info import DatabaseInfo, get_database_info @@ -339,6 +340,7 @@ def create_collection( indexing: Optional[Dict[str, Any]] = None, additional_options: Optional[Dict[str, Any]] = None, check_exists: Optional[bool] = None, + max_time_ms: Optional[int] = None, ) -> Collection: """ Creates a collection on the database and return the Collection @@ -373,6 +375,7 @@ def create_collection( If it is False, the creation is attempted. In this case, for preexisting collections, the command will succeed or fail depending on whether the options match or not. + max_time_ms: a timeout, in milliseconds, for the underlying HTTP request. Returns: a (synchronous) Collection instance, representing the @@ -396,7 +399,9 @@ def create_collection( _check_exists = check_exists existing_names: List[str] if _check_exists: - existing_names = self.list_collection_names(namespace=namespace) + existing_names = self.list_collection_names( + namespace=namespace, max_time_ms=max_time_ms + ) else: existing_names = [] driver_db = self._astra_db.copy(namespace=namespace) @@ -412,19 +417,23 @@ def create_collection( options=_options, dimension=dimension, metric=metric, + timeout_info=base_timeout_info(max_time_ms), ) return self.get_collection(name, namespace=namespace) @recast_method_sync def drop_collection( - self, name_or_collection: Union[str, Collection] + self, + name_or_collection: Union[str, Collection], + max_time_ms: Optional[int] = None, ) -> Dict[str, Any]: """ Drop a collection from the database, along with all documents therein. Args: name_or_collection: either the name of a collection or - a Collection instance. + a Collection instance. + max_time_ms: a timeout, in milliseconds, for the underlying HTTP request. Returns: a dictionary in the form {"ok": 1} if the command succeeds. @@ -441,11 +450,15 @@ def drop_collection( _namespace = name_or_collection.namespace _name: str = name_or_collection.name dc_response = self._astra_db.copy(namespace=_namespace).delete_collection( - _name + _name, + timeout_info=base_timeout_info(max_time_ms), ) return dc_response.get("status", {}) # type: ignore[no-any-return] else: - dc_response = self._astra_db.delete_collection(name_or_collection) + dc_response = self._astra_db.delete_collection( + name_or_collection, + timeout_info=base_timeout_info(max_time_ms), + ) return dc_response.get("status", {}) # type: ignore[no-any-return] @recast_method_sync @@ -453,13 +466,15 @@ def list_collections( self, *, namespace: Optional[str] = None, + max_time_ms: Optional[int] = None, ) -> CommandCursor[Dict[str, Any]]: """ List all collections in a given namespace for this database. Args: namespace: the namespace to be inspected. If not specified, - the general setting for this database is assumed. + the general setting for this database is assumed. + max_time_ms: a timeout, in milliseconds, for the underlying HTTP request. Returns: a `CommandCursor` to iterate over dictionaries, each @@ -471,7 +486,9 @@ def list_collections( _client = self._astra_db.copy(namespace=namespace) else: _client = self._astra_db - gc_response = _client.get_collections(options={"explain": True}) + gc_response = _client.get_collections( + options={"explain": True}, timeout_info=base_timeout_info(max_time_ms) + ) if "collections" not in gc_response.get("status", {}): raise DataAPIFaultyResponseException( text="Faulty response from get_collections API command.", @@ -492,13 +509,15 @@ def list_collection_names( self, *, namespace: Optional[str] = None, + max_time_ms: Optional[int] = None, ) -> List[str]: """ List the names of all collections in a given namespace of this database. Args: namespace: the namespace to be inspected. If not specified, - the general setting for this database is assumed. + the general setting for this database is assumed. + max_time_ms: a timeout, in milliseconds, for the underlying HTTP request. Returns: a list of the collection names as strings, in no particular order. @@ -508,7 +527,9 @@ def list_collection_names( _client = self._astra_db.copy(namespace=namespace) else: _client = self._astra_db - gc_response = _client.get_collections() + gc_response = _client.get_collections( + timeout_info=base_timeout_info(max_time_ms) + ) if "collections" not in gc_response.get("status", {}): raise DataAPIFaultyResponseException( text="Faulty response from get_collections API command.", @@ -525,6 +546,7 @@ def command( *, namespace: Optional[str] = None, collection_name: Optional[str] = None, + max_time_ms: Optional[int] = None, ) -> Dict[str, Any]: """ Send a POST request to the Data API for this database with @@ -534,10 +556,11 @@ def command( body: a JSON-serializable dictionary, the payload of the request. Args: namespace: the namespace to use. Requests always target a namespace: - if not specified, the general setting for this database is assumed. - collection_name: if provided, the collection name is appended at the end - of the endpoint. In this way, this method allows collection-level - arbitrary POST requests as well. + if not specified, the general setting for this database is assumed. + collection_name: if provided, the collection name is appended at the end + of the endpoint. In this way, this method allows collection-level + arbitrary POST requests as well. + max_time_ms: a timeout, in milliseconds, for the underlying HTTP request. Returns: a dictionary with the response of the HTTP request. @@ -549,9 +572,15 @@ def command( _client = self._astra_db if collection_name: _collection = _client.collection(collection_name) - return _collection.post_raw_request(body=body) + return _collection.post_raw_request( + body=body, + timeout_info=base_timeout_info(max_time_ms), + ) else: - return _client.post_raw_request(body=body) + return _client.post_raw_request( + body=body, + timeout_info=base_timeout_info(max_time_ms), + ) class AsyncDatabase: @@ -832,6 +861,7 @@ async def create_collection( indexing: Optional[Dict[str, Any]] = None, additional_options: Optional[Dict[str, Any]] = None, check_exists: Optional[bool] = None, + max_time_ms: Optional[int] = None, ) -> AsyncCollection: """ Creates a collection on the database and return the AsyncCollection @@ -866,6 +896,7 @@ async def create_collection( If it is False, the creation is attempted. In this case, for preexisting collections, the command will succeed or fail depending on whether the options match or not. + max_time_ms: a timeout, in milliseconds, for the underlying HTTP request. Returns: an AsyncCollection instance, representing the newly-created collection. @@ -888,7 +919,9 @@ async def create_collection( _check_exists = check_exists existing_names: List[str] if _check_exists: - existing_names = await self.list_collection_names(namespace=namespace) + existing_names = await self.list_collection_names( + namespace=namespace, max_time_ms=max_time_ms + ) else: existing_names = [] driver_db = self._astra_db.copy(namespace=namespace) @@ -904,19 +937,23 @@ async def create_collection( options=_options, dimension=dimension, metric=metric, + timeout_info=base_timeout_info(max_time_ms), ) return await self.get_collection(name, namespace=namespace) @recast_method_async async def drop_collection( - self, name_or_collection: Union[str, AsyncCollection] + self, + name_or_collection: Union[str, AsyncCollection], + max_time_ms: Optional[int] = None, ) -> Dict[str, Any]: """ Drop a collection from the database, along with all documents therein. Args: name_or_collection: either the name of a collection or - an AsyncCollection instance. + an AsyncCollection instance. + max_time_ms: a timeout, in milliseconds, for the underlying HTTP request. Returns: a dictionary in the form {"ok": 1} if the command succeeds. @@ -934,10 +971,16 @@ async def drop_collection( _name = name_or_collection.name dc_response = await self._astra_db.copy( namespace=_namespace - ).delete_collection(_name) + ).delete_collection( + _name, + timeout_info=base_timeout_info(max_time_ms), + ) return dc_response.get("status", {}) # type: ignore[no-any-return] else: - dc_response = await self._astra_db.delete_collection(name_or_collection) + dc_response = await self._astra_db.delete_collection( + name_or_collection, + timeout_info=base_timeout_info(max_time_ms), + ) return dc_response.get("status", {}) # type: ignore[no-any-return] @recast_method_sync @@ -945,13 +988,15 @@ def list_collections( self, *, namespace: Optional[str] = None, + max_time_ms: Optional[int] = None, ) -> AsyncCommandCursor[Dict[str, Any]]: """ List all collections in a given namespace for this database. Args: namespace: the namespace to be inspected. If not specified, - the general setting for this database is assumed. + the general setting for this database is assumed. + max_time_ms: a timeout, in milliseconds, for the underlying HTTP request. Returns: an `AsyncCommandCursor` to iterate over dictionaries, each @@ -964,7 +1009,10 @@ def list_collections( _client = self._astra_db.copy(namespace=namespace) else: _client = self._astra_db - gc_response = _client.to_sync().get_collections(options={"explain": True}) + gc_response = _client.to_sync().get_collections( + options={"explain": True}, + timeout_info=base_timeout_info(max_time_ms), + ) if "collections" not in gc_response.get("status", {}): raise DataAPIFaultyResponseException( text="Faulty response from get_collections API command.", @@ -985,19 +1033,23 @@ async def list_collection_names( self, *, namespace: Optional[str] = None, + max_time_ms: Optional[int] = None, ) -> List[str]: """ List the names of all collections in a given namespace of this database. Args: namespace: the namespace to be inspected. If not specified, - the general setting for this database is assumed. + the general setting for this database is assumed. + max_time_ms: a timeout, in milliseconds, for the underlying HTTP request. Returns: a list of the collection names as strings, in no particular order. """ - gc_response = await self._astra_db.copy(namespace=namespace).get_collections() + gc_response = await self._astra_db.copy(namespace=namespace).get_collections( + timeout_info=base_timeout_info(max_time_ms) + ) if "collections" not in gc_response.get("status", {}): raise DataAPIFaultyResponseException( text="Faulty response from get_collections API command.", @@ -1014,6 +1066,7 @@ async def command( *, namespace: Optional[str] = None, collection_name: Optional[str] = None, + max_time_ms: Optional[int] = None, ) -> Dict[str, Any]: """ Send a POST request to the Data API for this database with @@ -1023,10 +1076,11 @@ async def command( body: a JSON-serializable dictionary, the payload of the request. Args: namespace: the namespace to use. Requests always target a namespace: - if not specified, the general setting for this database is assumed. - collection_name: if provided, the collection name is appended at the end - of the endpoint. In this way, this method allows collection-level - arbitrary POST requests as well. + if not specified, the general setting for this database is assumed. + collection_name: if provided, the collection name is appended at the end + of the endpoint. In this way, this method allows collection-level + arbitrary POST requests as well. + max_time_ms: a timeout, in milliseconds, for the underlying HTTP request. Returns: a dictionary with the response of the HTTP request. @@ -1038,6 +1092,12 @@ async def command( _client = self._astra_db if collection_name: _collection = await _client.collection(collection_name) - return await _collection.post_raw_request(body=body) + return await _collection.post_raw_request( + body=body, + timeout_info=base_timeout_info(max_time_ms), + ) else: - return await _client.post_raw_request(body=body) + return await _client.post_raw_request( + body=body, + timeout_info=base_timeout_info(max_time_ms), + ) diff --git a/astrapy/exceptions.py b/astrapy/exceptions.py index b74d1579..cea76191 100644 --- a/astrapy/exceptions.py +++ b/astrapy/exceptions.py @@ -15,11 +15,13 @@ from __future__ import annotations from functools import wraps -from typing import Any, Awaitable, Callable, Dict, List, Optional +from typing import Any, Awaitable, Callable, Dict, List, Optional, Union from dataclasses import dataclass +import httpx from astrapy.core.api import APIRequestError +from astrapy.core.utils import TimeoutInfo from astrapy.results import ( BulkWriteResult, DeleteResult, @@ -104,6 +106,18 @@ class DataAPIException(ValueError): pass +@dataclass +class DataAPITimeoutException(DataAPIException): + """ + TODO + """ + + text: str + timeout_type: str + endpoint: Optional[str] + raw_payload: Optional[str] + + @dataclass class CursorIsStartedException(DataAPIException): """ @@ -422,6 +436,37 @@ class BulkWriteException(DataAPIResponseException): exceptions: List[DataAPIResponseException] +def to_dataapi_timeout_exception( + httpx_timeout: httpx.TimeoutException, +) -> DataAPITimeoutException: + text = str(httpx_timeout) + if isinstance(httpx_timeout, httpx.ConnectTimeout): + timeout_type = "connect" + elif isinstance(httpx_timeout, httpx.ReadTimeout): + timeout_type = "read" + elif isinstance(httpx_timeout, httpx.WriteTimeout): + timeout_type = "write" + elif isinstance(httpx_timeout, httpx.PoolTimeout): + timeout_type = "pool" + else: + timeout_type = "generic" + if httpx_timeout.request: + endpoint = str(httpx_timeout.request.url) + if isinstance(httpx_timeout.request.content, bytes): + raw_payload = httpx_timeout.request.content.decode() + else: + raw_payload = None + else: + endpoint = None + raw_payload = None + return DataAPITimeoutException( + text=text, + timeout_type=timeout_type, + endpoint=endpoint, + raw_payload=raw_payload, + ) + + def recast_method_sync(method: Callable[..., Any]) -> Callable[..., Any]: """ Decorator for a sync method liable to generate the core APIRequestError. @@ -436,6 +481,8 @@ def _wrapped_sync(*pargs: Any, **kwargs: Any) -> Any: raise DataAPIResponseException.from_response( command=exc.payload, raw_response=exc.response.json() ) + except httpx.TimeoutException as texc: + raise to_dataapi_timeout_exception(texc) return _wrapped_sync @@ -456,5 +503,14 @@ async def _wrapped_async(*pargs: Any, **kwargs: Any) -> Any: raise DataAPIResponseException.from_response( command=exc.payload, raw_response=exc.response.json() ) + except httpx.TimeoutException as texc: + raise to_dataapi_timeout_exception(texc) return _wrapped_async + + +def base_timeout_info(max_time_ms: Optional[int]) -> Union[TimeoutInfo, None]: + if max_time_ms is not None: + return {"base": max_time_ms / 1000.0} + else: + return None diff --git a/astrapy/info.py b/astrapy/info.py index d306ce22..ac259183 100644 --- a/astrapy/info.py +++ b/astrapy/info.py @@ -18,8 +18,14 @@ from dataclasses import dataclass from typing import Any, Dict, Optional +import httpx + from astrapy.core.ops import AstraDBOps -from astrapy.exceptions import DevOpsAPIException +from astrapy.exceptions import ( + DevOpsAPIException, + base_timeout_info, + to_dataapi_timeout_exception, +) database_id_finder = re.compile( @@ -45,7 +51,9 @@ def find_database_id(api_endpoint: str) -> Optional[str]: return None -def get_database_info(api_endpoint: str, token: str, namespace: str) -> DatabaseInfo: +def get_database_info( + api_endpoint: str, token: str, namespace: str, max_time_ms: Optional[int] = None +) -> DatabaseInfo: """ Fetch the relevant information through the DevOps API. @@ -53,6 +61,7 @@ def get_database_info(api_endpoint: str, token: str, namespace: str) -> Database api_endpoint: a full API endpoint for the Data Api. token: a valid token to access the database. namespace: the desired namespace that will be used in the result. + max_time_ms: a timeout, in milliseconds, for waiting on a response. Returns: A DatabaseInfo object. @@ -66,7 +75,12 @@ def get_database_info(api_endpoint: str, token: str, namespace: str) -> Database astra_db_ops = AstraDBOps(token=token) database_id = find_database_id(api_endpoint) if database_id: - gd_response = astra_db_ops.get_database(database=database_id) + try: + gd_response = astra_db_ops.get_database( + database=database_id, timeout_info=base_timeout_info(max_time_ms) + ) + except httpx.TimeoutException as texc: + raise to_dataapi_timeout_exception(texc) raw_info = gd_response["info"] if namespace not in raw_info["keyspaces"]: raise DevOpsAPIException(f"Namespace {namespace} not found on DB.") diff --git a/tests/core/test_timeouts.py b/tests/core/test_timeouts.py index 25e109fa..a51dc0c1 100644 --- a/tests/core/test_timeouts.py +++ b/tests/core/test_timeouts.py @@ -21,15 +21,12 @@ import pytest import httpx -from ..conftest import AstraDBCredentials -from .conftest import TEST_SKIP_COLLECTION_DELETE from astrapy.core.db import ( AstraDB, AstraDBCollection, AsyncAstraDB, AsyncAstraDBCollection, ) -from astrapy.core.defaults import DEFAULT_KEYSPACE_NAME TEST_CREATE_DELETE_VECTOR_COLLECTION_NAME = "ephemeral_v_col" TEST_CREATE_DELETE_NONVECTOR_COLLECTION_NAME = "ephemeral_non_v_col" diff --git a/tests/idiomatic/integration/test_timeout_async.py b/tests/idiomatic/integration/test_timeout_async.py new file mode 100644 index 00000000..e2010b91 --- /dev/null +++ b/tests/idiomatic/integration/test_timeout_async.py @@ -0,0 +1,77 @@ +# Copyright DataStax, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest + +from astrapy import AsyncCollection, AsyncDatabase + +from astrapy.exceptions import DataAPITimeoutException +from astrapy.info import get_database_info + + +class TestTimeoutAsync: + @pytest.mark.describe("test of collection count_documents timeout, async") + async def test_collection_count_documents_timeout_async( + self, + async_empty_collection: AsyncCollection, + ) -> None: + await async_empty_collection.insert_many([{"a": 1}] * 100) + assert await async_empty_collection.count_documents({}, upper_bound=150) == 100 + + with pytest.raises(DataAPITimeoutException) as exc: + await async_empty_collection.count_documents( + {}, upper_bound=150, max_time_ms=1 + ) + assert exc.value.timeout_type == "read" + assert exc.value.endpoint is not None + assert exc.value.raw_payload is not None + + @pytest.mark.describe("test of database info timeout, async") + async def test_database_info_timeout_async( + self, + async_database: AsyncDatabase, + ) -> None: + get_database_info( + async_database._astra_db.api_endpoint, + token=async_database._astra_db.token, + namespace=async_database.namespace, + ) + + with pytest.raises(DataAPITimeoutException) as exc: + get_database_info( + async_database._astra_db.api_endpoint, + token=async_database._astra_db.token, + namespace=async_database.namespace, + max_time_ms=1, + ) + assert exc.value.timeout_type == "read" + assert exc.value.endpoint is not None + assert exc.value.raw_payload is not None + + @pytest.mark.describe("test of cursor-based timeouts, async") + async def test_cursor_timeouts_async( + self, + async_empty_collection: AsyncCollection, + ) -> None: + await async_empty_collection.insert_one({"a": 1}) + + cur0 = async_empty_collection.find({}) + cur1 = async_empty_collection.find({}, max_time_ms=1) + await cur0.__anext__() + with pytest.raises(DataAPITimeoutException): + await cur1.__anext__() + + await async_empty_collection.find_one({}) + with pytest.raises(DataAPITimeoutException): + await async_empty_collection.find_one({}, max_time_ms=1) diff --git a/tests/idiomatic/integration/test_timeout_sync.py b/tests/idiomatic/integration/test_timeout_sync.py new file mode 100644 index 00000000..86fada1b --- /dev/null +++ b/tests/idiomatic/integration/test_timeout_sync.py @@ -0,0 +1,75 @@ +# Copyright DataStax, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest + +from astrapy import Collection, Database + +from astrapy.exceptions import DataAPITimeoutException +from astrapy.info import get_database_info + + +class TestTimeoutSync: + @pytest.mark.describe("test of collection count_documents timeout, sync") + def test_collection_count_documents_timeout_sync( + self, + sync_empty_collection: Collection, + ) -> None: + sync_empty_collection.insert_many([{"a": 1}] * 100) + assert sync_empty_collection.count_documents({}, upper_bound=150) == 100 + + with pytest.raises(DataAPITimeoutException) as exc: + sync_empty_collection.count_documents({}, upper_bound=150, max_time_ms=1) + assert exc.value.timeout_type == "read" + assert exc.value.endpoint is not None + assert exc.value.raw_payload is not None + + @pytest.mark.describe("test of database info timeout, sync") + def test_database_info_timeout_sync( + self, + sync_database: Database, + ) -> None: + get_database_info( + sync_database._astra_db.api_endpoint, + token=sync_database._astra_db.token, + namespace=sync_database.namespace, + ) + + with pytest.raises(DataAPITimeoutException) as exc: + get_database_info( + sync_database._astra_db.api_endpoint, + token=sync_database._astra_db.token, + namespace=sync_database.namespace, + max_time_ms=1, + ) + assert exc.value.timeout_type == "read" + assert exc.value.endpoint is not None + assert exc.value.raw_payload is not None + + @pytest.mark.describe("test of cursor-based timeouts, sync") + def test_cursor_timeouts_sync( + self, + sync_empty_collection: Collection, + ) -> None: + sync_empty_collection.insert_one({"a": 1}) + + cur0 = sync_empty_collection.find({}) + cur1 = sync_empty_collection.find({}, max_time_ms=1) + cur0.__next__() + with pytest.raises(DataAPITimeoutException): + cur1.__next__() + + sync_empty_collection.find_one({}) + with pytest.raises(DataAPITimeoutException): + sync_empty_collection.find_one({}, max_time_ms=1) From 08b376c80567cff1375e3e6f01db28986f600570 Mon Sep 17 00:00:00 2001 From: Stefano Lottini Date: Fri, 15 Mar 2024 10:48:33 +0100 Subject: [PATCH 04/13] cursors, find and their distinct() support timeout --- astrapy/collection.py | 21 ++++-- astrapy/cursors.py | 66 ++++++++++++++++--- .../integration/test_timeout_async.py | 18 +++++ .../integration/test_timeout_sync.py | 18 +++++ 4 files changed, 111 insertions(+), 12 deletions(-) diff --git a/astrapy/collection.py b/astrapy/collection.py index c4478f0d..7f864bdb 100644 --- a/astrapy/collection.py +++ b/astrapy/collection.py @@ -650,6 +650,7 @@ def find( filter=filter, projection=projection, max_time_ms=max_time_ms, + overall_max_time_ms=None, ) .skip(skip) .limit(limit) @@ -713,6 +714,7 @@ def distinct( key: str, *, filter: Optional[FilterType] = None, + max_time_ms: Optional[int] = None, ) -> List[Any]: """ Return a list of the unique values of `key` across the documents @@ -743,6 +745,7 @@ def distinct( {"price": {"$le": 100}} {"$and": [{"name": "John"}, {"price": {"$le": 100}}]} See the Data API documentation for the full set of operators. + max_time_ms: a timeout, in milliseconds, for the operation. Returns: a list of all different values for `key` found across the documents @@ -756,10 +759,14 @@ def distinct( billing implications if the amount of matching documents is large. """ - return self.find( # type: ignore[no-any-return] + f_cursor = Cursor( + collection=self, filter=filter, projection={key: True}, - ).distinct(key) + max_time_ms=None, + overall_max_time_ms=max_time_ms, + ) + return f_cursor.distinct(key) # type: ignore[no-any-return] @recast_method_sync def count_documents( @@ -2085,6 +2092,7 @@ def find( filter=filter, projection=projection, max_time_ms=max_time_ms, + overall_max_time_ms=None, ) .skip(skip) .limit(limit) @@ -2148,6 +2156,7 @@ async def distinct( key: str, *, filter: Optional[FilterType] = None, + max_time_ms: Optional[int] = None, ) -> List[Any]: """ Return a list of the unique values of `key` across the documents @@ -2170,6 +2179,7 @@ async def distinct( {"price": {"$le": 100}} {"$and": [{"name": "John"}, {"price": {"$le": 100}}]} See the Data API documentation for the full set of operators. + max_time_ms: a timeout, in milliseconds, for the operation. Returns: a list of all different values for `key` found across the documents @@ -2183,11 +2193,14 @@ async def distinct( billing implications if the amount of matching documents is large. """ - cursor = self.find( + f_cursor = AsyncCursor( + collection=self, filter=filter, projection={key: True}, + max_time_ms=None, + overall_max_time_ms=max_time_ms, ) - return await cursor.distinct(key) # type: ignore[no-any-return] + return await f_cursor.distinct(key) # type: ignore[no-any-return] @recast_method_async async def count_documents( diff --git a/astrapy/cursors.py b/astrapy/cursors.py index 2270b63d..3c4a3777 100644 --- a/astrapy/cursors.py +++ b/astrapy/cursors.py @@ -16,6 +16,7 @@ import hashlib import json +import time from collections.abc import Iterator, AsyncIterator from typing import ( Any, @@ -34,6 +35,7 @@ from astrapy.core.utils import _normalize_payload_value from astrapy.exceptions import ( CursorIsStartedException, + DataAPITimeoutException, recast_method_sync, recast_method_async, base_timeout_info, @@ -153,6 +155,8 @@ class BaseCursor: _filter: Optional[Dict[str, Any]] _projection: Optional[ProjectionType] _max_time_ms: Optional[int] + _overall_max_time_ms: Optional[int] + _started_time_s: Optional[float] _limit: Optional[int] _skip: Optional[int] _sort: Optional[Dict[str, Any]] @@ -169,6 +173,7 @@ def __init__( filter: Optional[Dict[str, Any]], projection: Optional[ProjectionType], max_time_ms: Optional[int], + overall_max_time_ms: Optional[int], ) -> None: raise NotImplementedError @@ -222,6 +227,7 @@ def _copy( *, projection: Optional[ProjectionType] = None, max_time_ms: Optional[int] = None, + overall_max_time_ms: Optional[int] = None, limit: Optional[int] = None, skip: Optional[int] = None, started: Optional[bool] = None, @@ -232,6 +238,7 @@ def _copy( filter=self._filter, projection=projection or self._projection, max_time_ms=max_time_ms or self._max_time_ms, + overall_max_time_ms=overall_max_time_ms or self._overall_max_time_ms, ) # Cursor treated as mutable within this function scope: new_cursor._limit = limit if limit is not None else self._limit @@ -416,11 +423,16 @@ def __init__( filter: Optional[Dict[str, Any]], projection: Optional[ProjectionType], max_time_ms: Optional[int], + overall_max_time_ms: Optional[int], ) -> None: self._collection: Collection = collection self._filter = filter self._projection = projection - self._max_time_ms = max_time_ms + self._overall_max_time_ms = overall_max_time_ms + if overall_max_time_ms is not None and max_time_ms is not None: + self._max_time_ms = min(max_time_ms, overall_max_time_ms) + else: + self._max_time_ms = max_time_ms self._limit: Optional[int] = None self._skip: Optional[int] = None self._sort: Optional[Dict[str, Any]] = None @@ -445,6 +457,16 @@ def __next__(self) -> DocumentType: if self._iterator is None: self._iterator = self._create_iterator() self._started = True + # check for overall timing out + if self._overall_max_time_ms is not None: + _elapsed = time.time() - self._started_time_s # type: ignore[operator] + if _elapsed > (self._overall_max_time_ms / 1000.0): + raise DataAPITimeoutException( + text="Cursor timed out.", + timeout_type="generic", + endpoint=None, + raw_payload=None, + ) try: next_item = self._iterator.__next__() self._retrieved = self._retrieved + 1 @@ -489,9 +511,10 @@ def _create_iterator(self) -> Iterator[DocumentType]: projection=pf_projection, sort=pf_sort, options=_options, - prefetched=FIND_PREFETCH, + prefetched=0, timeout_info=base_timeout_info(self._max_time_ms), ) + self._started_time_s = time.time() return iterator @property @@ -503,7 +526,7 @@ def collection(self) -> Collection: return self._collection @recast_method_sync - def distinct(self, key: str) -> List[Any]: + def distinct(self, key: str, max_time_ms: Optional[int] = None) -> List[Any]: """ Compute a list of unique values for a specific field across all documents the cursor iterates through. @@ -521,6 +544,7 @@ def distinct(self, key: str) -> List[Any]: "field.3.subfield" if lists are encountered and no numeric index is specified, all items in the list are visited. + max_time_ms: a timeout, in milliseconds, for the operation. Note: this operation works at client-side by scrolling through all @@ -536,7 +560,11 @@ def distinct(self, key: str) -> List[Any]: _extractor = _create_document_key_extractor(key) _key = _reduce_distinct_key_to_safe(key) - d_cursor = self._copy(projection={_key: True}, started=False) + d_cursor = self._copy( + projection={_key: True}, + started=False, + overall_max_time_ms=max_time_ms, + ) for document in d_cursor: for item in _extractor(document): _item_hash = _hash_document(item) @@ -582,11 +610,16 @@ def __init__( filter: Optional[Dict[str, Any]], projection: Optional[ProjectionType], max_time_ms: Optional[int], + overall_max_time_ms: Optional[int], ) -> None: self._collection: AsyncCollection = collection self._filter = filter self._projection = projection - self._max_time_ms = max_time_ms + self._overall_max_time_ms = overall_max_time_ms + if overall_max_time_ms is not None and max_time_ms is not None: + self._max_time_ms = min(max_time_ms, overall_max_time_ms) + else: + self._max_time_ms = max_time_ms self._limit: Optional[int] = None self._skip: Optional[int] = None self._sort: Optional[Dict[str, Any]] = None @@ -611,6 +644,16 @@ async def __anext__(self) -> DocumentType: if self._iterator is None: self._iterator = self._create_iterator() self._started = True + # check for overall timing out + if self._overall_max_time_ms is not None: + _elapsed = time.time() - self._started_time_s # type: ignore[operator] + if _elapsed > (self._overall_max_time_ms / 1000.0): + raise DataAPITimeoutException( + text="Cursor timed out.", + timeout_type="generic", + endpoint=None, + raw_payload=None, + ) try: next_item = await self._iterator.__anext__() self._retrieved = self._retrieved + 1 @@ -655,9 +698,10 @@ def _create_iterator(self) -> AsyncIterator[DocumentType]: projection=pf_projection, sort=pf_sort, options=_options, - prefetched=FIND_PREFETCH, + prefetched=0, timeout_info=base_timeout_info(self._max_time_ms), ) + self._started_time_s = time.time() return iterator def _to_sync( @@ -673,6 +717,7 @@ def _to_sync( filter=self._filter, projection=self._projection, max_time_ms=self._max_time_ms, + overall_max_time_ms=self._overall_max_time_ms, ) # Cursor treated as mutable within this function scope: new_cursor._limit = limit if limit is not None else self._limit @@ -696,7 +741,7 @@ def collection(self) -> AsyncCollection: return self._collection @recast_method_async - async def distinct(self, key: str) -> List[Any]: + async def distinct(self, key: str, max_time_ms: Optional[int] = None) -> List[Any]: """ Compute a list of unique values for a specific field across all documents the cursor iterates through. @@ -714,6 +759,7 @@ async def distinct(self, key: str) -> List[Any]: "field.3.subfield" if lists are encountered and no numeric index is specified, all items in the list are visited. + max_time_ms: a timeout, in milliseconds, for the operation. Note: this operation works at client-side by scrolling through all @@ -729,7 +775,11 @@ async def distinct(self, key: str) -> List[Any]: _extractor = _create_document_key_extractor(key) _key = _reduce_distinct_key_to_safe(key) - d_cursor = self._copy(projection={_key: True}, started=False) + d_cursor = self._copy( + projection={_key: True}, + started=False, + overall_max_time_ms=max_time_ms, + ) async for document in d_cursor: for item in _extractor(document): _item_hash = _hash_document(item) diff --git a/tests/idiomatic/integration/test_timeout_async.py b/tests/idiomatic/integration/test_timeout_async.py index e2010b91..7d85437f 100644 --- a/tests/idiomatic/integration/test_timeout_async.py +++ b/tests/idiomatic/integration/test_timeout_async.py @@ -75,3 +75,21 @@ async def test_cursor_timeouts_async( await async_empty_collection.find_one({}) with pytest.raises(DataAPITimeoutException): await async_empty_collection.find_one({}, max_time_ms=1) + + @pytest.mark.describe("test of cursor-based overall timeouts, async") + async def test_cursor_overalltimeout_exceptions_async( + self, + async_empty_collection: AsyncCollection, + ) -> None: + acol = async_empty_collection + await acol.insert_many([{"a": 1}] * 1000) + + await acol.distinct("a", max_time_ms=5000) + with pytest.raises(DataAPITimeoutException): + await acol.distinct("a", max_time_ms=1) + + cur1 = acol.find({}) + cur2 = acol.find({}) + await cur1.distinct("a", max_time_ms=5000) + with pytest.raises(DataAPITimeoutException): + await cur2.distinct("a", max_time_ms=1) diff --git a/tests/idiomatic/integration/test_timeout_sync.py b/tests/idiomatic/integration/test_timeout_sync.py index 86fada1b..0e6c7d48 100644 --- a/tests/idiomatic/integration/test_timeout_sync.py +++ b/tests/idiomatic/integration/test_timeout_sync.py @@ -73,3 +73,21 @@ def test_cursor_timeouts_sync( sync_empty_collection.find_one({}) with pytest.raises(DataAPITimeoutException): sync_empty_collection.find_one({}, max_time_ms=1) + + @pytest.mark.describe("test of cursor-based overall timeouts, sync") + def test_cursor_overalltimeout_exceptions_sync( + self, + sync_empty_collection: Collection, + ) -> None: + col = sync_empty_collection + col.insert_many([{"a": 1}] * 1000) + + col.distinct("a", max_time_ms=5000) + with pytest.raises(DataAPITimeoutException): + col.distinct("a", max_time_ms=1) + + cur1 = col.find({}) + cur2 = col.find({}) + cur1.distinct("a", max_time_ms=5000) + with pytest.raises(DataAPITimeoutException): + cur2.distinct("a", max_time_ms=1) From 4a6142e85bf18ceb421cc35422e23a4391f76b2e Mon Sep 17 00:00:00 2001 From: Stefano Lottini Date: Mon, 18 Mar 2024 12:03:33 +0100 Subject: [PATCH 05/13] docstring for DataAPITimeoutException --- astrapy/exceptions.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/astrapy/exceptions.py b/astrapy/exceptions.py index cea76191..23d94107 100644 --- a/astrapy/exceptions.py +++ b/astrapy/exceptions.py @@ -109,7 +109,19 @@ class DataAPIException(ValueError): @dataclass class DataAPITimeoutException(DataAPIException): """ - TODO + A Data API operation timed out. This can be a request timeout occurring + during a specific HTTP request, or can happen over the course of a method + involving several requests in a row, such as a paginated find. + + Attributes: + text: a textual description of the error + timeout_type: this denotes the phase of the HTTP request when the event + occurred ("connect", "read", "write", "pool") or "generic" if there is + not a specific request associated to the exception. + endpoint: if the timeout is tied to a specific request, this is the + URL that the request was targeting. + raw_payload: if the timeout is tied to a specific request, this is the + associated payload (as a string). """ text: str From 7302804fb09e8351b8ba0607a8fd4c22e2bfb90e Mon Sep 17 00:00:00 2001 From: Stefano Lottini Date: Mon, 18 Mar 2024 15:08:08 +0100 Subject: [PATCH 06/13] insert_many, delete_many, update_many support method-wide timeout --- astrapy/collection.py | 58 ++++++++++++++--- astrapy/exceptions.py | 50 +++++++++++++++ .../integration/test_timeout_async.py | 64 +++++++++++++++++++ .../integration/test_timeout_sync.py | 62 ++++++++++++++++++ tests/idiomatic/unit/test_timeouts.py | 38 +++++++++++ 5 files changed, 262 insertions(+), 10 deletions(-) create mode 100644 tests/idiomatic/unit/test_timeouts.py diff --git a/astrapy/collection.py b/astrapy/collection.py index 7f864bdb..145125bf 100644 --- a/astrapy/collection.py +++ b/astrapy/collection.py @@ -17,7 +17,6 @@ import asyncio import json from concurrent.futures import ThreadPoolExecutor -from functools import partial from typing import Any, Dict, Iterable, List, Optional, Tuple, Union, TYPE_CHECKING from astrapy.core.db import ( @@ -33,6 +32,7 @@ DataAPIResponseException, DeleteManyException, InsertManyException, + MultiCallTimeoutManager, TooManyDocumentsToCountException, UpdateManyException, recast_method_sync, @@ -411,6 +411,7 @@ def insert_one( raw_response=io_response, ) + @recast_method_sync def insert_many( self, documents: Iterable[DocumentType], @@ -418,6 +419,7 @@ def insert_many( ordered: bool = True, chunk_size: Optional[int] = None, concurrency: Optional[int] = None, + max_time_ms: Optional[int] = None, ) -> InsertManyResult: """ Insert a list of documents into the collection. @@ -434,6 +436,7 @@ def insert_many( Leave it unspecified (recommended) to use the system default. concurrency: maximum number of concurrent requests to the API at a given time. It cannot be more than one for ordered insertions. + max_time_ms: a timeout, in milliseconds, for the operation. Returns: an InsertManyResult object. @@ -481,6 +484,7 @@ def insert_many( _documents = list(documents) # TODO make this a chunked iterator # TODO handle the auto-inserted-ids here (chunk-wise better) raw_results: List[Dict[str, Any]] = [] + timeout_manager = MultiCallTimeoutManager(overall_max_time_ms=max_time_ms) if ordered: options = {"ordered": True} inserted_ids: List[Any] = [] @@ -489,6 +493,7 @@ def insert_many( documents=_documents[i : i + _chunk_size], options=options, partial_failures_allowed=True, + timeout_info=timeout_manager.check_remaining_timeout(), ) # accumulate the results in this call chunk_inserted_ids = (chunk_response.get("status") or {}).get( @@ -520,11 +525,17 @@ def insert_many( options = {"ordered": False} if _concurrency > 1: with ThreadPoolExecutor(max_workers=_concurrency) as executor: - _chunk_insertor = partial( - self._astra_db_collection.insert_many, - options=options, - partial_failures_allowed=True, - ) + + def _chunk_insertor( + document_chunk: List[Dict[str, Any]] + ) -> Dict[str, Any]: + return self._astra_db_collection.insert_many( + documents=document_chunk, + options=options, + partial_failures_allowed=True, + timeout_info=timeout_manager.check_remaining_timeout(), + ) + raw_results = list( executor.map( _chunk_insertor, @@ -540,6 +551,7 @@ def insert_many( _documents[i : i + _chunk_size], options=options, partial_failures_allowed=True, + timeout_info=timeout_manager.check_remaining_timeout(), ) for i in range(0, len(_documents), _chunk_size) ] @@ -1109,12 +1121,14 @@ def update_one( raw_response=fo_response, ) + @recast_method_sync def update_many( self, filter: Dict[str, Any], update: Dict[str, Any], *, upsert: bool = False, + max_time_ms: Optional[int] = None, ) -> UpdateResult: """ Apply an update operations to all documents matching a condition, @@ -1139,6 +1153,7 @@ def update_many( to an empty document) is inserted if no matches are found on the collection. If False, the operation silently does nothing in case of no matches. + max_time_ms: a timeout, in milliseconds, for the operation. Returns: an UpdateResult object summarizing the outcome of the update operation. @@ -1151,12 +1166,14 @@ def update_many( um_responses: List[Dict[str, Any]] = [] um_statuses: List[Dict[str, Any]] = [] must_proceed = True + timeout_manager = MultiCallTimeoutManager(overall_max_time_ms=max_time_ms) while must_proceed: options = {**base_options, **page_state_options} this_um_response = self._astra_db_collection.update_many( update=update, filter=filter, options=options, + timeout_info=timeout_manager.check_remaining_timeout(), ) this_um_status = this_um_response.get("status") or {} # @@ -1302,9 +1319,11 @@ def delete_one( raw_response=do_response, ) + @recast_method_sync def delete_many( self, filter: Dict[str, Any], + max_time_ms: Optional[int] = None, ) -> DeleteResult: """ Delete all documents matching a provided filter. @@ -1317,8 +1336,9 @@ def delete_many( {"price": {"$le": 100}} {"$and": [{"name": "John"}, {"price": {"$le": 100}}]} See the Data API documentation for the full set of operators. - The `delete_many` method does not accept an empty filter: see - `delete_all` to completely erase all contents of a collection + The `delete_many` method does not accept an empty filter: see + `delete_all` to completely erase all contents of a collection + max_time_ms: a timeout, in milliseconds, for the operation. Returns: a DeleteResult object summarizing the outcome of the delete operation. @@ -1341,10 +1361,12 @@ def delete_many( dm_responses: List[Dict[str, Any]] = [] deleted_count = 0 must_proceed = True + timeout_manager = MultiCallTimeoutManager(overall_max_time_ms=max_time_ms) while must_proceed: this_dm_response = self._astra_db_collection.delete_many( filter=filter, skip_error_check=True, + timeout_info=timeout_manager.check_remaining_timeout(), ) # if errors, quit early if this_dm_response.get("errors", []): @@ -1851,6 +1873,7 @@ async def insert_one( f"(gotten '${json.dumps(io_response)}')" ) + @recast_method_async async def insert_many( self, documents: Iterable[DocumentType], @@ -1858,6 +1881,7 @@ async def insert_many( ordered: bool = True, chunk_size: Optional[int] = None, concurrency: Optional[int] = None, + max_time_ms: Optional[int] = None, ) -> InsertManyResult: """ Insert a list of documents into the collection. @@ -1874,6 +1898,7 @@ async def insert_many( Leave it unspecified (recommended) to use the system default. concurrency: maximum number of concurrent requests to the API at a given time. It cannot be more than one for ordered insertions. + max_time_ms: a timeout, in milliseconds, for the operation. Returns: an InsertManyResult object. @@ -1921,6 +1946,7 @@ async def insert_many( _documents = list(documents) # TODO make this a chunked iterator # TODO handle the auto-inserted-ids here (chunk-wise better) raw_results: List[Dict[str, Any]] = [] + timeout_manager = MultiCallTimeoutManager(overall_max_time_ms=max_time_ms) if ordered: options = {"ordered": True} inserted_ids: List[Any] = [] @@ -1929,6 +1955,7 @@ async def insert_many( documents=_documents[i : i + _chunk_size], options=options, partial_failures_allowed=True, + timeout_info=timeout_manager.check_remaining_timeout(), ) # accumulate the results in this call chunk_inserted_ids = (chunk_response.get("status") or {}).get( @@ -1969,6 +1996,7 @@ async def concurrent_insert_chunk( document_chunk, options=options, partial_failures_allowed=True, + timeout_info=timeout_manager.check_remaining_timeout(), ) if _concurrency > 1: @@ -2543,12 +2571,14 @@ async def update_one( raw_response=fo_response, ) + @recast_method_async async def update_many( self, filter: Dict[str, Any], update: Dict[str, Any], *, upsert: bool = False, + max_time_ms: Optional[int] = None, ) -> UpdateResult: """ Apply an update operations to all documents matching a condition, @@ -2573,6 +2603,7 @@ async def update_many( to an empty document) is inserted if no matches are found on the collection. If False, the operation silently does nothing in case of no matches. + max_time_ms: a timeout, in milliseconds, for the operation. Returns: an UpdateResult object summarizing the outcome of the update operation. @@ -2585,12 +2616,14 @@ async def update_many( um_responses: List[Dict[str, Any]] = [] um_statuses: List[Dict[str, Any]] = [] must_proceed = True + timeout_manager = MultiCallTimeoutManager(overall_max_time_ms=max_time_ms) while must_proceed: options = {**base_options, **page_state_options} this_um_response = await self._astra_db_collection.update_many( update=update, filter=filter, options=options, + timeout_info=timeout_manager.check_remaining_timeout(), ) this_um_status = this_um_response.get("status") or {} # @@ -2737,11 +2770,13 @@ async def delete_one( raw_response=do_response, ) + @recast_method_async async def delete_many( self, filter: Dict[str, Any], *, let: Optional[int] = None, + max_time_ms: Optional[int] = None, ) -> DeleteResult: """ Delete all documents matching a provided filter. @@ -2754,8 +2789,9 @@ async def delete_many( {"price": {"$le": 100}} {"$and": [{"name": "John"}, {"price": {"$le": 100}}]} See the Data API documentation for the full set of operators. - The `delete_many` method does not accept an empty filter: see - `delete_all` to completely erase all contents of a collection + The `delete_many` method does not accept an empty filter: see + `delete_all` to completely erase all contents of a collection + max_time_ms: a timeout, in milliseconds, for the operation. Returns: a DeleteResult object summarizing the outcome of the delete operation. @@ -2778,10 +2814,12 @@ async def delete_many( dm_responses: List[Dict[str, Any]] = [] deleted_count = 0 must_proceed = True + timeout_manager = MultiCallTimeoutManager(overall_max_time_ms=max_time_ms) while must_proceed: this_dm_response = await self._astra_db_collection.delete_many( filter=filter, skip_error_check=True, + timeout_info=timeout_manager.check_remaining_timeout(), ) # if errors, quit early if this_dm_response.get("errors", []): diff --git a/astrapy/exceptions.py b/astrapy/exceptions.py index 23d94107..3e517979 100644 --- a/astrapy/exceptions.py +++ b/astrapy/exceptions.py @@ -14,6 +14,7 @@ from __future__ import annotations +import time from functools import wraps from typing import Any, Awaitable, Callable, Dict, List, Optional, Union from dataclasses import dataclass @@ -526,3 +527,52 @@ def base_timeout_info(max_time_ms: Optional[int]) -> Union[TimeoutInfo, None]: return {"base": max_time_ms / 1000.0} else: return None + + +class MultiCallTimeoutManager: + """ + A helper class to keep track of timing and timeouts + in a multi-call method context. + + Args: + overall_max_time_ms: an optional max duration to track (milliseconds) + + Attributes: + overall_max_time_ms: an optional max duration to track (milliseconds) + started_ms: timestamp of the instance construction (milliseconds) + deadline_ms: optional deadline in milliseconds (computed by the class). + """ + + overall_max_time_ms: Optional[int] + started_ms: int = -1 + deadline_ms: Optional[int] + + def __init__(self, overall_max_time_ms: Optional[int]) -> None: + self.started_ms = int(time.time() * 1000) + self.overall_max_time_ms = overall_max_time_ms + if self.overall_max_time_ms is not None: + self.deadline_ms = self.started_ms + self.overall_max_time_ms + else: + self.deadline_ms = None + + def check_remaining_timeout(self) -> Union[TimeoutInfo, None]: + """ + Ensure the deadline, if any, is not yet in the past. + If it is, raise an appropriate timeout error. + It it is not, or there is no deadline, return a suitable TimeoutInfo + for use within the multi-call method. + """ + now_ms = int(time.time() * 1000) + if self.deadline_ms is not None: + if now_ms < self.deadline_ms: + remaining_ms = self.deadline_ms - now_ms + return base_timeout_info(max_time_ms=remaining_ms) + else: + raise DataAPITimeoutException( + text="Operation timed out.", + timeout_type="generic", + endpoint=None, + raw_payload=None, + ) + else: + return None diff --git a/tests/idiomatic/integration/test_timeout_async.py b/tests/idiomatic/integration/test_timeout_async.py index 7d85437f..22561af5 100644 --- a/tests/idiomatic/integration/test_timeout_async.py +++ b/tests/idiomatic/integration/test_timeout_async.py @@ -93,3 +93,67 @@ async def test_cursor_overalltimeout_exceptions_async( await cur1.distinct("a", max_time_ms=5000) with pytest.raises(DataAPITimeoutException): await cur2.distinct("a", max_time_ms=1) + + @pytest.mark.describe("test of insert_many timeouts, async") + async def test_insert_many_timeout_exceptions_async( + self, + async_collection: AsyncCollection, + ) -> None: + fifty_docs = [{"seq": i} for i in range(50)] + await async_collection.insert_many(fifty_docs, ordered=True, max_time_ms=20000) + await async_collection.insert_many( + fifty_docs, ordered=False, concurrency=1, max_time_ms=20000 + ) + await async_collection.insert_many( + fifty_docs, ordered=False, concurrency=2, max_time_ms=20000 + ) + + with pytest.raises(DataAPITimeoutException): + await async_collection.insert_many( + fifty_docs, ordered=True, max_time_ms=200 + ) + with pytest.raises(DataAPITimeoutException): + await async_collection.insert_many( + fifty_docs, ordered=False, concurrency=1, max_time_ms=200 + ) + with pytest.raises(DataAPITimeoutException): + await async_collection.insert_many( + fifty_docs, ordered=False, concurrency=2, max_time_ms=200 + ) + + @pytest.mark.describe("test of update_many timeouts, async") + async def test_update_many_timeout_exceptions_async( + self, + async_collection: AsyncCollection, + ) -> None: + fifty_docs = [{"seq": i, "f": "update_many"} for i in range(50)] + await async_collection.insert_many(fifty_docs, ordered=False, concurrency=3) + + await async_collection.update_many({"f": "update_many"}, {"$inc": {"seq": 100}}) + await async_collection.update_many( + {"f": "update_many"}, {"$inc": {"seq": 100}}, max_time_ms=20000 + ) + + with pytest.raises(DataAPITimeoutException): + await async_collection.update_many( + {"f": "update_many"}, {"$inc": {"seq": 100}}, max_time_ms=200 + ) + + @pytest.mark.describe("test of delete_many timeouts, async") + async def test_delete_many_timeout_exceptions_async( + self, + async_collection: AsyncCollection, + ) -> None: + fifty_docs1 = [{"seq": i, "f": "delete_many1"} for i in range(50)] + fifty_docs2 = [{"seq": i, "f": "delete_many2"} for i in range(50)] + fifty_docs3 = [{"seq": i, "f": "delete_many3"} for i in range(50)] + await async_collection.insert_many( + fifty_docs1 + fifty_docs2 + fifty_docs3, + ordered=False, + concurrency=5, + ) + + await async_collection.delete_many({"f": "delete_many1"}) + await async_collection.delete_many({"f": "delete_many2"}, max_time_ms=20000) + with pytest.raises(DataAPITimeoutException): + await async_collection.delete_many({"f": "delete_many3"}, max_time_ms=200) diff --git a/tests/idiomatic/integration/test_timeout_sync.py b/tests/idiomatic/integration/test_timeout_sync.py index 0e6c7d48..c7cd8c5d 100644 --- a/tests/idiomatic/integration/test_timeout_sync.py +++ b/tests/idiomatic/integration/test_timeout_sync.py @@ -91,3 +91,65 @@ def test_cursor_overalltimeout_exceptions_sync( cur1.distinct("a", max_time_ms=5000) with pytest.raises(DataAPITimeoutException): cur2.distinct("a", max_time_ms=1) + + @pytest.mark.describe("test of insert_many timeouts, sync") + def test_insert_many_timeout_exceptions_sync( + self, + sync_collection: Collection, + ) -> None: + fifty_docs = [{"seq": i} for i in range(50)] + sync_collection.insert_many(fifty_docs, ordered=True, max_time_ms=20000) + sync_collection.insert_many( + fifty_docs, ordered=False, concurrency=1, max_time_ms=20000 + ) + sync_collection.insert_many( + fifty_docs, ordered=False, concurrency=2, max_time_ms=20000 + ) + + with pytest.raises(DataAPITimeoutException): + sync_collection.insert_many(fifty_docs, ordered=True, max_time_ms=200) + with pytest.raises(DataAPITimeoutException): + sync_collection.insert_many( + fifty_docs, ordered=False, concurrency=1, max_time_ms=200 + ) + with pytest.raises(DataAPITimeoutException): + sync_collection.insert_many( + fifty_docs, ordered=False, concurrency=2, max_time_ms=200 + ) + + @pytest.mark.describe("test of update_many timeouts, sync") + def test_update_many_timeout_exceptions_sync( + self, + sync_collection: Collection, + ) -> None: + fifty_docs = [{"seq": i, "f": "update_many"} for i in range(50)] + sync_collection.insert_many(fifty_docs, ordered=False, concurrency=3) + + sync_collection.update_many({"f": "update_many"}, {"$inc": {"seq": 100}}) + sync_collection.update_many( + {"f": "update_many"}, {"$inc": {"seq": 100}}, max_time_ms=20000 + ) + + with pytest.raises(DataAPITimeoutException): + sync_collection.update_many( + {"f": "update_many"}, {"$inc": {"seq": 100}}, max_time_ms=200 + ) + + @pytest.mark.describe("test of delete_many timeouts, sync") + def test_delete_many_timeout_exceptions_sync( + self, + sync_collection: Collection, + ) -> None: + fifty_docs1 = [{"seq": i, "f": "delete_many1"} for i in range(50)] + fifty_docs2 = [{"seq": i, "f": "delete_many2"} for i in range(50)] + fifty_docs3 = [{"seq": i, "f": "delete_many3"} for i in range(50)] + sync_collection.insert_many( + fifty_docs1 + fifty_docs2 + fifty_docs3, + ordered=False, + concurrency=5, + ) + + sync_collection.delete_many({"f": "delete_many1"}) + sync_collection.delete_many({"f": "delete_many2"}, max_time_ms=20000) + with pytest.raises(DataAPITimeoutException): + sync_collection.delete_many({"f": "delete_many3"}, max_time_ms=200) diff --git a/tests/idiomatic/unit/test_timeouts.py b/tests/idiomatic/unit/test_timeouts.py new file mode 100644 index 00000000..9998d217 --- /dev/null +++ b/tests/idiomatic/unit/test_timeouts.py @@ -0,0 +1,38 @@ +# Copyright DataStax, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import time +import pytest + +from astrapy.exceptions import DataAPITimeoutException, MultiCallTimeoutManager + + +@pytest.mark.describe("test MultiCallTimeoutManager") +def test_multicalltimeoutmanager() -> None: + mgr_n = MultiCallTimeoutManager(overall_max_time_ms=None) + assert mgr_n.check_remaining_timeout() is None + time.sleep(0.5) + assert mgr_n.check_remaining_timeout() is None + + mgr_1 = MultiCallTimeoutManager(overall_max_time_ms=1000) + crt_1 = mgr_1.check_remaining_timeout() + assert crt_1 is not None + assert crt_1["base"] > 0 + time.sleep(0.6) + crt_2 = mgr_1.check_remaining_timeout() + assert crt_2 is not None + assert crt_2["base"] > 0 + time.sleep(0.6) + with pytest.raises(DataAPITimeoutException): + mgr_1.check_remaining_timeout() From f4725d5fcdb14c2d5f2fb7703efd39d8279ad48d Mon Sep 17 00:00:00 2001 From: Stefano Lottini Date: Mon, 18 Mar 2024 15:59:39 +0100 Subject: [PATCH 07/13] add sort to core update_one, delete_one, delete_one_by_predicate; add sort to idiomatic replace_one, update_one, delete_one + tests --- astrapy/collection.py | 39 +++++++++- astrapy/core/db.py | 76 ++++++++++++------- tests/idiomatic/integration/test_dml_async.py | 33 ++++++++ tests/idiomatic/integration/test_dml_sync.py | 21 +++++ 4 files changed, 142 insertions(+), 27 deletions(-) diff --git a/astrapy/collection.py b/astrapy/collection.py index 145125bf..5eba069b 100644 --- a/astrapy/collection.py +++ b/astrapy/collection.py @@ -929,6 +929,7 @@ def replace_one( filter: Dict[str, Any], replacement: DocumentType, *, + sort: Optional[SortType] = None, upsert: bool = False, max_time_ms: Optional[int] = None, ) -> UpdateResult: @@ -945,6 +946,10 @@ def replace_one( {"$and": [{"name": "John"}, {"price": {"$le": 100}}]} See the Data API documentation for the full set of operators. replacement: the new document to write into the collection. + sort: with this dictionary parameter one can control the sorting + order of the documents matching the filter, effectively + determining what document will come first and hence be the + replaced one. See the `find` method for more on sorting. upsert: this parameter controls the behavior in absence of matches. If True, `replacement` is inserted as a new document if no matches are found on the collection. If False, @@ -961,6 +966,7 @@ def replace_one( fo_response = self._astra_db_collection.find_one_and_replace( replacement=replacement, filter=filter, + sort=sort, options=options, timeout_info=base_timeout_info(max_time_ms), ) @@ -1067,6 +1073,7 @@ def update_one( filter: Dict[str, Any], update: Dict[str, Any], *, + sort: Optional[SortType] = None, upsert: bool = False, max_time_ms: Optional[int] = None, ) -> UpdateResult: @@ -1088,6 +1095,10 @@ def update_one( {"$inc": {"counter": 10}} {"$unset": {"field": ""}} See the Data API documentation for the full syntax. + sort: with this dictionary parameter one can control the sorting + order of the documents matching the filter, effectively + determining what document will come first and hence be the + updated one. See the `find` method for more on sorting. upsert: this parameter controls the behavior in absence of matches. If True, a new document (resulting from applying the `update` to an empty document) is inserted if no matches are found on @@ -1104,6 +1115,7 @@ def update_one( } fo_response = self._astra_db_collection.find_one_and_update( update=update, + sort=sort, filter=filter, options=options, timeout_info=base_timeout_info(max_time_ms), @@ -1276,6 +1288,8 @@ def find_one_and_delete( def delete_one( self, filter: Dict[str, Any], + *, + sort: Optional[SortType] = None, max_time_ms: Optional[int] = None, ) -> DeleteResult: """ @@ -1291,6 +1305,10 @@ def delete_one( {"price": {"$le": 100}} {"$and": [{"name": "John"}, {"price": {"$le": 100}}]} See the Data API documentation for the full set of operators. + sort: with this dictionary parameter one can control the sorting + order of the documents matching the filter, effectively + determining what document will come first and hence be the + deleted one. See the `find` method for more on sorting. max_time_ms: a timeout, in milliseconds, for the underlying HTTP request. Returns: @@ -1298,7 +1316,7 @@ def delete_one( """ do_response = self._astra_db_collection.delete_one_by_predicate( - filter=filter, timeout_info=base_timeout_info(max_time_ms) + filter=filter, timeout_info=base_timeout_info(max_time_ms), sort=sort ) if "deletedCount" in do_response.get("status", {}): deleted_count = do_response["status"]["deletedCount"] @@ -2379,6 +2397,7 @@ async def replace_one( filter: Dict[str, Any], replacement: DocumentType, *, + sort: Optional[SortType] = None, upsert: bool = False, max_time_ms: Optional[int] = None, ) -> UpdateResult: @@ -2395,6 +2414,10 @@ async def replace_one( {"$and": [{"name": "John"}, {"price": {"$le": 100}}]} See the Data API documentation for the full set of operators. replacement: the new document to write into the collection. + sort: with this dictionary parameter one can control the sorting + order of the documents matching the filter, effectively + determining what document will come first and hence be the + replaced one. See the `find` method for more on sorting. upsert: this parameter controls the behavior in absence of matches. If True, `replacement` is inserted as a new document if no matches are found on the collection. If False, @@ -2411,6 +2434,7 @@ async def replace_one( fo_response = await self._astra_db_collection.find_one_and_replace( replacement=replacement, filter=filter, + sort=sort, options=options, timeout_info=base_timeout_info(max_time_ms), ) @@ -2517,6 +2541,7 @@ async def update_one( filter: Dict[str, Any], update: Dict[str, Any], *, + sort: Optional[SortType] = None, upsert: bool = False, max_time_ms: Optional[int] = None, ) -> UpdateResult: @@ -2538,6 +2563,10 @@ async def update_one( {"$inc": {"counter": 10}} {"$unset": {"field": ""}} See the Data API documentation for the full syntax. + sort: with this dictionary parameter one can control the sorting + order of the documents matching the filter, effectively + determining what document will come first and hence be the + updated one. See the `find` method for more on sorting. upsert: this parameter controls the behavior in absence of matches. If True, a new document (resulting from applying the `update` to an empty document) is inserted if no matches are found on @@ -2554,6 +2583,7 @@ async def update_one( } fo_response = await self._astra_db_collection.find_one_and_update( update=update, + sort=sort, filter=filter, options=options, timeout_info=base_timeout_info(max_time_ms), @@ -2726,6 +2756,8 @@ async def find_one_and_delete( async def delete_one( self, filter: Dict[str, Any], + *, + sort: Optional[SortType] = None, max_time_ms: Optional[int] = None, ) -> DeleteResult: """ @@ -2741,6 +2773,10 @@ async def delete_one( {"price": {"$le": 100}} {"$and": [{"name": "John"}, {"price": {"$le": 100}}]} See the Data API documentation for the full set of operators. + sort: with this dictionary parameter one can control the sorting + order of the documents matching the filter, effectively + determining what document will come first and hence be the + deleted one. See the `find` method for more on sorting. max_time_ms: a timeout, in milliseconds, for the underlying HTTP request. Returns: @@ -2750,6 +2786,7 @@ async def delete_one( do_response = await self._astra_db_collection.delete_one_by_predicate( filter=filter, timeout_info=base_timeout_info(max_time_ms), + sort=sort, ) if "deletedCount" in do_response.get("status", {}): deleted_count = do_response["status"]["deletedCount"] diff --git a/astrapy/core/db.py b/astrapy/core/db.py index daec4be9..f9c1b53a 100644 --- a/astrapy/core/db.py +++ b/astrapy/core/db.py @@ -1050,6 +1050,7 @@ def update_one( self, filter: Dict[str, Any], update: Dict[str, Any], + sort: Optional[Dict[str, Any]] = None, timeout_info: TimeoutInfoWideType = None, ) -> API_RESPONSE: """ @@ -1065,7 +1066,12 @@ def update_one( Returns: dict: The response from the database after the update operation. """ - json_query = make_payload(top_level="updateOne", filter=filter, update=update) + json_query = make_payload( + top_level="updateOne", + filter=filter, + update=update, + sort=sort, + ) response = self._request( method=http_methods.POST, @@ -1140,7 +1146,10 @@ def delete(self, id: str, timeout_info: TimeoutInfoWideType = None) -> API_RESPO return self.delete_one(id, timeout_info=timeout_info) def delete_one( - self, id: str, timeout_info: TimeoutInfoWideType = None + self, + id: str, + sort: Optional[Dict[str, Any]] = None, + timeout_info: TimeoutInfoWideType = None, ) -> API_RESPONSE: """ Delete a single document from the collection based on its ID. @@ -1154,11 +1163,11 @@ def delete_one( Returns: dict: The response from the database after the delete operation. """ - json_query = { - "deleteOne": { - "filter": {"_id": id}, - } - } + json_query = make_payload( + top_level="deleteOne", + filter={"_id": id}, + sort=sort, + ) response = self._request( method=http_methods.POST, @@ -1170,7 +1179,10 @@ def delete_one( return response def delete_one_by_predicate( - self, filter: Dict[str, Any], timeout_info: TimeoutInfoWideType = None + self, + filter: Dict[str, Any], + sort: Optional[Dict[str, Any]] = None, + timeout_info: TimeoutInfoWideType = None, ) -> API_RESPONSE: """ Delete a single document from the collection based on a filter clause @@ -1184,11 +1196,11 @@ def delete_one_by_predicate( Returns: dict: The response from the database after the delete operation. """ - json_query = { - "deleteOne": { - "filter": filter, - } - } + json_query = make_payload( + top_level="deleteOne", + filter=filter, + sort=sort, + ) response = self._request( method=http_methods.POST, @@ -2399,6 +2411,7 @@ async def update_one( self, filter: Dict[str, Any], update: Dict[str, Any], + sort: Optional[Dict[str, Any]] = None, timeout_info: TimeoutInfoWideType = None, ) -> API_RESPONSE: """ @@ -2414,7 +2427,12 @@ async def update_one( Returns: dict: The response from the database after the update operation. """ - json_query = make_payload(top_level="updateOne", filter=filter, update=update) + json_query = make_payload( + top_level="updateOne", + filter=filter, + update=update, + sort=sort, + ) response = await self._request( method=http_methods.POST, @@ -2480,7 +2498,10 @@ async def replace( return await self._put(path=path, document=document, timeout_info=timeout_info) async def delete_one( - self, id: str, timeout_info: TimeoutInfoWideType = None + self, + id: str, + sort: Optional[Dict[str, Any]] = None, + timeout_info: TimeoutInfoWideType = None, ) -> API_RESPONSE: """ Delete a single document from the collection based on its ID. @@ -2494,11 +2515,11 @@ async def delete_one( Returns: dict: The response from the database after the delete operation. """ - json_query = { - "deleteOne": { - "filter": {"_id": id}, - } - } + json_query = make_payload( + top_level="deleteOne", + filter={"_id": id}, + sort=sort, + ) response = await self._request( method=http_methods.POST, @@ -2510,7 +2531,10 @@ async def delete_one( return response async def delete_one_by_predicate( - self, filter: Dict[str, Any], timeout_info: TimeoutInfoWideType = None + self, + filter: Dict[str, Any], + sort: Optional[Dict[str, Any]] = None, + timeout_info: TimeoutInfoWideType = None, ) -> API_RESPONSE: """ Delete a single document from the collection based on a filter clause @@ -2524,11 +2548,11 @@ async def delete_one_by_predicate( Returns: dict: The response from the database after the delete operation. """ - json_query = { - "deleteOne": { - "filter": filter, - } - } + json_query = make_payload( + top_level="deleteOne", + filter=filter, + sort=sort, + ) response = await self._request( method=http_methods.POST, diff --git a/tests/idiomatic/integration/test_dml_async.py b/tests/idiomatic/integration/test_dml_async.py index 859ef898..f9260818 100644 --- a/tests/idiomatic/integration/test_dml_async.py +++ b/tests/idiomatic/integration/test_dml_async.py @@ -125,6 +125,16 @@ async def test_collection_delete_one_async( == 2 ) + # test of sort + await async_empty_collection.insert_many( + [{"ts": 1, "seq": i} for i in [2, 0, 1]] + ) + await async_empty_collection.delete_one({"ts": 1}, sort={"seq": 1}) + assert set(await async_empty_collection.distinct("seq", filter={"ts": 1})) == { + 1, + 2, + } + @pytest.mark.describe("test of collection delete_many, async") async def test_collection_delete_many_async( self, @@ -778,6 +788,18 @@ async def test_collection_replace_one_async( assert result4.update_info["nModified"] == 1 assert "upserted" not in result4.update_info + # test of sort + await async_empty_collection.insert_many( + [{"ts": 1, "seq": i} for i in [2, 0, 1]] + ) + await async_empty_collection.replace_one( + {"ts": 1}, {"ts": 1, "R": True}, sort={"seq": 1} + ) + assert set(await async_empty_collection.distinct("seq", filter={"ts": 1})) == { + 1, + 2, + } + @pytest.mark.describe("test of update_one, async") async def test_collection_update_one_async( self, @@ -813,6 +835,17 @@ async def test_collection_update_one_async( assert result4.update_info["nModified"] == 1 assert "upserted" not in result4.update_info + # test of sort + await async_empty_collection.insert_many( + [{"ts": 1, "seq": i} for i in [2, 0, 1]] + ) + await async_empty_collection.update_one( + {"ts": 1}, {"$set": {"U": True}}, sort={"seq": 1} + ) + updated = await async_empty_collection.find_one({"U": True}) + assert updated is not None + assert updated["seq"] == 0 + @pytest.mark.describe("test of update_many, async") async def test_collection_update_many_async( self, diff --git a/tests/idiomatic/integration/test_dml_sync.py b/tests/idiomatic/integration/test_dml_sync.py index b3eba349..695fbebe 100644 --- a/tests/idiomatic/integration/test_dml_sync.py +++ b/tests/idiomatic/integration/test_dml_sync.py @@ -99,6 +99,11 @@ def test_collection_delete_one_sync( assert do_result1.deleted_count == 1 assert sync_empty_collection.count_documents(filter={}, upper_bound=100) == 2 + # test of sort + sync_empty_collection.insert_many([{"ts": 1, "seq": i} for i in [2, 0, 1]]) + sync_empty_collection.delete_one({"ts": 1}, sort={"seq": 1}) + assert set(sync_empty_collection.distinct("seq", filter={"ts": 1})) == {1, 2} + @pytest.mark.describe("test of collection delete_many, sync") def test_collection_delete_many_sync( self, @@ -714,6 +719,13 @@ def test_collection_replace_one_sync( assert result4.update_info["nModified"] == 1 assert "upserted" not in result4.update_info + # test of sort + sync_empty_collection.insert_many([{"ts": 1, "seq": i} for i in [2, 0, 1]]) + sync_empty_collection.replace_one( + {"ts": 1}, {"ts": 1, "R": True}, sort={"seq": 1} + ) + assert set(sync_empty_collection.distinct("seq", filter={"ts": 1})) == {1, 2} + @pytest.mark.describe("test of update_one, sync") def test_collection_update_one_sync( self, @@ -749,6 +761,15 @@ def test_collection_update_one_sync( assert result4.update_info["nModified"] == 1 assert "upserted" not in result4.update_info + # test of sort + sync_empty_collection.insert_many([{"ts": 1, "seq": i} for i in [2, 0, 1]]) + sync_empty_collection.update_one( + {"ts": 1}, {"$set": {"U": True}}, sort={"seq": 1} + ) + updated = sync_empty_collection.find_one({"U": True}) + assert updated is not None + assert updated["seq"] == 0 + @pytest.mark.describe("test of update_many, sync") def test_collection_update_many_sync( self, From 21d59be3913e5158af9243aee1f168eb006832f2 Mon Sep 17 00:00:00 2001 From: Stefano Lottini Date: Mon, 18 Mar 2024 16:17:25 +0100 Subject: [PATCH 08/13] align signatures and make most params kwonly across collection/asynccollection/database/asyncdatabase --- astrapy/collection.py | 22 +++++++++++++++------- astrapy/database.py | 2 ++ 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/astrapy/collection.py b/astrapy/collection.py index 5eba069b..316d38a0 100644 --- a/astrapy/collection.py +++ b/astrapy/collection.py @@ -266,7 +266,7 @@ def set_caller( caller_version=caller_version, ) - def options(self, max_time_ms: Optional[int] = None) -> Dict[str, Any]: + def options(self, *, max_time_ms: Optional[int] = None) -> Dict[str, Any]: """ Get the collection options, i.e. its configuration as read from the database. @@ -352,6 +352,7 @@ def full_name(self) -> str: def insert_one( self, document: DocumentType, + *, max_time_ms: Optional[int] = None, ) -> InsertOneResult: """ @@ -784,6 +785,7 @@ def distinct( def count_documents( self, filter: Dict[str, Any], + *, upper_bound: int, max_time_ms: Optional[int] = None, ) -> int: @@ -1341,6 +1343,7 @@ def delete_one( def delete_many( self, filter: Dict[str, Any], + *, max_time_ms: Optional[int] = None, ) -> DeleteResult: """ @@ -1415,7 +1418,7 @@ def delete_many( ) @recast_method_sync - def delete_all(self, max_time_ms: Optional[int] = None) -> Dict[str, Any]: + def delete_all(self, *, max_time_ms: Optional[int] = None) -> Dict[str, Any]: """ Delete all documents in a collection. @@ -1574,13 +1577,15 @@ def _execute_as_either( else: return reduce_bulk_write_results(bulk_write_successes) - def drop(self, max_time_ms: Optional[int] = None) -> Dict[str, Any]: + def drop(self, *, max_time_ms: Optional[int] = None) -> Dict[str, Any]: """ Drop the collection, i.e. delete it from the database along with all the documents it contains. Args: max_time_ms: a timeout, in milliseconds, for the underlying HTTP request. + Remember there is not guarantee that a request that has + timed out us not in fact honored. Returns: a dictionary of the form {"ok": 1} to signal successful deletion. @@ -1761,7 +1766,7 @@ def set_caller( caller_version=caller_version, ) - async def options(self, max_time_ms: Optional[int] = None) -> Dict[str, Any]: + async def options(self, *, max_time_ms: Optional[int] = None) -> Dict[str, Any]: """ Get the collection options, i.e. its configuration as read from the database. @@ -1849,6 +1854,7 @@ def full_name(self) -> str: async def insert_one( self, document: DocumentType, + *, max_time_ms: Optional[int] = None, ) -> InsertOneResult: """ @@ -2252,6 +2258,7 @@ async def distinct( async def count_documents( self, filter: Dict[str, Any], + *, upper_bound: int, max_time_ms: Optional[int] = None, ) -> int: @@ -2812,7 +2819,6 @@ async def delete_many( self, filter: Dict[str, Any], *, - let: Optional[int] = None, max_time_ms: Optional[int] = None, ) -> DeleteResult: """ @@ -2887,7 +2893,7 @@ async def delete_many( ) @recast_method_async - async def delete_all(self, max_time_ms: Optional[int] = None) -> Dict[str, Any]: + async def delete_all(self, *, max_time_ms: Optional[int] = None) -> Dict[str, Any]: """ Delete all documents in a collection. @@ -3039,13 +3045,15 @@ async def _concurrent_execute_as_either( else: return reduce_bulk_write_results(bulk_write_successes) - async def drop(self, max_time_ms: Optional[int] = None) -> Dict[str, Any]: + async def drop(self, *, max_time_ms: Optional[int] = None) -> Dict[str, Any]: """ Drop the collection, i.e. delete it from the database along with all the documents it contains. Args: max_time_ms: a timeout, in milliseconds, for the underlying HTTP request. + Remember there is not guarantee that a request that has + timed out us not in fact honored. Returns: a dictionary of the form {"ok": 1} to signal successful deletion. diff --git a/astrapy/database.py b/astrapy/database.py index 69611acf..0fb18479 100644 --- a/astrapy/database.py +++ b/astrapy/database.py @@ -425,6 +425,7 @@ def create_collection( def drop_collection( self, name_or_collection: Union[str, Collection], + *, max_time_ms: Optional[int] = None, ) -> Dict[str, Any]: """ @@ -945,6 +946,7 @@ async def create_collection( async def drop_collection( self, name_or_collection: Union[str, AsyncCollection], + *, max_time_ms: Optional[int] = None, ) -> Dict[str, Any]: """ From fcc85c6412994efc9c52945845658368fd59ee36 Mon Sep 17 00:00:00 2001 From: Stefano Lottini Date: Mon, 18 Mar 2024 16:39:06 +0100 Subject: [PATCH 09/13] operation classes bear the full method signatures (except timeout) --- astrapy/operations.py | 70 +++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 67 insertions(+), 3 deletions(-) diff --git a/astrapy/operations.py b/astrapy/operations.py index 92454406..02922438 100644 --- a/astrapy/operations.py +++ b/astrapy/operations.py @@ -22,9 +22,10 @@ Dict, Iterable, List, + Optional, ) -from astrapy.constants import DocumentType +from astrapy.constants import DocumentType, SortType from astrapy.results import ( BulkWriteResult, DeleteResult, @@ -126,17 +127,30 @@ class InsertMany(BaseOperation): Attributes: documents: the list document to insert. ordered: whether the inserts should be done in sequence. + chunk_size: how many documents to include in a single API request. + Exceeding the server maximum allowed value results in an error. + Leave it unspecified (recommended) to use the system default. + concurrency: maximum number of concurrent requests to the API at + a given time. It cannot be more than one for ordered insertions. """ documents: Iterable[DocumentType] + ordered: bool + chunk_size: Optional[int] + concurrency: Optional[int] def __init__( self, documents: Iterable[DocumentType], + *, ordered: bool = True, + chunk_size: Optional[int] = None, + concurrency: Optional[int] = None, ) -> None: self.documents = documents self.ordered = ordered + self.chunk_size = chunk_size + self.concurrency = concurrency def execute( self, collection: Collection, index_in_bulk_write: int @@ -152,6 +166,8 @@ def execute( op_result: InsertManyResult = collection.insert_many( documents=self.documents, ordered=self.ordered, + chunk_size=self.chunk_size, + concurrency=self.concurrency, ) return op_result.to_bulk_write_result(index_in_bulk_write=index_in_bulk_write) @@ -165,11 +181,13 @@ class UpdateOne(BaseOperation): Attributes: filter: a filter condition to select a target document. update: an update prescription to apply to the document. + sort: controls ordering of results, hence which document is affected. upsert: controls what to do when no documents are found. """ filter: Dict[str, Any] update: Dict[str, Any] + sort: Optional[SortType] upsert: bool def __init__( @@ -177,10 +195,12 @@ def __init__( filter: Dict[str, Any], update: Dict[str, Any], *, + sort: Optional[SortType] = None, upsert: bool = False, ) -> None: self.filter = filter self.update = update + self.sort = sort self.upsert = upsert def execute( @@ -197,6 +217,7 @@ def execute( op_result: UpdateResult = collection.update_one( filter=self.filter, update=self.update, + sort=self.sort, upsert=self.upsert, ) return op_result.to_bulk_write_result(index_in_bulk_write=index_in_bulk_write) @@ -257,11 +278,13 @@ class ReplaceOne(BaseOperation): Attributes: filter: a filter condition to select a target document. replacement: the replacement document. + sort: controls ordering of results, hence which document is affected. upsert: controls what to do when no documents are found. """ filter: Dict[str, Any] replacement: DocumentType + sort: Optional[SortType] upsert: bool def __init__( @@ -269,10 +292,12 @@ def __init__( filter: Dict[str, Any], replacement: DocumentType, *, + sort: Optional[SortType] = None, upsert: bool = False, ) -> None: self.filter = filter self.replacement = replacement + self.sort = sort self.upsert = upsert def execute( @@ -289,6 +314,7 @@ def execute( op_result: UpdateResult = collection.replace_one( filter=self.filter, replacement=self.replacement, + sort=self.sort, upsert=self.upsert, ) return op_result.to_bulk_write_result(index_in_bulk_write=index_in_bulk_write) @@ -302,15 +328,20 @@ class DeleteOne(BaseOperation): Attributes: filter: a filter condition to select a target document. + sort: controls ordering of results, hence which document is affected. """ filter: Dict[str, Any] + sort: Optional[SortType] def __init__( self, filter: Dict[str, Any], + *, + sort: Optional[SortType] = None, ) -> None: self.filter = filter + self.sort = sort def execute( self, collection: Collection, index_in_bulk_write: int @@ -323,7 +354,9 @@ def execute( insert_in_bulk_write: the index in the list of bulkoperations """ - op_result: DeleteResult = collection.delete_one(filter=self.filter) + op_result: DeleteResult = collection.delete_one( + filter=self.filter, sort=self.sort + ) return op_result.to_bulk_write_result(index_in_bulk_write=index_in_bulk_write) @@ -414,18 +447,30 @@ class AsyncInsertMany(AsyncBaseOperation): Attributes: documents: the list document to insert. ordered: whether the inserts should be done in sequence. + chunk_size: how many documents to include in a single API request. + Exceeding the server maximum allowed value results in an error. + Leave it unspecified (recommended) to use the system default. + concurrency: maximum number of concurrent requests to the API at + a given time. It cannot be more than one for ordered insertions. """ documents: Iterable[DocumentType] ordered: bool + chunk_size: Optional[int] + concurrency: Optional[int] def __init__( self, documents: Iterable[DocumentType], + *, ordered: bool = True, + chunk_size: Optional[int] = None, + concurrency: Optional[int] = None, ) -> None: self.documents = documents self.ordered = ordered + self.chunk_size = chunk_size + self.concurrency = concurrency async def execute( self, collection: AsyncCollection, index_in_bulk_write: int @@ -441,6 +486,8 @@ async def execute( op_result: InsertManyResult = await collection.insert_many( documents=self.documents, ordered=self.ordered, + chunk_size=self.chunk_size, + concurrency=self.concurrency, ) return op_result.to_bulk_write_result(index_in_bulk_write=index_in_bulk_write) @@ -454,11 +501,13 @@ class AsyncUpdateOne(AsyncBaseOperation): Attributes: filter: a filter condition to select a target document. update: an update prescription to apply to the document. + sort: controls ordering of results, hence which document is affected. upsert: controls what to do when no documents are found. """ filter: Dict[str, Any] update: Dict[str, Any] + sort: Optional[SortType] upsert: bool def __init__( @@ -466,10 +515,12 @@ def __init__( filter: Dict[str, Any], update: Dict[str, Any], *, + sort: Optional[SortType] = None, upsert: bool = False, ) -> None: self.filter = filter self.update = update + self.sort = sort self.upsert = upsert async def execute( @@ -486,6 +537,7 @@ async def execute( op_result: UpdateResult = await collection.update_one( filter=self.filter, update=self.update, + sort=self.sort, upsert=self.upsert, ) return op_result.to_bulk_write_result(index_in_bulk_write=index_in_bulk_write) @@ -546,11 +598,13 @@ class AsyncReplaceOne(AsyncBaseOperation): Attributes: filter: a filter condition to select a target document. replacement: the replacement document. + sort: controls ordering of results, hence which document is affected. upsert: controls what to do when no documents are found. """ filter: Dict[str, Any] replacement: DocumentType + sort: Optional[SortType] upsert: bool def __init__( @@ -558,10 +612,12 @@ def __init__( filter: Dict[str, Any], replacement: DocumentType, *, + sort: Optional[SortType] = None, upsert: bool = False, ) -> None: self.filter = filter self.replacement = replacement + self.sort = sort self.upsert = upsert async def execute( @@ -578,6 +634,7 @@ async def execute( op_result: UpdateResult = await collection.replace_one( filter=self.filter, replacement=self.replacement, + sort=self.sort, upsert=self.upsert, ) return op_result.to_bulk_write_result(index_in_bulk_write=index_in_bulk_write) @@ -591,15 +648,20 @@ class AsyncDeleteOne(AsyncBaseOperation): Attributes: filter: a filter condition to select a target document. + sort: controls ordering of results, hence which document is affected. """ filter: Dict[str, Any] + sort: Optional[SortType] def __init__( self, filter: Dict[str, Any], + *, + sort: Optional[SortType] = None, ) -> None: self.filter = filter + self.sort = sort async def execute( self, collection: AsyncCollection, index_in_bulk_write: int @@ -612,7 +674,9 @@ async def execute( insert_in_bulk_write: the index in the list of bulkoperations """ - op_result: DeleteResult = await collection.delete_one(filter=self.filter) + op_result: DeleteResult = await collection.delete_one( + filter=self.filter, sort=self.sort + ) return op_result.to_bulk_write_result(index_in_bulk_write=index_in_bulk_write) From 0b2d004397d06d9cdbbb86925c794127d418bbe6 Mon Sep 17 00:00:00 2001 From: Stefano Lottini Date: Mon, 18 Mar 2024 17:44:57 +0100 Subject: [PATCH 10/13] bulk_write supports timeout + tests --- astrapy/collection.py | 54 ++++++--- astrapy/exceptions.py | 18 ++- astrapy/operations.py | 110 ++++++++++++++---- .../integration/test_timeout_async.py | 41 ++++++- .../integration/test_timeout_sync.py | 41 ++++++- 5 files changed, 220 insertions(+), 44 deletions(-) diff --git a/astrapy/collection.py b/astrapy/collection.py index 316d38a0..0a8de9bc 100644 --- a/astrapy/collection.py +++ b/astrapy/collection.py @@ -494,7 +494,7 @@ def insert_many( documents=_documents[i : i + _chunk_size], options=options, partial_failures_allowed=True, - timeout_info=timeout_manager.check_remaining_timeout(), + timeout_info=timeout_manager.remaining_timeout_info(), ) # accumulate the results in this call chunk_inserted_ids = (chunk_response.get("status") or {}).get( @@ -534,7 +534,7 @@ def _chunk_insertor( documents=document_chunk, options=options, partial_failures_allowed=True, - timeout_info=timeout_manager.check_remaining_timeout(), + timeout_info=timeout_manager.remaining_timeout_info(), ) raw_results = list( @@ -552,7 +552,7 @@ def _chunk_insertor( _documents[i : i + _chunk_size], options=options, partial_failures_allowed=True, - timeout_info=timeout_manager.check_remaining_timeout(), + timeout_info=timeout_manager.remaining_timeout_info(), ) for i in range(0, len(_documents), _chunk_size) ] @@ -1187,7 +1187,7 @@ def update_many( update=update, filter=filter, options=options, - timeout_info=timeout_manager.check_remaining_timeout(), + timeout_info=timeout_manager.remaining_timeout_info(), ) this_um_status = this_um_response.get("status") or {} # @@ -1387,7 +1387,7 @@ def delete_many( this_dm_response = self._astra_db_collection.delete_many( filter=filter, skip_error_check=True, - timeout_info=timeout_manager.check_remaining_timeout(), + timeout_info=timeout_manager.remaining_timeout_info(), ) # if errors, quit early if this_dm_response.get("errors", []): @@ -1449,6 +1449,7 @@ def bulk_write( requests: Iterable[BaseOperation], *, ordered: bool = True, + max_time_ms: Optional[int] = None, ) -> BulkWriteResult: """ Execute an arbitrary amount of operations such as inserts, updates, deletes @@ -1468,6 +1469,10 @@ def bulk_write( in arbitrary order, possibly in a concurrent fashion. For performance reasons, `ordered=False` should be preferred when compatible with the needs of the application flow. + max_time_ms: a timeout, in milliseconds, for the whole bulk write. + Remember that, if the method call times out, then there's no + guarantee about what portion of the bulk write has been received + and successfully executed by the Data API. Returns: A single BulkWriteResult summarizing the whole list of requested @@ -1479,11 +1484,16 @@ def bulk_write( # lazy importing here against circular-import error from astrapy.operations import reduce_bulk_write_results + timeout_manager = MultiCallTimeoutManager(overall_max_time_ms=max_time_ms) if ordered: bulk_write_results: List[BulkWriteResult] = [] for operation_i, operation in enumerate(requests): try: - this_bw_result = operation.execute(self, operation_i) + this_bw_result = operation.execute( + self, + index_in_bulk_write=operation_i, + bulk_write_timeout_ms=timeout_manager.remaining_timeout_ms(), + ) bulk_write_results.append(this_bw_result) except CumulativeOperationException as exc: partial_result = exc.partial_result @@ -1524,7 +1534,11 @@ def _execute_as_either( operation: BaseOperation, operation_i: int ) -> Tuple[Optional[BulkWriteResult], Optional[DataAPIResponseException]]: try: - ex_result = operation.execute(self, operation_i) + ex_result = operation.execute( + self, + index_in_bulk_write=operation_i, + bulk_write_timeout_ms=timeout_manager.remaining_timeout_ms(), + ) return (ex_result, None) except DataAPIResponseException as exc: return (None, exc) @@ -1979,7 +1993,7 @@ async def insert_many( documents=_documents[i : i + _chunk_size], options=options, partial_failures_allowed=True, - timeout_info=timeout_manager.check_remaining_timeout(), + timeout_info=timeout_manager.remaining_timeout_info(), ) # accumulate the results in this call chunk_inserted_ids = (chunk_response.get("status") or {}).get( @@ -2020,7 +2034,7 @@ async def concurrent_insert_chunk( document_chunk, options=options, partial_failures_allowed=True, - timeout_info=timeout_manager.check_remaining_timeout(), + timeout_info=timeout_manager.remaining_timeout_info(), ) if _concurrency > 1: @@ -2660,7 +2674,7 @@ async def update_many( update=update, filter=filter, options=options, - timeout_info=timeout_manager.check_remaining_timeout(), + timeout_info=timeout_manager.remaining_timeout_info(), ) this_um_status = this_um_response.get("status") or {} # @@ -2862,7 +2876,7 @@ async def delete_many( this_dm_response = await self._astra_db_collection.delete_many( filter=filter, skip_error_check=True, - timeout_info=timeout_manager.check_remaining_timeout(), + timeout_info=timeout_manager.remaining_timeout_info(), ) # if errors, quit early if this_dm_response.get("errors", []): @@ -2924,6 +2938,7 @@ async def bulk_write( requests: Iterable[AsyncBaseOperation], *, ordered: bool = True, + max_time_ms: Optional[int] = None, ) -> BulkWriteResult: """ Execute an arbitrary amount of operations such as inserts, updates, deletes @@ -2943,6 +2958,10 @@ async def bulk_write( in arbitrary order, possibly in a concurrent fashion. For performance reasons, `ordered=False` should be preferred when compatible with the needs of the application flow. + max_time_ms: a timeout, in milliseconds, for the whole bulk write. + Remember that, if the method call times out, then there's no + guarantee about what portion of the bulk write has been received + and successfully executed by the Data API. Returns: A single BulkWriteResult summarizing the whole list of requested @@ -2954,11 +2973,16 @@ async def bulk_write( # lazy importing here against circular-import error from astrapy.operations import reduce_bulk_write_results + timeout_manager = MultiCallTimeoutManager(overall_max_time_ms=max_time_ms) if ordered: bulk_write_results: List[BulkWriteResult] = [] for operation_i, operation in enumerate(requests): try: - this_bw_result = await operation.execute(self, operation_i) + this_bw_result = await operation.execute( + self, + index_in_bulk_write=operation_i, + bulk_write_timeout_ms=timeout_manager.remaining_timeout_ms(), + ) bulk_write_results.append(this_bw_result) except CumulativeOperationException as exc: partial_result = exc.partial_result @@ -3002,7 +3026,11 @@ async def _concurrent_execute_as_either( ) -> Tuple[Optional[BulkWriteResult], Optional[DataAPIResponseException]]: async with sem: try: - ex_result = await operation.execute(self, operation_i) + ex_result = await operation.execute( + self, + index_in_bulk_write=operation_i, + bulk_write_timeout_ms=timeout_manager.remaining_timeout_ms(), + ) return (ex_result, None) except DataAPIResponseException as exc: return (None, exc) diff --git a/astrapy/exceptions.py b/astrapy/exceptions.py index 3e517979..c2078111 100644 --- a/astrapy/exceptions.py +++ b/astrapy/exceptions.py @@ -555,18 +555,17 @@ def __init__(self, overall_max_time_ms: Optional[int]) -> None: else: self.deadline_ms = None - def check_remaining_timeout(self) -> Union[TimeoutInfo, None]: + def remaining_timeout_ms(self) -> Union[int, None]: """ Ensure the deadline, if any, is not yet in the past. If it is, raise an appropriate timeout error. - It it is not, or there is no deadline, return a suitable TimeoutInfo - for use within the multi-call method. + If not, return either None (if no timeout) or the remaining milliseconds. + For use within the multi-call method. """ now_ms = int(time.time() * 1000) if self.deadline_ms is not None: if now_ms < self.deadline_ms: - remaining_ms = self.deadline_ms - now_ms - return base_timeout_info(max_time_ms=remaining_ms) + return self.deadline_ms - now_ms else: raise DataAPITimeoutException( text="Operation timed out.", @@ -576,3 +575,12 @@ def check_remaining_timeout(self) -> Union[TimeoutInfo, None]: ) else: return None + + def remaining_timeout_info(self) -> Union[TimeoutInfo, None]: + """ + Ensure the deadline, if any, is not yet in the past. + If it is, raise an appropriate timeout error. + It it is not, or there is no deadline, return a suitable TimeoutInfo + for use within the multi-call method. + """ + return base_timeout_info(max_time_ms=self.remaining_timeout_ms()) diff --git a/astrapy/operations.py b/astrapy/operations.py index 02922438..47f1a62f 100644 --- a/astrapy/operations.py +++ b/astrapy/operations.py @@ -81,7 +81,10 @@ class BaseOperation(ABC): @abstractmethod def execute( - self, collection: Collection, index_in_bulk_write: int + self, + collection: Collection, + index_in_bulk_write: int, + bulk_write_timeout_ms: Optional[int], ) -> BulkWriteResult: ... @@ -104,7 +107,10 @@ def __init__( self.document = document def execute( - self, collection: Collection, index_in_bulk_write: int + self, + collection: Collection, + index_in_bulk_write: int, + bulk_write_timeout_ms: Optional[int], ) -> BulkWriteResult: """ Execute this operation against a collection as part of a bulk write. @@ -114,7 +120,9 @@ def execute( insert_in_bulk_write: the index in the list of bulkoperations """ - op_result: InsertOneResult = collection.insert_one(document=self.document) + op_result: InsertOneResult = collection.insert_one( + document=self.document, max_time_ms=bulk_write_timeout_ms + ) return op_result.to_bulk_write_result(index_in_bulk_write=index_in_bulk_write) @@ -153,7 +161,10 @@ def __init__( self.concurrency = concurrency def execute( - self, collection: Collection, index_in_bulk_write: int + self, + collection: Collection, + index_in_bulk_write: int, + bulk_write_timeout_ms: Optional[int], ) -> BulkWriteResult: """ Execute this operation against a collection as part of a bulk write. @@ -168,6 +179,7 @@ def execute( ordered=self.ordered, chunk_size=self.chunk_size, concurrency=self.concurrency, + max_time_ms=bulk_write_timeout_ms, ) return op_result.to_bulk_write_result(index_in_bulk_write=index_in_bulk_write) @@ -204,7 +216,10 @@ def __init__( self.upsert = upsert def execute( - self, collection: Collection, index_in_bulk_write: int + self, + collection: Collection, + index_in_bulk_write: int, + bulk_write_timeout_ms: Optional[int], ) -> BulkWriteResult: """ Execute this operation against a collection as part of a bulk write. @@ -219,6 +234,7 @@ def execute( update=self.update, sort=self.sort, upsert=self.upsert, + max_time_ms=bulk_write_timeout_ms, ) return op_result.to_bulk_write_result(index_in_bulk_write=index_in_bulk_write) @@ -251,7 +267,10 @@ def __init__( self.upsert = upsert def execute( - self, collection: Collection, index_in_bulk_write: int + self, + collection: Collection, + index_in_bulk_write: int, + bulk_write_timeout_ms: Optional[int], ) -> BulkWriteResult: """ Execute this operation against a collection as part of a bulk write. @@ -265,6 +284,7 @@ def execute( filter=self.filter, update=self.update, upsert=self.upsert, + max_time_ms=bulk_write_timeout_ms, ) return op_result.to_bulk_write_result(index_in_bulk_write=index_in_bulk_write) @@ -301,7 +321,10 @@ def __init__( self.upsert = upsert def execute( - self, collection: Collection, index_in_bulk_write: int + self, + collection: Collection, + index_in_bulk_write: int, + bulk_write_timeout_ms: Optional[int], ) -> BulkWriteResult: """ Execute this operation against a collection as part of a bulk write. @@ -316,6 +339,7 @@ def execute( replacement=self.replacement, sort=self.sort, upsert=self.upsert, + max_time_ms=bulk_write_timeout_ms, ) return op_result.to_bulk_write_result(index_in_bulk_write=index_in_bulk_write) @@ -344,7 +368,10 @@ def __init__( self.sort = sort def execute( - self, collection: Collection, index_in_bulk_write: int + self, + collection: Collection, + index_in_bulk_write: int, + bulk_write_timeout_ms: Optional[int], ) -> BulkWriteResult: """ Execute this operation against a collection as part of a bulk write. @@ -355,7 +382,9 @@ def execute( """ op_result: DeleteResult = collection.delete_one( - filter=self.filter, sort=self.sort + filter=self.filter, + sort=self.sort, + max_time_ms=bulk_write_timeout_ms, ) return op_result.to_bulk_write_result(index_in_bulk_write=index_in_bulk_write) @@ -379,7 +408,10 @@ def __init__( self.filter = filter def execute( - self, collection: Collection, index_in_bulk_write: int + self, + collection: Collection, + index_in_bulk_write: int, + bulk_write_timeout_ms: Optional[int], ) -> BulkWriteResult: """ Execute this operation against a collection as part of a bulk write. @@ -389,7 +421,9 @@ def execute( insert_in_bulk_write: the index in the list of bulkoperations """ - op_result: DeleteResult = collection.delete_many(filter=self.filter) + op_result: DeleteResult = collection.delete_many( + filter=self.filter, max_time_ms=bulk_write_timeout_ms + ) return op_result.to_bulk_write_result(index_in_bulk_write=index_in_bulk_write) @@ -401,7 +435,10 @@ class AsyncBaseOperation(ABC): @abstractmethod async def execute( - self, collection: AsyncCollection, index_in_bulk_write: int + self, + collection: AsyncCollection, + index_in_bulk_write: int, + bulk_write_timeout_ms: Optional[int], ) -> BulkWriteResult: ... @@ -424,7 +461,10 @@ def __init__( self.document = document async def execute( - self, collection: AsyncCollection, index_in_bulk_write: int + self, + collection: AsyncCollection, + index_in_bulk_write: int, + bulk_write_timeout_ms: Optional[int], ) -> BulkWriteResult: """ Execute this operation against a collection as part of a bulk write. @@ -434,7 +474,9 @@ async def execute( insert_in_bulk_write: the index in the list of bulkoperations """ - op_result: InsertOneResult = await collection.insert_one(document=self.document) + op_result: InsertOneResult = await collection.insert_one( + document=self.document, max_time_ms=bulk_write_timeout_ms + ) return op_result.to_bulk_write_result(index_in_bulk_write=index_in_bulk_write) @@ -473,7 +515,10 @@ def __init__( self.concurrency = concurrency async def execute( - self, collection: AsyncCollection, index_in_bulk_write: int + self, + collection: AsyncCollection, + index_in_bulk_write: int, + bulk_write_timeout_ms: Optional[int], ) -> BulkWriteResult: """ Execute this operation against a collection as part of a bulk write. @@ -488,6 +533,7 @@ async def execute( ordered=self.ordered, chunk_size=self.chunk_size, concurrency=self.concurrency, + max_time_ms=bulk_write_timeout_ms, ) return op_result.to_bulk_write_result(index_in_bulk_write=index_in_bulk_write) @@ -524,7 +570,10 @@ def __init__( self.upsert = upsert async def execute( - self, collection: AsyncCollection, index_in_bulk_write: int + self, + collection: AsyncCollection, + index_in_bulk_write: int, + bulk_write_timeout_ms: Optional[int], ) -> BulkWriteResult: """ Execute this operation against a collection as part of a bulk write. @@ -539,6 +588,7 @@ async def execute( update=self.update, sort=self.sort, upsert=self.upsert, + max_time_ms=bulk_write_timeout_ms, ) return op_result.to_bulk_write_result(index_in_bulk_write=index_in_bulk_write) @@ -571,7 +621,10 @@ def __init__( self.upsert = upsert async def execute( - self, collection: AsyncCollection, index_in_bulk_write: int + self, + collection: AsyncCollection, + index_in_bulk_write: int, + bulk_write_timeout_ms: Optional[int], ) -> BulkWriteResult: """ Execute this operation against a collection as part of a bulk write. @@ -585,6 +638,7 @@ async def execute( filter=self.filter, update=self.update, upsert=self.upsert, + max_time_ms=bulk_write_timeout_ms, ) return op_result.to_bulk_write_result(index_in_bulk_write=index_in_bulk_write) @@ -621,7 +675,10 @@ def __init__( self.upsert = upsert async def execute( - self, collection: AsyncCollection, index_in_bulk_write: int + self, + collection: AsyncCollection, + index_in_bulk_write: int, + bulk_write_timeout_ms: Optional[int], ) -> BulkWriteResult: """ Execute this operation against a collection as part of a bulk write. @@ -636,6 +693,7 @@ async def execute( replacement=self.replacement, sort=self.sort, upsert=self.upsert, + max_time_ms=bulk_write_timeout_ms, ) return op_result.to_bulk_write_result(index_in_bulk_write=index_in_bulk_write) @@ -664,7 +722,10 @@ def __init__( self.sort = sort async def execute( - self, collection: AsyncCollection, index_in_bulk_write: int + self, + collection: AsyncCollection, + index_in_bulk_write: int, + bulk_write_timeout_ms: Optional[int], ) -> BulkWriteResult: """ Execute this operation against a collection as part of a bulk write. @@ -675,7 +736,7 @@ async def execute( """ op_result: DeleteResult = await collection.delete_one( - filter=self.filter, sort=self.sort + filter=self.filter, sort=self.sort, max_time_ms=bulk_write_timeout_ms ) return op_result.to_bulk_write_result(index_in_bulk_write=index_in_bulk_write) @@ -699,7 +760,10 @@ def __init__( self.filter = filter async def execute( - self, collection: AsyncCollection, index_in_bulk_write: int + self, + collection: AsyncCollection, + index_in_bulk_write: int, + bulk_write_timeout_ms: Optional[int], ) -> BulkWriteResult: """ Execute this operation against a collection as part of a bulk write. @@ -709,5 +773,7 @@ async def execute( insert_in_bulk_write: the index in the list of bulkoperations """ - op_result: DeleteResult = await collection.delete_many(filter=self.filter) + op_result: DeleteResult = await collection.delete_many( + filter=self.filter, max_time_ms=bulk_write_timeout_ms + ) return op_result.to_bulk_write_result(index_in_bulk_write=index_in_bulk_write) diff --git a/tests/idiomatic/integration/test_timeout_async.py b/tests/idiomatic/integration/test_timeout_async.py index 22561af5..08351f0b 100644 --- a/tests/idiomatic/integration/test_timeout_async.py +++ b/tests/idiomatic/integration/test_timeout_async.py @@ -17,6 +17,7 @@ from astrapy import AsyncCollection, AsyncDatabase from astrapy.exceptions import DataAPITimeoutException +from astrapy.operations import AsyncDeleteMany, AsyncInsertMany from astrapy.info import get_database_info @@ -84,13 +85,13 @@ async def test_cursor_overalltimeout_exceptions_async( acol = async_empty_collection await acol.insert_many([{"a": 1}] * 1000) - await acol.distinct("a", max_time_ms=5000) + await acol.distinct("a", max_time_ms=20000) with pytest.raises(DataAPITimeoutException): await acol.distinct("a", max_time_ms=1) cur1 = acol.find({}) cur2 = acol.find({}) - await cur1.distinct("a", max_time_ms=5000) + await cur1.distinct("a", max_time_ms=20000) with pytest.raises(DataAPITimeoutException): await cur2.distinct("a", max_time_ms=1) @@ -157,3 +158,39 @@ async def test_delete_many_timeout_exceptions_async( await async_collection.delete_many({"f": "delete_many2"}, max_time_ms=20000) with pytest.raises(DataAPITimeoutException): await async_collection.delete_many({"f": "delete_many3"}, max_time_ms=200) + + @pytest.mark.describe("test of bulk_write timeouts, async") + async def test_bulk_write_ordered_timeout_exceptions_async( + self, + async_empty_collection: AsyncCollection, + ) -> None: + im_a = AsyncInsertMany([{"seq": i, "group": "A"} for i in range(100)]) + im_b = AsyncInsertMany([{"seq": i, "group": "B"} for i in range(100)]) + dm = AsyncDeleteMany(filter={"group": "A"}) + + await async_empty_collection.bulk_write([im_a, im_b, dm], ordered=True) + await async_empty_collection.bulk_write( + [im_a, im_b, dm], ordered=True, max_time_ms=50000 + ) + with pytest.raises(DataAPITimeoutException): + await async_empty_collection.bulk_write( + [im_a, im_b, dm], ordered=True, max_time_ms=500 + ) + + @pytest.mark.describe("test of bulk_write timeouts, async") + async def test_bulk_write_unordered_timeout_exceptions_async( + self, + async_empty_collection: AsyncCollection, + ) -> None: + im_a = AsyncInsertMany([{"seq": i, "group": "A"} for i in range(100)]) + im_b = AsyncInsertMany([{"seq": i, "group": "B"} for i in range(100)]) + dm = AsyncDeleteMany(filter={"group": "A"}) + + await async_empty_collection.bulk_write([im_a, im_b, dm], ordered=False) + await async_empty_collection.bulk_write( + [im_a, im_b, dm], ordered=False, max_time_ms=50000 + ) + with pytest.raises(DataAPITimeoutException): + await async_empty_collection.bulk_write( + [im_a, im_b, dm], ordered=False, max_time_ms=500 + ) diff --git a/tests/idiomatic/integration/test_timeout_sync.py b/tests/idiomatic/integration/test_timeout_sync.py index c7cd8c5d..d494b7af 100644 --- a/tests/idiomatic/integration/test_timeout_sync.py +++ b/tests/idiomatic/integration/test_timeout_sync.py @@ -17,6 +17,7 @@ from astrapy import Collection, Database from astrapy.exceptions import DataAPITimeoutException +from astrapy.operations import DeleteMany, InsertMany from astrapy.info import get_database_info @@ -82,13 +83,13 @@ def test_cursor_overalltimeout_exceptions_sync( col = sync_empty_collection col.insert_many([{"a": 1}] * 1000) - col.distinct("a", max_time_ms=5000) + col.distinct("a", max_time_ms=20000) with pytest.raises(DataAPITimeoutException): col.distinct("a", max_time_ms=1) cur1 = col.find({}) cur2 = col.find({}) - cur1.distinct("a", max_time_ms=5000) + cur1.distinct("a", max_time_ms=20000) with pytest.raises(DataAPITimeoutException): cur2.distinct("a", max_time_ms=1) @@ -153,3 +154,39 @@ def test_delete_many_timeout_exceptions_sync( sync_collection.delete_many({"f": "delete_many2"}, max_time_ms=20000) with pytest.raises(DataAPITimeoutException): sync_collection.delete_many({"f": "delete_many3"}, max_time_ms=200) + + @pytest.mark.describe("test of bulk_write timeouts, sync") + def test_bulk_write_ordered_timeout_exceptions_sync( + self, + sync_empty_collection: Collection, + ) -> None: + im_a = InsertMany([{"seq": i, "group": "A"} for i in range(100)]) + im_b = InsertMany([{"seq": i, "group": "B"} for i in range(100)]) + dm = DeleteMany(filter={"group": "A"}) + + sync_empty_collection.bulk_write([im_a, im_b, dm], ordered=True) + sync_empty_collection.bulk_write( + [im_a, im_b, dm], ordered=True, max_time_ms=50000 + ) + with pytest.raises(DataAPITimeoutException): + sync_empty_collection.bulk_write( + [im_a, im_b, dm], ordered=True, max_time_ms=500 + ) + + @pytest.mark.describe("test of bulk_write timeouts, sync") + def test_bulk_write_unordered_timeout_exceptions_sync( + self, + sync_empty_collection: Collection, + ) -> None: + im_a = InsertMany([{"seq": i, "group": "A"} for i in range(100)]) + im_b = InsertMany([{"seq": i, "group": "B"} for i in range(100)]) + dm = DeleteMany(filter={"group": "A"}) + + sync_empty_collection.bulk_write([im_a, im_b, dm], ordered=False) + sync_empty_collection.bulk_write( + [im_a, im_b, dm], ordered=False, max_time_ms=50000 + ) + with pytest.raises(DataAPITimeoutException): + sync_empty_collection.bulk_write( + [im_a, im_b, dm], ordered=False, max_time_ms=500 + ) From 69c60b644ae122ab84e87b3db0e25b9ecaa1de3a Mon Sep 17 00:00:00 2001 From: Stefano Lottini Date: Mon, 18 Mar 2024 17:51:34 +0100 Subject: [PATCH 11/13] fix method name in unit test --- tests/idiomatic/unit/test_timeouts.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/idiomatic/unit/test_timeouts.py b/tests/idiomatic/unit/test_timeouts.py index 9998d217..5d82398d 100644 --- a/tests/idiomatic/unit/test_timeouts.py +++ b/tests/idiomatic/unit/test_timeouts.py @@ -21,18 +21,18 @@ @pytest.mark.describe("test MultiCallTimeoutManager") def test_multicalltimeoutmanager() -> None: mgr_n = MultiCallTimeoutManager(overall_max_time_ms=None) - assert mgr_n.check_remaining_timeout() is None + assert mgr_n.remaining_timeout_info() is None time.sleep(0.5) - assert mgr_n.check_remaining_timeout() is None + assert mgr_n.remaining_timeout_info() is None mgr_1 = MultiCallTimeoutManager(overall_max_time_ms=1000) - crt_1 = mgr_1.check_remaining_timeout() + crt_1 = mgr_1.remaining_timeout_info() assert crt_1 is not None assert crt_1["base"] > 0 time.sleep(0.6) - crt_2 = mgr_1.check_remaining_timeout() + crt_2 = mgr_1.remaining_timeout_info() assert crt_2 is not None assert crt_2["base"] > 0 time.sleep(0.6) with pytest.raises(DataAPITimeoutException): - mgr_1.check_remaining_timeout() + mgr_1.remaining_timeout_info() From 979d088b23a0c846076db1a021021cd9ecd05138 Mon Sep 17 00:00:00 2001 From: Stefano Lottini Date: Mon, 18 Mar 2024 18:14:46 +0100 Subject: [PATCH 12/13] enable update_many paginated tests in prod after deploy --- tests/idiomatic/integration/test_dml_async.py | 5 ----- tests/idiomatic/integration/test_dml_sync.py | 5 ----- 2 files changed, 10 deletions(-) diff --git a/tests/idiomatic/integration/test_dml_async.py b/tests/idiomatic/integration/test_dml_async.py index f9260818..fe6a857c 100644 --- a/tests/idiomatic/integration/test_dml_async.py +++ b/tests/idiomatic/integration/test_dml_async.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import os import datetime from typing import Any, Dict, List @@ -879,10 +878,6 @@ async def test_collection_update_many_async( assert resp4.update_info["nModified"] == 0 assert "upserted" in resp4.update_info - @pytest.mark.skipif( - ".astra-dev." not in os.environ["ASTRA_DB_API_ENDPOINT"], - reason="paginated update_many is in DEV only at the moment", - ) @pytest.mark.describe("test of update_many, async") async def test_collection_paginated_update_many_async( self, diff --git a/tests/idiomatic/integration/test_dml_sync.py b/tests/idiomatic/integration/test_dml_sync.py index 695fbebe..960c98bc 100644 --- a/tests/idiomatic/integration/test_dml_sync.py +++ b/tests/idiomatic/integration/test_dml_sync.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import os import datetime import pytest @@ -803,10 +802,6 @@ def test_collection_update_many_sync( assert resp4.update_info["nModified"] == 0 assert "upserted" in resp4.update_info - @pytest.mark.skipif( - ".astra-dev." not in os.environ["ASTRA_DB_API_ENDPOINT"], - reason="paginated update_many is in DEV only at the moment", - ) @pytest.mark.describe("test of update_many, sync") def test_collection_paginated_update_many_sync( self, From 73e057380d2aa2bc28d712d905f36d2e9443f452 Mon Sep 17 00:00:00 2001 From: Stefano Lottini Date: Mon, 18 Mar 2024 18:17:59 +0100 Subject: [PATCH 13/13] more stringent values to ensure timeout tests pass --- .../idiomatic/integration/test_timeout_async.py | 16 +++++++--------- tests/idiomatic/integration/test_timeout_sync.py | 12 ++++++------ 2 files changed, 13 insertions(+), 15 deletions(-) diff --git a/tests/idiomatic/integration/test_timeout_async.py b/tests/idiomatic/integration/test_timeout_async.py index 08351f0b..c989fac7 100644 --- a/tests/idiomatic/integration/test_timeout_async.py +++ b/tests/idiomatic/integration/test_timeout_async.py @@ -110,16 +110,14 @@ async def test_insert_many_timeout_exceptions_async( ) with pytest.raises(DataAPITimeoutException): - await async_collection.insert_many( - fifty_docs, ordered=True, max_time_ms=200 - ) + await async_collection.insert_many(fifty_docs, ordered=True, max_time_ms=2) with pytest.raises(DataAPITimeoutException): await async_collection.insert_many( - fifty_docs, ordered=False, concurrency=1, max_time_ms=200 + fifty_docs, ordered=False, concurrency=1, max_time_ms=2 ) with pytest.raises(DataAPITimeoutException): await async_collection.insert_many( - fifty_docs, ordered=False, concurrency=2, max_time_ms=200 + fifty_docs, ordered=False, concurrency=2, max_time_ms=2 ) @pytest.mark.describe("test of update_many timeouts, async") @@ -137,7 +135,7 @@ async def test_update_many_timeout_exceptions_async( with pytest.raises(DataAPITimeoutException): await async_collection.update_many( - {"f": "update_many"}, {"$inc": {"seq": 100}}, max_time_ms=200 + {"f": "update_many"}, {"$inc": {"seq": 100}}, max_time_ms=2 ) @pytest.mark.describe("test of delete_many timeouts, async") @@ -157,7 +155,7 @@ async def test_delete_many_timeout_exceptions_async( await async_collection.delete_many({"f": "delete_many1"}) await async_collection.delete_many({"f": "delete_many2"}, max_time_ms=20000) with pytest.raises(DataAPITimeoutException): - await async_collection.delete_many({"f": "delete_many3"}, max_time_ms=200) + await async_collection.delete_many({"f": "delete_many3"}, max_time_ms=2) @pytest.mark.describe("test of bulk_write timeouts, async") async def test_bulk_write_ordered_timeout_exceptions_async( @@ -174,7 +172,7 @@ async def test_bulk_write_ordered_timeout_exceptions_async( ) with pytest.raises(DataAPITimeoutException): await async_empty_collection.bulk_write( - [im_a, im_b, dm], ordered=True, max_time_ms=500 + [im_a, im_b, dm], ordered=True, max_time_ms=5 ) @pytest.mark.describe("test of bulk_write timeouts, async") @@ -192,5 +190,5 @@ async def test_bulk_write_unordered_timeout_exceptions_async( ) with pytest.raises(DataAPITimeoutException): await async_empty_collection.bulk_write( - [im_a, im_b, dm], ordered=False, max_time_ms=500 + [im_a, im_b, dm], ordered=False, max_time_ms=5 ) diff --git a/tests/idiomatic/integration/test_timeout_sync.py b/tests/idiomatic/integration/test_timeout_sync.py index d494b7af..d9bdbb56 100644 --- a/tests/idiomatic/integration/test_timeout_sync.py +++ b/tests/idiomatic/integration/test_timeout_sync.py @@ -108,14 +108,14 @@ def test_insert_many_timeout_exceptions_sync( ) with pytest.raises(DataAPITimeoutException): - sync_collection.insert_many(fifty_docs, ordered=True, max_time_ms=200) + sync_collection.insert_many(fifty_docs, ordered=True, max_time_ms=2) with pytest.raises(DataAPITimeoutException): sync_collection.insert_many( - fifty_docs, ordered=False, concurrency=1, max_time_ms=200 + fifty_docs, ordered=False, concurrency=1, max_time_ms=2 ) with pytest.raises(DataAPITimeoutException): sync_collection.insert_many( - fifty_docs, ordered=False, concurrency=2, max_time_ms=200 + fifty_docs, ordered=False, concurrency=2, max_time_ms=2 ) @pytest.mark.describe("test of update_many timeouts, sync") @@ -133,7 +133,7 @@ def test_update_many_timeout_exceptions_sync( with pytest.raises(DataAPITimeoutException): sync_collection.update_many( - {"f": "update_many"}, {"$inc": {"seq": 100}}, max_time_ms=200 + {"f": "update_many"}, {"$inc": {"seq": 100}}, max_time_ms=2 ) @pytest.mark.describe("test of delete_many timeouts, sync") @@ -153,7 +153,7 @@ def test_delete_many_timeout_exceptions_sync( sync_collection.delete_many({"f": "delete_many1"}) sync_collection.delete_many({"f": "delete_many2"}, max_time_ms=20000) with pytest.raises(DataAPITimeoutException): - sync_collection.delete_many({"f": "delete_many3"}, max_time_ms=200) + sync_collection.delete_many({"f": "delete_many3"}, max_time_ms=2) @pytest.mark.describe("test of bulk_write timeouts, sync") def test_bulk_write_ordered_timeout_exceptions_sync( @@ -188,5 +188,5 @@ def test_bulk_write_unordered_timeout_exceptions_sync( ) with pytest.raises(DataAPITimeoutException): sync_empty_collection.bulk_write( - [im_a, im_b, dm], ordered=False, max_time_ms=500 + [im_a, im_b, dm], ordered=False, max_time_ms=5 )