vespa/application.py

# Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.

import sys
import asyncio
import traceback
import concurrent.futures
import warnings
from typing import Optional, Dict, Generator, List, IO, Iterable, Callable, Tuple, Union
from concurrent.futures import ThreadPoolExecutor, Future, as_completed
from queue import Queue, Empty
import threading
from requests import Session
from requests.models import Response
from requests.exceptions import ConnectionError, HTTPError, JSONDecodeError
from requests.adapters import HTTPAdapter
from urllib3.util import Retry
from tenacity import (
    retry,
    wait_exponential,
    wait_random_exponential,
    stop_after_attempt,
    retry_if_result,
    retry_if_exception,
    retry_if_exception_type,
    retry_any,
    RetryCallState,
)
from time import sleep
from urllib.parse import quote
import random
import time

from vespa.exceptions import VespaError
from vespa.io import VespaQueryResponse, VespaResponse, VespaVisitResponse
from vespa.package import ApplicationPackage
import httpx
import vespa
import gzip
from requests.models import PreparedRequest
from io import BytesIO
import logging

logging.getLogger("urllib3").setLevel(logging.ERROR)

VESPA_CLOUD_SECRET_TOKEN: str = "VESPA_CLOUD_SECRET_TOKEN"


def raise_for_status(
    response: Response, raise_on_not_found: Optional[bool] = False
) -> None:
    """
    Raises an appropriate error if necessary.

    If the response contains an error message, VespaError is raised along with HTTPError to provide more details.

    :param response: Response object from Vespa API.
    :param raise_on_not_found: If True, raise HTTPError if status_code is 404.
    :raises HTTPError: If status_code is between 400 and 599.
    :raises VespaError: If the response JSON contains an error message.
    """
    try:
        response.raise_for_status()
    except HTTPError as http_error:
        try:
            response_json = response.json()
            if response.status_code == 404 and not raise_on_not_found:
                return
        except JSONDecodeError:
            raise http_error
        errors = response_json.get("root", {}).get("errors", [])
        error_message = response_json.get("message", None)
        if errors:
            raise VespaError(errors) from http_error
        if error_message:
            raise VespaError(error_message) from http_error
        raise HTTPError(http_error) from http_error


class Vespa(object):
    def __init__(
        self,
        url: str,
        port: Optional[int] = None,
        deployment_message: Optional[List[str]] = None,
        cert: Optional[str] = None,
        key: Optional[str] = None,
        vespa_cloud_secret_token: Optional[str] = None,
        output_file: IO = sys.stdout,
        application_package: Optional[ApplicationPackage] = None,
    ) -> None:
        """
        Establish a connection with an existing Vespa application.

        :param url: Vespa endpoint URL.
        :param port: Vespa endpoint port.
        :param deployment_message: Message returned by Vespa engine after deployment. Used internally by deploy methods.
        :param cert: Path to data plane certificate and key file in case the 'key' parameter is none. If 'key' is not None, this
            should be the path of the certificate file. Typically generated by Vespa-cli with 'vespa auth cert'
        :param key: Path to the data plane key file. Typically generated by Vespa-cli with 'vespa auth cert'
        :param vespa_cloud_secret_token: Vespa Cloud data plane secret token.
        :param output_file: Output file to write output messages.
        :param application_package: Application package definition used to deploy the application.

        >>> Vespa(url = "https://cord19.vespa.ai")  # doctest: +SKIP

        >>> Vespa(url = "http://localhost", port = 8080)
        Vespa(http://localhost, 8080)

        >>> Vespa(url="https://token-endpoint..z.vespa-app.cloud", vespa_cloud_secret_token="vespa_cloud_secret_token") # doctest: +SKIP

        >>> Vespa(url = "https://mtls-endpoint..z.vespa-app.cloud", cert = "/path/to/cert.pem", key = "/path/to/key.pem")  # doctest: +SKIP

        """
        self.output_file = output_file
        self.url = url
        self.port = port
        self.deployment_message = deployment_message
        self.cert = cert
        self.key = key
        self.vespa_cloud_secret_token = vespa_cloud_secret_token
        self._application_package = application_package
        self.pyvespa_version = vespa.__version__
        self.base_headers = {"User-Agent": f"pyvespa/{self.pyvespa_version}"}
        if port is None:
            self.end_point = self.url
        else:
            self.end_point = str(url).rstrip("/") + ":" + str(port)
        self.search_end_point = self.end_point + "/search/"
        if self.vespa_cloud_secret_token is not None:
            self.auth_method = "token"
            self.base_headers.update(
                {"Authorization": f"Bearer {self.vespa_cloud_secret_token}"}
            )
        else:
            self.auth_method = "mtls"

    def asyncio(
        self,
        connections: Optional[int] = 1,
        total_timeout: Optional[int] = None,
        timeout: Union[httpx.Timeout, int] = httpx.Timeout(5),
        **kwargs,
    ) -> "VespaAsync":
        """
        Access Vespa asynchronous connection layer.
        Should be used as a context manager.

        Example usage::

                async with app.asyncio() as async_app:
                    response = await async_app.query(body=body)

                # passing kwargs
                limits = httpx.Limits(max_keepalive_connections=5, max_connections=5, keepalive_expiry=15)
                timeout = httpx.Timeout(connect=3, read=4, write=2, pool=5)
                async with app.asyncio(connections=5, timeout=timeout, limits=limits) as async_app:
                    response = await async_app.query(body=body)

        See :class:`VespaAsync` for more details on the parameters.

        :param connections: Number of maximum_keepalive_connections.
        :param total_timeout: Deprecated. Will be ignored. Use timeout instead.
        :param timeout: httpx.Timeout object. See https://www.python-httpx.org/advanced/timeouts/. Defaults to 5 seconds.
        :param kwargs: Additional arguments to be passed to the httpx.AsyncClient.
        :return: Instance of Vespa asynchronous layer.
        """
        return VespaAsync(
            app=self,
            connections=connections,
            total_timeout=total_timeout,
            timeout=timeout,
            **kwargs,
        )

    def syncio(
        self,
        connections: Optional[int] = 8,
        compress: Union[str, bool] = "auto",
    ) -> "VespaSync":
        """
        Access Vespa synchronous connection layer.
        Should be used as a context manager.

        Example usage::

            with app.syncio() as sync_app:
                response = sync_app.query(body=body)

        See :class:`VespaSync` for more details.

        :param connections: Number of allowed concurrent connections
        :param total_timeout: Total timeout in secs.
        :param compress (Union[str, bool], optional): Whether to compress the request body. Defaults to "auto", which will compress if the body is larger than 1024 bytes.
        :return: Instance of Vespa asynchronous layer.
        """
        return VespaSync(
            app=self,
            pool_connections=connections,
            pool_maxsize=connections,
            compress=compress,
        )

    @staticmethod
    def _run_coroutine_new_event_loop(loop, coro):
        asyncio.set_event_loop(loop)
        return loop.run_until_complete(coro)

    @staticmethod
    def _check_for_running_loop_and_run_coroutine(coro):
        try:
            _ = asyncio.get_running_loop()
            new_loop = asyncio.new_event_loop()
            with concurrent.futures.ThreadPoolExecutor() as executor:
                future = executor.submit(
                    Vespa._run_coroutine_new_event_loop, new_loop, coro
                )
                return future.result()
        except RuntimeError:
            return asyncio.run(coro)

    def http(self, pool_maxsize: int = 10):
        return VespaSync(
            app=self, pool_maxsize=pool_maxsize, pool_connections=pool_maxsize
        )

    def __repr__(self) -> str:
        if self.port:
            return "Vespa({}, {})".format(self.url, self.port)
        else:
            return "Vespa({})".format(self.url)

    def _infer_schema_name(self):
        if not self._application_package:
            raise ValueError(
                "Application Package not available. Not possible to infer schema name."
            )

        try:
            schema = self._application_package.schema
        except AssertionError:
            raise ValueError(
                "Application has more than one schema. Not possible to infer schema name."
            )

        if not schema:
            raise ValueError(
                "Application has no schema. Not possible to infer schema name."
            )

        return schema.name

    def wait_for_application_up(self, max_wait: int = 300) -> None:
        """
        Wait for application endpoint ready (/ApplicationStatus).

        :param max_wait: Seconds to wait for the application endpoint
        :raises RuntimeError: If not able to reach endpoint within :max_wait: param or the client fails to authenticate.
        :return:
        """
        for wait_sec in range(max_wait):
            sleep(1)
            try:
                response = self.get_application_status()
                if not response:
                    continue
                if response.status_code == 200:
                    print("Application is up!", file=self.output_file)
                    return
            except ConnectionError:
                pass

            if wait_sec % 5 == 0:
                print(
                    f"Waiting for application to come up, {wait_sec}/{max_wait} seconds.",
                    file=self.output_file,
                )
        else:
            raise RuntimeError(
                "Could not connect to endpoint {0} using any of the available auth methods within {1} seconds.".format(
                    self.end_point, max_wait
                )
            )

    def get_application_status(self) -> Optional[Response]:
        """
        Get application status (/ApplicationStatus)

        :return:
        """
        endpoint = f"{self.end_point}/ApplicationStatus"
        with self.syncio() as sync_sess:
            response = sync_sess.http_session.get(endpoint)
        return response

    def get_model_endpoint(self, model_id: Optional[str] = None) -> Optional[Response]:
        """Get stateless model evaluation endpoints."""

        with VespaSync(self, pool_connections=1, pool_maxsize=1) as sync_app:
            return sync_app.get_model_endpoint(model_id=model_id)

    def query(
        self, body: Optional[Dict] = None, groupname: str = None, **kwargs
    ) -> VespaQueryResponse:
        """
        Send a query request to the Vespa application.

        Send 'body' containing all the request parameters.

        :param body: Dict containing request parameters.
        :param groupname: The groupname used with streaming search.
        :param kwargs: Extra Vespa Query API parameters.
        :return: The response from the Vespa application.
        """
        # Use one connection as this is a single query
        with VespaSync(self, pool_maxsize=1, pool_connections=1) as sync_app:
            return sync_app.query(body=body, groupname=groupname, **kwargs)

    def feed_data_point(
        self,
        schema: str,
        data_id: str,
        fields: Dict,
        namespace: str = None,
        groupname: str = None,
        compress: Union[str, bool] = "auto",
        **kwargs,
    ) -> VespaResponse:
        """
        Feed a data point to a Vespa app. Will create a new VespaSync with
        connection overhead.

        Example usage::

            app = Vespa(url="localhost", port=8080)
            data_id = "1",
            fields = {
                    "field1": "value1",
                }
            with VespaSync(app) as sync_app:
                response = sync_app.feed_data_point(
                    schema="schema_name",
                    data_id=data_id,
                    fields=fields
                )
            print(response)

        :param schema: The schema that we are sending data to.
        :param data_id: Unique id associated with this data point.
        :param fields: Dict containing all the fields required by the `schema`.
        :param namespace: The namespace that we are sending data to.
        :param groupname: The groupname that we are sending data
        :param compress (Union[str, bool], optional): Whether to compress the request body. Defaults to "auto", which will compress if the body is larger than 1024 bytes.
        :return: VespaResponse of the HTTP POST request.
        """
        if not namespace:
            namespace = schema
        # Use low low connection settings to avoid too much overhead for a
        # single data point
        with VespaSync(
            app=self, pool_connections=1, pool_maxsize=1, compress=compress
        ) as sync_app:
            return sync_app.feed_data_point(
                schema=schema,
                data_id=data_id,
                fields=fields,
                namespace=namespace,
                groupname=groupname,
                **kwargs,
            )

    def feed_iterable(
        self,
        iter: Iterable[Dict],
        schema: Optional[str] = None,
        namespace: Optional[str] = None,
        callback: Optional[Callable[[VespaResponse, str], None]] = None,
        operation_type: Optional[str] = "feed",
        max_queue_size: int = 1000,
        max_workers: int = 8,
        max_connections: int = 16,
        compress: Union[str, bool] = "auto",
        **kwargs,
    ):
        """
        Feed data from an Iterable of Dict with the keys 'id' and 'fields' to be used in the :func:`feed_data_point`.

        Uses a queue to feed data in parallel with a thread pool. The result of each operation is forwarded
        to the user provided callback function that can process the returned `VespaResponse`.

        Example usage::

            app = Vespa(url="localhost", port=8080)
            data = [
                {"id": "1", "fields": {"field1": "value1"}},
                {"id": "2", "fields": {"field1": "value2"}},
            ]
            def callback(response, id):
                print(f"Response for id {id}: {response.status_code}")
            app.feed_iterable(data, schema="schema_name", callback=callback)

        :param iter: An iterable of Dict containing the keys 'id' and 'fields' to be used in the :func:`feed_data_point`. Note that this 'id' is only the last part of the full document id, that will be generated automatically by pyvespa.
        :param schema: The Vespa schema name that we are sending data to.
        :param namespace: The Vespa document id namespace. If no namespace is provided the schema is used.
        :param callback: A callback function to be called on each result. Signature `callback(response:VespaResponse, id:str)`
        :param operation_type: The operation to perform. Default to `feed`. Valid are `feed`, `update` or `delete`.
        :param max_queue_size: The maximum size of the blocking queue and max in-flight operations.
        :param max_workers: The maximum number of workers in the threadpool executor.
        :param max_connections: The maximum number of persisted connections to the Vespa endpoint.
        :param compress (Union[str, bool], optional): Whether to compress the request body. Defaults to "auto", which will compress if the body is larger than 1024 bytes.
        :param kwargs: Additional parameters are passed to the respective operation type specific :func:`_data_point`.
        """
        if operation_type not in ["feed", "update", "delete"]:
            raise ValueError(
                "Invalid operation type. Valid are `feed`, `update` or `delete`."
            )

        if namespace is None:
            namespace = schema
        if not schema:
            try:
                schema = self._infer_schema_name()
            except ValueError:
                raise ValueError(
                    "Not possible to infer schema name. Specify schema parameter."
                )

        def _consumer(
            queue: Queue,
            executor: ThreadPoolExecutor,
            sync_session: VespaSync,
            max_in_flight=2 * max_queue_size,
        ):
            in_flight = 0  # Single threaded consumer
            futures: List[Future] = []
            while True:
                try:
                    doc = queue.get(timeout=5)
                except Empty:
                    continue  # producer has not produced anything
                if doc is None:  # producer is done
                    queue.task_done()
                    break  # Break and wait for all futures to complete

                completed_futures = [future for future in futures if future.done()]
                for future in completed_futures:
                    futures.remove(future)
                    in_flight -= 1
                    _handle_result_callback(future, callback=callback)

                while in_flight >= max_in_flight:
                    # Check for completed tasks and reduce in-flight tasks
                    for future in futures:
                        if future.done():
                            futures.remove(future)
                            in_flight -= 1
                            _handle_result_callback(future, callback=callback)
                    sleep(0.01)  # wait a bit for more futures to complete

                # we can submit a new doc to Vespa
                future: Future = executor.submit(_submit, doc, sync_session)
                futures.append(future)
                in_flight += 1
                queue.task_done()  # signal that we have consumed the doc from queue

            # make sure callback is called for all pending operations before
            # exiting the consumer thread
            for future in futures:
                _handle_result_callback(future, callback)

        def _submit(
            doc: dict, sync_session: VespaSync
        ) -> Tuple[str, Union[VespaResponse, Exception]]:
            id = doc.get("id", None)
            if id is None:
                return id, VespaResponse(
                    status_code=499,
                    json={"id": id, "message": "Missing id in input dict"},
                    url="n/a",
                    operation_type=operation_type,
                )
            fields = doc.get("fields", None)
            if fields is None and operation_type != "delete":
                return id, VespaResponse(
                    status_code=499,
                    json={"id": id, "message": "Missing fields in input dict"},
                    url="n/a",
                    operation_type=operation_type,
                )
            groupname = doc.get("groupname", None)
            try:
                if operation_type == "feed":
                    response: VespaResponse = sync_session.feed_data_point(
                        schema=schema,
                        namespace=namespace,
                        groupname=groupname,
                        data_id=id,
                        fields=fields,
                        **kwargs,
                    )
                    return (id, response)
                elif operation_type == "update":
                    response: VespaResponse = sync_session.update_data(
                        schema=schema,
                        namespace=namespace,
                        groupname=groupname,
                        data_id=id,
                        fields=fields,
                        **kwargs,
                    )
                    return (id, response)
                elif operation_type == "delete":
                    response: VespaResponse = sync_session.delete_data(
                        schema=schema,
                        namespace=namespace,
                        data_id=id,
                        groupname=groupname,
                        **kwargs,
                    )
                    return (id, response)
            except Exception as e:
                return (id, e)

        def _handle_result_callback(
            future: Future, callback: Optional[Callable[[VespaResponse, str], None]]
        ):
            id, response = future.result()
            if isinstance(response, Exception):
                response = VespaResponse(
                    status_code=599,
                    json={
                        "Exception": str(response),
                        "id": id,
                        "message": "Exception during feed_data_point",
                    },
                    url="n/a",
                    operation_type=operation_type,
                )
            if callback is not None:
                try:
                    callback(response, id)
                except Exception as e:
                    print(f"Exception in user callback for id {id}", file=sys.stderr)
                    traceback.print_exception(
                        type(e), e, e.__traceback__, file=sys.stderr
                    )

        with VespaSync(
            app=self,
            pool_maxsize=max_connections,
            pool_connections=max_connections,
            compress=compress,
        ) as session:
            queue = Queue(maxsize=max_queue_size)
            with ThreadPoolExecutor(max_workers=max_workers) as executor:
                consumer_thread = threading.Thread(
                    target=_consumer, args=(queue, executor, session, max_queue_size)
                )
                consumer_thread.start()
                for doc in iter:
                    queue.put(doc, block=True)
                queue.put(None, block=True)
                queue.join()
                consumer_thread.join()

    def feed_async_iterable(
        self,
        iter: Iterable[Dict],
        schema: Optional[str] = None,
        namespace: Optional[str] = None,
        callback: Optional[Callable[[VespaResponse, str], None]] = None,
        operation_type: Optional[str] = "feed",
        max_queue_size: int = 1000,
        max_workers: int = 64,
        max_connections: int = 1,
        **kwargs,
    ):
        """
        Feed data asynchronously using httpx.AsyncClient with HTTP/2. Feed from an Iterable of Dict with the keys 'id' and 'fields' to be used in the :func:`feed_data_point`.
        The result of each operation is forwarded to the user provided callback function that can process the returned `VespaResponse`.
        Prefer using this method over :func:`feed_iterable` when the operation is I/O bound from the client side.

        Example usage::

                app = Vespa(url="localhost", port=8080)
                data = [
                    {"id": "1", "fields": {"field1": "value1"}},
                    {"id": "2", "fields": {"field1": "value2"}},
                ]
                async def callback(response, id):
                    print(f"Response for id {id}: {response.status_code}")
                app.feed_async_iterable(data, schema="schema_name", callback=callback)


        :param iter: An iterable of Dict containing the keys 'id' and 'fields' to be used in the :func:`feed_data_point`. Note that this 'id' is only the last part of the full document id, that will be generated automatically by pyvespa.
        :param schema: The Vespa schema name that we are sending data to.
        :param namespace: The Vespa document id namespace. If no namespace is provided the schema is used.
        :param callback: A callback function to be called on each result. Signature `callback(response:VespaResponse, id:str)`
        :param operation_type: The operation to perform. Default to `feed`. Valid are `feed`, `update` or `delete`.
        :param max_queue_size: The maximum number of tasks waiting to be processed. Useful to limit memory usage. Default is 1000.
        :param max_workers: Maximum number of concurrent requests to have in-flight, bound by an asyncio.Semaphore, that needs to be acquired by a submit task. Increase if the server is scaled to handle more requests.
        :param max_connections: The maximum number of connections passed to httpx.AsyncClient to the Vespa endpoint. As HTTP/2 is used, only one connection is needed.
        :param kwargs: Additional parameters are passed to the respective operation type specific :func:`_data_point`.
        """

        if operation_type not in ["feed", "update", "delete"]:
            raise ValueError(
                "Invalid operation type. Valid are `feed`, `update` or `delete`."
            )

        if namespace is None:
            namespace = schema
        if not schema:
            try:
                schema = self._infer_schema_name()
            except ValueError:
                raise ValueError(
                    "Not possible to infer schema name. Specify schema parameter."
                )

        async def handle_result(task: asyncio.Task, id: str):
            # Wrapper around the task to handle exceptions and call the user callback
            try:
                response = await task
            except Exception as e:
                response = VespaResponse(
                    status_code=599,
                    json={
                        "Exception": str(e),
                        "id": id,
                        "message": "Exception during feed_data_point",
                    },
                    url="n/a",
                    operation_type=operation_type,
                )
            if callback is not None:
                try:
                    callback(response, id)
                except Exception as e:
                    print(f"Exception in user callback for id {id}", file=sys.stderr)
                    traceback.print_exception(
                        type(e), e, e.__traceback__, file=sys.stderr
                    )

        # Wrapping in async function to be able to use asyncio.run, and avoid that the feed_async_iterable have to be async
        async def run():
            async with self.asyncio(connections=max_connections) as async_session:
                semaphore = asyncio.Semaphore(max_workers)
                tasks = []
                for doc in iter:
                    id = doc.get("id")
                    fields = doc.get("fields")
                    groupname = doc.get("groupname")

                    if id is None:
                        response = VespaResponse(
                            status_code=499,
                            json={"id": id, "message": "Missing id in input dict"},
                            url="n/a",
                            operation_type=operation_type,
                        )
                        if callback is not None:
                            callback(response, id)
                        continue
                    if fields is None and operation_type != "delete":
                        response = VespaResponse(
                            status_code=499,
                            json={"id": id, "message": "Missing fields in input dict"},
                            url="n/a",
                            operation_type=operation_type,
                        )
                        if callback is not None:
                            callback(response, id)
                        continue

                    async with semaphore:
                        if operation_type == "feed":
                            task = async_session.feed_data_point(
                                schema=schema,
                                namespace=namespace,
                                groupname=groupname,
                                data_id=id,
                                fields=fields,
                                **kwargs,
                            )
                        elif operation_type == "update":
                            task = async_session.update_data(
                                schema=schema,
                                namespace=namespace,
                                groupname=groupname,
                                data_id=id,
                                fields=fields,
                                **kwargs,
                            )
                        elif operation_type == "delete":
                            task = async_session.delete_data(
                                schema=schema,
                                namespace=namespace,
                                data_id=id,
                                groupname=groupname,
                                **kwargs,
                            )

                        tasks.append(handle_result(asyncio.create_task(task), id))

                        # Control the number of in-flight tasks
                        if len(tasks) >= max_queue_size:
                            await asyncio.gather(*tasks)
                            tasks = []

                if tasks:
                    await asyncio.gather(*tasks)

        asyncio.run(run())
        return

    async def query_many_async(
        self,
        queries: Iterable[Dict],
        num_connections: int = 1,
        max_concurrent: int = 100,
        client_kwargs: Dict = {},
        **query_kwargs,
    ) -> List[VespaQueryResponse]:
        """
        Execute many queries asynchronously using httpx.AsyncClient.
        Number of concurrent requests is controlled by the max_concurrent parameter.
        Each query will be retried up to 3 times using an exponential backoff strategy.

        :param queries: Iterable of query bodies (dictionaries) to be sent.
        :param num_connections: Number of connections to be used in the asynchronous client (uses http2). Defaults to 1.
        :param max_concurrent: Maximum concurrent requests to be sent. Defaults to 100. Be careful with increasing too much.
        :param client_kwargs: Additional arguments to be passed to the httpx.AsyncClient.
        :param query_kwargs: Additional arguments to be passed to the query method.
        :return: List of VespaQueryResponse objects.
        """
        results = []
        # Use the asynchronous client from VespaAsync (created via self.asyncio).
        async with self.asyncio(connections=num_connections, **client_kwargs) as client:
            sem = asyncio.Semaphore(max_concurrent)

            async def query_wrapper(query_body: Dict) -> VespaQueryResponse:
                async with sem:
                    try:
                        response = await client.query(query_body, **query_kwargs)
                        return response
                    except HTTPError as e:
                        return VespaQueryResponse(
                            json=str(e),
                            status_code=e.response.status_code,
                            url=e.request.url,
                            request_body=query_body,
                        )

            tasks = [query_wrapper(q) for q in queries]
            results = await asyncio.gather(*tasks)
        return results

    def query_many(
        self,
        queries: Iterable[Dict],
        num_connections: int = 1,
        max_concurrent: int = 100,
        client_kwargs: Dict = {},
        **query_kwargs,
    ) -> List[VespaQueryResponse]:
        """
        Execute many queries asynchronously using httpx.AsyncClient.
        This method is a wrapper around the query_many_async method that uses the asyncio event loop to run the coroutine.
        Number of concurrent requests is controlled by the max_concurrent parameter.
        Each query will be retried up to 3 times using an exponential backoff strategy.

        :param queries: Iterable of query bodies (dictionaries) to be sent.
        :param num_connections: Number of connections to be used in the asynchronous client (uses http2). Defaults to 1.
        :param max_concurrent: Maximum concurrent requests to be sent. Defaults to 100. Be careful with increasing too much.
        :param client_kwargs: Additional arguments to be passed to the httpx.AsyncClient.
        :param query_kwargs: Additional arguments to be passed to the query method.
        :return: List of VespaQueryResponse objects.
        """
        return self._check_for_running_loop_and_run_coroutine(
            self.query_many_async(
                queries=queries,
                num_connections=num_connections,
                max_concurrent=max_concurrent,
                client_kwargs=client_kwargs,
                **query_kwargs,
            )
        )

    def delete_data(
        self,
        schema: str,
        data_id: str,
        namespace: str = None,
        groupname: str = None,
        **kwargs,
    ) -> VespaResponse:
        """
        Delete a data point from a Vespa app.

        Example usage::

            app = Vespa(url="localhost", port=8080)
            response = app.delete_data(schema="schema_name", data_id="1")
            print(response)

        :param schema: The schema that we are deleting data from.
        :param data_id: Unique id associated with this data point.
        :param namespace: The namespace that we are deleting data from. If no namespace is provided the schema is used.
        :param groupname: The groupname that we are deleting data from.
        :param kwargs: Additional arguments to be passed to the HTTP DELETE request https://docs.vespa.ai/en/reference/document-v1-api-reference.html#request-parameters
        :return: Response of the HTTP DELETE request.
        """

        with VespaSync(self, pool_connections=1, pool_maxsize=1) as sync_app:
            return sync_app.delete_data(
                schema=schema,
                data_id=data_id,
                namespace=namespace,
                groupname=groupname,
                **kwargs,
            )

    def delete_all_docs(
        self,
        content_cluster_name: str,
        schema: str,
        namespace: str = None,
        slices: int = 1,
        **kwargs,
    ) -> Response:
        """
        Delete all documents associated with the schema. This might block for a long time as
        it requires sending multiple delete requests to complete.

        :param content_cluster_name: Name of content cluster to GET from, or visit.
        :param schema: The schema that we are deleting data from.
        :param namespace: The  namespace that we are deleting data from. If no namespace is provided the schema is used.
        :param slices: Number of slices to use for parallel delete requests. Defaults to 1.
        :param kwargs: Additional arguments to be passed to the HTTP DELETE request https://docs.vespa.ai/en/reference/document-v1-api-reference.html#request-parameters
        :return: Response of the HTTP DELETE request.
        """

        with VespaSync(self, pool_connections=slices, pool_maxsize=slices) as sync_app:
            return sync_app.delete_all_docs(
                content_cluster_name=content_cluster_name,
                namespace=namespace,
                schema=schema,
                slices=slices,
                **kwargs,
            )

    def visit(
        self,
        content_cluster_name: str,
        schema: Optional[str] = None,
        namespace: Optional[str] = None,
        slices: int = 1,
        selection: str = "true",
        wanted_document_count: int = 500,
        slice_id: int = -1,
        **kwargs,
    ) -> Generator[Generator[VespaVisitResponse, None, None], None, None]:
        """
        Visit all documents associated with the schema and matching the selection.

        Will run each slice on a seperate thread, for each slice yields the
        response for each page.

        Example usage::

            for slice in app.visit(schema="schema_name", slices=2):
                for response in slice:
                    print(response.json)

        :param content_cluster_name: Name of content cluster to GET from.
        :param schema: The schema that we are visiting data from.
        :param namespace: The namespace that we are visiting data from.
        :param slices: Number of slices to use for parallel GET.
        :param selection: Selection expression to filter documents.
        :param wanted_document_count: Best effort number of documents to retrieve for each request. May contain less if there are not enough documents left.
        :param slice_id: Slice id to use for the visit. If -1, all slices will be used.
        :param kwargs: Additional HTTP request parameters (https://docs.vespa.ai/en/reference/document-v1-api-reference.html#request-parameters)
        :return: A generator of slices, each containing a generator of responses.
        :raises HTTPError: if one occurred
        """

        with VespaSync(self, pool_connections=slices, pool_maxsize=slices) as sync_app:
            return sync_app.visit(
                content_cluster_name=content_cluster_name,
                namespace=namespace,
                schema=schema,
                slices=slices,
                selection=selection,
                wanted_document_count=wanted_document_count,
                slice_id=slice_id,
                **kwargs,
            )

    def get_data(
        self,
        schema: str,
        data_id: str,
        namespace: str = None,
        groupname: str = None,
        raise_on_not_found: Optional[bool] = False,
        **kwargs,
    ) -> VespaResponse:
        """
        Get a data point from a Vespa app.

        :param data_id: Unique id associated with this data point.
        :param schema: The schema that we are getting data from. Will attempt to infer schema name if not provided.
        :param data_id: Unique id associated with this data point.
        :param namespace: The namespace that we are getting data from. If no namespace is provided the schema is used.
        :param groupname: The groupname that we are getting data from.
        :param raise_on_not_found: Raise an exception if the data_id is not found. Default is False.
        :param kwargs: Additional arguments to be passed to the HTTP GET request https://docs.vespa.ai/en/reference/document-v1-api-reference.html#request-parameters
        :return: Response of the HTTP GET request.
        """

        with VespaSync(self, pool_connections=1, pool_maxsize=1) as sync_app:
            return sync_app.get_data(
                schema=schema,
                data_id=data_id,
                namespace=namespace,
                groupname=groupname,
                raise_on_not_found=raise_on_not_found,
                **kwargs,
            )

    def update_data(
        self,
        schema: str,
        data_id: str,
        fields: Dict,
        create: bool = False,
        namespace: str = None,
        groupname: str = None,
        compress: Union[str, bool] = "auto",
        **kwargs,
    ) -> VespaResponse:
        """
        Update a data point in a Vespa app.

        Example usage::

            vespa = Vespa(url="localhost", port=8080)

            fields = {"mystringfield": "value1", "myintfield": 42}
            response = vespa.update_data(schema="schema_name", data_id="id1", fields=fields)
            # or, with partial update, setting auto_assign=False
            fields = {"myintfield": {"increment": 1}}
            response = vespa.update_data(schema="schema_name", data_id="id1", fields=fields, auto_assign=False)
            print(response.json)

        :param schema: The schema that we are updating data.
        :param data_id: Unique id associated with this data point.
        :param fields: Dict containing all the fields you want to update.
        :param create: If true, updates to non-existent documents will create an empty document to update
        :param auto_assign: Assumes `fields`-parameter is an assignment operation. (https://docs.vespa.ai/en/reference/document-json-format.html#assign). If set to false, the fields parameter should be a dictionary including the update operation.
        :param namespace: The namespace that we are updating data. If no namespace is provided the schema is used.
        :param groupname: The groupname that we are updating data.
        :param compress (Union[str, bool], optional): Whether to compress the request body. Defaults to "auto", which will compress if the body is larger than 1024 bytes.
        :param kwargs: Additional arguments to be passed to the HTTP PUT request. https://docs.vespa.ai/en/reference/document-v1-api-reference.html#request-parameters
        :return: Response of the HTTP PUT request.
        """

        with VespaSync(
            self, pool_connections=1, pool_maxsize=1, compress=compress
        ) as sync_app:
            return sync_app.update_data(
                schema=schema,
                data_id=data_id,
                fields=fields,
                create=create,
                namespace=namespace,
                groupname=groupname,
                **kwargs,
            )

    @property
    def application_package(self):
        """Get application package definition, if available."""
        if not self._application_package:
            raise ValueError("Application package not available.")
        else:
            return self._application_package

    def get_model_from_application_package(self, model_name: str):
        """Get model definition from application package, if available."""
        app_package = self.application_package
        model = app_package.get_model(model_id=model_name)
        return model

    def predict(self, x, model_id, function_name="output_0"):
        """
        Obtain a stateless model evaluation.

        :param x: Input where the format depends on the task that the model is serving.
        :param model_id: The id of the model used to serve the prediction.
        :param function_name: The name of the output function to be evaluated.
        :return: Model prediction.
        """
        model = self.get_model_from_application_package(model_id)
        encoded_tokens = model.create_url_encoded_tokens(x=x)
        with VespaSync(self) as sync_app:
            return model.parse_vespa_prediction(
                sync_app.predict(
                    model_id=model_id,
                    function_name=function_name,
                    encoded_tokens=encoded_tokens,
                )
            )

    def get_document_v1_path(
        self,
        id: str,
        schema: Optional[str] = None,
        namespace: Optional[str] = None,
        group: Optional[str] = None,
        number: Optional[str] = None,
    ) -> str:
        """
        Convert to document v1 path

        :param id: The id of the document
        :param namespace: The namespace of the document
        :param schema: The schema of the document
        :param group: The group of the document
        :param number: The number of the document
        :return: The path to the document v1 endpoint
        """
        # Make sure `id` is properly quoted, e.g. myid#123 -> myid%23123
        id = quote(str(id))
        if not schema:
            print("schema is not provided. Attempting to infer schema name.")
            schema = self._infer_schema_name()
        if not namespace:
            namespace = schema
        if number:
            return f"/document/v1/{namespace}/{schema}/number/{number}/{id}"
        if group:
            return f"/document/v1/{namespace}/{schema}/group/{group}/{id}"
        return f"/document/v1/{namespace}/{schema}/docid/{id}"


class CustomHTTPAdapter(HTTPAdapter):
    def __init__(
        self,
        pool_connections=10,
        pool_maxsize=10,
        num_retries_429=10,
        compress: Union[str, bool] = "auto",
        compress_larger_than: int = 1024,
        *args,
        **kwargs,
    ):
        if compress not in ["auto", True, False]:
            raise ValueError(
                f"compress must be 'auto', True, or False. Got {compress} instead."
            )
        super().__init__(*args, **kwargs)
        self.num_retries_429 = num_retries_429
        self.compress = compress
        self.compress_larger_than = compress_larger_than
        self.retry_strategy = Retry(
            total=10,
            backoff_factor=1,
            raise_on_status=False,
            status_forcelist=[429, 503],
            allowed_methods=["POST", "GET", "DELETE", "PUT"],
        )

    def send(self, request: PreparedRequest, **kwargs) -> Response:
        # Automatically handle compression if needed
        self._maybe_compress_request(request)

        for attempt in range(self.num_retries_429 + 1):
            try:
                response = super().send(request, **kwargs)

                if response.status_code == 429:
                    self._wait_with_backoff(attempt)
                else:
                    return response

            except ConnectionResetError:
                if attempt < self.num_retries_429:
                    print(f"ConnectionResetError on attempt {attempt}", file=sys.stderr)
                    self._wait_with_backoff(attempt)
                else:
                    print(f"ConnectionResetError on attempt {attempt}", file=sys.stderr)
                    raise

        return response

    def _maybe_compress_request(self, request: PreparedRequest):
        # Compress if the method is POST or PUT, body exists, and compression conditions are met
        if (
            self.compress in [True, "auto"]
            and request.method in ["POST", "PUT"]
            and request.body
        ):
            body_size = (
                len(request.body)
                if isinstance(request.body, bytes)
                else len(request.body.encode("utf-8"))
            )

            if self.compress is True or (
                self.compress == "auto" and body_size > self.compress_larger_than
            ):
                # Compress the body
                compressed_body = self._gzip_compress(request.body)
                request.body = compressed_body
                request.headers["Content-Encoding"] = "gzip"
                request.headers["Content-Length"] = str(len(compressed_body))

    @staticmethod
    def _gzip_compress(data: Union[str, bytes]) -> bytes:
        if isinstance(data, str):
            data = data.encode("utf-8")

        buf = BytesIO()
        with gzip.GzipFile(fileobj=buf, mode="wb") as f:
            f.write(data)
        return buf.getvalue()

    @staticmethod
    def _wait_with_backoff(attempt):
        wait_time = 0.1 * 1.618**attempt + random.uniform(0, 1)
        time.sleep(wait_time)


class VespaSync(object):
    def __init__(
        self,
        app: Vespa,
        pool_maxsize: int = 10,
        pool_connections: int = 10,
        compress: Union[str, bool] = "auto",
    ) -> None:
        """
        Class to handle synchronous requests to Vespa.
        This class is intended to be used as a context manager.

        Example usage::

                with VespaSync(app) as sync_app:
                    response = sync_app.query(body=body)
                print(response)

        Can also be accessed directly through :func:`Vespa.syncio` ::

                app = Vespa(url="localhost", port=8080)
                with app.syncio() as sync_app:
                    response = sync_app.query(body=body)

        See also :func:`Vespa.feed_iterable` for a convenient way to feed data synchronously.

        Args:
            app (Vespa): Vespa app object.
            pool_maxsize (int, optional): The maximum number of connections to save in the pool. Defaults to 10.
            pool_connections (int, optional): The number of urllib3 connection pools to cache. Defaults to 10.
            compress (Union[str, bool], optional): Whether to compress the request body. Defaults to "auto", which will compress if the body is larger than 1024 bytes.
        """
        if compress not in ["auto", True, False]:
            raise ValueError(
                f"compress must be 'auto', True, or False. Got {compress} instead."
            )
        self.app = app
        if self.app.key:
            self.cert = (self.app.cert, self.app.key)
        else:
            self.cert = self.app.cert
        self.headers = self.app.base_headers.copy()
        if self.app.auth_method == "token" and self.app.vespa_cloud_secret_token:
            # Bearer and user-agent
            self.headers.update(
                {"Authorization": f"Bearer {self.app.vespa_cloud_secret_token}"}
            )
        self.compress = compress
        self.http_session = None
        self.adapter = CustomHTTPAdapter(
            pool_maxsize=pool_maxsize,
            pool_connections=pool_connections,
            max_retries=10,
            num_retries_429=10,
            pool_block=True,
            compress=compress,
        )

    def __enter__(self):
        self._open_http_session()
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        self._close_http_session()

    def _open_http_session(self):
        if self.http_session is not None:
            return

        self.http_session = Session()
        self.http_session.headers.update(self.headers)
        self.http_session.mount("https://", self.adapter)
        self.http_session.mount("http://", self.adapter)
        if self.app.auth_method == "token" and self.app.vespa_cloud_secret_token:
            self.http_session.headers.update(self.headers)
        else:
            self.http_session.cert = self.cert

        return self.http_session

    def _close_http_session(self):
        if self.http_session is None:
            return
        self.http_session.close()

    def get_model_endpoint(self, model_id: Optional[str] = None) -> Optional[dict]:
        """Get model evaluation endpoints."""
        end_point = "{}/model-evaluation/v1/".format(self.app.end_point)
        if model_id:
            end_point = end_point + model_id
        try:
            response = self.http_session.get(end_point)
            if response.status_code == 200:
                return response.json()
            else:
                return {"status_code": response.status_code, "message": response.reason}
        except ConnectionError:
            response = None
        return response

    def predict(self, model_id, function_name, encoded_tokens):
        """
        Obtain a stateless model evaluation.

        :param model_id: The id of the model used to serve the prediction.
        :param function_name: The name of the output function to be evaluated.
        :param encoded_tokens: URL-encoded input to the model
        :return: Model prediction.
        """
        end_point = "{}/model-evaluation/v1/{}/{}/eval?{}".format(
            self.app.end_point, model_id, function_name, encoded_tokens
        )
        try:
            response = self.http_session.get(end_point)
            if response.status_code == 200:
                return response.json()
            else:
                return {"status_code": response.status_code, "message": response.reason}
        except ConnectionError:
            response = None
        return response

    def feed_data_point(
        self,
        schema: str,
        data_id: str,
        fields: Dict,
        namespace: str = None,
        groupname: str = None,
        **kwargs,
    ) -> VespaResponse:
        """
        Feed a data point to a Vespa app.

        :param schema: The schema that we are sending data to.
        :param data_id: Unique id associated with this data point.
        :param fields: Dict containing all the fields required by the `schema`.
        :param namespace: The namespace that we are sending data to. If no namespace is provided the schema is used.
        :param groupname: The group that we are sending data to.
        :param kwargs: Additional HTTP request parameters (https://docs.vespa.ai/en/reference/document-v1-api-reference.html#request-parameters)
        :return: Response of the HTTP POST request.
        :raises HTTPError: if one occurred
        """

        path = self.app.get_document_v1_path(
            id=data_id, schema=schema, namespace=namespace, group=groupname
        )
        end_point = "{}{}".format(self.app.end_point, path)
        vespa_format = {"fields": fields}
        response = self.http_session.post(end_point, json=vespa_format, params=kwargs)
        raise_for_status(response)
        return VespaResponse(
            json=response.json(),
            status_code=response.status_code,
            url=str(response.url),
            operation_type="feed",
        )

    def query(
        self, body: Optional[Dict] = None, groupname: str = None, **kwargs
    ) -> VespaQueryResponse:
        """
        Send a query request to the Vespa application.

        Send 'body' containing all the request parameters.

        :param body: Dict containing all the request parameters.
        :param groupname: The groupname used in streaming search
        :param kwargs: Additional Valid Vespa HTTP Query Api parameters (https://docs.vespa.ai/en/reference/query-api-reference.html)
        :return: Either the request body if debug_request is True or the result from the Vespa application
        :raises HTTPError: if one occurred
        """

        if groupname:
            kwargs["streaming.groupname"] = groupname
        response = self.http_session.post(
            self.app.search_end_point, json=body, params=kwargs
        )
        raise_for_status(response)
        return VespaQueryResponse(
            json=response.json(),
            status_code=response.status_code,
            url=str(response.url),
        )

    def delete_data(
        self,
        schema: str,
        data_id: str,
        namespace: str = None,
        groupname: str = None,
        **kwargs,
    ) -> VespaResponse:
        """
        Delete a data point from a Vespa app.

        :param schema: The schema that we are deleting data from.
        :param data_id: Unique id associated with this data point.
        :param namespace: The namespace that we are deleting data from.
        :param kwargs: Additional HTTP request parameters (https://docs.vespa.ai/en/reference/document-v1-api-reference.html#request-parameters)
        :return: Response of the HTTP DELETE request.
        :raises HTTPError: if one occurred
        """

        path = self.app.get_document_v1_path(
            id=data_id, schema=schema, namespace=namespace, group=groupname
        )
        end_point = "{}{}".format(self.app.end_point, path)
        response = self.http_session.delete(end_point, params=kwargs)
        raise_for_status(response)
        return VespaResponse(
            json=response.json(),
            status_code=response.status_code,
            url=str(response.url),
            operation_type="delete",
        )

    def delete_all_docs(
        self,
        content_cluster_name: str,
        schema: str,
        namespace: str = None,
        slices: int = 1,
        **kwargs,
    ) -> None:
        """
        Delete all documents associated with the schema.

        :param content_cluster_name: Name of content cluster to GET from, or visit.
        :param schema: The schema that we are deleting data from.
        :param namespace: The namespace that we are deleting data from.
        :param slices: Number of slices to use for parallel delete.
        :param kwargs: Additional HTTP request parameters (https://docs.vespa.ai/en/reference/document-v1-api-reference.html#request-parameters)
        :return: Response of the HTTP DELETE request.
        :raises HTTPError: if one occurred
        """
        if not namespace:
            namespace = schema

        def delete_slice(slice_id):
            end_point = "{}/document/v1/{}/{}/docid/?cluster={}&selection=true&slices={}&sliceId={}".format(
                self.app.end_point,
                namespace,
                schema,
                content_cluster_name,
                slices,
                slice_id,
            )
            request_endpoint = end_point
            count = 0
            errors = 0
            while True:
                try:
                    count += 1
                    response = self.http_session.delete(request_endpoint, params=kwargs)
                    result = response.json()
                    if "continuation" in result:
                        request_endpoint = "{}&continuation={}".format(
                            end_point, result["continuation"]
                        )
                    else:
                        break
                except Exception as e:
                    errors += 1
                    error_rate = errors / count
                    if error_rate > 0.1:
                        raise Exception(
                            "Too many errors for slice delete requests"
                        ) from e
                    sleep(1)

        with ThreadPoolExecutor(max_workers=slices) as executor:
            executor.map(delete_slice, range(slices))

    def visit(
        self,
        content_cluster_name: str,
        schema: Optional[str] = None,
        namespace: Optional[str] = None,
        slices: int = 1,
        selection: str = "true",
        wanted_document_count: int = 500,
        slice_id: int = -1,
        **kwargs,
    ) -> Generator[Generator[VespaVisitResponse, None, None], None, None]:
        """
        Visit all documents associated with the schema and matching the selection.

        Will run each slice on a seperate thread, for each slice yields the
        response for each page.

        :param content_cluster_name: Name of content cluster to GET from.
        :param schema: The schema that we are visiting data from.
        :param namespace: The namespace that we are visiting data from.
        :param slices: Number of slices to use for parallel GET.
        :param wanted_document_count: Best effort number of documents to retrieve for each request. May contain less if there are not enough documents left.
        :param selection: Selection expression to use. Defaults to "true". See https://docs.vespa.ai/en/reference/document-select-language.html
        :param slice_id: Slice id to use. Defaults to -1, which means all slices.
        :param kwargs: Additional HTTP request parameters (https://docs.vespa.ai/en/reference/document-v1-api-reference.html#request-parameters)
        :return: A generator of slices, each containing a generator of responses.
        :raises HTTPError: if one occurred
        """
        if not namespace:
            namespace = schema

        if schema:
            target = "{}/{}/docid/".format(
                namespace,
                schema,
            )
        else:
            target = ""

        end_point = "{}/document/v1/{}".format(
            self.app.end_point,
            target,
        )
        # Validate that if slice_id is set, slice_id is in range [0, slices)
        if slice_id >= 0 and slice_id >= slices:
            raise ValueError(
                f"slice_id must be in range [0, {slices - 1}] or -1. Got {slice_id} instead."
            )

        @retry(retry=retry_if_exception_type(HTTPError), stop=stop_after_attempt(3))
        def visit_request(end_point: str, params: Dict[str, str]):
            r = self.http_session.get(end_point, params=params)
            r.raise_for_status()
            return VespaVisitResponse(
                json=r.json(), status_code=r.status_code, url=str(r.url)
            )

        def visit_slice(slice_id):
            params = {
                "cluster": content_cluster_name,
                "selection": selection,
                "wantedDocumentCount": wanted_document_count,
                "slices": slices,
                "sliceId": slice_id,
                **kwargs,
            }

            while True:
                result = visit_request(end_point, params=params)
                yield result
                if result.continuation:
                    params["continuation"] = result.continuation
                else:
                    break

        if slice_id == -1:
            with ThreadPoolExecutor(max_workers=slices) as executor:
                futures = [
                    executor.submit(visit_slice, slice) for slice in range(slices)
                ]
                for future in as_completed(futures):
                    yield future.result()
        else:
            yield visit_slice(slice_id)

    def get_data(
        self,
        schema: str,
        data_id: str,
        namespace: str = None,
        groupname: str = None,
        raise_on_not_found: Optional[bool] = False,
        **kwargs,
    ) -> VespaResponse:
        """
        Get a data point from a Vespa app.

        :param schema: The schema that we are getting data from.
        :param data_id: Unique id associated with this data point.
        :param namespace: The namespace that we are getting data from.
        :param groupname: The groupname used to get data
        :param raise_on_not_found: Raise an exception if the document is not found.
        :param kwargs: Additional HTTP request parameters (https://docs.vespa.ai/en/reference/document-v1-api-reference.html#request-parameters)
        :return: Response of the HTTP GET request.
        :raises HTTPError: if one occurred
        """
        path = self.app.get_document_v1_path(
            id=data_id, schema=schema, namespace=namespace, group=groupname
        )
        end_point = "{}{}".format(self.app.end_point, path)

        response = self.http_session.get(end_point, params=kwargs)
        raise_for_status(response, raise_on_not_found=raise_on_not_found)
        return VespaResponse(
            json=response.json(),
            status_code=response.status_code,
            url=str(response.url),
            operation_type="get",
        )

    def update_data(
        self,
        schema: str,
        data_id: str,
        fields: Dict,
        create: bool = False,
        auto_assign: bool = True,
        namespace: str = None,
        groupname: str = None,
        **kwargs,
    ) -> VespaResponse:
        """
        Update a data point in a Vespa app.

        :param schema: The schema that we are updating data.
        :param data_id: Unique id associated with this data point.
        :param fields: Dict containing all the fields you want to update.
        :param create: If true, updates to non-existent documents will create an empty document to update
        :param auto_assign: Assumes `fields`-parameter is an assignment operation. (https://docs.vespa.ai/en/reference/document-json-format.html#assign). If set to false, the fields parameter should be a dictionary including the update operation.
        :param namespace: The namespace that we are updating data.
        :param groupname: The groupname used to update data
        :param kwargs: Additional HTTP request parameters (https://docs.vespa.ai/en/reference/document-v1-api-reference.html#request-parameters)
        :return: Response of the HTTP PUT request.
        :raises HTTPError: if one occurred
        """

        path = self.app.get_document_v1_path(
            id=data_id, schema=schema, namespace=namespace, group=groupname
        )
        end_point = "{}{}?create={}".format(
            self.app.end_point, path, str(create).lower()
        )
        if auto_assign:
            vespa_format = {"fields": {k: {"assign": v} for k, v in fields.items()}}
        else:
            # Can not send 'id' in fields for partial update
            vespa_format = {"fields": {k: v for k, v in fields.items() if k != "id"}}
        response = self.http_session.put(end_point, json=vespa_format, params=kwargs)
        raise_for_status(response)
        return VespaResponse(
            json=response.json(),
            status_code=response.status_code,
            url=str(response.url),
            operation_type="update",
        )


class VespaAsync(object):
    def __init__(
        self,
        app: Vespa,
        connections: Optional[int] = 1,
        total_timeout: Optional[int] = None,
        timeout: Union[httpx.Timeout, int] = httpx.Timeout(5),
        **kwargs,
    ) -> None:
        """
        Class to handle asynchronous HTTP connections to Vespa.

        Uses `httpx` as the async HTTP client, and HTTP/2 by default.
        This class is intended to be used as a context manager.

        **Basic usage**::

            async with VespaAsync(app) as async_app:
                response = await async_app.query(
                    body={"yql": "select * from sources * where title contains 'music';"}
                )

        **Passing custom timeout and limits**::

            import httpx

            timeout = httpx.Timeout(10.0, connect=5.0)
            limits = httpx.Limits(max_connections=10, max_keepalive_connections=5)

            async with VespaAsync(app, timeout=timeout, limits=limits) as async_app:
                response = await async_app.query(
                    body={"yql": "select * from sources * where title contains 'music';"}
                )

        **Using additional kwargs (e.g., proxies)**::

            proxies = "http://localhost:8080"

            async with VespaAsync(app, proxies=proxies) as async_app:
                response = await async_app.query(
                    body={"yql": "select * from sources * where title contains 'music';"}
                )

        **Accessing via :func:`Vespa.asyncio`**::

            app = Vespa(url="localhost", port=8080)
            async with app.asyncio(timeout=timeout, limits=limits) as async_app:
                response = await async_app.query(
                    body={"yql": "select * from sources * where title contains 'music';"}
                )

        See also :func:`Vespa.feed_async_iterable` for a convenient interface to async data feeding.

        Args:
            app (Vespa): Vespa application object.
            connections (Optional[int], optional): Number of connections. Defaults to 1 as HTTP/2 is multiplexed.
            total_timeout (int, optional): **Deprecated**. Will be ignored and removed in future versions.
                Use `timeout` to pass an `httpx.Timeout` object instead.
            timeout (httpx.Timeout, optional): Timeout settings for the `httpx.AsyncClient`. Defaults to `httpx.Timeout(5)`.
            **kwargs: Additional arguments to be passed to the `httpx.AsyncClient`. See
                [HTTPX AsyncClient documentation](https://www.python-httpx.org/api/#asyncclient) for more details.

        Note:
            - Passing `timeout` allows you to configure timeouts for connect, read, write, and overall request time.
            - The `limits` parameter can be used to control connection pooling behavior, such as the maximum number of concurrent connections.
            - See https://www.python-httpx.org/ for more information on `httpx` and its features.
        """
        self.app = app
        self.httpx_client = None
        self.connections = connections
        self.total_timeout = total_timeout
        if self.total_timeout is not None:
            # issue DeprecationWarning
            warnings.warn(
                "total_timeout is deprecated, will be ignored and will be removed in future versions. Use timeout to pass a httpx.Timeout object instead.",
                category=DeprecationWarning,
            )
        self.timeout = timeout
        if isinstance(self.timeout, int):
            self.timeout = httpx.Timeout(timeout)
        self.kwargs = kwargs
        self.headers = self.app.base_headers.copy()
        self.limits = kwargs.get(
            "limits", httpx.Limits(max_keepalive_connections=self.connections)
        )
        # Warn if limits.keepalive_expiry is higher than 30 seconds
        if self.limits.keepalive_expiry and self.limits.keepalive_expiry > 30:
            warnings.warn(
                "Keepalive expiry is set to more than 30 seconds. Vespa server resets idle connections, so this may cause ConnectionResetError.",
                category=UserWarning,
            )
        if self.app.auth_method == "token" and self.app.vespa_cloud_secret_token:
            # Bearer and user-agent
            self.headers.update(
                {"Authorization": f"Bearer {self.app.vespa_cloud_secret_token}"}
            )

    async def __aenter__(self):
        self._open_httpx_client()
        return self

    async def __aexit__(self, exc_type, exc_val, exc_tb):
        await self._close_httpx_client()

    def _open_httpx_client(self):
        if self.httpx_client is not None:
            return

        if self.app.cert is not None:
            sslcontext = httpx.create_ssl_context(cert=(self.app.cert, self.app.key))
        else:
            sslcontext = False
        self.httpx_client = httpx.AsyncClient(
            timeout=self.timeout,
            headers=self.headers,
            verify=sslcontext,
            http2=True,  # HTTP/2 by default
            http1=False,
            **self.kwargs,
        )
        return self.httpx_client

    async def _close_httpx_client(self):
        if self.httpx_client is None:
            return
        await self.httpx_client.aclose()

    async def _wait(f, args, **kwargs):
        tasks = [asyncio.create_task(f(*arg, **kwargs)) for arg in args]
        await asyncio.wait(tasks, return_when=asyncio.ALL_COMPLETED)
        return [result for result in map(lambda task: task.result(), tasks)]

    def callback_docv1(state: RetryCallState) -> VespaResponse:
        if state.outcome.failed:
            raise state.outcome.exception()
        return state.outcome.result()

    @retry(
        wait=wait_random_exponential(multiplier=1.5, max=60), stop=stop_after_attempt(5)
    )
    async def query(
        self, body: Optional[Dict] = None, groupname: str = None, **kwargs
    ) -> VespaQueryResponse:
        if groupname:
            kwargs["streaming.groupname"] = groupname
        r = await self.httpx_client.post(
            self.app.search_end_point, json=body, params=kwargs
        )
        return VespaQueryResponse(
            json=r.json(), status_code=r.status_code, url=str(r.url)
        )

    @retry(
        wait=wait_exponential(multiplier=1),
        retry=retry_any(
            retry_if_exception(lambda x: True),
            retry_if_result(lambda x: x.get_status_code() == 503),
        ),
        stop=stop_after_attempt(3),
        retry_error_callback=callback_docv1,
    )
    @retry(
        wait=wait_random_exponential(multiplier=1, max=3),
        retry=retry_if_result(lambda x: x.get_status_code() == 429),
    )
    async def feed_data_point(
        self,
        schema: str,
        data_id: str,
        fields: Dict,
        namespace: Optional[str] = None,
        groupname: Optional[str] = None,
        semaphore: Optional[asyncio.Semaphore] = None,
        **kwargs,
    ) -> VespaResponse:
        path = self.app.get_document_v1_path(
            id=data_id, schema=schema, namespace=namespace, group=groupname
        )
        end_point = "{}{}".format(self.app.end_point, path)
        vespa_format = {"fields": fields}
        if semaphore:
            async with semaphore:
                response = await self.httpx_client.post(
                    end_point, json=vespa_format, params=kwargs
                )
        else:
            response = await self.httpx_client.post(
                end_point, json=vespa_format, params=kwargs
            )
        return VespaResponse(
            json=response.json(),
            status_code=response.status_code,
            url=str(response.url),
            operation_type="feed",
        )

    @retry(
        wait=wait_exponential(multiplier=1),
        retry=retry_any(
            retry_if_exception(lambda x: True),
            retry_if_result(lambda x: x.get_status_code() == 503),
        ),
        stop=stop_after_attempt(3),
        retry_error_callback=callback_docv1,
    )
    @retry(
        wait=wait_exponential(multiplier=1, max=10),
        retry=retry_if_result(lambda x: x.get_status_code() == 429),
    )
    async def delete_data(
        self,
        schema: str,
        data_id: str,
        namespace: str = None,
        groupname: str = None,
        semaphore: asyncio.Semaphore = None,
        **kwargs,
    ) -> VespaResponse:
        path = self.app.get_document_v1_path(
            id=data_id, schema=schema, namespace=namespace, group=groupname
        )
        end_point = "{}{}".format(self.app.end_point, path)
        if semaphore:
            async with semaphore:
                response = await self.httpx_client.delete(end_point, params=kwargs)
        else:
            response = await self.httpx_client.delete(end_point, params=kwargs)
        return VespaResponse(
            json=response.json(),
            status_code=response.status_code,
            url=str(response.url),
            operation_type="delete",
        )

    @retry(
        wait=wait_exponential(multiplier=1),
        retry=retry_any(
            retry_if_exception(lambda x: True),
            retry_if_result(lambda x: x.get_status_code() == 503),
        ),
        stop=stop_after_attempt(3),
        retry_error_callback=callback_docv1,
    )
    @retry(
        wait=wait_exponential(multiplier=1, max=10),
        retry=retry_if_result(lambda x: x.get_status_code() == 429),
    )
    async def get_data(
        self,
        schema: str,
        data_id: str,
        namespace: str = None,
        groupname: str = None,
        semaphore: asyncio.Semaphore = None,
        **kwargs,
    ) -> VespaResponse:
        path = self.app.get_document_v1_path(
            id=data_id, schema=schema, namespace=namespace, group=groupname
        )
        end_point = "{}{}".format(self.app.end_point, path)
        if semaphore:
            async with semaphore:
                response = await self.httpx_client.get(end_point, params=kwargs)
        else:
            response = await self.httpx_client.get(end_point, params=kwargs)
        return VespaResponse(
            json=response.json(),
            status_code=response.status_code,
            url=str(response.url),
            operation_type="get",
        )

    @retry(
        wait=wait_exponential(multiplier=1),
        retry=retry_any(
            retry_if_exception(lambda x: True),
            retry_if_result(lambda x: x.get_status_code() == 503),
        ),
        stop=stop_after_attempt(3),
        retry_error_callback=callback_docv1,
    )
    @retry(
        wait=wait_exponential(multiplier=1, max=10),
        retry=retry_if_result(lambda x: x.get_status_code() == 429),
    )
    async def update_data(
        self,
        schema: str,
        data_id: str,
        fields: Dict,
        create: bool = False,
        auto_assign: bool = True,
        namespace: str = None,
        groupname: str = None,
        semaphore: asyncio.Semaphore = None,
        **kwargs,
    ) -> VespaResponse:
        path = self.app.get_document_v1_path(
            id=data_id, schema=schema, namespace=namespace, group=groupname
        )
        end_point = "{}{}?create={}".format(
            self.app.end_point, path, str(create).lower()
        )
        if auto_assign:
            vespa_format = {"fields": {k: {"assign": v} for k, v in fields.items()}}
        else:
            # Can not send 'id' in fields for partial update
            vespa_format = {"fields": {k: v for k, v in fields.items() if k != "id"}}
        if semaphore:
            async with semaphore:
                response = await self.httpx_client.put(
                    end_point, json=vespa_format, params=kwargs
                )
        else:
            response = await self.httpx_client.put(
                end_point, json=vespa_format, params=kwargs
            )
        return VespaResponse(
            json=response.json(),
            status_code=response.status_code,
            url=str(response.url),
            operation_type="update",
        )