diff --git a/.github/workflows/python-prerelease.yml b/.github/workflows/python-prerelease.yml index de124871d9..4e10594920 100644 --- a/.github/workflows/python-prerelease.yml +++ b/.github/workflows/python-prerelease.yml @@ -6,7 +6,7 @@ on: - "v*.rc*" jobs: - test_on_transformers: + trigger_rc_testing: runs-on: ubuntu-latest strategy: @@ -32,6 +32,15 @@ jobs: git config user.name "Hugging Face Bot (RC Testing)" git config user.email "bot@huggingface.co" + - name: Wait for prerelease to be out on PyPI + run: | + VERSION=${{ steps.get-version.outputs.VERSION }} + echo "Waiting for huggingface-hub==${VERSION} to be available on PyPI" + while ! pip install huggingface-hub==${VERSION}; do + echo "huggingface-hub==${VERSION} not available yet, retrying in 15s" + sleep 15 + done + - name: Create test branch and update dependencies id: create-pr run: | @@ -43,21 +52,25 @@ jobs: git checkout -b $BRANCH_NAME # Update dependencies using sed - sed -i -E 's/"huggingface-hub>=0.*"/"huggingface-hub==${VERSION}"/' setup.py + sed -i -E "s/\"huggingface-hub>=0.*\"/\"huggingface-hub==${VERSION}\"/" setup.py git add setup.py # Only if the target repo is transformers if [ "${{ matrix.target-repo }}" = "transformers" ]; then - sed -i -E 's/"huggingface-hub>=0.*"/"huggingface-hub==${VERSION}"/' src/transformers/dependency_versions_table.py + sed -i -E "s/\"huggingface-hub>=0.*\"/\"huggingface-hub==${VERSION}\"/" src/transformers/dependency_versions_table.py git add src/transformers/dependency_versions_table.py fi # Only if the target repo is diffusers if [ "${{ matrix.target-repo }}" = "diffusers" ]; then - sed -i -E 's/"huggingface-hub":.*/"huggingface-hub": "huggingface-hub==${VERSION}",/' src/diffusers/dependency_versions_table.py + sed -i -E "s/\"huggingface-hub\":.*/\"huggingface-hub\": \"huggingface-hub==${VERSION}\",/" src/diffusers/dependency_versions_table.py git add src/diffusers/dependency_versions_table.py fi + # Any line with `uv pip install --prerelease=allow` in the `.github/` folder must be updated with `--prerelease=allow` flag + find .github/workflows/ -type f -exec sed -i 's/uv pip install /uv pip install --prerelease=allow /g' {} + + git add .github/workflows/ + # Commit and push changes git --no-pager diff --staged git commit -m "Test hfh ${VERSION}" @@ -67,4 +80,4 @@ jobs: run: | VERSION=${{ steps.get-version.outputs.VERSION }} echo "https://github.com/huggingface/${{ matrix.target-repo }}/actions" - echo "https://github.com/huggingface/${{ matrix.target-repo }}/compare/main...ci-test-huggingface-hub-${VERSION}" + echo "https://github.com/huggingface/${{ matrix.target-repo }}/compare/main...ci-test-huggingface-hub-${VERSION}" \ No newline at end of file diff --git a/setup.py b/setup.py index 9de62223c9..d0a0e3ce49 100644 --- a/setup.py +++ b/setup.py @@ -54,6 +54,7 @@ def get_version() -> str: "tensorflow", "keras<3.0", ] + extras["hf_xet"] = ["hf_xet"] extras["testing"] = ( diff --git a/src/huggingface_hub/_inference_endpoints.py b/src/huggingface_hub/_inference_endpoints.py index ad5c34ad31..37733fef1b 100644 --- a/src/huggingface_hub/_inference_endpoints.py +++ b/src/huggingface_hub/_inference_endpoints.py @@ -207,16 +207,21 @@ def wait(self, timeout: Optional[int] = None, refresh_every: int = 5) -> "Infere start = time.time() while True: - if self.url is not None: - # Means the URL is provisioned => check if the endpoint is reachable - response = get_session().get(self.url, headers=self._api._build_hf_headers(token=self._token)) - if response.status_code == 200: - logger.info("Inference Endpoint is ready to be used.") - return self if self.status == InferenceEndpointStatus.FAILED: raise InferenceEndpointError( f"Inference Endpoint {self.name} failed to deploy. Please check the logs for more information." ) + if self.status == InferenceEndpointStatus.UPDATE_FAILED: + raise InferenceEndpointError( + f"Inference Endpoint {self.name} failed to update. Please check the logs for more information." + ) + if self.status == InferenceEndpointStatus.RUNNING and self.url is not None: + # Verify the endpoint is actually reachable + response = get_session().get(self.url, headers=self._api._build_hf_headers(token=self._token)) + if response.status_code == 200: + logger.info("Inference Endpoint is ready to be used.") + return self + if timeout is not None: if time.time() - start > timeout: raise InferenceEndpointTimeoutError("Timeout while waiting for Inference Endpoint to be deployed.") diff --git a/src/huggingface_hub/_upload_large_folder.py b/src/huggingface_hub/_upload_large_folder.py index a736e7562a..c925a31ff5 100644 --- a/src/huggingface_hub/_upload_large_folder.py +++ b/src/huggingface_hub/_upload_large_folder.py @@ -25,6 +25,7 @@ from pathlib import Path from threading import Lock from typing import TYPE_CHECKING, List, Optional, Tuple, Union +from urllib.parse import quote from . import constants from ._commit_api import CommitOperationAdd, UploadInfo, _fetch_upload_modes @@ -497,7 +498,7 @@ def _get_upload_mode(items: List[JOB_ITEM_T], api: "HfApi", repo_id: str, repo_t repo_type=repo_type, repo_id=repo_id, headers=api._build_hf_headers(), - revision=revision, + revision=quote(revision, safe=""), ) for item, addition in zip(items, additions): paths, metadata = item diff --git a/tests/test_inference_endpoints.py b/tests/test_inference_endpoints.py index 6b44466338..265b700c0f 100644 --- a/tests/test_inference_endpoints.py +++ b/tests/test_inference_endpoints.py @@ -1,5 +1,6 @@ from datetime import datetime, timezone -from unittest.mock import Mock, patch +from itertools import chain, repeat +from unittest.mock import MagicMock, Mock, patch import pytest @@ -109,6 +110,39 @@ "targetReplica": 1, }, } +# added for test_wait_update function +MOCK_UPDATE = { + "name": "my-endpoint-name", + "type": "protected", + "accountId": None, + "provider": {"vendor": "aws", "region": "us-east-1"}, + "compute": { + "accelerator": "cpu", + "instanceType": "intel-icl", + "instanceSize": "x2", + "scaling": {"minReplica": 0, "maxReplica": 1}, + }, + "model": { + "repository": "gpt2", + "revision": "11c5a3d5811f50298f278a704980280950aedb10", + "task": "text-generation", + "framework": "pytorch", + "image": {"huggingface": {}}, + "secret": {"token": "my-token"}, + }, + "status": { + "createdAt": "2023-10-26T12:41:53.263078506Z", + "createdBy": {"id": "6273f303f6d63a28483fde12", "name": "Wauplin"}, + "updatedAt": "2023-10-26T12:41:53.263079138Z", + "updatedBy": {"id": "6273f303f6d63a28483fde12", "name": "Wauplin"}, + "private": None, + "state": "updating", + "url": "https://vksrvs8pc1xnifhq.us-east-1.aws.endpoints.huggingface.cloud", + "message": "Endpoint waiting for the update", + "readyReplica": 0, + "targetReplica": 1, + }, +} def test_from_raw_initialization(): @@ -189,7 +223,7 @@ def test_fetch(mock_get: Mock): @patch("huggingface_hub._inference_endpoints.get_session") @patch("huggingface_hub.hf_api.HfApi.get_inference_endpoint") def test_wait_until_running(mock_get: Mock, mock_session: Mock): - """Test waits waits until the endpoint is ready.""" + """Test waits until the endpoint is ready.""" endpoint = InferenceEndpoint.from_raw(MOCK_INITIALIZING, namespace="foo") mock_get.side_effect = [ @@ -244,6 +278,27 @@ def test_wait_failed(mock_get: Mock): endpoint.wait(refresh_every=0.001) +@patch("huggingface_hub.hf_api.HfApi.get_inference_endpoint") +@patch("huggingface_hub._inference_endpoints.get_session") +def test_wait_update(mock_get_session, mock_get_inference_endpoint): + """Test that wait() returns when the endpoint transitions to running.""" + endpoint = InferenceEndpoint.from_raw(MOCK_INITIALIZING, namespace="foo") + # Create an iterator that yields three MOCK_UPDATE responses,and then infinitely yields MOCK_RUNNING responses. + responses = chain( + [InferenceEndpoint.from_raw(MOCK_UPDATE, namespace="foo")] * 3, + repeat(InferenceEndpoint.from_raw(MOCK_RUNNING, namespace="foo")), + ) + mock_get_inference_endpoint.side_effect = lambda *args, **kwargs: next(responses) + + # Patch the get_session().get() call to always return a fake response with status_code 200. + fake_response = MagicMock() + fake_response.status_code = 200 + mock_get_session.return_value.get.return_value = fake_response + + endpoint.wait(refresh_every=0.05) + assert endpoint.status == "running" + + @patch("huggingface_hub.hf_api.HfApi.pause_inference_endpoint") def test_pause(mock: Mock): """Test `pause` calls the correct alias."""