diff --git a/.github/workflows/doc-build.yml b/.github/workflows/doc-build.yml index 9752960..d7b6d51 100644 --- a/.github/workflows/doc-build.yml +++ b/.github/workflows/doc-build.yml @@ -1,22 +1,22 @@ -# name: Build Documentation -# -# on: -# push: -# branches: -# - main -# - doc-builder* -# paths: -# - docs/** -# - .github/workflows/doc-build.yml -# -# jobs: -# build: -# uses: huggingface/doc-builder/.github/workflows/build_main_documentation.yml@main -# with: -# commit_sha: ${{ github.sha }} -# package: hugs-docs -# package_name: hugs -# additional_args: --not_python_module -# secrets: -# token: ${{ secrets.HUGGINGFACE_PUSH }} -# hf_token: ${{ secrets.HF_DOC_BUILD_PUSH }} +name: Build Documentation + +on: + push: + branches: + - main + - doc-builder* + paths: + - docs/** + - .github/workflows/doc-build.yml + +jobs: + build: + uses: huggingface/doc-builder/.github/workflows/build_main_documentation.yml@main + with: + commit_sha: ${{ github.sha }} + package: hugs-docs + package_name: hugs + additional_args: --not_python_module + secrets: + token: ${{ secrets.HUGGINGFACE_PUSH }} + hf_token: ${{ secrets.HF_DOC_BUILD_PUSH }} diff --git a/.github/workflows/doc-pr-build.yml b/.github/workflows/doc-pr-build.yml index 1bbbab3..619f824 100644 --- a/.github/workflows/doc-pr-build.yml +++ b/.github/workflows/doc-pr-build.yml @@ -1,21 +1,21 @@ -# name: Build PR Documentation -# -# on: -# pull_request: -# paths: -# - docs/** -# - .github/workflows/doc-pr-build.yml -# -# concurrency: -# group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} -# cancel-in-progress: true -# -# jobs: -# build: -# uses: huggingface/doc-builder/.github/workflows/build_pr_documentation.yml@main -# with: -# commit_sha: ${{ github.event.pull_request.head.sha }} -# pr_number: ${{ github.event.number }} -# package: hugs-docs -# package_name: hugs -# additional_args: --not_python_module +name: Build PR Documentation + +on: + pull_request: + paths: + - docs/** + - .github/workflows/doc-pr-build.yml + +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} + cancel-in-progress: true + +jobs: + build: + uses: huggingface/doc-builder/.github/workflows/build_pr_documentation.yml@main + with: + commit_sha: ${{ github.event.pull_request.head.sha }} + pr_number: ${{ github.event.number }} + package: hugs-docs + package_name: hugs + additional_args: --not_python_module diff --git a/.github/workflows/doc-pr-upload.yml b/.github/workflows/doc-pr-upload.yml index 8856331..a00dc72 100644 --- a/.github/workflows/doc-pr-upload.yml +++ b/.github/workflows/doc-pr-upload.yml @@ -1,16 +1,16 @@ -# name: Upload PR Documentation -# -# on: -# workflow_run: -# workflows: ["Build PR Documentation"] -# types: -# - completed -# -# jobs: -# build: -# uses: huggingface/doc-builder/.github/workflows/upload_pr_documentation.yml@main -# with: -# package_name: hugs -# secrets: -# hf_token: ${{ secrets.HF_DOC_BUILD_PUSH }} -# comment_bot_token: ${{ secrets.COMMENT_BOT_TOKEN }} +name: Upload PR Documentation + +on: + workflow_run: + workflows: ["Build PR Documentation"] + types: + - completed + +jobs: + build: + uses: huggingface/doc-builder/.github/workflows/upload_pr_documentation.yml@main + with: + package_name: hugs + secrets: + hf_token: ${{ secrets.HF_DOC_BUILD_PUSH }} + comment_bot_token: ${{ secrets.COMMENT_BOT_TOKEN }} diff --git a/docs/source/_toctree.yml b/docs/source/_toctree.yml index f96e63e..529d752 100644 --- a/docs/source/_toctree.yml +++ b/docs/source/_toctree.yml @@ -19,12 +19,12 @@ title: HUGS with Kubernetes - local: how-to/cloud/aws title: HUGS on AWS + - local: how-to/cloud/digital-ocean + title: HUGS on DigitalOcean - local: how-to/cloud/gcp - title: HUGS on Google Cloud + title: (Soon) HUGS on Google Cloud - local: how-to/cloud/azure title: (Soon) HUGS on Azure - - local: how-to/digitalocean - title: HUGS on DigitalOcean title: How to run HUGS - sections: - local: guides/inference diff --git a/docs/source/faq.mdx b/docs/source/faq.mdx index 734926c..0749742 100644 --- a/docs/source/faq.mdx +++ b/docs/source/faq.mdx @@ -2,47 +2,48 @@ ## What is HUGS? -HUGS (Hugging Face Generative AI Services) are optimized, zero-configuration inference microservices designed to simplify and accelerate the development of AI applications with open models. For more details, see our [Introduction to HUGS](./index.mdx). +HUGS (Hugging Face Generative AI Services) are optimized, zero-configuration inference microservices designed to simplify and accelerate the development of AI applications with open models. For more details, see our [Introduction to HUGS](./index). ## Which models are supported by HUGS? -HUGS supports a wide range of open AI models, including LLMs, Multimodal Models, and Embedding Models. For a complete list of supported models, check our [Supported Models](./models.mdx) page. +HUGS supports a wide range of open AI models, including LLMs, Multimodal Models, and Embedding Models. For a complete list of supported models, check our [Supported Models](./models) page. ## What hardware is compatible with HUGS? -HUGS is optimized for various hardware accelerators, including NVIDIA GPUs, AMD GPUs, AWS Inferentia, and Google TPUs. For more information, visit our [Supported Hardware](./hardware.mdx) page. +HUGS is optimized for various hardware accelerators, including NVIDIA GPUs, AMD GPUs, AWS Inferentia, and Google TPUs. For more information, visit our [Supported Hardware](./hardware) page. ## How do I deploy HUGS? -You can deploy HUGS through various methods, including Docker and Kubernetes. For step-by-step deployment instructions, refer to our [Deployment Guide](./how-to/docker.mdx). +You can deploy HUGS through various methods, including Docker and Kubernetes. For step-by-step deployment instructions, refer to our [Deployment Guide](./how-to/docker). ## Is HUGS available on cloud platforms? Yes, HUGS is available on major cloud platforms. For specific instructions, check our guides for: -- [AWS](./how-to/cloud/aws.mdx) -- [Google Cloud](./how-to/cloud/gcp.mdx) -- [Azure](./how-to/cloud/azure.mdx) (coming soon) +- [AWS](./how-to/cloud/aws) +- [DigitalOcean](./how-to/cloud/digital-ocean) +- [Google Cloud](./how-to/cloud/gcp) (coming soon) +- [Microsoft Azure](./how-to/cloud/azure) (coming soon) ## How does HUGS pricing work? -HUGS offers a on-demand pricing based on the uptime of each container. For detailed pricing information, visit our [Pricing](./pricing.mdx) page. +HUGS offers a on-demand pricing based on the uptime of each container. For detailed pricing information, visit our [Pricing](./pricing) page. ## How do I run inference using HUGS? -To learn how to run inference with HUGS, check our [Inference Guide](./guides/inference.mdx). +To learn how to run inference with HUGS, check our [Inference Guide](./guides/inference). ## What are the key features of HUGS? -HUGS offers several key features, including optimized hardware inference engines, zero-configuration deployment, and industry-standardized APIs. For a complete list of features, see our [Introduction to HUGS](./index.mdx#key-features). +HUGS offers several key features, including optimized hardware inference engines, zero-configuration deployment, and industry-standardized APIs. For a complete list of features, see our [Introduction to HUGS](./index#key-features). ## How does HUGS ensure security and compliance? -HUGS allows deployment within your own infrastructure for enhanced security and data control. It also includes necessary licenses and terms of services to minimize compliance risks. For more information, refer to our [Security and Compliance](./index.mdx#key-features) section. +HUGS allows deployment within your own infrastructure for enhanced security and data control. It also includes necessary licenses and terms of services to minimize compliance risks. For more information, refer to our [Security and Compliance](./index#key-features) section. ## Where can I get help or support for HUGS? -If you need assistance or have questions about HUGS, check our [Help & Support](./help.mdx) page for community forums and contact information. +If you need assistance or have questions about HUGS, check our [Help & Support](./help) page for community forums and contact information. ## Can I use HUGS with my existing AI applications? diff --git a/docs/source/guides/migrate.mdx b/docs/source/guides/migrate.mdx new file mode 100644 index 0000000..bce662d --- /dev/null +++ b/docs/source/guides/migrate.mdx @@ -0,0 +1,3 @@ +# Migrate from OpenAI to HUGS + +Coming soon! diff --git a/docs/source/how-to/cloud/aws.mdx b/docs/source/how-to/cloud/aws.mdx index a7016f4..55d3bb3 100644 --- a/docs/source/how-to/cloud/aws.mdx +++ b/docs/source/how-to/cloud/aws.mdx @@ -14,15 +14,15 @@ With HUGS, developers can easily find, subscribe to, and deploy Hugging Face mod ![HUGS on AWS Marketplace](https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/hugs/aws/hugs-marketplace-listing.png) -2. Subscribe to the product in AWS Marketplace by following the instructions on the page. At the time of writing (September 2024), the steps are to: +2. Subscribe to the product in AWS Marketplace by following the instructions on the page. At the time of writing (October 2024), the steps are to: 1. Click `Continue to Subscribe`, then go to the next page. 2. Click `Continue to Configuration`, then go to the next page. - 3. Select the fulfillment option & software version (HUGS Version and model you want to use) from the list. + 3. Select the fulfillment option e.g. `open LLMs for NVIDIA GPUS`, and the software version e.g. `0.1.0`. ![HUGS Configuration on AWS Marketplace](https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/hugs/aws/hugs-configuration.png) -3. Then click `Continue to Launch`. You successfully subscribe to HUGS. You can now follow the steps below to deploy your preferred HUGS container and model using AWS EKS. +3. Then click `Continue to Launch`. You successfully subscribe to HUGS. You can now follow the steps below to deploy your preferred HUGS container and model using AWS EKS, with the provided container URIs. @@ -198,6 +198,7 @@ helm install $DEPLOYMENT_NAME hugs/hugs \ --set image.registry="XXXXXXXXXXXX.dkr.ecr.us-east-1.amazonaws.com" \ --set image.repository="hugging-face" \ --set image.name="nvidia-meta-llama-meta-llama-3.1-8b-instruct" \ + --set image.tag="0.1.0" \ --set serviceAccountName=$SERVICE_ACCOUNT_NAME \ --set nodeSelector."eks\.amazonaws\.com/nodegroup"=$NODE_GROUP_NAME ``` @@ -232,7 +233,7 @@ In the inference examples in the guide below, the host is assumed to be `localho -Refer to [Run Inference on HUGS](../../guides/inference.mdx) to see how to run inference on HUGS. +Refer to [Run Inference on HUGS](../../guides/inference) to see how to run inference on HUGS. ### Uninstall HUGS diff --git a/docs/source/how-to/cloud/digital-ocean.mdx b/docs/source/how-to/cloud/digital-ocean.mdx index 136df07..4a169d0 100644 --- a/docs/source/how-to/cloud/digital-ocean.mdx +++ b/docs/source/how-to/cloud/digital-ocean.mdx @@ -1,3 +1,112 @@ -# HUGS on Digital Ocean +# HUGS on DigitalOcean -TODO +The Hugging Face Generative AI Services, also known as HUGS, can be deployed in DigitalOcean (DO) via the GPU Droplets as 1-Click Models. + +This collaboration brings Hugging Face's extensive library of pre-trained models and their Text Generation Inference (TGI) solution to DigitalOcean customers, enabling seamless integration of state-of-the-art Large Language Models (LLMs) within the GPU Droplets of Digitial Ocean. + +HUGS provides access to a hand-picked and manually benchmarked collection of the most performant and latest open LLMs hosted in the Hugging Face Hub to TGI-optimized container applications, allowing users to deploy LLMs with a 1-Click deployment on DigitalOcean GPU Droplets. + +With HUGS, developers can easily find, subscribe to, and deploy Hugging Face models using DigitalOcean's infrastructure, leveraging the power of NVIDIA GPUs on optimized, zero-configuration TGI containers. + +**More How to:** + +* [Deploying Hugging Face Generative AI Services on DigitalOcean GPU Droplet and Integrating with Open WebUI](https://www.digitalocean.com/community/tutorials/deploy-hugs-on-gpu-droplets-open-webui) + + +## 1-Click Deploy of HUGS in DO GPU Droplets + +1. Create a DigitalOcean account with a valid payment method, if you don't have one already, and make sure that you have enough quota to spin up GPU Droplets. + +2. Go to [DigitalOcean GPU Droplets](https://www.digitalocean.com/products/gpu-droplets) and create a new one. + +![Create GPU Droplet on DigitalOcean](https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/hugs/digital-ocean/create-gpu-droplet.png) + +3. Choose a data-center region (New York i.e. NYC2, or Toronto i.e. TOR1, available at the time of writing this). + +4. Choose the 1-Click Models when choosing an image, and select any of the available Hugging Face images that correspond to popular LLMs hosted on the Hugging Face Hub. + +![Choose 1-Click Models on DigitalOcean](https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/hugs/digital-ocean/one-click-models.png) + +5. Configure the remaining options, and click on "Create GPU Droplet" when done. + +### HUGS Inference on DO GPU Droplets + +Once the HUGS LLM has been deployed in a DO GPU Droplet, you can either connect to it via the public IP exposed by the instance, or just connect to it via the Web Console. + +![HUGS on DigitalOcean GPU Droplet](https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/hugs/digital-ocean/hugs-gpu-droplet.png) + +When connected to the HUGS Droplet, the initial SSH message will display a Bearer Token, which is required to send requests to the public IP of the deployed HUGS Droplet. + +Then you can send requests to the Messages API via either `localhost` if connected within the HUGS Droplet, or via its public IP. + + + +In the inference examples in the guide below, the host is assumed to be `localhost`, which is the case when deploying HUGS via GPU Droplet and connecting to the running instance via SSH. If you prefer to use the public IP instead, then you should update that in the examples provided below. + + + +Refer to [Run Inference on HUGS](../../guides/inference) to see how to run inference on HUGS, but note that in this case you will need to use the Bearer Token provided, so find below the updated examples as in the guide, but using the Bearer Token to send the requests to the Messages API of the deployed HUGS Droplet (assuming that the Bearer Token is stored in the environment variable `export BEARER_TOKEN`). + +#### cURL + +Using `cURL` is pretty straight forward to [install](https://curl.se/docs/install.html) and use. + +```bash +curl http://localhost:8080/v1/chat/completions \ + -X POST \ + -d '{"messages":[{"role":"user","content":"What is Deep Learning?"}],"temperature":0.7,"top_p":0.95,"max_tokens":128}}' \ + -H 'Content-Type: application/json' \ + -H "Authorization: Bearer $BEARER_TOKEN" +``` + +#### Python + +As already mentioned, you can either use the `huggingface_hub.InferenceClient` from the `huggingface_hub` Python SDK (recommended), the `openai` Python SDK, or any SDK with an OpenAI-compatible interface that can consume the Messages API. + +##### `huggingface_hub` + +You can install it via pip as `pip install --upgrade --quiet huggingface_hub`, and then run the following snippet to mimic the `cURL` commands above i.e. sending requests to the Messages API: + +```python +import os +from huggingface_hub import InferenceClient + +client = InferenceClient(base_url="http://localhost:8080", api_key=os.getenv("BEARER_TOKEN")) + +chat_completion = client.chat.completions.create( + messages=[ + {"role":"user","content":"What is Deep Learning?"}, + ], + temperature=0.7, + top_p=0.95, + max_tokens=128, +) +``` + +Read more about the [`huggingface_hub.InferenceClient.chat_completion` method](https://huggingface.co/docs/huggingface_hub/en/package_reference/inference_client#huggingface_hub.AsyncInferenceClient.chat_completion). + +##### `openai` + +Alternatively, you can also use the Messages API via `openai`; you can install it via `pip as pip install --upgrade openai`, and then run: + +```python +import os +from openai import OpenAI + +client = OpenAI(base_url="http://localhost:8080/v1/", api_key=os.getenv("BEARER_TOKEN")) + +chat_completion = client.chat.completions.create( + model="tgi", + messages=[ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": "What is Deep Learning?"}, + ], + temperature=0.7, + top_p=0.95, + max_tokens=128, +) +``` + +### Delete created DO GPU Droplet + +Finally, once you are done using the deployed LLM via the GPU Droplet, you can safely delete it to avoid incurring in unnecessary costs via the "Actions" option within the deployed LLM, and then delete it. diff --git a/docs/source/how-to/cloud/gcp.mdx b/docs/source/how-to/cloud/gcp.mdx index 8d68a47..3aadc19 100644 --- a/docs/source/how-to/cloud/gcp.mdx +++ b/docs/source/how-to/cloud/gcp.mdx @@ -130,4 +130,4 @@ Your GKE cluster with GPU support is now ready for HUGS deployment. You can proc For more detailed information on creating and managing GKE clusters, refer to the [official Google Kubernetes Engine documentation](https://cloud.google.com/kubernetes-engine/docs) or [run GPUs in GKE Standard node pools](https://cloud.google.com/kubernetes-engine/docs/how-to/gpus). - + \ No newline at end of file diff --git a/docs/source/how-to/kubernetes.mdx b/docs/source/how-to/kubernetes.mdx index 0260a6a..c56b3c8 100644 --- a/docs/source/how-to/kubernetes.mdx +++ b/docs/source/how-to/kubernetes.mdx @@ -83,7 +83,7 @@ helm upgrade --install \ ## Running Inference -Once HUGS is deployed, you can run inference using the provided API. For detailed instructions, refer to [the inference guide](../guides/inference.mdx). +Once HUGS is deployed, you can run inference using the provided API. For detailed instructions, refer to [the inference guide](../guides/inference). ## Troubleshooting diff --git a/docs/source/index.mdx b/docs/source/index.mdx index 924b8e7..95f494c 100644 --- a/docs/source/index.mdx +++ b/docs/source/index.mdx @@ -1,5 +1,7 @@ # Hugging Face Generative AI Services (HUGS) +![HUGS Banner](https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/hugs/hugs-banner.png) + > Optimized, zero-configuration inference microservices for open AI models Hugging Face Generative AI Services (HUGS) are optimized, zero-configuration inference microservices designed to simplify and accelerate the development of AI applications with open models. Built on open-source Hugging Face technologies such as Text Generation Inference or Transformers. HUGS provides the best solution for efficiently building Generative AI Applications with open models and are optimized for a variety of hardware accelerators, including NVIDIA GPUs, AMD GPUs, AWS Inferentia (soon), and Google TPUs (soon). @@ -27,9 +29,9 @@ HUGS make it easy to keep your applications at the cutting edge of Generative AI Compatible with a wide range of popular open AI models, including: -- LLMs: Llama, Gemma, Mistral, Mixtral, Qwen, Deepseek, T5, Yi, Phi, Command R -- Multimodal Models: Idefics, Llava -- Embedding Models: BGE, GTE, Mixbread, Arctic, Jina, Nomic +- LLMs: Llama, Gemma, Mistral, Mixtral, Qwen, Deepseek (soon), T5 (soon), Yi (soon), Phi (soon), Command R (soon) +- (Soon) Multimodal Models: Idefics, Llava +- (Soon) Embedding Models: BGE, GTE, Mixbread, Arctic, Jina, Nomic ## Getting Started @@ -38,13 +40,14 @@ To start using HUGS, you have several options. You can access HUGS as part of yo For detailed instructions on deployment and usage: - [Hugging Face Enterprise](https://huggingface.co/enterprise) -- [Amazon Web Services (AWS)](./guides/aws.mdx) -- [Google Cloud Platform (GCP)](./guides/gcp.mdx) -- [DigitalOcean](./guides/digitalocean.mdx) +- [Amazon Web Services (AWS)](./how-to/cloud/aws) +- [DigitalOcean](./how-to/cloud/digital-ocean) +- [Google Cloud Platform (GCP)](./how-to/cloud/gcp) (coming soon) +- [Microsoft Azure](./how-to/cloud/azure) (coming soon) ## More Resources -- [Community Forum](link-to-community-forum) -- [Enterprise Support](link-to-enterprise-support) +- [Community Forum](https://discuss.huggingface.co/) +- [Enterprise Support](https://huggingface.co/contact/sales?from=hugs) Experience the power of open models with the simplicity of HUGS. Start building your AI applications faster and more efficiently today! diff --git a/docs/source/models.mdx b/docs/source/models.mdx index 65ff311..deab5a0 100644 --- a/docs/source/models.mdx +++ b/docs/source/models.mdx @@ -6,9 +6,9 @@ HUGS supports a wide range of open AI models, including LLMs, Multimodal Models, | Model | 1x NVIDIA A10G | 2x NVIDIA A10G | 4x NVIDIA A10G | 8x NVIDIA A10G | 1x NVIDIA L4 | 2x NVIDIA L4 | 4x NVIDIA L4 | 8x NVIDIA L4 | 1x NVIDIA L40S | 2x NVIDIA L40S | 4x NVIDIA L40S | 8x NVIDIA L40S | 1x NVIDIA A100 80GB | 2x NVIDIA A100 80GB | 4x NVIDIA A100 80GB | 8x NVIDIA A100 80GB | 1x NVIDIA H100 | 2x NVIDIA H100 | 4x NVIDIA H100 | 8x NVIDIA H100 | 8x AMD Instinct MI300X | | ----------------------------------------------------------------------------------------------------------------- | -------------- | -------------- | -------------- | -------------- | ------------ | ------------ | ------------ | ------------ | -------------- | -------------- | -------------- | -------------- | ------------------- | ------------------- | ------------------- | ------------------- | -------------- | -------------- | -------------- | -------------- | ---------------------- | -| [meta-llama/Meta-Llama-3.1-8B-Instruct](https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | -| [meta-llama/Meta-Llama-3.1-70B-Instruct](https://huggingface.co/meta-llama/Meta-Llama-3.1-70B-Instruct) | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ✅ | ✅ | ✅ | -| [meta-llama/Meta-Llama-3.1-405B-Instruct-FP8](https://huggingface.co/meta-llama/Meta-Llama-3.1-405B-Instruct-FP8) | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | +| [meta-llama/Llama-3.1-8B-Instruct](https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| [meta-llama/Llama-3.1-70B-Instruct](https://huggingface.co/meta-llama/Llama-3.1-70B-Instruct) | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ✅ | ✅ | ✅ | +| [meta-llama/Llama-3.1-405B-Instruct-FP8](https://huggingface.co/meta-llama/Llama-3.1-405B-Instruct-FP8) | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | | [NousResearch/Hermes-3-Llama-3.1-8B](https://huggingface.co/NousResearch/Hermes-3-Llama-3.1-8B) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | [NousResearch/Hermes-3-Llama-3.1-70B](https://huggingface.co/NousResearch/Hermes-3-Llama-3.1-70B) | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ✅ | ✅ | ✅ | | [NousResearch/Hermes-3-Llama-3.1-405B-FP8](https://huggingface.co/NousResearch/Hermes-3-Llama-3.1-405B-FP8) | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | diff --git a/docs/source/pricing.mdx b/docs/source/pricing.mdx index 48cc15e..72bc959 100644 --- a/docs/source/pricing.mdx +++ b/docs/source/pricing.mdx @@ -1,19 +1,20 @@ # Pricing -HUGS (Hugging Face Generative AI Services) offers a on-demand pricing based on the uptime of each container, except for Digital Ocean. +HUGS (Hugging Face Generative AI Services) offers a on-demand pricing based on the uptime of each container, except for DigitalOcean. ## Cloud Marketplace Pricing For deployments on major cloud platforms, HUGS is available through their respective marketplaces: - **AWS Marketplace**: $1 per hour per container -- **Google Cloud Platform (GCP) Marketplace**: $1 per hour per container +- (Soon) **Google Cloud Platform (GCP) Marketplace**: $1 per hour per container +- (Soon) **Microsoft Azure** This pricing model is based on the uptime of each container, allowing you to scale your usage according to your needs. -## Digital Ocean +## DigitalOcean -HUGS is available on Digital Ocean free of charge. You only pay for the compute resources used to run the containers. +HUGS is available on DigitalOcean free of charge. You only pay for the compute resources used to run the containers. ## Hugging Face Enterprise Customers