From ca01082b4925aa6acc761f0a17e75aa2e5d07130 Mon Sep 17 00:00:00 2001 From: Chris Newell Date: Tue, 17 Dec 2024 16:55:31 -0800 Subject: [PATCH] PR feedback --- README.md | 12 +++++------- scripts/olmo2_modal_openai.py | 10 +++++----- 2 files changed, 10 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index 00c95d42a..59da84fad 100644 --- a/README.md +++ b/README.md @@ -156,19 +156,17 @@ Additional tools for evaluating OLMo models are available at the [OLMo Eval](htt ## Modal.com Hosting -An example script is provided for hosting an OLMo 2 model on Modal.com using a the OpenAI API in ./scripts/olmo2_modal_openai.py. +An example script is provided for hosting an OLMo 2 model on Modal.com using the OpenAI API in ./scripts/olmo2_modal_openai.py. To run that: -
    -
  1. Follow the instructions under Getting Started in [the Modal.com Guide](https://modal.com/docs/guide) to install + +1. Follow the instructions under Getting Started in [the Modal.com Guide](https://modal.com/docs/guide) to install the Modal library and command line tools.
  2. -
  3. Follow the instructions under [Secrets](https://modal.com/docs/guide/secrets) in the Modal.com Guide to create a Modal secret named "example-secret-token" +2. Follow the instructions under [Secrets](https://modal.com/docs/guide/secrets) in the Modal.com Guide to create a Modal secret named "example-secret-token" that defines a value for the variable MODAL_TOKEN for your server.
  4. -
  5. Then run +3. Then run ```bash modal deploy ./scripts/olmo2_modal_openai.py ``` -
  6. -
You can check your endpoint using curl similar to the following: ```bash diff --git a/scripts/olmo2_modal_openai.py b/scripts/olmo2_modal_openai.py index ab21e31ad..25255e793 100644 --- a/scripts/olmo2_modal_openai.py +++ b/scripts/olmo2_modal_openai.py @@ -18,8 +18,8 @@ APP_NAME = "OLMo-2-1124-13B-Instruct-openai" APP_LABEL = APP_NAME.lower() -MINUTES = 60 # seconds -HOURS = 60 * MINUTES +ONE_MINUTE = 60 # seconds +ONE_HOUR = 60 * ONE_MINUTE # ## Download the model weights @@ -78,7 +78,7 @@ def download_model_to_image(model_dir, model_name, model_revision): .env({"HF_HUB_ENABLE_HF_TRANSFER": "1"}) .run_function( download_model_to_image, - timeout=60 * MINUTES, # typically much faster but set high to be conservative + timeout=60 * ONE_MINUTE, # typically much faster but set high to be conservative kwargs={ "model_dir": MODEL_DIR, "model_name": MODEL_NAME, @@ -107,8 +107,8 @@ def download_model_to_image(model_dir, model_name, model_revision): image=vllm_image, gpu=GPU_CONFIG, keep_warm=0, # Spin down entirely when idle - container_idle_timeout=5 * MINUTES, - timeout=24 * HOURS, + container_idle_timeout=5 * ONE_MINUTE, + timeout=24 * ONE_HOUR, allow_concurrent_inputs=1000, secrets=[modal.Secret.from_name("example-secret-token")], # contains MODAL_TOKEN used below )