diff --git a/mcts/__init__.py b/mcts/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/mcts/train_policy_sft.py b/mcts/train_policy_sft.py new file mode 100644 index 0000000..b1e23d2 --- /dev/null +++ b/mcts/train_policy_sft.py @@ -0,0 +1,132 @@ +from unsloth import FastLanguageModel +import torch + +from trl import SFTTrainer +from transformers import TrainingArguments +from unsloth import is_bfloat16_supported +from unsloth import UnslothTrainer, UnslothTrainingArguments + +from datasets import load_dataset + + +# DUPLICATED CODE FOR MODAL +# --------------------- +import re +SEED = 42 + +def split_and_clean_steps(text): + # Use regex to split the text into steps + steps = re.split(r'(?=##\s*Step\s+\d+:)', text) + + # Remove any leading/trailing whitespace, empty steps, and the "## Step n:" prefix + cleaned_steps = [] + for step in steps: + # Strip whitespace and check if step is not empty + step = step.strip() + if step: + # Remove the "## Step n:" prefix + step = re.sub(r'^##\s*Step\s+\d+:\s*', '', step) + cleaned_steps.append(step) + + return cleaned_steps + +def quality_filter(example): + response_quality = example['score'] >= 0.32 # arbitrary af + # TODO: check correctness of chain + # math_and_reasoning = example['primary_tag'] in ['Math', 'Reasoning'] + instruction_quality = example['quality'] in ['excellent', 'good'] + response_format = "## Step 1: " in example['response'] + return response_quality and instruction_quality and response_format +# --------------------- + + +def train_sft(): + max_seq_length = 8192 # Choose any! We auto support RoPE Scaling internally! + dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+ + load_in_4bit = False # Use 4bit quantization to reduce memory usage. Can be False. + + model, tokenizer = FastLanguageModel.from_pretrained( + model_name = "unsloth/gemma-2-2b", + max_seq_length = max_seq_length, + dtype = dtype, + load_in_4bit = load_in_4bit, + # token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf + ) + + model = FastLanguageModel.get_peft_model( + model, + r = 128, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128 + target_modules = ["q_proj", "k_proj", "v_proj", "o_proj", + "gate_proj", "up_proj", "down_proj", + "embed_tokens", "lm_head",], # Add for continual pretraining + lora_alpha = 32, + lora_dropout = 0, # Supports any, but = 0 is optimized + bias = "none", # Supports any, but = "none" is optimized + # [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes! + use_gradient_checkpointing = "unsloth", # True or "unsloth" for very long context + random_state = 3407, + use_rslora = True, # We support rank stabilized LoRA + loftq_config = None, # And LoftQ + ) + + + # dataset + ds = load_dataset("argilla/magpie-ultra-v0.1") + filtered_ds = ds.filter(quality_filter) + split_ds = filtered_ds['train'].train_test_split(test_size=0.1, seed=SEED) + train_ds = split_ds['train'] + + EOS_TOKEN = tokenizer.eos_token + def formatting_prompts_func(examples): + texts = [] + for instruction, response in zip(examples['instruction'], examples['response']): + clean_steps = split_and_clean_steps(response) + all_steps = "\n\n".join(clean_steps) + + prompt = f"{instruction}\n\n{all_steps}{EOS_TOKEN}" + texts.append(prompt) + + return {"text": texts} + formatted_dataset = train_ds.map(formatting_prompts_func, batched = True,) + + + trainer = UnslothTrainer( + model = model, + tokenizer = tokenizer, + train_dataset = formatted_dataset, + dataset_text_field = "text", + max_seq_length = max_seq_length, + dataset_num_proc = 8, + packing = True, + + args = UnslothTrainingArguments( + per_device_train_batch_size = 2, + gradient_accumulation_steps = 8, + + warmup_ratio = 0.1, + num_train_epochs = 1, + + learning_rate = 4e-4, + embedding_learning_rate = 4e-5, + + fp16 = not is_bfloat16_supported(), + bf16 = is_bfloat16_supported(), + logging_steps = 1, + optim = "adamw_torch_fused", + weight_decay = 0.01, + lr_scheduler_type = "cosine", + seed = 3407, + output_dir = "outputs", + ), + ) + + #@title Show current memory stats + gpu_stats = torch.cuda.get_device_properties(0) + start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3) + max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3) + print(f"GPU = {gpu_stats.name}. Max memory = {max_memory} GB.") + print(f"{start_gpu_memory} GB of memory reserved.") + + trainer_stats = trainer.train() + + model.push_to_hub_merged("rawsh/mirrorgemma-2-2b-SFT", tokenizer, save_method = "merged_16bit") \ No newline at end of file diff --git a/mcts/train_reward.py b/mcts/train_reward.py index 76b1515..997e672 100644 --- a/mcts/train_reward.py +++ b/mcts/train_reward.py @@ -42,13 +42,15 @@ class ScriptArguments: "help": "Path to deepspeed config if using deepspeed. You may need this if the model that you want to train doesn't fit on a single GPU." }, ) - per_device_train_batch_size: Optional[int] = field(default=1) - per_device_eval_batch_size: Optional[int] = field(default=1) + per_device_train_batch_size: Optional[int] = field(default=4) + per_device_eval_batch_size: Optional[int] = field(default=4) gradient_accumulation_steps: Optional[int] = field(default=32) learning_rate: Optional[float] = field(default=1e-5) weight_decay: Optional[float] = field(default=0.001) model_name: Optional[str] = field( - default="google/gemma-2b-it", #"mistralai/Mistral-7B-Instruct-v0.2", + # default="google/gemma-2-9b", + default="google/gemma-2-2b", + # default="Qwen/Qwen2.5-1.5B", metadata={ "help": "The model that you want to train from the Hugging Face hub. E.g. gpt2, gpt2-xl, bert, etc." }, @@ -64,15 +66,16 @@ class ScriptArguments: metadata={"help": "The number of training epochs for the reward model."}, ) train_set_path: Optional[str] = field( - default="hendrydong/preference_700K", + default="rawsh/magpie-ultra-v0.1-PRM-data-base", metadata={"help": "The dir of the subset of the training data to use"}, ) eval_set_path: Optional[str] = field( - default="hendrydong/preference_700K", + default="rawsh/magpie-ultra-v0.1-PRM-data-base", metadata={"help": "The dir of the subset of the eval data to use"}, ) output_path: Optional[str] = field( - default="./bt_models/gemma2b_rm", + default="./mirrorgemma-2-2b-prm-base", + # default="./gemma-2-9b", metadata={"help": "The dir for output model"}, ) gradient_checkpointing: Optional[bool] = field( @@ -81,15 +84,16 @@ class ScriptArguments: ) optim: Optional[str] = field( # default="adamw_hf", - default="paged_adamw_32bit", - # default="adamw_torch_fused", + # default="paged_adamw_32bit", + default="adamw_torch_fused", + # default="adamw_bnb_8bit", metadata={"help": "The optimizer to use."}, ) lr_scheduler_type: Optional[str] = field( default="cosine", metadata={"help": "The lr scheduler"}, ) - max_length: Optional[int] = field(default=4096) + max_length: Optional[int] = field(default=8192) save_every_steps: Optional[int] = field( default=999999, @@ -100,102 +104,33 @@ class ScriptArguments: metadata={"help": "Eval the model every x steps"}, ) -parser = HfArgumentParser(ScriptArguments) -script_args = parser.parse_args_into_dataclasses()[0] - -# Load the value-head model and tokenizer. -tokenizer_name = script_args.model_name -tokenizer = AutoTokenizer.from_pretrained(tokenizer_name, use_auth_token=True) +def build_dataset(tokenizer, train_path, eval_path): -# Adjusted according to the base model -# Need to do this for the models that don't have an official pad token. -tokenizer.truncation_side = "left" -tokenizer.model_max_length = script_args.max_length + def tokenize(sample): + question = sample['question'] + steps = sample['steps'] + final_step_reward = sample['final_step_reward'] -# Get the dataset -train_path = script_args.train_set_path -eval_path = script_args.eval_set_path -output_name = script_args.output_path + formatted_steps = "\n\n".join(steps) + full_text = f"{question}\n\n{formatted_steps}" -def build_dataset(tokenizer, train_path, eval_path): + tokenized = tokenizer(full_text, truncation=True, max_length=tokenizer.model_max_length) - def tokenize(sample): - - sample['positive'] = tokenizer.apply_chat_template( - sample['chosen'], tokenize=False, add_generation_prompt=False).replace(tokenizer.bos_token, "") - sample['negative'] = tokenizer.apply_chat_template( - sample['rejected'], tokenize=False, add_generation_prompt=False).replace(tokenizer.bos_token, "") - - tokenized_pos = tokenizer(sample['positive'], truncation=True) - tokenized_neg = tokenizer(sample['negative'], truncation=True) - sample["input_ids_j"] = tokenized_pos["input_ids"] - sample["attention_mask_j"] = tokenized_pos["attention_mask"] - sample["input_ids_k"] = tokenized_neg["input_ids"] - sample["attention_mask_k"] = tokenized_neg["attention_mask"] + sample["input_ids"] = tokenized["input_ids"] + sample["attention_mask"] = tokenized["attention_mask"] + sample["reward"] = final_step_reward return sample ds = load_dataset(train_path, split="train").shuffle(seed=42) - #ds = ds.select(range(2000)) - ds = ds.map(tokenize, num_proc=8) - - eval_dataset = None + ds = ds.map(tokenize, num_proc=24) train_dataset = ds - eval_dataset = load_dataset(eval_path, split="train").shuffle(seed=42).select(range(500)) - #eval_dataset = ds.select(range(500)) + # eval_dataset = load_dataset(eval_path, split="train").shuffle(seed=42).select(range(500)) + eval_dataset = load_dataset(eval_path, split="train").shuffle(seed=42).select(range(10000)) + # TODO: FIX return train_dataset, eval_dataset -train_dataset, eval_dataset = build_dataset(tokenizer, train_path, eval_path) -print("Training set: ", len(train_dataset), " Eval set: ", len(eval_dataset)) - -# Define the trainer -training_args = TrainingArguments( - output_dir=output_name, - learning_rate=script_args.learning_rate, - per_device_train_batch_size=script_args.per_device_train_batch_size, - per_device_eval_batch_size=script_args.per_device_eval_batch_size, - num_train_epochs=script_args.num_train_epochs, - weight_decay=script_args.weight_decay, - evaluation_strategy="steps", - eval_steps=script_args.eval_every_steps, - save_strategy="steps", - save_steps=script_args.save_every_steps, - gradient_accumulation_steps=script_args.gradient_accumulation_steps, - gradient_checkpointing=script_args.gradient_checkpointing, - deepspeed=script_args.deepspeed, - local_rank=script_args.local_rank, - remove_unused_columns=False, - label_names=[], - bf16=script_args.bf16, - logging_strategy="steps", - logging_steps=10, - optim=script_args.optim, - lr_scheduler_type=script_args.lr_scheduler_type, - warmup_ratio=0.03, - report_to='wandb' -) - -# enable if you want to train with lora -# peft_config = LoraConfig( -# task_type=TaskType.SEQ_CLS, -# inference_mode=False, -# r=8, -# lora_alpha=32, -# lora_dropout=0.1, -# ) - -model = AutoModelForSequenceClassification.from_pretrained( - script_args.model_name, num_labels=1, torch_dtype=torch.bfloat16, use_flash_attention_2=True, -) -# model = get_peft_model(model, peft_config) -# model.print_trainable_parameters() - -model.config.use_cache = not script_args.gradient_checkpointing -num_proc = 24 # Can adjust to be higher if you have more processors. -original_columns = train_dataset.column_names - - # We need to define a special data collator that batches the data in our j vs k format. @dataclass class RewardDataCollatorWithPadding: @@ -206,20 +141,10 @@ class RewardDataCollatorWithPadding: return_tensors: str = "pt" def __call__(self, features: List[Dict[str, Any]]) -> Dict[str, Any]: - merged_features = [] - for feature in features: - merged_features.append( - { - "input_ids": feature["input_ids_j"], - "attention_mask": feature["attention_mask_j"], - } - ) - merged_features.append( - { - "input_ids": feature["input_ids_k"], - "attention_mask": feature["attention_mask_k"], - } - ) + merged_features = [{ + "input_ids": feature["input_ids"], + "attention_mask": feature["attention_mask"], + } for feature in features] batch = self.tokenizer.pad( merged_features, padding=self.padding, @@ -228,6 +153,7 @@ def __call__(self, features: List[Dict[str, Any]]) -> Dict[str, Any]: return_tensors=self.return_tensors, ) batch = { + "rewards": torch.tensor([feature["reward"] for feature in features], dtype=torch.float), "input_ids": batch["input_ids"], "attention_mask": batch["attention_mask"], "return_loss": True, @@ -237,47 +163,116 @@ def __call__(self, features: List[Dict[str, Any]]) -> Dict[str, Any]: # Define the trainer def compute_metrics(eval_pred): - result = {} - pos_predictions_scores = eval_pred.predictions[0] - neg_predictions_scores = eval_pred.predictions[1] - # We assume that the first sample is preferred by default in groundtruth - result['accuracy'] = np.sum( - pos_predictions_scores > neg_predictions_scores) / len(pos_predictions_scores) - return result + predictions = eval_pred.predictions + labels = eval_pred.label_ids + mse = np.mean((predictions - labels) ** 2) + return {"mse": mse} class RewardTrainer(Trainer): def compute_loss(self, model, inputs, return_outputs=False): rewards = model( input_ids=inputs["input_ids"], attention_mask=inputs["attention_mask"] - )[0] - bsz = rewards.size(0) - jidx = torch.arange(0, bsz, 2) - kidx = jidx + 1 - rewards_j = rewards[jidx] - rewards_k = rewards[kidx] - loss = -nn.functional.logsigmoid(rewards_j - rewards_k).mean() + )[0].squeeze() + loss = nn.functional.mse_loss(rewards, inputs["rewards"]) + if return_outputs: - return loss, {"rewards_j": rewards_j, "rewards_k": rewards_k} + return loss, {"rewards": rewards} return loss -# Train the model, woohoo. -trainer = RewardTrainer( - model=model, - args=training_args, - train_dataset=train_dataset, - eval_dataset=eval_dataset, - compute_metrics=compute_metrics, - data_collator=RewardDataCollatorWithPadding( - tokenizer=tokenizer, max_length=script_args.max_length), -) +def train_reward_model(): + # parser = HfArgumentParser(ScriptArguments) + # script_args = parser.parse_args_into_dataclasses()[0] + + # hardcode args + script_args = ScriptArguments() + + # Load the value-head model and tokenizer. + tokenizer_name = script_args.model_name + tokenizer = AutoTokenizer.from_pretrained(tokenizer_name, use_auth_token=True) + + # Adjusted according to the base model + # Need to do this for the models that don't have an official pad token. + tokenizer.truncation_side = "left" + tokenizer.model_max_length = script_args.max_length + + # Get the dataset + train_path = script_args.train_set_path + eval_path = script_args.eval_set_path + output_name = script_args.output_path + + train_dataset, eval_dataset = build_dataset(tokenizer, train_path, eval_path) + print("Training set: ", len(train_dataset), " Eval set: ", len(eval_dataset)) + + # Define the trainer + training_args = TrainingArguments( + output_dir=output_name, + learning_rate=script_args.learning_rate, + per_device_train_batch_size=script_args.per_device_train_batch_size, + per_device_eval_batch_size=script_args.per_device_eval_batch_size, + num_train_epochs=script_args.num_train_epochs, + weight_decay=script_args.weight_decay, + evaluation_strategy="steps", + eval_steps=script_args.eval_every_steps, + save_strategy="steps", + save_steps=script_args.save_every_steps, + gradient_accumulation_steps=script_args.gradient_accumulation_steps, + gradient_checkpointing=script_args.gradient_checkpointing, + deepspeed=script_args.deepspeed, + local_rank=script_args.local_rank, + remove_unused_columns=False, + label_names=[], + bf16=script_args.bf16, + logging_strategy="steps", + logging_steps=10, + optim=script_args.optim, + lr_scheduler_type=script_args.lr_scheduler_type, + warmup_ratio=0.03, + report_to='wandb', + # compile + torch_compile=True + ) + + # enable if you want to train with lora + # peft_config = LoraConfig( + # task_type=TaskType.SEQ_CLS, + # inference_mode=False, + # r=8, + # lora_alpha=32, + # lora_dropout=0.1, + # ) + + model = AutoModelForSequenceClassification.from_pretrained( + script_args.model_name, num_labels=1, torch_dtype=torch.bfloat16, use_flash_attention_2=True, + ) + # model = get_peft_model(model, peft_config) + # model.print_trainable_parameters() + + model.config.use_cache = not script_args.gradient_checkpointing + num_proc = 24 # Can adjust to be higher if you have more processors. + original_columns = train_dataset.column_names + + + # Train the model, woohoo. + trainer = RewardTrainer( + model=model, + args=training_args, + train_dataset=train_dataset, + eval_dataset=eval_dataset, + compute_metrics=compute_metrics, + data_collator=RewardDataCollatorWithPadding( + tokenizer=tokenizer, max_length=script_args.max_length), + ) -trainer.train() + trainer.train() + print("Saving last checkpoint of the model") + #model.save_pretrained(output_name + "/last_checkpoint") + trainer.save_model(output_name + "/last_checkpoint") + tokenizer.save_pretrained(output_name + "/last_checkpoint") -print("Saving last checkpoint of the model") -#model.save_pretrained(output_name + "/last_checkpoint") -trainer.save_model(output_name + "/last_checkpoint") -tokenizer.save_pretrained(output_name + "/last_checkpoint") \ No newline at end of file + # push to hub + # TODO: modal secret + trainer.push_to_hub("rawsh/mirrorgemma-2-2b-PRM-base") \ No newline at end of file diff --git a/modal_reward.py b/modal_orm_reward.py similarity index 95% rename from modal_reward.py rename to modal_orm_reward.py index e67f516..88b856a 100644 --- a/modal_reward.py +++ b/modal_orm_reward.py @@ -6,7 +6,7 @@ .pip_install("transformers") .pip_install("accelerate") ) -app = modal.App("dankreward", image=image) +app = modal.App("mirrorgemma-prm", image=image) with image.imports(): @@ -27,6 +27,7 @@ ) class Embedder: model_id = "RLHFlow/ArmoRM-Llama3-8B-v0.1" + # model_id = "rawsh/mirrorgemma-2-2b-prm-base" device = "cuda" @modal.build() @@ -85,14 +86,15 @@ def setup(self): print(f"[setup] loading tokenizer took {elapsed} seconds") @modal.web_endpoint(method="POST", docs=True) - def score_output(self, messages: List[Dict[str, str]]): + def score_output(self, prompt: str): print("score_output") input_ids = self.tokenizer.apply_chat_template( messages, return_tensors="pt", padding=True, truncation=True, - max_length=4096, + # max_length=4096, + max_length=8192, ).to("cuda") with torch.no_grad(): output = self.model(input_ids) diff --git a/modal_prm_reward.py b/modal_prm_reward.py new file mode 100644 index 0000000..44bfb8e --- /dev/null +++ b/modal_prm_reward.py @@ -0,0 +1,82 @@ +import modal + +image = ( + modal.Image.debian_slim() + .pip_install("torch") + .pip_install("transformers") + .pip_install("accelerate") +) +app = modal.App("mirrorgemma-prm", image=image) + + +with image.imports(): + from typing import List, Dict, Tuple + import asyncio + import torch + from time import perf_counter as pc + import copy + # from transformers import AutoModelForSequenceClassification, AutoTokenizer + from transformers import pipeline + import os + # from lib import extract_tensors, test + # print(test()) + +@app.cls( + gpu=modal.gpu.A10G(), + container_idle_timeout=30, + # volumes={"/data": modal.Volume.from_name("my-test-volume")} +) +class Embedder: + # model_id = "RLHFlow/ArmoRM-Llama3-8B-v0.1" + model_id = "rawsh/mirrorgemma-2-2b-prm-base" + device = "cuda" + + @modal.build() + def build(self): + # cache + print("build") + dtype = torch.bfloat16 + with torch.device("cuda"): + print("[build] loading model") + start = pc() + classifier = pipeline("sentiment-analysis", model=self.model_id, + trust_remote_code=True, torch_dtype=dtype) + elapsed = pc() - start + print(f"[build] loading model took {elapsed} seconds") + + # @modal.enter(snap=False) + @modal.enter() + def setup(self): + # Start the model to a GPU before doing any work. + print("setup") + os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True" + + # faster model loading + dtype = torch.bfloat16 + with torch.device("cuda"): + print("[setup] loading model") + start = pc() + self.pipeline = pipeline("sentiment-analysis", model=self.model_id, + trust_remote_code=True, torch_dtype=dtype) + elapsed = pc() - start + print(f"[setup] loading model took {elapsed} seconds") + + @modal.web_endpoint(method="POST", docs=True) + def score_output(self, prompt: str): + print("score_output") + return self.pipeline(prompt) + + +# @app.local_entrypoint() +# async def main(): +# # score the messages +# prompt = 'What are some synonyms for the word "beautiful"?' +# response1 = 'Nicely, Beautifully, Handsome, Stunning, Wonderful, Gorgeous, Pretty, Stunning, Elegant' +# response2 = 'bad' +# messages1 = [{"role": "user", "content": prompt}, {"role": "assistant", "content": response1}] +# messages2 = [{"role": "user", "content": prompt}, {"role": "assistant", "content": response2}] +# m1 = Embedder().score_output(messages1) +# m2 = Embedder().score_output(messages2) +# res = await asyncio.gather(*[m1,m2]) +# print(response1, res[0]) +# print(response2, res[1]) \ No newline at end of file diff --git a/modal_train_policy_sft.py b/modal_train_policy_sft.py new file mode 100644 index 0000000..c79f8e8 --- /dev/null +++ b/modal_train_policy_sft.py @@ -0,0 +1,49 @@ +import modal + +cuda_version = "12.4.0" # should be no greater than host CUDA version +flavor = "devel" # includes full CUDA toolkit +operating_sys = "ubuntu22.04" +tag = f"{cuda_version}-{flavor}-{operating_sys}" + +image = ( + # modal.Image.debian_slim() + modal.Image.from_registry(f"nvidia/cuda:{tag}", add_python="3.11") + .apt_install("git") + .pip_install("torch") + .pip_install("packaging") + .pip_install("wheel") + .run_commands("pip install flash-attn --no-build-isolation") + .pip_install("transformers") + .pip_install("accelerate") + .pip_install("numpy") + .pip_install("datasets") + .pip_install("wandb") + .pip_install("bitsandbytes") + .pip_install("unsloth") +) +app = modal.App("train_policy_sft", image=image) + +with image.imports(): + from mcts.train_policy_sft import train_sft + +MINUTES = 60 # seconds +HOURS = 60 * MINUTES + +@app.function( + cpu=2.0, + # gpu=modal.gpu.A10G(), + gpu=modal.gpu.H100(), + # gpu=modal.gpu.A100(size="40GB"), + timeout=20 * HOURS, + secrets=[ + modal.Secret.from_name("hf-token"), + modal.Secret.from_name("wandb-token") + ] +) +def train_policy_model_sft_upload_to_hf(): + train_sft() + +@app.local_entrypoint() +def main(): + # run the function remotely on Modal + train_policy_model_sft_upload_to_hf.remote() \ No newline at end of file diff --git a/modal_train_prm.py b/modal_train_prm.py new file mode 100644 index 0000000..ee481de --- /dev/null +++ b/modal_train_prm.py @@ -0,0 +1,48 @@ +import modal + +cuda_version = "12.4.0" # should be no greater than host CUDA version +flavor = "devel" # includes full CUDA toolkit +operating_sys = "ubuntu22.04" +tag = f"{cuda_version}-{flavor}-{operating_sys}" + +image = ( + # modal.Image.debian_slim() + modal.Image.from_registry(f"nvidia/cuda:{tag}", add_python="3.11") + .apt_install("git") + .pip_install("torch") + .pip_install("packaging") + .pip_install("wheel") + .run_commands("pip install flash-attn --no-build-isolation") + .pip_install("transformers") + .pip_install("accelerate") + .pip_install("numpy") + .pip_install("datasets") + .pip_install("wandb") + .pip_install("bitsandbytes") +) +app = modal.App("train_prm", image=image) + +with image.imports(): + from mcts.train_reward import train_reward_model + +MINUTES = 60 # seconds +HOURS = 60 * MINUTES + +@app.function( + cpu=2.0, + # gpu=modal.gpu.A10G(), + gpu=modal.gpu.H100(), + # gpu=modal.gpu.A100(count=4, size="40GB"), + timeout=20 * HOURS, + secrets=[ + modal.Secret.from_name("hf-token"), + modal.Secret.from_name("wandb-token") + ] +) +def train_reward_model_upload_to_hf(): + train_reward_model() + +@app.local_entrypoint() +def main(): + # run the function remotely on Modal + train_reward_model_upload_to_hf.remote() \ No newline at end of file diff --git a/modal_vllm.py b/modal_vllm.py index ab7bf74..1d01141 100644 --- a/modal_vllm.py +++ b/modal_vllm.py @@ -15,9 +15,13 @@ def download_model_to_image(model_dir, model_name, model_revision): ) move_cache() -MODEL_DIR = "/qwen" -MODEL_NAME = "Qwen/Qwen2.5-0.5B-Instruct" -MODEL_REVISION = "a8b602d9dafd3a75d382e62757d83d89fca3be54" +# MODEL_DIR = "/qwen" +# MODEL_NAME = "Qwen/Qwen2.5-0.5B-Instruct" +# MODEL_REVISION = "a8b602d9dafd3a75d382e62757d83d89fca3be54" + +MODEL_DIR = "/gemma" +MODEL_NAME = "rawsh/mirrorgemma-2-2b-SFT" +MODEL_REVISION = "6c27fa6de9b04f9d4fe4b8889ef53404f679bcf6" vllm_image = ( modal.Image.debian_slim(python_version="3.10") @@ -33,15 +37,17 @@ def download_model_to_image(model_dir, model_name, model_revision): .run_function( download_model_to_image, timeout=60 * 20, + secrets=[modal.Secret.from_name("hf-token")], kwargs={ "model_dir": MODEL_DIR, "model_name": MODEL_NAME, "model_revision": MODEL_REVISION, }, ) + .env({"VLLM_ALLOW_LONG_MAX_MODEL_LEN": "1"}) ) -app = modal.App("vllm-qwen") +app = modal.App("vllm-gemma") N_GPU = 1 # tip: for best results, first upgrade to more powerful GPUs, and only then increase GPU count @@ -57,8 +63,11 @@ def download_model_to_image(model_dir, model_name, model_revision): gpu=modal.gpu.A10G(count=N_GPU), container_idle_timeout=1 * MINUTES, timeout=20 * MINUTES, - allow_concurrent_inputs=1, - secrets=[modal.Secret.from_name("vllm-token")] + allow_concurrent_inputs=100, + secrets=[ + modal.Secret.from_name("vllm-token"), + # modal.Secret.from_name("hf-token"), + ] # volumes={MODELS_DIR: volume}, ) @modal.asgi_app() diff --git a/poetry.lock b/poetry.lock index 24d31cf..7425415 100644 --- a/poetry.lock +++ b/poetry.lock @@ -137,6 +137,20 @@ files = [ [package.dependencies] frozenlist = ">=1.1.0" +[[package]] +name = "aiostream" +version = "0.5.2" +description = "Generator-based operators for asynchronous iteration" +optional = false +python-versions = ">=3.8" +files = [ + {file = "aiostream-0.5.2-py3-none-any.whl", hash = "sha256:054660370be9d37f6fe3ece3851009240416bd082e469fd90cc8673d3818cf71"}, + {file = "aiostream-0.5.2.tar.gz", hash = "sha256:b71b519a2d66c38f0872403ab86417955b77352f08d9ad02ad46fc3926b389f4"}, +] + +[package.dependencies] +typing-extensions = "*" + [[package]] name = "alembic" version = "1.13.2" @@ -340,6 +354,20 @@ files = [ {file = "charset_normalizer-3.3.2-py3-none-any.whl", hash = "sha256:3e4d1f6587322d2788836a99c69062fbb091331ec940e02d12d179c1d53e25fc"}, ] +[[package]] +name = "click" +version = "8.1.7" +description = "Composable command line interface toolkit" +optional = false +python-versions = ">=3.7" +files = [ + {file = "click-8.1.7-py3-none-any.whl", hash = "sha256:ae74fb96c20a0277a1d615f1e4d73c8414f5a98db8b799a7931d1582f3390c28"}, + {file = "click-8.1.7.tar.gz", hash = "sha256:ca9853ad459e787e2192211578cc907e7594e294c7ccc834310722b41b9ca6de"}, +] + +[package.dependencies] +colorama = {version = "*", markers = "platform_system == \"Windows\""} + [[package]] name = "colorama" version = "0.4.6" @@ -505,6 +533,26 @@ files = [ [package.extras] test = ["pytest (>=6)"] +[[package]] +name = "fastapi" +version = "0.115.0" +description = "FastAPI framework, high performance, easy to learn, fast to code, ready for production" +optional = false +python-versions = ">=3.8" +files = [ + {file = "fastapi-0.115.0-py3-none-any.whl", hash = "sha256:17ea427674467486e997206a5ab25760f6b09e069f099b96f5b55a32fb6f1631"}, + {file = "fastapi-0.115.0.tar.gz", hash = "sha256:f93b4ca3529a8ebc6fc3fcf710e5efa8de3df9b41570958abf1d97d843138004"}, +] + +[package.dependencies] +pydantic = ">=1.7.4,<1.8 || >1.8,<1.8.1 || >1.8.1,<2.0.0 || >2.0.0,<2.0.1 || >2.0.1,<2.1.0 || >2.1.0,<3.0.0" +starlette = ">=0.37.2,<0.39.0" +typing-extensions = ">=4.8.0" + +[package.extras] +all = ["email-validator (>=2.0.0)", "fastapi-cli[standard] (>=0.0.5)", "httpx (>=0.23.0)", "itsdangerous (>=1.1.0)", "jinja2 (>=2.11.2)", "orjson (>=3.2.1)", "pydantic-extra-types (>=2.0.0)", "pydantic-settings (>=2.0.0)", "python-multipart (>=0.0.7)", "pyyaml (>=5.3.1)", "ujson (>=4.0.1,!=4.0.2,!=4.1.0,!=4.2.0,!=4.3.0,!=5.0.0,!=5.1.0)", "uvicorn[standard] (>=0.12.0)"] +standard = ["email-validator (>=2.0.0)", "fastapi-cli[standard] (>=0.0.5)", "httpx (>=0.23.0)", "jinja2 (>=2.11.2)", "python-multipart (>=0.0.7)", "uvicorn[standard] (>=0.12.0)"] + [[package]] name = "filelock" version = "3.16.0" @@ -728,6 +776,23 @@ files = [ docs = ["Sphinx", "furo"] test = ["objgraph", "psutil"] +[[package]] +name = "grpclib" +version = "0.4.7" +description = "Pure-Python gRPC implementation for asyncio" +optional = false +python-versions = ">=3.7" +files = [ + {file = "grpclib-0.4.7.tar.gz", hash = "sha256:2988ef57c02b22b7a2e8e961792c41ccf97efc2ace91ae7a5b0de03c363823c3"}, +] + +[package.dependencies] +h2 = ">=3.1.0,<5" +multidict = "*" + +[package.extras] +protobuf = ["protobuf (>=3.20.0)"] + [[package]] name = "h11" version = "0.14.0" @@ -739,6 +804,32 @@ files = [ {file = "h11-0.14.0.tar.gz", hash = "sha256:8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d"}, ] +[[package]] +name = "h2" +version = "4.1.0" +description = "HTTP/2 State-Machine based protocol implementation" +optional = false +python-versions = ">=3.6.1" +files = [ + {file = "h2-4.1.0-py3-none-any.whl", hash = "sha256:03a46bcf682256c95b5fd9e9a99c1323584c3eec6440d379b9903d709476bc6d"}, + {file = "h2-4.1.0.tar.gz", hash = "sha256:a83aca08fbe7aacb79fec788c9c0bac936343560ed9ec18b82a13a12c28d2abb"}, +] + +[package.dependencies] +hpack = ">=4.0,<5" +hyperframe = ">=6.0,<7" + +[[package]] +name = "hpack" +version = "4.0.0" +description = "Pure-Python HPACK header compression" +optional = false +python-versions = ">=3.6.1" +files = [ + {file = "hpack-4.0.0-py3-none-any.whl", hash = "sha256:84a076fad3dc9a9f8063ccb8041ef100867b1878b25ef0ee63847a5d53818a6c"}, + {file = "hpack-4.0.0.tar.gz", hash = "sha256:fc41de0c63e687ebffde81187a948221294896f6bdc0ae2312708df339430095"}, +] + [[package]] name = "httpcore" version = "1.0.5" @@ -819,6 +910,17 @@ testing = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "fastapi", "gr torch = ["safetensors[torch]", "torch"] typing = ["types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3", "typing-extensions (>=4.8.0)"] +[[package]] +name = "hyperframe" +version = "6.0.1" +description = "HTTP/2 framing layer for Python" +optional = false +python-versions = ">=3.6.1" +files = [ + {file = "hyperframe-6.0.1-py3-none-any.whl", hash = "sha256:0ec6bafd80d8ad2195c4f03aacba3a8265e57bc4cff261e802bf39970ed02a15"}, + {file = "hyperframe-6.0.1.tar.gz", hash = "sha256:ae510046231dc8e9ecb1a6586f63d2347bf4c8905914aa84ba585ae85f28a914"}, +] + [[package]] name = "idna" version = "3.9" @@ -833,6 +935,23 @@ files = [ [package.extras] all = ["flake8 (>=7.1.1)", "mypy (>=1.11.2)", "pytest (>=8.3.2)", "ruff (>=0.6.2)"] +[[package]] +name = "jinja2" +version = "3.1.4" +description = "A very fast and expressive template engine." +optional = false +python-versions = ">=3.7" +files = [ + {file = "jinja2-3.1.4-py3-none-any.whl", hash = "sha256:bc5dd2abb727a5319567b7a813e6a2e7318c39f4f487cfe6c89c6f9c7d25197d"}, + {file = "jinja2-3.1.4.tar.gz", hash = "sha256:4a3aee7acbbe7303aede8e9648d13b8bf88a429282aa6122a993f0ac800cb369"}, +] + +[package.dependencies] +MarkupSafe = ">=2.0" + +[package.extras] +i18n = ["Babel (>=2.7)"] + [[package]] name = "jiter" version = "0.5.0" @@ -943,6 +1062,30 @@ babel = ["Babel"] lingua = ["lingua"] testing = ["pytest"] +[[package]] +name = "markdown-it-py" +version = "3.0.0" +description = "Python port of markdown-it. Markdown parsing, done right!" +optional = false +python-versions = ">=3.8" +files = [ + {file = "markdown-it-py-3.0.0.tar.gz", hash = "sha256:e3f60a94fa066dc52ec76661e37c851cb232d92f9886b15cb560aaada2df8feb"}, + {file = "markdown_it_py-3.0.0-py3-none-any.whl", hash = "sha256:355216845c60bd96232cd8d8c40e8f9765cc86f46880e43a8fd22dc1a1a8cab1"}, +] + +[package.dependencies] +mdurl = ">=0.1,<1.0" + +[package.extras] +benchmarking = ["psutil", "pytest", "pytest-benchmark"] +code-style = ["pre-commit (>=3.0,<4.0)"] +compare = ["commonmark (>=0.9,<1.0)", "markdown (>=3.4,<4.0)", "mistletoe (>=1.0,<2.0)", "mistune (>=2.0,<3.0)", "panflute (>=2.3,<3.0)"] +linkify = ["linkify-it-py (>=1,<3)"] +plugins = ["mdit-py-plugins"] +profiling = ["gprof2dot"] +rtd = ["jupyter_sphinx", "mdit-py-plugins", "myst-parser", "pyyaml", "sphinx", "sphinx-copybutton", "sphinx-design", "sphinx_book_theme"] +testing = ["coverage", "pytest", "pytest-cov", "pytest-regressions"] + [[package]] name = "markupsafe" version = "2.1.5" @@ -1012,6 +1155,61 @@ files = [ {file = "MarkupSafe-2.1.5.tar.gz", hash = "sha256:d283d37a890ba4c1ae73ffadf8046435c76e7bc2247bbb63c00bd1a709c6544b"}, ] +[[package]] +name = "mdurl" +version = "0.1.2" +description = "Markdown URL utilities" +optional = false +python-versions = ">=3.7" +files = [ + {file = "mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8"}, + {file = "mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba"}, +] + +[[package]] +name = "modal" +version = "0.64.131" +description = "Python client library for Modal" +optional = false +python-versions = ">=3.8" +files = [ + {file = "modal-0.64.131-py3-none-any.whl", hash = "sha256:93cf2272a4f716627ad79a7e66dbc562e2f051270b37a333ac9f472369f3b75b"}, +] + +[package.dependencies] +aiohttp = "*" +aiostream = ">=0.5.2,<0.6.0" +certifi = "*" +click = ">=8.1.0" +fastapi = "*" +grpclib = "0.4.7" +protobuf = ">=3.19,<4.24.0 || >4.24.0,<5.0" +rich = ">=12.0.0" +synchronicity = ">=0.7.6,<0.8.0" +toml = "*" +typer = ">=0.9" +types-certifi = "*" +types-toml = "*" +typing-extensions = ">=4.6,<5.0" +watchfiles = "*" + +[[package]] +name = "mpmath" +version = "1.3.0" +description = "Python library for arbitrary-precision floating-point arithmetic" +optional = false +python-versions = "*" +files = [ + {file = "mpmath-1.3.0-py3-none-any.whl", hash = "sha256:a0b2b9fe80bbcd81a6647ff13108738cfb482d481d826cc0e02f5b35e5c88d2c"}, + {file = "mpmath-1.3.0.tar.gz", hash = "sha256:7a28eb2a9774d00c7bc92411c19a89209d5da7c4c9a9e227be8330a23a25b91f"}, +] + +[package.extras] +develop = ["codecov", "pycodestyle", "pytest (>=4.6)", "pytest-cov", "wheel"] +docs = ["sphinx"] +gmpy = ["gmpy2 (>=2.1.0a4)"] +tests = ["pytest (>=4.6)"] + [[package]] name = "multidict" version = "6.1.0" @@ -1140,6 +1338,24 @@ files = [ [package.dependencies] dill = ">=0.3.8" +[[package]] +name = "networkx" +version = "3.3" +description = "Python package for creating and manipulating graphs and networks" +optional = false +python-versions = ">=3.10" +files = [ + {file = "networkx-3.3-py3-none-any.whl", hash = "sha256:28575580c6ebdaf4505b22c6256a2b9de86b316dc63ba9e93abde3d78dfdbcf2"}, + {file = "networkx-3.3.tar.gz", hash = "sha256:0c127d8b2f4865f59ae9cb8aafcd60b5c70f3241ebd66f7defad7c4ab90126c9"}, +] + +[package.extras] +default = ["matplotlib (>=3.6)", "numpy (>=1.23)", "pandas (>=1.4)", "scipy (>=1.9,!=1.11.0,!=1.11.1)"] +developer = ["changelist (==0.5)", "mypy (>=1.1)", "pre-commit (>=3.2)", "rtoml"] +doc = ["myst-nb (>=1.0)", "numpydoc (>=1.7)", "pillow (>=9.4)", "pydata-sphinx-theme (>=0.14)", "sphinx (>=7)", "sphinx-gallery (>=0.14)", "texext (>=0.6.7)"] +extra = ["lxml (>=4.6)", "pydot (>=2.0)", "pygraphviz (>=1.12)", "sympy (>=1.10)"] +test = ["pytest (>=7.2)", "pytest-cov (>=4.0)"] + [[package]] name = "numpy" version = "2.1.1" @@ -1202,6 +1418,150 @@ files = [ {file = "numpy-2.1.1.tar.gz", hash = "sha256:d0cf7d55b1051387807405b3898efafa862997b4cba8aa5dbe657be794afeafd"}, ] +[[package]] +name = "nvidia-cublas-cu12" +version = "12.1.3.1" +description = "CUBLAS native runtime libraries" +optional = false +python-versions = ">=3" +files = [ + {file = "nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl", hash = "sha256:ee53ccca76a6fc08fb9701aa95b6ceb242cdaab118c3bb152af4e579af792728"}, + {file = "nvidia_cublas_cu12-12.1.3.1-py3-none-win_amd64.whl", hash = "sha256:2b964d60e8cf11b5e1073d179d85fa340c120e99b3067558f3cf98dd69d02906"}, +] + +[[package]] +name = "nvidia-cuda-cupti-cu12" +version = "12.1.105" +description = "CUDA profiling tools runtime libs." +optional = false +python-versions = ">=3" +files = [ + {file = "nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl", hash = "sha256:e54fde3983165c624cb79254ae9818a456eb6e87a7fd4d56a2352c24ee542d7e"}, + {file = "nvidia_cuda_cupti_cu12-12.1.105-py3-none-win_amd64.whl", hash = "sha256:bea8236d13a0ac7190bd2919c3e8e6ce1e402104276e6f9694479e48bb0eb2a4"}, +] + +[[package]] +name = "nvidia-cuda-nvrtc-cu12" +version = "12.1.105" +description = "NVRTC native runtime libraries" +optional = false +python-versions = ">=3" +files = [ + {file = "nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl", hash = "sha256:339b385f50c309763ca65456ec75e17bbefcbbf2893f462cb8b90584cd27a1c2"}, + {file = "nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-win_amd64.whl", hash = "sha256:0a98a522d9ff138b96c010a65e145dc1b4850e9ecb75a0172371793752fd46ed"}, +] + +[[package]] +name = "nvidia-cuda-runtime-cu12" +version = "12.1.105" +description = "CUDA Runtime native Libraries" +optional = false +python-versions = ">=3" +files = [ + {file = "nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl", hash = "sha256:6e258468ddf5796e25f1dc591a31029fa317d97a0a94ed93468fc86301d61e40"}, + {file = "nvidia_cuda_runtime_cu12-12.1.105-py3-none-win_amd64.whl", hash = "sha256:dfb46ef84d73fababab44cf03e3b83f80700d27ca300e537f85f636fac474344"}, +] + +[[package]] +name = "nvidia-cudnn-cu12" +version = "9.1.0.70" +description = "cuDNN runtime libraries" +optional = false +python-versions = ">=3" +files = [ + {file = "nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl", hash = "sha256:165764f44ef8c61fcdfdfdbe769d687e06374059fbb388b6c89ecb0e28793a6f"}, + {file = "nvidia_cudnn_cu12-9.1.0.70-py3-none-win_amd64.whl", hash = "sha256:6278562929433d68365a07a4a1546c237ba2849852c0d4b2262a486e805b977a"}, +] + +[package.dependencies] +nvidia-cublas-cu12 = "*" + +[[package]] +name = "nvidia-cufft-cu12" +version = "11.0.2.54" +description = "CUFFT native runtime libraries" +optional = false +python-versions = ">=3" +files = [ + {file = "nvidia_cufft_cu12-11.0.2.54-py3-none-manylinux1_x86_64.whl", hash = "sha256:794e3948a1aa71fd817c3775866943936774d1c14e7628c74f6f7417224cdf56"}, + {file = "nvidia_cufft_cu12-11.0.2.54-py3-none-win_amd64.whl", hash = "sha256:d9ac353f78ff89951da4af698f80870b1534ed69993f10a4cf1d96f21357e253"}, +] + +[[package]] +name = "nvidia-curand-cu12" +version = "10.3.2.106" +description = "CURAND native runtime libraries" +optional = false +python-versions = ">=3" +files = [ + {file = "nvidia_curand_cu12-10.3.2.106-py3-none-manylinux1_x86_64.whl", hash = "sha256:9d264c5036dde4e64f1de8c50ae753237c12e0b1348738169cd0f8a536c0e1e0"}, + {file = "nvidia_curand_cu12-10.3.2.106-py3-none-win_amd64.whl", hash = "sha256:75b6b0c574c0037839121317e17fd01f8a69fd2ef8e25853d826fec30bdba74a"}, +] + +[[package]] +name = "nvidia-cusolver-cu12" +version = "11.4.5.107" +description = "CUDA solver native runtime libraries" +optional = false +python-versions = ">=3" +files = [ + {file = "nvidia_cusolver_cu12-11.4.5.107-py3-none-manylinux1_x86_64.whl", hash = "sha256:8a7ec542f0412294b15072fa7dab71d31334014a69f953004ea7a118206fe0dd"}, + {file = "nvidia_cusolver_cu12-11.4.5.107-py3-none-win_amd64.whl", hash = "sha256:74e0c3a24c78612192a74fcd90dd117f1cf21dea4822e66d89e8ea80e3cd2da5"}, +] + +[package.dependencies] +nvidia-cublas-cu12 = "*" +nvidia-cusparse-cu12 = "*" +nvidia-nvjitlink-cu12 = "*" + +[[package]] +name = "nvidia-cusparse-cu12" +version = "12.1.0.106" +description = "CUSPARSE native runtime libraries" +optional = false +python-versions = ">=3" +files = [ + {file = "nvidia_cusparse_cu12-12.1.0.106-py3-none-manylinux1_x86_64.whl", hash = "sha256:f3b50f42cf363f86ab21f720998517a659a48131e8d538dc02f8768237bd884c"}, + {file = "nvidia_cusparse_cu12-12.1.0.106-py3-none-win_amd64.whl", hash = "sha256:b798237e81b9719373e8fae8d4f091b70a0cf09d9d85c95a557e11df2d8e9a5a"}, +] + +[package.dependencies] +nvidia-nvjitlink-cu12 = "*" + +[[package]] +name = "nvidia-nccl-cu12" +version = "2.20.5" +description = "NVIDIA Collective Communication Library (NCCL) Runtime" +optional = false +python-versions = ">=3" +files = [ + {file = "nvidia_nccl_cu12-2.20.5-py3-none-manylinux2014_aarch64.whl", hash = "sha256:1fc150d5c3250b170b29410ba682384b14581db722b2531b0d8d33c595f33d01"}, + {file = "nvidia_nccl_cu12-2.20.5-py3-none-manylinux2014_x86_64.whl", hash = "sha256:057f6bf9685f75215d0c53bf3ac4a10b3e6578351de307abad9e18a99182af56"}, +] + +[[package]] +name = "nvidia-nvjitlink-cu12" +version = "12.6.68" +description = "Nvidia JIT LTO Library" +optional = false +python-versions = ">=3" +files = [ + {file = "nvidia_nvjitlink_cu12-12.6.68-py3-none-manylinux2014_aarch64.whl", hash = "sha256:b3fd0779845f68b92063ab1393abab1ed0a23412fc520df79a8190d098b5cd6b"}, + {file = "nvidia_nvjitlink_cu12-12.6.68-py3-none-manylinux2014_x86_64.whl", hash = "sha256:125a6c2a44e96386dda634e13d944e60b07a0402d391a070e8fb4104b34ea1ab"}, + {file = "nvidia_nvjitlink_cu12-12.6.68-py3-none-win_amd64.whl", hash = "sha256:a55744c98d70317c5e23db14866a8cc2b733f7324509e941fc96276f9f37801d"}, +] + +[[package]] +name = "nvidia-nvtx-cu12" +version = "12.1.105" +description = "NVIDIA Tools Extension" +optional = false +python-versions = ">=3" +files = [ + {file = "nvidia_nvtx_cu12-12.1.105-py3-none-manylinux1_x86_64.whl", hash = "sha256:dc21cf308ca5691e7c04d962e213f8a4aa9bbfa23d95412f452254c2caeb09e5"}, + {file = "nvidia_nvtx_cu12-12.1.105-py3-none-win_amd64.whl", hash = "sha256:65f4d98982b31b60026e0e6de73fbdfc09d08a96f4656dd3665ca616a11e1e82"}, +] + [[package]] name = "openai" version = "1.45.0" @@ -1304,9 +1664,9 @@ files = [ [package.dependencies] numpy = [ + {version = ">=1.26.0", markers = "python_version >= \"3.12\""}, {version = ">=1.22.4", markers = "python_version < \"3.11\""}, {version = ">=1.23.2", markers = "python_version == \"3.11\""}, - {version = ">=1.26.0", markers = "python_version >= \"3.12\""}, ] python-dateutil = ">=2.8.2" pytz = ">=2020.1" @@ -1337,6 +1697,26 @@ sql-other = ["SQLAlchemy (>=2.0.0)", "adbc-driver-postgresql (>=0.8.0)", "adbc-d test = ["hypothesis (>=6.46.1)", "pytest (>=7.3.2)", "pytest-xdist (>=2.2.0)"] xml = ["lxml (>=4.9.2)"] +[[package]] +name = "protobuf" +version = "4.25.5" +description = "" +optional = false +python-versions = ">=3.8" +files = [ + {file = "protobuf-4.25.5-cp310-abi3-win32.whl", hash = "sha256:5e61fd921603f58d2f5acb2806a929b4675f8874ff5f330b7d6f7e2e784bbcd8"}, + {file = "protobuf-4.25.5-cp310-abi3-win_amd64.whl", hash = "sha256:4be0571adcbe712b282a330c6e89eae24281344429ae95c6d85e79e84780f5ea"}, + {file = "protobuf-4.25.5-cp37-abi3-macosx_10_9_universal2.whl", hash = "sha256:b2fde3d805354df675ea4c7c6338c1aecd254dfc9925e88c6d31a2bcb97eb173"}, + {file = "protobuf-4.25.5-cp37-abi3-manylinux2014_aarch64.whl", hash = "sha256:919ad92d9b0310070f8356c24b855c98df2b8bd207ebc1c0c6fcc9ab1e007f3d"}, + {file = "protobuf-4.25.5-cp37-abi3-manylinux2014_x86_64.whl", hash = "sha256:fe14e16c22be926d3abfcb500e60cab068baf10b542b8c858fa27e098123e331"}, + {file = "protobuf-4.25.5-cp38-cp38-win32.whl", hash = "sha256:98d8d8aa50de6a2747efd9cceba361c9034050ecce3e09136f90de37ddba66e1"}, + {file = "protobuf-4.25.5-cp38-cp38-win_amd64.whl", hash = "sha256:b0234dd5a03049e4ddd94b93400b67803c823cfc405689688f59b34e0742381a"}, + {file = "protobuf-4.25.5-cp39-cp39-win32.whl", hash = "sha256:abe32aad8561aa7cc94fc7ba4fdef646e576983edb94a73381b03c53728a626f"}, + {file = "protobuf-4.25.5-cp39-cp39-win_amd64.whl", hash = "sha256:7a183f592dc80aa7c8da7ad9e55091c4ffc9497b3054452d629bb85fa27c2a45"}, + {file = "protobuf-4.25.5-py3-none-any.whl", hash = "sha256:0aebecb809cae990f8129ada5ca273d9d670b76d9bfc9b1809f0a9c02b7dbf41"}, + {file = "protobuf-4.25.5.tar.gz", hash = "sha256:7f8249476b4a9473645db7f8ab42b02fe1488cbe5fb72fddd445e0665afd8584"}, +] + [[package]] name = "pyarrow" version = "17.0.0" @@ -1403,8 +1783,8 @@ files = [ annotated-types = ">=0.6.0" pydantic-core = "2.23.3" typing-extensions = [ - {version = ">=4.6.1", markers = "python_version < \"3.13\""}, {version = ">=4.12.2", markers = "python_version >= \"3.13\""}, + {version = ">=4.6.1", markers = "python_version < \"3.13\""}, ] [package.extras] @@ -1512,6 +1892,20 @@ files = [ [package.dependencies] typing-extensions = ">=4.6.0,<4.7.0 || >4.7.0" +[[package]] +name = "pygments" +version = "2.18.0" +description = "Pygments is a syntax highlighting package written in Python." +optional = false +python-versions = ">=3.8" +files = [ + {file = "pygments-2.18.0-py3-none-any.whl", hash = "sha256:b8e6aca0523f3ab76fee51799c488e38782ac06eafcf95e7ba832985c8e7b13a"}, + {file = "pygments-2.18.0.tar.gz", hash = "sha256:786ff802f32e91311bff3889f6e9a86e81505fe99f2735bb6d60ae0c5004f199"}, +] + +[package.extras] +windows-terminal = ["colorama (>=0.4.6)"] + [[package]] name = "python-dateutil" version = "2.9.0.post0" @@ -1723,6 +2117,73 @@ urllib3 = ">=1.21.1,<3" socks = ["PySocks (>=1.5.6,!=1.5.7)"] use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] +[[package]] +name = "rich" +version = "13.8.1" +description = "Render rich text, tables, progress bars, syntax highlighting, markdown and more to the terminal" +optional = false +python-versions = ">=3.7.0" +files = [ + {file = "rich-13.8.1-py3-none-any.whl", hash = "sha256:1760a3c0848469b97b558fc61c85233e3dafb69c7a071b4d60c38099d3cd4c06"}, + {file = "rich-13.8.1.tar.gz", hash = "sha256:8260cda28e3db6bf04d2d1ef4dbc03ba80a824c88b0e7668a0f23126a424844a"}, +] + +[package.dependencies] +markdown-it-py = ">=2.2.0" +pygments = ">=2.13.0,<3.0.0" + +[package.extras] +jupyter = ["ipywidgets (>=7.5.1,<9)"] + +[[package]] +name = "setuptools" +version = "75.1.0" +description = "Easily download, build, install, upgrade, and uninstall Python packages" +optional = false +python-versions = ">=3.8" +files = [ + {file = "setuptools-75.1.0-py3-none-any.whl", hash = "sha256:35ab7fd3bcd95e6b7fd704e4a1539513edad446c097797f2985e0e4b960772f2"}, + {file = "setuptools-75.1.0.tar.gz", hash = "sha256:d59a21b17a275fb872a9c3dae73963160ae079f1049ed956880cd7c09b120538"}, +] + +[package.extras] +check = ["pytest-checkdocs (>=2.4)", "pytest-ruff (>=0.2.1)", "ruff (>=0.5.2)"] +core = ["importlib-metadata (>=6)", "importlib-resources (>=5.10.2)", "jaraco.collections", "jaraco.functools", "jaraco.text (>=3.7)", "more-itertools", "more-itertools (>=8.8)", "packaging", "packaging (>=24)", "platformdirs (>=2.6.2)", "tomli (>=2.0.1)", "wheel (>=0.43.0)"] +cover = ["pytest-cov"] +doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "pyproject-hooks (!=1.1)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-favicon", "sphinx-inline-tabs", "sphinx-lint", "sphinx-notfound-page (>=1,<2)", "sphinx-reredirects", "sphinxcontrib-towncrier", "towncrier (<24.7)"] +enabler = ["pytest-enabler (>=2.2)"] +test = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "ini2toml[lite] (>=0.14)", "jaraco.develop (>=7.21)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "jaraco.test", "packaging (>=23.2)", "pip (>=19.1)", "pyproject-hooks (!=1.1)", "pytest (>=6,!=8.1.*)", "pytest-home (>=0.5)", "pytest-perf", "pytest-subprocess", "pytest-timeout", "pytest-xdist (>=3)", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel (>=0.44.0)"] +type = ["importlib-metadata (>=7.0.2)", "jaraco.develop (>=7.21)", "mypy (==1.11.*)", "pytest-mypy"] + +[[package]] +name = "shellingham" +version = "1.5.4" +description = "Tool to Detect Surrounding Shell" +optional = false +python-versions = ">=3.7" +files = [ + {file = "shellingham-1.5.4-py2.py3-none-any.whl", hash = "sha256:7ecfff8f2fd72616f7481040475a65b2bf8af90a56c89140852d1120324e8686"}, + {file = "shellingham-1.5.4.tar.gz", hash = "sha256:8dbca0739d487e5bd35ab3ca4b36e11c4078f3a234bfce294b0a0291363404de"}, +] + +[[package]] +name = "sigtools" +version = "4.0.1" +description = "Utilities for working with inspect.Signature objects." +optional = false +python-versions = ">=3.6" +files = [ + {file = "sigtools-4.0.1-py2.py3-none-any.whl", hash = "sha256:d216b4cf920bbab0fce636ddc429ed8463a5b533d9e1492acb45a2a1bc36ac6c"}, + {file = "sigtools-4.0.1.tar.gz", hash = "sha256:4b8e135a9cd4d2ea00da670c093372d74e672ba3abb87f4c98d8e73dea54445c"}, +] + +[package.dependencies] +attrs = "*" + +[package.extras] +test = ["coverage", "mock", "repeated-test (>=2.2.1)", "sphinx"] +tests = ["coverage", "mock", "repeated-test (>=2.2.1)", "sphinx"] + [[package]] name = "six" version = "1.16.0" @@ -1832,6 +2293,23 @@ postgresql-psycopgbinary = ["psycopg[binary] (>=3.0.7)"] pymysql = ["pymysql"] sqlcipher = ["sqlcipher3_binary"] +[[package]] +name = "starlette" +version = "0.38.6" +description = "The little ASGI library that shines." +optional = false +python-versions = ">=3.8" +files = [ + {file = "starlette-0.38.6-py3-none-any.whl", hash = "sha256:4517a1409e2e73ee4951214ba012052b9e16f60e90d73cfb06192c19203bbb05"}, + {file = "starlette-0.38.6.tar.gz", hash = "sha256:863a1588f5574e70a821dadefb41e4881ea451a47a3cd1b4df359d4ffefe5ead"}, +] + +[package.dependencies] +anyio = ">=3.4.0,<5" + +[package.extras] +full = ["httpx (>=0.22.0)", "itsdangerous", "jinja2", "python-multipart (>=0.0.7)", "pyyaml"] + [[package]] name = "structlog" version = "24.4.0" @@ -1849,6 +2327,102 @@ docs = ["cogapp", "furo", "myst-parser", "sphinx", "sphinx-notfound-page", "sphi tests = ["freezegun (>=0.2.8)", "pretend", "pytest (>=6.0)", "pytest-asyncio (>=0.17)", "simplejson"] typing = ["mypy (>=1.4)", "rich", "twisted"] +[[package]] +name = "sympy" +version = "1.13.3" +description = "Computer algebra system (CAS) in Python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "sympy-1.13.3-py3-none-any.whl", hash = "sha256:54612cf55a62755ee71824ce692986f23c88ffa77207b30c1368eda4a7060f73"}, + {file = "sympy-1.13.3.tar.gz", hash = "sha256:b27fd2c6530e0ab39e275fc9b683895367e51d5da91baa8d3d64db2565fec4d9"}, +] + +[package.dependencies] +mpmath = ">=1.1.0,<1.4" + +[package.extras] +dev = ["hypothesis (>=6.70.0)", "pytest (>=7.1.0)"] + +[[package]] +name = "synchronicity" +version = "0.7.6" +description = "Export blocking and async library versions from a single async implementation" +optional = false +python-versions = ">=3.8" +files = [ + {file = "synchronicity-0.7.6-py3-none-any.whl", hash = "sha256:c5bb60a0f39c5a161b5013d183cfe6229dc4ee6e3a09714f37da2905635cf983"}, +] + +[package.dependencies] +sigtools = "4.0.1" +typing-extensions = ">=4.6" + +[[package]] +name = "toml" +version = "0.10.2" +description = "Python Library for Tom's Obvious, Minimal Language" +optional = false +python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*" +files = [ + {file = "toml-0.10.2-py2.py3-none-any.whl", hash = "sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b"}, + {file = "toml-0.10.2.tar.gz", hash = "sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f"}, +] + +[[package]] +name = "torch" +version = "2.4.1" +description = "Tensors and Dynamic neural networks in Python with strong GPU acceleration" +optional = false +python-versions = ">=3.8.0" +files = [ + {file = "torch-2.4.1-cp310-cp310-manylinux1_x86_64.whl", hash = "sha256:362f82e23a4cd46341daabb76fba08f04cd646df9bfaf5da50af97cb60ca4971"}, + {file = "torch-2.4.1-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:e8ac1985c3ff0f60d85b991954cfc2cc25f79c84545aead422763148ed2759e3"}, + {file = "torch-2.4.1-cp310-cp310-win_amd64.whl", hash = "sha256:91e326e2ccfb1496e3bee58f70ef605aeb27bd26be07ba64f37dcaac3d070ada"}, + {file = "torch-2.4.1-cp310-none-macosx_11_0_arm64.whl", hash = "sha256:d36a8ef100f5bff3e9c3cea934b9e0d7ea277cb8210c7152d34a9a6c5830eadd"}, + {file = "torch-2.4.1-cp311-cp311-manylinux1_x86_64.whl", hash = "sha256:0b5f88afdfa05a335d80351e3cea57d38e578c8689f751d35e0ff36bce872113"}, + {file = "torch-2.4.1-cp311-cp311-manylinux2014_aarch64.whl", hash = "sha256:ef503165f2341942bfdf2bd520152f19540d0c0e34961232f134dc59ad435be8"}, + {file = "torch-2.4.1-cp311-cp311-win_amd64.whl", hash = "sha256:092e7c2280c860eff762ac08c4bdcd53d701677851670695e0c22d6d345b269c"}, + {file = "torch-2.4.1-cp311-none-macosx_11_0_arm64.whl", hash = "sha256:ddddbd8b066e743934a4200b3d54267a46db02106876d21cf31f7da7a96f98ea"}, + {file = "torch-2.4.1-cp312-cp312-manylinux1_x86_64.whl", hash = "sha256:fdc4fe11db3eb93c1115d3e973a27ac7c1a8318af8934ffa36b0370efe28e042"}, + {file = "torch-2.4.1-cp312-cp312-manylinux2014_aarch64.whl", hash = "sha256:18835374f599207a9e82c262153c20ddf42ea49bc76b6eadad8e5f49729f6e4d"}, + {file = "torch-2.4.1-cp312-cp312-win_amd64.whl", hash = "sha256:ebea70ff30544fc021d441ce6b219a88b67524f01170b1c538d7d3ebb5e7f56c"}, + {file = "torch-2.4.1-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:72b484d5b6cec1a735bf3fa5a1c4883d01748698c5e9cfdbeb4ffab7c7987e0d"}, + {file = "torch-2.4.1-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:c99e1db4bf0c5347107845d715b4aa1097e601bdc36343d758963055e9599d93"}, + {file = "torch-2.4.1-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:b57f07e92858db78c5b72857b4f0b33a65b00dc5d68e7948a8494b0314efb880"}, + {file = "torch-2.4.1-cp38-cp38-win_amd64.whl", hash = "sha256:f18197f3f7c15cde2115892b64f17c80dbf01ed72b008020e7da339902742cf6"}, + {file = "torch-2.4.1-cp38-none-macosx_11_0_arm64.whl", hash = "sha256:5fc1d4d7ed265ef853579caf272686d1ed87cebdcd04f2a498f800ffc53dab71"}, + {file = "torch-2.4.1-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:40f6d3fe3bae74efcf08cb7f8295eaddd8a838ce89e9d26929d4edd6d5e4329d"}, + {file = "torch-2.4.1-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:c9299c16c9743001ecef515536ac45900247f4338ecdf70746f2461f9e4831db"}, + {file = "torch-2.4.1-cp39-cp39-win_amd64.whl", hash = "sha256:6bce130f2cd2d52ba4e2c6ada461808de7e5eccbac692525337cfb4c19421846"}, + {file = "torch-2.4.1-cp39-none-macosx_11_0_arm64.whl", hash = "sha256:a38de2803ee6050309aac032676536c3d3b6a9804248537e38e098d0e14817ec"}, +] + +[package.dependencies] +filelock = "*" +fsspec = "*" +jinja2 = "*" +networkx = "*" +nvidia-cublas-cu12 = {version = "12.1.3.1", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} +nvidia-cuda-cupti-cu12 = {version = "12.1.105", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} +nvidia-cuda-nvrtc-cu12 = {version = "12.1.105", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} +nvidia-cuda-runtime-cu12 = {version = "12.1.105", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} +nvidia-cudnn-cu12 = {version = "9.1.0.70", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} +nvidia-cufft-cu12 = {version = "11.0.2.54", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} +nvidia-curand-cu12 = {version = "10.3.2.106", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} +nvidia-cusolver-cu12 = {version = "11.4.5.107", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} +nvidia-cusparse-cu12 = {version = "12.1.0.106", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} +nvidia-nccl-cu12 = {version = "2.20.5", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} +nvidia-nvtx-cu12 = {version = "12.1.105", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} +setuptools = "*" +sympy = "*" +triton = {version = "3.0.0", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version < \"3.13\""} +typing-extensions = ">=4.8.0" + +[package.extras] +opt-einsum = ["opt-einsum (>=3.3)"] +optree = ["optree (>=0.11.0)"] + [[package]] name = "tqdm" version = "4.66.5" @@ -1869,6 +2443,67 @@ notebook = ["ipywidgets (>=6)"] slack = ["slack-sdk"] telegram = ["requests"] +[[package]] +name = "triton" +version = "3.0.0" +description = "A language and compiler for custom Deep Learning operations" +optional = false +python-versions = "*" +files = [ + {file = "triton-3.0.0-1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:e1efef76935b2febc365bfadf74bcb65a6f959a9872e5bddf44cc9e0adce1e1a"}, + {file = "triton-3.0.0-1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:5ce8520437c602fb633f1324cc3871c47bee3b67acf9756c1a66309b60e3216c"}, + {file = "triton-3.0.0-1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:34e509deb77f1c067d8640725ef00c5cbfcb2052a1a3cb6a6d343841f92624eb"}, + {file = "triton-3.0.0-1-cp38-cp38-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:bcbf3b1c48af6a28011a5c40a5b3b9b5330530c3827716b5fbf6d7adcc1e53e9"}, + {file = "triton-3.0.0-1-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:6e5727202f7078c56f91ff13ad0c1abab14a0e7f2c87e91b12b6f64f3e8ae609"}, +] + +[package.dependencies] +filelock = "*" + +[package.extras] +build = ["cmake (>=3.20)", "lit"] +tests = ["autopep8", "flake8", "isort", "llnl-hatchet", "numpy", "pytest", "scipy (>=1.7.1)"] +tutorials = ["matplotlib", "pandas", "tabulate"] + +[[package]] +name = "typer" +version = "0.12.5" +description = "Typer, build great CLIs. Easy to code. Based on Python type hints." +optional = false +python-versions = ">=3.7" +files = [ + {file = "typer-0.12.5-py3-none-any.whl", hash = "sha256:62fe4e471711b147e3365034133904df3e235698399bc4de2b36c8579298d52b"}, + {file = "typer-0.12.5.tar.gz", hash = "sha256:f592f089bedcc8ec1b974125d64851029c3b1af145f04aca64d69410f0c9b722"}, +] + +[package.dependencies] +click = ">=8.0.0" +rich = ">=10.11.0" +shellingham = ">=1.3.0" +typing-extensions = ">=3.7.4.3" + +[[package]] +name = "types-certifi" +version = "2021.10.8.3" +description = "Typing stubs for certifi" +optional = false +python-versions = "*" +files = [ + {file = "types-certifi-2021.10.8.3.tar.gz", hash = "sha256:72cf7798d165bc0b76e1c10dd1ea3097c7063c42c21d664523b928e88b554a4f"}, + {file = "types_certifi-2021.10.8.3-py3-none-any.whl", hash = "sha256:b2d1e325e69f71f7c78e5943d410e650b4707bb0ef32e4ddf3da37f54176e88a"}, +] + +[[package]] +name = "types-toml" +version = "0.10.8.20240310" +description = "Typing stubs for toml" +optional = false +python-versions = ">=3.8" +files = [ + {file = "types-toml-0.10.8.20240310.tar.gz", hash = "sha256:3d41501302972436a6b8b239c850b26689657e25281b48ff0ec06345b8830331"}, + {file = "types_toml-0.10.8.20240310-py3-none-any.whl", hash = "sha256:627b47775d25fa29977d9c70dc0cbab3f314f32c8d8d0c012f2ef5de7aaec05d"}, +] + [[package]] name = "typing-extensions" version = "4.12.2" @@ -1995,6 +2630,101 @@ h2 = ["h2 (>=4,<5)"] socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"] zstd = ["zstandard (>=0.18.0)"] +[[package]] +name = "watchfiles" +version = "0.24.0" +description = "Simple, modern and high performance file watching and code reload in python." +optional = false +python-versions = ">=3.8" +files = [ + {file = "watchfiles-0.24.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:083dc77dbdeef09fa44bb0f4d1df571d2e12d8a8f985dccde71ac3ac9ac067a0"}, + {file = "watchfiles-0.24.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e94e98c7cb94cfa6e071d401ea3342767f28eb5a06a58fafdc0d2a4974f4f35c"}, + {file = "watchfiles-0.24.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:82ae557a8c037c42a6ef26c494d0631cacca040934b101d001100ed93d43f361"}, + {file = "watchfiles-0.24.0-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:acbfa31e315a8f14fe33e3542cbcafc55703b8f5dcbb7c1eecd30f141df50db3"}, + {file = "watchfiles-0.24.0-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b74fdffce9dfcf2dc296dec8743e5b0332d15df19ae464f0e249aa871fc1c571"}, + {file = "watchfiles-0.24.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:449f43f49c8ddca87c6b3980c9284cab6bd1f5c9d9a2b00012adaaccd5e7decd"}, + {file = "watchfiles-0.24.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4abf4ad269856618f82dee296ac66b0cd1d71450fc3c98532d93798e73399b7a"}, + {file = "watchfiles-0.24.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9f895d785eb6164678ff4bb5cc60c5996b3ee6df3edb28dcdeba86a13ea0465e"}, + {file = "watchfiles-0.24.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:7ae3e208b31be8ce7f4c2c0034f33406dd24fbce3467f77223d10cd86778471c"}, + {file = "watchfiles-0.24.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:2efec17819b0046dde35d13fb8ac7a3ad877af41ae4640f4109d9154ed30a188"}, + {file = "watchfiles-0.24.0-cp310-none-win32.whl", hash = "sha256:6bdcfa3cd6fdbdd1a068a52820f46a815401cbc2cb187dd006cb076675e7b735"}, + {file = "watchfiles-0.24.0-cp310-none-win_amd64.whl", hash = "sha256:54ca90a9ae6597ae6dc00e7ed0a040ef723f84ec517d3e7ce13e63e4bc82fa04"}, + {file = "watchfiles-0.24.0-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:bdcd5538e27f188dd3c804b4a8d5f52a7fc7f87e7fd6b374b8e36a4ca03db428"}, + {file = "watchfiles-0.24.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2dadf8a8014fde6addfd3c379e6ed1a981c8f0a48292d662e27cabfe4239c83c"}, + {file = "watchfiles-0.24.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6509ed3f467b79d95fc62a98229f79b1a60d1b93f101e1c61d10c95a46a84f43"}, + {file = "watchfiles-0.24.0-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:8360f7314a070c30e4c976b183d1d8d1585a4a50c5cb603f431cebcbb4f66327"}, + {file = "watchfiles-0.24.0-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:316449aefacf40147a9efaf3bd7c9bdd35aaba9ac5d708bd1eb5763c9a02bef5"}, + {file = "watchfiles-0.24.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:73bde715f940bea845a95247ea3e5eb17769ba1010efdc938ffcb967c634fa61"}, + {file = "watchfiles-0.24.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3770e260b18e7f4e576edca4c0a639f704088602e0bc921c5c2e721e3acb8d15"}, + {file = "watchfiles-0.24.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aa0fd7248cf533c259e59dc593a60973a73e881162b1a2f73360547132742823"}, + {file = "watchfiles-0.24.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:d7a2e3b7f5703ffbd500dabdefcbc9eafeff4b9444bbdd5d83d79eedf8428fab"}, + {file = "watchfiles-0.24.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:d831ee0a50946d24a53821819b2327d5751b0c938b12c0653ea5be7dea9c82ec"}, + {file = "watchfiles-0.24.0-cp311-none-win32.whl", hash = "sha256:49d617df841a63b4445790a254013aea2120357ccacbed00253f9c2b5dc24e2d"}, + {file = "watchfiles-0.24.0-cp311-none-win_amd64.whl", hash = "sha256:d3dcb774e3568477275cc76554b5a565024b8ba3a0322f77c246bc7111c5bb9c"}, + {file = "watchfiles-0.24.0-cp311-none-win_arm64.whl", hash = "sha256:9301c689051a4857d5b10777da23fafb8e8e921bcf3abe6448a058d27fb67633"}, + {file = "watchfiles-0.24.0-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:7211b463695d1e995ca3feb38b69227e46dbd03947172585ecb0588f19b0d87a"}, + {file = "watchfiles-0.24.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:4b8693502d1967b00f2fb82fc1e744df128ba22f530e15b763c8d82baee15370"}, + {file = "watchfiles-0.24.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cdab9555053399318b953a1fe1f586e945bc8d635ce9d05e617fd9fe3a4687d6"}, + {file = "watchfiles-0.24.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:34e19e56d68b0dad5cff62273107cf5d9fbaf9d75c46277aa5d803b3ef8a9e9b"}, + {file = "watchfiles-0.24.0-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:41face41f036fee09eba33a5b53a73e9a43d5cb2c53dad8e61fa6c9f91b5a51e"}, + {file = "watchfiles-0.24.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5148c2f1ea043db13ce9b0c28456e18ecc8f14f41325aa624314095b6aa2e9ea"}, + {file = "watchfiles-0.24.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7e4bd963a935aaf40b625c2499f3f4f6bbd0c3776f6d3bc7c853d04824ff1c9f"}, + {file = "watchfiles-0.24.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c79d7719d027b7a42817c5d96461a99b6a49979c143839fc37aa5748c322f234"}, + {file = "watchfiles-0.24.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:32aa53a9a63b7f01ed32e316e354e81e9da0e6267435c7243bf8ae0f10b428ef"}, + {file = "watchfiles-0.24.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:ce72dba6a20e39a0c628258b5c308779b8697f7676c254a845715e2a1039b968"}, + {file = "watchfiles-0.24.0-cp312-none-win32.whl", hash = "sha256:d9018153cf57fc302a2a34cb7564870b859ed9a732d16b41a9b5cb2ebed2d444"}, + {file = "watchfiles-0.24.0-cp312-none-win_amd64.whl", hash = "sha256:551ec3ee2a3ac9cbcf48a4ec76e42c2ef938a7e905a35b42a1267fa4b1645896"}, + {file = "watchfiles-0.24.0-cp312-none-win_arm64.whl", hash = "sha256:b52a65e4ea43c6d149c5f8ddb0bef8d4a1e779b77591a458a893eb416624a418"}, + {file = "watchfiles-0.24.0-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:3d2e3ab79a1771c530233cadfd277fcc762656d50836c77abb2e5e72b88e3a48"}, + {file = "watchfiles-0.24.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:327763da824817b38ad125dcd97595f942d720d32d879f6c4ddf843e3da3fe90"}, + {file = "watchfiles-0.24.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bd82010f8ab451dabe36054a1622870166a67cf3fce894f68895db6f74bbdc94"}, + {file = "watchfiles-0.24.0-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:d64ba08db72e5dfd5c33be1e1e687d5e4fcce09219e8aee893a4862034081d4e"}, + {file = "watchfiles-0.24.0-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1cf1f6dd7825053f3d98f6d33f6464ebdd9ee95acd74ba2c34e183086900a827"}, + {file = "watchfiles-0.24.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:43e3e37c15a8b6fe00c1bce2473cfa8eb3484bbeecf3aefbf259227e487a03df"}, + {file = "watchfiles-0.24.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:88bcd4d0fe1d8ff43675360a72def210ebad3f3f72cabfeac08d825d2639b4ab"}, + {file = "watchfiles-0.24.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:999928c6434372fde16c8f27143d3e97201160b48a614071261701615a2a156f"}, + {file = "watchfiles-0.24.0-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:30bbd525c3262fd9f4b1865cb8d88e21161366561cd7c9e1194819e0a33ea86b"}, + {file = "watchfiles-0.24.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:edf71b01dec9f766fb285b73930f95f730bb0943500ba0566ae234b5c1618c18"}, + {file = "watchfiles-0.24.0-cp313-none-win32.whl", hash = "sha256:f4c96283fca3ee09fb044f02156d9570d156698bc3734252175a38f0e8975f07"}, + {file = "watchfiles-0.24.0-cp313-none-win_amd64.whl", hash = "sha256:a974231b4fdd1bb7f62064a0565a6b107d27d21d9acb50c484d2cdba515b9366"}, + {file = "watchfiles-0.24.0-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:ee82c98bed9d97cd2f53bdb035e619309a098ea53ce525833e26b93f673bc318"}, + {file = "watchfiles-0.24.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:fd92bbaa2ecdb7864b7600dcdb6f2f1db6e0346ed425fbd01085be04c63f0b05"}, + {file = "watchfiles-0.24.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f83df90191d67af5a831da3a33dd7628b02a95450e168785586ed51e6d28943c"}, + {file = "watchfiles-0.24.0-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:fca9433a45f18b7c779d2bae7beeec4f740d28b788b117a48368d95a3233ed83"}, + {file = "watchfiles-0.24.0-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b995bfa6bf01a9e09b884077a6d37070464b529d8682d7691c2d3b540d357a0c"}, + {file = "watchfiles-0.24.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ed9aba6e01ff6f2e8285e5aa4154e2970068fe0fc0998c4380d0e6278222269b"}, + {file = "watchfiles-0.24.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e5171ef898299c657685306d8e1478a45e9303ddcd8ac5fed5bd52ad4ae0b69b"}, + {file = "watchfiles-0.24.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4933a508d2f78099162da473841c652ad0de892719043d3f07cc83b33dfd9d91"}, + {file = "watchfiles-0.24.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:95cf3b95ea665ab03f5a54765fa41abf0529dbaf372c3b83d91ad2cfa695779b"}, + {file = "watchfiles-0.24.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:01def80eb62bd5db99a798d5e1f5f940ca0a05986dcfae21d833af7a46f7ee22"}, + {file = "watchfiles-0.24.0-cp38-none-win32.whl", hash = "sha256:4d28cea3c976499475f5b7a2fec6b3a36208656963c1a856d328aeae056fc5c1"}, + {file = "watchfiles-0.24.0-cp38-none-win_amd64.whl", hash = "sha256:21ab23fdc1208086d99ad3f69c231ba265628014d4aed31d4e8746bd59e88cd1"}, + {file = "watchfiles-0.24.0-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:b665caeeda58625c3946ad7308fbd88a086ee51ccb706307e5b1fa91556ac886"}, + {file = "watchfiles-0.24.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:5c51749f3e4e269231510da426ce4a44beb98db2dce9097225c338f815b05d4f"}, + {file = "watchfiles-0.24.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:82b2509f08761f29a0fdad35f7e1638b8ab1adfa2666d41b794090361fb8b855"}, + {file = "watchfiles-0.24.0-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:9a60e2bf9dc6afe7f743e7c9b149d1fdd6dbf35153c78fe3a14ae1a9aee3d98b"}, + {file = "watchfiles-0.24.0-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f7d9b87c4c55e3ea8881dfcbf6d61ea6775fffed1fedffaa60bd047d3c08c430"}, + {file = "watchfiles-0.24.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:78470906a6be5199524641f538bd2c56bb809cd4bf29a566a75051610bc982c3"}, + {file = "watchfiles-0.24.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:07cdef0c84c03375f4e24642ef8d8178e533596b229d32d2bbd69e5128ede02a"}, + {file = "watchfiles-0.24.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d337193bbf3e45171c8025e291530fb7548a93c45253897cd764a6a71c937ed9"}, + {file = "watchfiles-0.24.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:ec39698c45b11d9694a1b635a70946a5bad066b593af863460a8e600f0dff1ca"}, + {file = "watchfiles-0.24.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:2e28d91ef48eab0afb939fa446d8ebe77e2f7593f5f463fd2bb2b14132f95b6e"}, + {file = "watchfiles-0.24.0-cp39-none-win32.whl", hash = "sha256:7138eff8baa883aeaa074359daabb8b6c1e73ffe69d5accdc907d62e50b1c0da"}, + {file = "watchfiles-0.24.0-cp39-none-win_amd64.whl", hash = "sha256:b3ef2c69c655db63deb96b3c3e587084612f9b1fa983df5e0c3379d41307467f"}, + {file = "watchfiles-0.24.0-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:632676574429bee8c26be8af52af20e0c718cc7f5f67f3fb658c71928ccd4f7f"}, + {file = "watchfiles-0.24.0-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:a2a9891723a735d3e2540651184be6fd5b96880c08ffe1a98bae5017e65b544b"}, + {file = "watchfiles-0.24.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4a7fa2bc0efef3e209a8199fd111b8969fe9db9c711acc46636686331eda7dd4"}, + {file = "watchfiles-0.24.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:01550ccf1d0aed6ea375ef259706af76ad009ef5b0203a3a4cce0f6024f9b68a"}, + {file = "watchfiles-0.24.0-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:96619302d4374de5e2345b2b622dc481257a99431277662c30f606f3e22f42be"}, + {file = "watchfiles-0.24.0-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:85d5f0c7771dcc7a26c7a27145059b6bb0ce06e4e751ed76cdf123d7039b60b5"}, + {file = "watchfiles-0.24.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:951088d12d339690a92cef2ec5d3cfd957692834c72ffd570ea76a6790222777"}, + {file = "watchfiles-0.24.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:49fb58bcaa343fedc6a9e91f90195b20ccb3135447dc9e4e2570c3a39565853e"}, + {file = "watchfiles-0.24.0.tar.gz", hash = "sha256:afb72325b74fa7a428c009c1b8be4b4d7c2afedafb2982827ef2156646df2fe1"}, +] + +[package.dependencies] +anyio = ">=3.0.0" + [[package]] name = "xxhash" version = "3.5.0" @@ -2235,4 +2965,4 @@ multidict = ">=4.0" [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "8806276cf99eb02f5b03a4190df6c5b9c72656376fde6a7b17f93b1c080c170e" +content-hash = "fa1f9c8a653400e9abd89c4af03449e709b106f9e02f625547cb0c9b99da3ca6" diff --git a/pyproject.toml b/pyproject.toml index 2d0de05..7480d41 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,6 +8,8 @@ readme = "README.md" [tool.poetry.dependencies] python = "^3.10" dspy-ai = "^2.4.16" +torch = "^2.4.1" +modal = "^0.64.131" [build-system] diff --git a/readme.md b/readme.md index 2f7b3cc..e0a3e8c 100644 --- a/readme.md +++ b/readme.md @@ -10,7 +10,17 @@ curl -X 'POST' 'https://rawsh--vllm-qwen-serve.modal.run/v1/completions' -H ], "max_tokens": 200, "stop": ["\n\n## Step "], - "temperature": 1 + "temperature": 0.7 +}' + +curl -X 'POST' 'https://rawsh--vllm-gemma-serve.modal.run/v1/completions' -H 'accept: application/json' -H 'Authorization: Bearer 9FF74944EED19865193F979942FB1' -H 'Content-Type: application/json' -d '{ + "model": "rawsh/mirrorgemma-2-2b-SFT", + "prompt": [ + "Find the least positive integer such that when its leftmost digit is deleted, the resulting integer is 1/29 of the original integer.\n\n" + ], + "max_tokens": 200, + "stop": ["\n"], + "temperature": 0.7 }' ```