-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'develop' into feature/revised-programming-feedback
# Conflicts: # llm_core/llm_core/utils/llm_utils.py # modules/modeling/module_modeling_llm/poetry.lock # modules/modeling/module_modeling_llm/pyproject.toml # modules/programming/module_programming_llm/module_programming_llm/config.py # modules/programming/module_programming_llm/module_programming_llm/generate_graded_suggestions_by_file.py # modules/programming/module_programming_llm/module_programming_llm/generate_non_graded_suggestions_by_file.py # modules/programming/module_programming_llm/module_programming_llm/generate_summary_by_file.py # modules/programming/module_programming_llm/module_programming_llm/helpers/models/model_config.py # modules/programming/module_programming_llm/module_programming_llm/helpers/models/openai.py # modules/programming/module_programming_llm/module_programming_llm/helpers/models/replicate.py # modules/programming/module_programming_llm/module_programming_llm/split_grading_instructions_by_file.py # modules/programming/module_programming_llm/module_programming_llm/split_problem_statement_by_file.py # modules/programming/module_programming_llm/poetry.lock # modules/programming/module_programming_llm/pyproject.toml # modules/text/module_text_llm/poetry.lock # modules/text/module_text_llm/pyproject.toml
- Loading branch information
Showing
45 changed files
with
4,177 additions
and
2,262 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
# Comment out the variables that you define somewhere else | ||
# Environment variables are overwritten by .env file | ||
|
||
PRODUCTION=0 | ||
SECRET=12345abcdef | ||
DATABASE_URL=sqlite:///../data/data.sqlite | ||
|
||
|
||
################################################################ | ||
# LLM Credentials # | ||
################################################################ | ||
|
||
# Default model to use | ||
# See below for options, available models are also logged on startup | ||
LLM_DEFAULT_MODEL="azure_openai_gpt-4o" | ||
LLM_DEFAULT_MODEL_COST_PER_MILLION_INPUT_TOKEN=5 | ||
LLM_DEFAULT_MODEL_COST_PER_MILLION_OUTPUT_TOKEN=15 | ||
|
||
# Enable LLM-as-a-judge approach 0 = disabled, 1 = enabled | ||
LLM_ENABLE_LLM_AS_A_JUDGE=1 | ||
# Evaluation model to use for the LLM-as-a-judge approach [Only important if you want to use it in the /evaluate endpoint] | ||
# See below for options, available models are also logged on startup | ||
LLM_EVALUATION_MODEL="azure_openai_gpt-4o" | ||
|
||
# Standard OpenAI (Non-Azure) [leave blank if not used] | ||
# Model names prefixed with `openai_` followed by the model name, e.g. `openai_text-davinci-003` | ||
# A list of models can be found in `module_text_llm/helpers/models/openai.py` (openai_models) | ||
OPENAI_API_KEY="xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" | ||
|
||
# Azure OpenAI [leave blank if not used] | ||
# Model names prefixed with `azure_openai_` followed by the deployment id, e.g. `azure_openai_gpt-35` | ||
AZURE_OPENAI_API_KEY="xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" | ||
AZURE_OPENAI_ENDPOINT="https://ase-eu01.openai.azure.com/" # change base if needed | ||
OPENAI_API_VERSION="2024-06-01" # change base if needed | ||
|
||
# LangSmith (can be used for tracing LLMs) [leave blank if not used] | ||
# See https://docs.smith.langchain.com | ||
# LANGCHAIN_TRACING_V2=true | ||
# LANGCHAIN_ENDPOINT="https://api.smith.langchain.com" | ||
# LANGCHAIN_API_KEY="XXX" | ||
# LANGCHAIN_PROJECT="XXX" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
# syntax=docker/dockerfile:1 | ||
|
||
# This is the Dockerfile for the shared llm package. | ||
# Its output is used as a dependency in the module_* Dockerfiles. | ||
|
||
FROM python:3.11 as llm_core | ||
|
||
WORKDIR /code | ||
|
||
# Poetry | ||
RUN pip install --no-cache-dir poetry==1.5.0 | ||
|
||
# Dependencies | ||
COPY pyproject.toml poetry.lock ./ | ||
COPY --from=athena /code /athena | ||
|
||
RUN poetry config virtualenvs.create false \ | ||
&& poetry install --no-interaction --no-ansi | ||
|
||
# Project files | ||
COPY . ./ | ||
|
||
# Build the package | ||
RUN poetry build -f wheel |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
import dotenv | ||
|
||
# Load environment variables from .env file (for local development) | ||
dotenv.load_dotenv(override=True) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
import os | ||
|
||
from langchain.callbacks.base import BaseCallbackHandler | ||
from langchain_core.outputs import LLMResult | ||
from langchain_core.messages.ai import UsageMetadata | ||
|
||
from athena import emit_meta, get_meta | ||
|
||
|
||
class UsageHandler(BaseCallbackHandler): | ||
def on_llm_end(self, response: LLMResult, **kwargs) -> None: | ||
cost_per_million_input_tokens = float(os.environ.get("LLM_DEFAULT_MODEL_COST_PER_MILLION_INPUT_TOKEN", 0.0)) | ||
cost_per_million_output_tokens = float(os.environ.get("LLM_DEFAULT_MODEL_COST_PER_MILLION_OUTPUT_TOKEN", 0.0)) | ||
|
||
meta = get_meta() | ||
|
||
total_usage = meta.get("totalUsage", {"numInputTokens": 0, "numOutputTokens": 0, "numTotalTokens": 0, "cost": 0 }) | ||
llm_calls = meta.get("llmRequests", []) | ||
|
||
for generations in response.generations: | ||
for generation in generations: | ||
message = generation.dict()["message"] | ||
generation_usage: UsageMetadata = message["usage_metadata"] | ||
model_name = message["response_metadata"].get("model_name", None) | ||
|
||
total_usage["numInputTokens"] += generation_usage["input_tokens"] | ||
total_usage["numOutputTokens"] += generation_usage["output_tokens"] | ||
total_usage["numTotalTokens"] += generation_usage["total_tokens"] | ||
|
||
total_usage["cost"] += int(generation_usage["input_tokens"]) * cost_per_million_output_tokens / 1_000_000 | ||
total_usage["cost"] += int(generation_usage["output_tokens"]) * cost_per_million_output_tokens / 1_000_000 | ||
|
||
llm_calls.append({ | ||
"model": model_name, | ||
"costPerMillionInputToken": cost_per_million_input_tokens, | ||
"costPerMillionOutputToken": cost_per_million_output_tokens, | ||
"numInputTokens": generation_usage["input_tokens"], | ||
"numOutputTokens": generation_usage["output_tokens"], | ||
"numTotalTokens": generation_usage["total_tokens"], | ||
}) | ||
|
||
emit_meta("totalUsage", total_usage) | ||
emit_meta("llmRequests", llm_calls) |
File renamed without changes.
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.