From 3e336dc0619d96e373bf499dfd2f2f00593cad0e Mon Sep 17 00:00:00 2001 From: "Felix T.J. Dietrich" Date: Sun, 5 Nov 2023 21:54:36 +0100 Subject: [PATCH 01/54] add endpoint --- athena/athena/endpoints.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/athena/athena/endpoints.py b/athena/athena/endpoints.py index 8e9d4dcc5..eac88656f 100644 --- a/athena/athena/endpoints.py +++ b/athena/athena/endpoints.py @@ -2,7 +2,7 @@ import inspect from fastapi import Depends, BackgroundTasks from pydantic import BaseModel, ValidationError -from typing import TypeVar, Callable, List, Union, Any, Coroutine, Type +from typing import TypeVar, Callable, Dict, List, Union, Any, Coroutine, Type from athena.app import app from athena.authenticate import authenticated @@ -358,4 +358,12 @@ def config_schema_provider(cls: Type[C]) -> Type[C]: async def wrapper(): return cls.schema() - return cls \ No newline at end of file + return cls + +def evaluation_provider(func: Union[ + Callable[[E, S, List[F], List[F]], Dict[int, Any]], + Callable[[E, S, List[F], List[F]], Coroutine[Any, Any, Dict[int, Any]]], + Callable[[E, S, List[F], List[F], C], Dict[int, Any]], + Callable[[E, S, List[F], List[F], C], Coroutine[Any, Any, Dict[int, Any]]] +]): + pass \ No newline at end of file From 2fb28f00182568feab5ca78605c10e13b61deeea Mon Sep 17 00:00:00 2001 From: "Felix T.J. Dietrich" Date: Sun, 5 Nov 2023 22:09:19 +0100 Subject: [PATCH 02/54] add evaluation_provider --- athena/athena/endpoints.py | 64 ++++++++++++++++++++++++++++++++++---- 1 file changed, 58 insertions(+), 6 deletions(-) diff --git a/athena/athena/endpoints.py b/athena/athena/endpoints.py index eac88656f..a762a37d0 100644 --- a/athena/athena/endpoints.py +++ b/athena/athena/endpoints.py @@ -2,7 +2,7 @@ import inspect from fastapi import Depends, BackgroundTasks from pydantic import BaseModel, ValidationError -from typing import TypeVar, Callable, Dict, List, Union, Any, Coroutine, Type +from typing import TypeVar, Callable, List, Union, Any, Coroutine, Type from athena.app import app from athena.authenticate import authenticated @@ -361,9 +361,61 @@ async def wrapper(): return cls def evaluation_provider(func: Union[ - Callable[[E, S, List[F], List[F]], Dict[int, Any]], - Callable[[E, S, List[F], List[F]], Coroutine[Any, Any, Dict[int, Any]]], - Callable[[E, S, List[F], List[F], C], Dict[int, Any]], - Callable[[E, S, List[F], List[F], C], Coroutine[Any, Any, Dict[int, Any]]] + Callable[[E, S, List[F], List[F]], Any], + Callable[[E, S, List[F], List[F]], Coroutine[Any, Any, Any]] ]): - pass \ No newline at end of file + """ + Provide evaluated feedback to the Assessment Module Manager. + + Note: The evaluation provider is usually called during the research and development phase (by the Playground). + Return arbitrary evaluation results. + + This decorator can be used with several types of functions: synchronous or asynchronous. + + Examples: + Below are some examples of possible functions that you can decorate with this decorator: + + Without using module config (both synchronous and asynchronous forms): + >>> @evaluation_provider + ... def sync_evaluate_feedback( + ... exercise: Exercise, submission: Submission, + ... true_feedbacks: List[Feedback], predicted_feedbacks: List[Feedback] + ... ) -> Any: + ... # evaluate predicted feedback here and return evaluation results + + >>> @feedback_provider + ... async def async_evaluate_feedback( + ... exercise: Exercise, submission: Submission, + ... true_feedbacks: List[Feedback], predicted_feedbacks: List[Feedback] + ... ) -> Any: + ... # evaluate predicted feedback here and return evaluation results + """ + exercise_type = inspect.signature(func).parameters["exercise"].annotation + submission_type = inspect.signature(func).parameters["submission"].annotation + feedback_type = inspect.signature(func).parameters["predicted_feedbacks"].annotation.__args__[0] + + @app.post("/evaluation", responses=module_responses) + @authenticated + @with_meta + async def wrapper( + exercise: exercise_type, + submission: submission_type, + true_feedbacks: List[feedback_type], + predicted_feedbacks: List[feedback_type], + ): + # Retrieve existing metadata for the exercise, submission and feedback + exercise.meta.update(get_stored_exercise_meta(exercise) or {}) + submission.meta.update(get_stored_submission_meta(submission) or {}) + for feedback in true_feedbacks: + feedback.meta.update(get_stored_feedback_meta(feedback) or {}) + for feedback in predicted_feedbacks: + feedback.meta.update(get_stored_feedback_meta(feedback) or {}) + + # Call the actual provider + if inspect.iscoroutinefunction(func): + evaluation = await func(exercise, submission, true_feedbacks, predicted_feedbacks) + else: + evaluation = func(exercise, submission, true_feedbacks, predicted_feedbacks) + + return evaluation + return wrapper \ No newline at end of file From 38f6055373b7fb24a34749b3fcd6f0e46c6fc273 Mon Sep 17 00:00:00 2001 From: "Felix T.J. Dietrich" Date: Sun, 5 Nov 2023 22:09:27 +0100 Subject: [PATCH 03/54] add new line --- athena/athena/endpoints.py | 1 + 1 file changed, 1 insertion(+) diff --git a/athena/athena/endpoints.py b/athena/athena/endpoints.py index a762a37d0..411edaf0a 100644 --- a/athena/athena/endpoints.py +++ b/athena/athena/endpoints.py @@ -360,6 +360,7 @@ async def wrapper(): return cls + def evaluation_provider(func: Union[ Callable[[E, S, List[F], List[F]], Any], Callable[[E, S, List[F], List[F]], Coroutine[Any, Any, Any]] From 92ca4ed74f8e77cbe3dc349fa691f74c2b7b8f28 Mon Sep 17 00:00:00 2001 From: "Felix T.J. Dietrich" Date: Sun, 5 Nov 2023 22:10:28 +0100 Subject: [PATCH 04/54] add evaluation_provider to export --- athena/athena/__init__.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/athena/athena/__init__.py b/athena/athena/__init__.py index 90fb46e62..8a67e5315 100644 --- a/athena/athena/__init__.py +++ b/athena/athena/__init__.py @@ -6,7 +6,7 @@ from .schemas import ExerciseType, GradingCriterion, StructuredGradingInstruction from .metadata import emit_meta, get_meta from .experiment import get_experiment_environment -from .endpoints import submission_selector, submissions_consumer, feedback_consumer, feedback_provider, config_schema_provider # type: ignore +from .endpoints import submission_selector, submissions_consumer, feedback_consumer, feedback_provider, config_schema_provider, evaluation_provider # type: ignore @app.get("/") @@ -28,6 +28,7 @@ def run_module(): "feedback_consumer", "feedback_provider", "config_schema_provider", + "evaluation_provider", "emit_meta", "get_meta", "get_experiment_environment", From 433cd7fda70dbe38890797a33bed5b475632b572 Mon Sep 17 00:00:00 2001 From: "Felix T.J. Dietrich" Date: Sun, 5 Nov 2023 22:25:35 +0100 Subject: [PATCH 05/54] add example evaluation endpoint --- module_example/module_example/__main__.py | 30 +++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) diff --git a/module_example/module_example/__main__.py b/module_example/module_example/__main__.py index 7bdef91fe..182f160bb 100644 --- a/module_example/module_example/__main__.py +++ b/module_example/module_example/__main__.py @@ -1,10 +1,11 @@ """ Entry point for the module_example module. """ -from typing import List +import random +from typing import List, Any from pydantic import BaseModel, Field -from athena import app, config_schema_provider, submissions_consumer, submission_selector, feedback_consumer, feedback_provider, emit_meta +from athena import app, config_schema_provider, submissions_consumer, submission_selector, feedback_consumer, feedback_provider, evaluation_provider, emit_meta from athena.programming import Exercise, Submission, Feedback from athena.logger import logger from athena.storage import store_exercise, store_submissions, store_feedback @@ -139,5 +140,30 @@ def suggest_feedback(exercise: Exercise, submission: Submission, module_config: ] +# Only if it makes sense for a module (Optional) +@evaluation_provider +def evaluate_feedback(exercise: Exercise, submission: Submission, true_feedbacks: List[Feedback], predicted_feedbacks: List[Feedback]) -> Any: + logger.info( + "evaluate_feedback: Evaluation for submission %d of exercise %d was requested with %d true and %d predicted feedbacks", + submission.id, exercise.id, len(true_feedbacks), len(predicted_feedbacks) + ) + + # Do something with the true and predicted feedback and return the evaluation result + # Generate some example evaluation result + evaluation_results = [] + true_feedback_embeddings = [random.random() for _ in true_feedbacks] + predicted_feedback_embeddings = [random.random() for _ in predicted_feedbacks] + for feedback, embedding in zip(predicted_feedbacks, predicted_feedback_embeddings): + feedback_evaluation = { + "feedback_id": feedback.id, + "embedding": embedding, + "has_match": len([t for t in true_feedback_embeddings if abs(t - embedding) < 0.1]) > 0, + "correctness": random.random() + } + evaluation_results.append(feedback_evaluation) + + return evaluation_results + + if __name__ == "__main__": app.start() From 9b4e2c97367cc8bfb8fe1b35caa8c0ae45f29bda Mon Sep 17 00:00:00 2001 From: "Felix T.J. Dietrich" Date: Sun, 5 Nov 2023 22:44:22 +0100 Subject: [PATCH 06/54] add playground ui --- .../view_mode/module_requests/index.tsx | 7 +- .../module_requests/request_evaluation.tsx | 172 ++++++++++++++++++ .../src/hooks/athena/request_evaluation.ts | 31 ++++ 3 files changed, 208 insertions(+), 2 deletions(-) create mode 100644 playground/src/components/view_mode/module_requests/request_evaluation.tsx create mode 100644 playground/src/hooks/athena/request_evaluation.ts diff --git a/playground/src/components/view_mode/module_requests/index.tsx b/playground/src/components/view_mode/module_requests/index.tsx index dca29946d..dcda17bf2 100644 --- a/playground/src/components/view_mode/module_requests/index.tsx +++ b/playground/src/components/view_mode/module_requests/index.tsx @@ -3,12 +3,14 @@ import type { ModuleMeta } from "@/model/health_response"; import { useState } from "react"; import { ModuleProvider } from "@/hooks/module_context"; +import ModuleAndConfigSelect from "@/components/selectors/module_and_config_select"; import GetConfigSchema from "@/components/view_mode/module_requests/get_config_schema"; import SendSubmissions from "@/components/view_mode/module_requests/send_submissions"; +import SelectSubmission from "@/components/view_mode/module_requests/request_submission_selection"; import SendFeedbacks from "@/components/view_mode/module_requests/send_feedbacks"; import RequestFeedbackSuggestions from "@/components/view_mode/module_requests/request_feedback_suggestions"; -import SelectSubmission from "@/components/view_mode/module_requests/request_submission_selection"; -import ModuleAndConfigSelect from "@/components/selectors/module_and_config_select"; +import RequestEvaluation from "@/components/view_mode/module_requests/request_evaluation"; + export default function ModuleRequests() { const [moduleAndConfig, setModuleAndConfig] = useState<{ module: ModuleMeta; moduleConfig: any } | undefined>(undefined); @@ -34,6 +36,7 @@ export default function ModuleRequests() { + )} diff --git a/playground/src/components/view_mode/module_requests/request_evaluation.tsx b/playground/src/components/view_mode/module_requests/request_evaluation.tsx new file mode 100644 index 000000000..c98ac6d27 --- /dev/null +++ b/playground/src/components/view_mode/module_requests/request_evaluation.tsx @@ -0,0 +1,172 @@ +import type { Submission } from "@/model/submission"; +import type { Exercise } from "@/model/exercise"; +import type { Feedback } from "@/model/feedback"; +import type ModuleResponse from "@/model/module_response"; + +import { useEffect, useState } from "react"; + +import { useModule } from "@/hooks/module_context"; +import { useBaseInfo } from "@/hooks/base_info_context"; +import useRequestEvaluation from "@/hooks/athena/request_evaluation"; +import useFeedbacks from "@/hooks/playground/feedbacks"; + +import ExerciseSelect from "@/components/selectors/exercise_select"; +import SubmissionSelect from "@/components/selectors/submission_select"; +import ModuleResponseView from "@/components/module_response_view"; +import Disclosure from "@/components/disclosure"; +import ExerciseDetail from "@/components/details/exercise_detail"; +import SubmissionDetail from "@/components/details/submission_detail"; + +export default function RequestEvaluation() { + const { module } = useModule(); + const { dataMode } = useBaseInfo(); + + const [exercise, setExercise] = useState(undefined); + const [submission, setSubmission] = useState( + undefined + ); + + const [predictedFeedbacks, setPredictedFeedbacks] = useState([]); + + const { + data: trueFeedbacks, + isLoading: isLoadingTrueFeedbacks, + error: errorTrueFeedbacks, + } = useFeedbacks(exercise, submission); + + const { + data: response, + isLoading, + error, + mutate, + reset, + } = useRequestEvaluation(); + + useEffect(() => setExercise(undefined), [module, dataMode]); + + return ( +
+

+ Request Evaluation from Athena{" "} + (OPTIONAL) +

+

+ Evaluate a list of feedback suggestions during the research and + development phase. Compare the predicted feedback with the actual + feedback using the function annotated with{" "} + @evaluation_provider. Each module can implement custom + metrics to evaluate the feedback suggestions during evaluation and + respond with arbitrary evaluation results. +

+ { + setExercise(exercise); + reset(); + setSubmission(undefined); + setPredictedFeedbacks([]); + }} + disabled={isLoading} + /> + {exercise && ( + <> + { + setSubmission(submission); + setPredictedFeedbacks([]); + }} + disabled={isLoading} + /> +
+ + {submission && + (trueFeedbacks ? ( + +

+ The following feedbacks given by the tutor in the past. +

+ f.submission_id === submission.id + )} + /> +
+ ) : ( +
+ No true feedbacks available +
+ ))} + {submission && ( + +

+ Provide feedback as predicted feedbacks to + test the evaluation. +

+ f.submission_id === submission.id + )} + onFeedbacksChange={setPredictedFeedbacks} + /> +
+ )} + {isLoadingTrueFeedbacks && ( +
Loading feedbacks...
+ )} + {errorTrueFeedbacks && ( +
+ Failed to load feedbacks +
+ )} +
+ + )} + + +
+ ); +} diff --git a/playground/src/hooks/athena/request_evaluation.ts b/playground/src/hooks/athena/request_evaluation.ts new file mode 100644 index 000000000..d6ccc9f60 --- /dev/null +++ b/playground/src/hooks/athena/request_evaluation.ts @@ -0,0 +1,31 @@ +import type { Exercise } from "@/model/exercise"; +import type { Submission } from "@/model/submission"; +import type ModuleResponse from "@/model/module_response"; + +import { UseMutationOptions, useMutation } from "react-query"; +import { AthenaError, useAthenaFetcher } from "@/hooks/athena_fetcher"; +import { Feedback } from "@/model/feedback"; + +/** + * Requests an evaluation for an exercise and a submission given the true and predicted feedbacks from an Athena module. + * + * @example + * const { data, isLoading, error, mutate } = useRequestEvaluation(); + * mutate({ exercise, submission, trueFeedbacks, predictedFeedbacks }); + * + * @param options The react-query options. + */ +export default function useRequestEvaluation( + options: Omit< + UseMutationOptions, + "mutationFn" + > = {} +) { + const athenaFetcher = useAthenaFetcher(); + return useMutation({ + mutationFn: async ({ exercise, submission, trueFeedbacks, predictedFeedbacks }) => { + return await athenaFetcher("/evaluation", { exercise, submission, true_feedbacks: trueFeedbacks, predicted_feedbacks: predictedFeedbacks }); + }, + ...options, + }); +} From 5f39b8c6d2c28ed945a61f6a24ee33f7900f9688 Mon Sep 17 00:00:00 2001 From: "Felix T.J. Dietrich" Date: Mon, 6 Nov 2023 16:22:16 +0100 Subject: [PATCH 07/54] add automatic evaluation --- .../conduct_experiment/index.tsx | 17 +-- .../src/hooks/batch_module_experiment.ts | 108 +++++++++++++++++- playground/src/model/automatic_evaluation.ts | 3 + 3 files changed, 117 insertions(+), 11 deletions(-) create mode 100644 playground/src/model/automatic_evaluation.ts diff --git a/playground/src/components/view_mode/evaluation_mode/conduct_experiment/index.tsx b/playground/src/components/view_mode/evaluation_mode/conduct_experiment/index.tsx index f91cc7b3e..3c78e1ecb 100644 --- a/playground/src/components/view_mode/evaluation_mode/conduct_experiment/index.tsx +++ b/playground/src/components/view_mode/evaluation_mode/conduct_experiment/index.tsx @@ -65,6 +65,12 @@ export default function ConductExperiment({ data: data.manualRatings, }); } + if (data.automaticEvaluation) { + files.push({ + name: `${experiment.exerciseType}_automatic_evaluation_${moduleConfigurations[index].name}_${experiment.id}_run-${data.automaticEvaluation.runId}`, + data: data.automaticEvaluation, + }); + } } return files; }) @@ -102,14 +108,11 @@ export default function ConductExperiment({ return; } - if ( - !data.type || - (data.type !== "results" && data.type !== "manualRatings") - ) { - alert("No correct type found in the data i.e. 'results' or 'manualRatings'"); + if (!data.type || !["results", "manualRatings", "automaticEvaluation"].includes(data.type)) { + alert("No correct type found in the data i.e. 'results', 'manualRatings', or 'automaticEvaluation'."); return; } - const type = data.type as "results" | "manualRatings"; + const type = data.type as "results" | "manualRatings" | "automaticEvaluation"; try { moduleViewRef.importData(data); @@ -209,7 +212,7 @@ export default function ConductExperiment({ // If all files have been read, sort and import if (filesProcessed === files.length) { - // Sort the array by 'type', 'results' first and then 'manualRatings' + // Sort the array by 'type', 'results' first and then 'manualRatings' or 'automaticEvaluation' const sortedData = fileDataArray.sort((a, b) => { if (a.type === "results" && b.type !== "results") { return -1; diff --git a/playground/src/hooks/batch_module_experiment.ts b/playground/src/hooks/batch_module_experiment.ts index ba99982c2..a2ff10fb0 100644 --- a/playground/src/hooks/batch_module_experiment.ts +++ b/playground/src/hooks/batch_module_experiment.ts @@ -1,5 +1,6 @@ import type { Feedback } from "@/model/feedback"; import type { ManualRating } from "@/model/manual_rating"; +import type { AutomaticEvaluation } from "@/model/automatic_evaluation"; import type { Experiment } from "@/components/view_mode/evaluation_mode/define_experiment"; import type { ModuleConfiguration } from "@/components/view_mode/evaluation_mode/configure_modules"; @@ -9,6 +10,7 @@ import { useSendFeedbacks } from "./athena/send_feedbacks"; import useRequestSubmissionSelection from "./athena/request_submission_selection"; import useRequestFeedbackSuggestions from "./athena/request_feedback_suggestions"; import useSendSubmissions from "./athena/send_submissions"; +import useRequestEvaluation from "./athena/request_evaluation"; import { useExperimentIdentifiersSetRunId } from "./experiment_identifiers_context"; export type ExperimentStep = @@ -50,6 +52,11 @@ export default function useBatchModuleExperiment(experiment: Experiment, moduleC Map >(new Map()); + // Stores automatic evaluation of submissions + const [submissionsWithAutomaticEvaluation, setSubmissionsWithAutomaticEvaluation] = useState< + Map + >(new Map()); + const [processingStep, setProcessingStep] = useState< ExperimentStep | undefined >(undefined); @@ -95,6 +102,19 @@ export default function useBatchModuleExperiment(experiment: Experiment, moduleC }, } : {} ), + ...( + submissionsWithAutomaticEvaluation.size > 0 ? { + automaticEvaluation: { + type: "automaticEvaluation", + runId: data.runId, + experimentId: experiment.id, + moduleConfigurationId: moduleConfiguration.id, + submissionsWithAutomaticEvaluation: Object.fromEntries( + submissionsWithAutomaticEvaluation + ), + }, + } : {} + ), }; }; @@ -108,6 +128,7 @@ export default function useBatchModuleExperiment(experiment: Experiment, moduleC throw new Error("Invalid results data"); } + setProcessingStep(undefined); setData(() => ({ runId: importedData.runId, step: importedData.step, @@ -134,7 +155,22 @@ export default function useBatchModuleExperiment(experiment: Experiment, moduleC ) )); return; + } else if (importedData.type === "automaticEvaluation") { + // Relies on the fact that the automatic evaluations have to be imported after the results + if (importedData.runId !== data.runId) { + throw new Error("Run ID does not match, have you imported the results first?"); + } + if (importedData.submissionsWithAutomaticEvaluation === undefined) { + throw new Error("Invalid automatic evaluation data"); + } + setSubmissionsWithAutomaticEvaluation(() => new Map( + Object.entries(importedData.submissionsWithAutomaticEvaluation).map( + ([key, value]) => [Number(key), value as any] + ) + )); + return; } + throw new Error("Unknown import data type"); }; @@ -158,6 +194,7 @@ export default function useBatchModuleExperiment(experiment: Experiment, moduleC const sendFeedbacks = useSendFeedbacks(); const requestSubmissionSelection = useRequestSubmissionSelection(); const requestFeedbackSuggestions = useRequestFeedbackSuggestions(); + const requestEvaluation = useRequestEvaluation(); // 1. Send submissions to Athena const stepSendSubmissions = () => { @@ -338,10 +375,70 @@ export default function useBatchModuleExperiment(experiment: Experiment, moduleC setData((prevState) => ({ ...prevState, - step: "finished", + step: "finished", // Automatic evaluation is done separately })); }; + // 4. Automatic evaluation (after results are 'finished') + const stepAutomaticEvaluation = async () => { + setProcessingStep("finished"); + + console.log("Running automatic evaluation..."); + + let remainingSubmissions = experiment.evaluationSubmissions.filter( + (submission) => !submissionsWithAutomaticEvaluation.has(submission.id) + ); + + let index = 0; + for (const submission of remainingSubmissions) { + console.log( + `Evaluating... (${index + 1}/${ + remainingSubmissions.length + })` + ); + + const predictedFeedbacks = data.submissionsWithFeedbackSuggestions.get( + submission.id + )?.suggestions ?? []; + + if (predictedFeedbacks.length === 0) { + // Skip if there are no predicted feedbacks + setSubmissionsWithAutomaticEvaluation((prevState) => { + const newMap = new Map(prevState); + newMap.set(submission.id, {}); + return newMap; + }); + continue; + } + + try { + const response = await requestEvaluation.mutateAsync({ + exercise: experiment.exercise, + submission, + trueFeedbacks: experiment.tutorFeedbacks.filter( + (feedback) => feedback.submission_id === submission.id + ), + predictedFeedbacks: predictedFeedbacks, + }); + if (!isMounted.current) { + return; + } + console.log(`Received evaluation for submission ${submission.id}:`, response.data); + + setSubmissionsWithAutomaticEvaluation((prevState) => { + const newMap = new Map(prevState); + newMap.set(submission.id, response.data); + return newMap; + }); + } catch (error) { + console.error( + `Error while evaluating submission ${submission.id}:`, + error + ); + } + } + }; + useEffect(() => { isMounted.current = true; return () => { @@ -375,10 +472,12 @@ export default function useBatchModuleExperiment(experiment: Experiment, moduleC processingStep !== "generatingFeedbackSuggestions" ) { stepGenerateFeedbackSuggestions(); + } else if ( + data.step === "finished" && + processingStep !== "finished" + ) { + stepAutomaticEvaluation(); } - // TODO: Add automatic evaluation step here - // Note: Evaluate tutor feedback more globally to not do it multiple times - // Note 2: Actually, I probably want to have it in parallel with the feedback suggestions for the interactive mode! }, [data.step]); return { @@ -394,6 +493,7 @@ export default function useBatchModuleExperiment(experiment: Experiment, moduleC sendFeedbacks, requestSubmissionSelection, requestFeedbackSuggestions, + requestEvaluation, }, }; } diff --git a/playground/src/model/automatic_evaluation.ts b/playground/src/model/automatic_evaluation.ts new file mode 100644 index 000000000..fb55b9c86 --- /dev/null +++ b/playground/src/model/automatic_evaluation.ts @@ -0,0 +1,3 @@ +export type AutomaticEvaluation = { + [module: string]: any; +}; From 2667cab2b270305becc9f3b054e40a5bfb2af81d Mon Sep 17 00:00:00 2001 From: "Felix T.J. Dietrich" Date: Mon, 6 Nov 2023 17:22:55 +0100 Subject: [PATCH 08/54] add automatic evaluation --- .../endpoints/health_endpoint.py | 5 +- .../module/list_modules.py | 1 + .../module/module.py | 1 + assessment_module_manager/modules.ini | 5 ++ .../module_requests/request_evaluation.tsx | 4 +- .../src/hooks/athena/request_evaluation.ts | 62 ++++++++++++++++--- playground/src/hooks/athena_fetcher.ts | 42 ++++++++----- .../src/hooks/batch_module_experiment.ts | 11 +++- playground/src/hooks/module_context.tsx | 2 +- playground/src/model/health_response.ts | 1 + 10 files changed, 103 insertions(+), 31 deletions(-) diff --git a/assessment_module_manager/assessment_module_manager/endpoints/health_endpoint.py b/assessment_module_manager/assessment_module_manager/endpoints/health_endpoint.py index e1a8d252f..85879b673 100644 --- a/assessment_module_manager/assessment_module_manager/endpoints/health_endpoint.py +++ b/assessment_module_manager/assessment_module_manager/endpoints/health_endpoint.py @@ -27,6 +27,7 @@ class HealthResponse(BaseModel): """ Response indicating whether the Assessment Module Manager is healthy, and whether all the modules are healthy (i.e. reachable). + Additional information about the modules is also provided. """ status: str = Field(const=True, default="ok", example="ok") modules: dict = Field( @@ -35,7 +36,8 @@ class HealthResponse(BaseModel): "module_example": { "url": "http://localhost:5001", "type": "programming", - "healthy": True + "healthy": True, + "supportsEvaluation": True } } ] @@ -56,6 +58,7 @@ async def get_health() -> HealthResponse: "url": module.url, "type": module.type, "healthy": await is_healthy(module), + "supportsEvaluation": module.supports_evaluation } for module in get_modules() } diff --git a/assessment_module_manager/assessment_module_manager/module/list_modules.py b/assessment_module_manager/assessment_module_manager/module/list_modules.py index b2862e489..e9f18f106 100644 --- a/assessment_module_manager/assessment_module_manager/module/list_modules.py +++ b/assessment_module_manager/assessment_module_manager/module/list_modules.py @@ -18,6 +18,7 @@ def list_modules() -> List[Module]: name=module, url=cast(AnyHttpUrl, modules_config[module]["url"]), type=ExerciseType(modules_config[module]["type"]), + supports_evaluation=modules_config[module].getboolean("supports_evaluation"), ) for module in modules_config.sections() ] diff --git a/assessment_module_manager/assessment_module_manager/module/module.py b/assessment_module_manager/assessment_module_manager/module/module.py index 35dbb6da1..65e99931f 100644 --- a/assessment_module_manager/assessment_module_manager/module/module.py +++ b/assessment_module_manager/assessment_module_manager/module/module.py @@ -8,3 +8,4 @@ class Module(BaseModel): name: str = Field(example="module_example") url: AnyHttpUrl = Field(example="http://localhost:5001") type: ExerciseType = Field(example=ExerciseType.text) + supports_evaluation: bool = Field(description="Whether the module supports evaluation", example=True) diff --git a/assessment_module_manager/modules.ini b/assessment_module_manager/modules.ini index 0dde7b074..3402183f0 100644 --- a/assessment_module_manager/modules.ini +++ b/assessment_module_manager/modules.ini @@ -1,19 +1,24 @@ [module_example] url = http://localhost:5001 type = programming +supports_evaluation = true [module_programming_llm] url = http://localhost:5002 type = programming +supports_evaluation = false [module_text_llm] url = http://localhost:5003 type = text +supports_evaluation = false [module_text_cofee] url = http://localhost:5004 type = text +supports_evaluation = false [module_programming_themisml] url = http://localhost:5005 type = programming +supports_evaluation = false diff --git a/playground/src/components/view_mode/module_requests/request_evaluation.tsx b/playground/src/components/view_mode/module_requests/request_evaluation.tsx index c98ac6d27..f004708d6 100644 --- a/playground/src/components/view_mode/module_requests/request_evaluation.tsx +++ b/playground/src/components/view_mode/module_requests/request_evaluation.tsx @@ -40,7 +40,7 @@ export default function RequestEvaluation() { error, mutate, reset, - } = useRequestEvaluation(); + } = useRequestEvaluation(undefined, true) // onlyUseContextModule = true for module requests only useEffect(() => setExercise(undefined), [module, dataMode]); @@ -130,7 +130,7 @@ export default function RequestEvaluation() { )} diff --git a/playground/src/hooks/athena/request_evaluation.ts b/playground/src/hooks/athena/request_evaluation.ts index d6ccc9f60..76227258f 100644 --- a/playground/src/hooks/athena/request_evaluation.ts +++ b/playground/src/hooks/athena/request_evaluation.ts @@ -5,26 +5,74 @@ import type ModuleResponse from "@/model/module_response"; import { UseMutationOptions, useMutation } from "react-query"; import { AthenaError, useAthenaFetcher } from "@/hooks/athena_fetcher"; import { Feedback } from "@/model/feedback"; +import { useModule } from "@/hooks/module_context"; +import useHealth from "@/hooks/health"; /** - * Requests an evaluation for an exercise and a submission given the true and predicted feedbacks from an Athena module. + * Requests an evaluation for an exercise and a submission given the true and predicted feedbacks from healthy Athena modules. + * + * @param options The react-query options. + * @param onlyUseContextModule - If true, only the context module is used for the evaluation. Otherwise, all healthy modules are used. * * @example * const { data, isLoading, error, mutate } = useRequestEvaluation(); * mutate({ exercise, submission, trueFeedbacks, predictedFeedbacks }); - * - * @param options The react-query options. */ export default function useRequestEvaluation( options: Omit< - UseMutationOptions, + UseMutationOptions< + ModuleResponse[] | undefined, + AthenaError, + { + exercise: Exercise; + submission: Submission; + trueFeedbacks: Feedback[]; + predictedFeedbacks: Feedback[]; + } + >, "mutationFn" - > = {} + > = {}, + onlyUseContextModule = false ) { const athenaFetcher = useAthenaFetcher(); + const { module: contextModule } = useModule(); + const { data: health } = useHealth(); + return useMutation({ - mutationFn: async ({ exercise, submission, trueFeedbacks, predictedFeedbacks }) => { - return await athenaFetcher("/evaluation", { exercise, submission, true_feedbacks: trueFeedbacks, predicted_feedbacks: predictedFeedbacks }); + mutationFn: async ({ + exercise, + submission, + trueFeedbacks, + predictedFeedbacks, + }) => { + const modules = onlyUseContextModule + ? [contextModule] + : Object.values(health?.modules ?? {}).filter( + (module) => module.healthy && module.type === contextModule.type + ); + + const results = await Promise.allSettled( + modules.map((module) => + athenaFetcher( + "/evaluation", + { + exercise, + submission, + true_feedbacks: trueFeedbacks, + predicted_feedbacks: predictedFeedbacks, + }, + { module: module, moduleConfig: undefined } + ) + ) + ); + + return results.flatMap((result) => { + if (result.status === "fulfilled") { + return [result.value]; + } else { + return []; + } + }); }, ...options, }); diff --git a/playground/src/hooks/athena_fetcher.ts b/playground/src/hooks/athena_fetcher.ts index 1f23f20ba..69b9a2278 100644 --- a/playground/src/hooks/athena_fetcher.ts +++ b/playground/src/hooks/athena_fetcher.ts @@ -1,4 +1,5 @@ import type ModuleResponse from "@/model/module_response"; +import type { Module } from "@/hooks/module_context"; import baseUrl from "@/helpers/base_url"; import { useBaseInfo } from "@/hooks/base_info_context"; @@ -36,27 +37,34 @@ export class AthenaError extends Error { * @returns A function that can be used to fetch data from the module or that returns undefined if the module is not set. */ export function useAthenaFetcher() { - const { module, moduleConfig } = useModule(); + const { module: contextModule, moduleConfig: contextModuleConfig } = useModule(); const { athenaUrl, athenaSecret } = useBaseInfo(); const { experimentId, moduleConfigurationId, runId } = useExperimentIdentifiers(); - const headers: { [key: string]: string } = {}; - if (moduleConfig) { - headers["X-Module-Config"] = JSON.stringify(moduleConfig); - } - if (experimentId) { - headers["X-Experiment-ID"] = experimentId; - } - if (moduleConfigurationId) { - headers["X-Module-Configuration-ID"] = moduleConfigurationId; - } - if (runId) { - headers["X-Run-ID"] = runId; - } - return ( - async (moduleRoute: string, body?: any) => { - const url = `${athenaUrl}/modules/${module.type}/${module.name}${moduleRoute}`; + async (moduleRoute: string, body?: any, overrideModule?: Module) => { + let targetModule = contextModule; + let targetModuleConfig = contextModuleConfig; + if (overrideModule) { + targetModule = overrideModule.module; + targetModuleConfig = overrideModule.moduleConfig; + } + + const headers: { [key: string]: string } = {}; + if (targetModuleConfig) { + headers["X-Module-Config"] = JSON.stringify(targetModuleConfig); + } + if (experimentId) { + headers["X-Experiment-ID"] = experimentId; + } + if (moduleConfigurationId) { + headers["X-Module-Configuration-ID"] = moduleConfigurationId; + } + if (runId) { + headers["X-Run-ID"] = runId; + } + + const url = `${athenaUrl}/modules/${targetModule.type}/${targetModule.name}${moduleRoute}`; const response = await fetch( `${baseUrl}/api/athena_request?${new URLSearchParams({ url: url, diff --git a/playground/src/hooks/batch_module_experiment.ts b/playground/src/hooks/batch_module_experiment.ts index a2ff10fb0..7c6f9f68d 100644 --- a/playground/src/hooks/batch_module_experiment.ts +++ b/playground/src/hooks/batch_module_experiment.ts @@ -412,7 +412,7 @@ export default function useBatchModuleExperiment(experiment: Experiment, moduleC } try { - const response = await requestEvaluation.mutateAsync({ + const responses = await requestEvaluation.mutateAsync({ exercise: experiment.exercise, submission, trueFeedbacks: experiment.tutorFeedbacks.filter( @@ -423,11 +423,16 @@ export default function useBatchModuleExperiment(experiment: Experiment, moduleC if (!isMounted.current) { return; } - console.log(`Received evaluation for submission ${submission.id}:`, response.data); + + const data = Object.fromEntries( + responses.map((response) => [response.module_name, response.data]) + ); + + console.log(`Received evaluation for submission ${submission.id}:`, data); setSubmissionsWithAutomaticEvaluation((prevState) => { const newMap = new Map(prevState); - newMap.set(submission.id, response.data); + newMap.set(submission.id, data); return newMap; }); } catch (error) { diff --git a/playground/src/hooks/module_context.tsx b/playground/src/hooks/module_context.tsx index c2a96232e..f7aab7666 100644 --- a/playground/src/hooks/module_context.tsx +++ b/playground/src/hooks/module_context.tsx @@ -2,7 +2,7 @@ import type { ModuleMeta } from '@/model/health_response'; import { ReactNode, createContext, useContext, useReducer } from 'react'; -type Module = { +export type Module = { module: ModuleMeta; moduleConfig: any; }; diff --git a/playground/src/model/health_response.ts b/playground/src/model/health_response.ts index 7502b21d9..4ffe27de9 100644 --- a/playground/src/model/health_response.ts +++ b/playground/src/model/health_response.ts @@ -2,6 +2,7 @@ export type ModuleMeta = { name: string; type: string; healthy: boolean; + supportsEvaluation: boolean; }; export type HealthResponse = { From 7dbb316f1a7b003bff65c24c38092b9c5f0c2451 Mon Sep 17 00:00:00 2001 From: "Felix T.J. Dietrich" Date: Tue, 7 Nov 2023 16:34:34 +0100 Subject: [PATCH 09/54] add UI changes --- .../batch_module_experiment.tsx | 64 ++++-- .../module_experiment_progress.tsx | 193 +++++++++++------- .../src/hooks/batch_module_experiment.ts | 22 +- 3 files changed, 177 insertions(+), 102 deletions(-) diff --git a/playground/src/components/view_mode/evaluation_mode/conduct_experiment/batch_module_experiment.tsx b/playground/src/components/view_mode/evaluation_mode/conduct_experiment/batch_module_experiment.tsx index d2e9fdb02..560d8b204 100644 --- a/playground/src/components/view_mode/evaluation_mode/conduct_experiment/batch_module_experiment.tsx +++ b/playground/src/components/view_mode/evaluation_mode/conduct_experiment/batch_module_experiment.tsx @@ -2,7 +2,12 @@ import type { Submission } from "@/model/submission"; import type { Experiment } from "../define_experiment"; import type { ExperimentStep } from "@/hooks/batch_module_experiment"; -import React, { useImperativeHandle, useState, ForwardedRef, useEffect } from "react"; +import React, { + useImperativeHandle, + useState, + ForwardedRef, + useEffect, +} from "react"; import Modal from "react-modal"; import { FullScreenHandle } from "react-full-screen"; @@ -14,6 +19,7 @@ import { ModuleConfiguration } from "../configure_modules"; import ModuleExperimentProgress from "./module_experiment_progress"; import SubmissionDetail from "@/components/details/submission_detail"; import ModuleConfigSelect from "@/components/selectors/module_config_select"; +import { twMerge } from "tailwind-merge"; type ConductBatchModuleExperimentProps = { experiment: Experiment; @@ -53,7 +59,10 @@ const ConductBatchModuleExperiment = React.forwardRef< ref: ForwardedRef ) => { const { data: health } = useHealth(); - const moduleExperiment = useBatchModuleExperiment(experiment, moduleConfiguration); + const moduleExperiment = useBatchModuleExperiment( + experiment, + moduleConfiguration + ); const [showProgress, setShowProgress] = useState(true); const [isConfigModalOpen, setConfigModalOpen] = useState(false); @@ -88,14 +97,6 @@ const ConductBatchModuleExperiment = React.forwardRef<

{moduleConfiguration.name}

- {moduleExperiment.continueAfterTraining && ( - - )}
) { return ( - + { @@ -86,10 +86,11 @@ export default function ModuleExperimentProgress({ className={twMerge( "flex items-center justify-center w-6 h-6 border rounded-full shrink-0", stepToIndex(data.step) >= 2 - ? stepToIndex(data.step) > 2 || moduleExperiment.continueAfterTraining - ? "text-green-500 border-green-500" - : "text-yellow-500 border-yellow-500" - : "text-gray-500 border-gray-500" + ? stepToIndex(data.step) > 2 || + moduleExperiment.continueAfterTraining + ? "text-green-500 border-green-500" + : "text-yellow-500 border-yellow-500" + : "text-gray-500 border-gray-500" )} > 2 @@ -98,10 +99,11 @@ export default function ModuleExperimentProgress({ className={twMerge( "flex flex-col", stepToIndex(data.step) >= 2 - ? stepToIndex(data.step) > 2 || moduleExperiment.continueAfterTraining - ? "text-green-500" - : "text-yellow-500" - : "text-gray-500" + ? stepToIndex(data.step) > 2 || + moduleExperiment.continueAfterTraining + ? "text-green-500" + : "text-yellow-500" + : "text-gray-500" )} > Sending Training Feedback @@ -129,70 +131,121 @@ export default function ModuleExperimentProgress({ )} {/* Generate Feedback Suggestions */} -
  • - 3 - ? "text-green-500 border-green-500" - : stepToIndex(data.step) === 3 - ? "text-yellow-500 border-yellow-500" - : "text-gray-500 border-gray-500" - )} - > - {experiment.trainingSubmissions ? 3 : 2} - -
    3 - ? "text-green-500" - : stepToIndex(data.step) === 3 - ? "text-yellow-500" - : "text-gray-500" - )} - > - Generating Feedback Suggestions - {moduleRequests.requestFeedbackSuggestions.isLoading && ( - - Generating feedback suggestions... ( - {data.submissionsWithFeedbackSuggestions.size + 1}/ - {experiment.evaluationSubmissions.length}) - - )} - {moduleRequests.requestFeedbackSuggestions.isError && ( - - {moduleRequests.requestFeedbackSuggestions.error.message} - - )} - {moduleRequests.requestFeedbackSuggestions.isSuccess && ( - - Generated feedback suggestions ( - {data.submissionsWithFeedbackSuggestions.size}/ - {experiment.evaluationSubmissions.length}) - - )} +
  • +
    + 3 + ? "text-green-500 border-green-500" + : stepToIndex(data.step) === 3 + ? "text-yellow-500 border-yellow-500" + : "text-gray-500 border-gray-500" + )} + > + {experiment.trainingSubmissions ? 3 : 2} + +
    3 + ? "text-green-500" + : stepToIndex(data.step) === 3 + ? "text-yellow-500" + : "text-gray-500" + )} + > + Generating Feedback Suggestions + {moduleRequests.requestFeedbackSuggestions.isLoading && ( + + Generating feedback suggestions... ( + {data.submissionsWithFeedbackSuggestions.size + 1}/ + {experiment.evaluationSubmissions.length}) + + )} + {moduleRequests.requestFeedbackSuggestions.isError && ( + + {moduleRequests.requestFeedbackSuggestions.error.message} + + )} + {moduleRequests.requestFeedbackSuggestions.isSuccess && ( + + Generated feedback suggestions ( + {data.submissionsWithFeedbackSuggestions.size}/ + {experiment.evaluationSubmissions.length}) + + )} +
    + {moduleExperiment.continueAfterTraining && ( + + )}
  • -
  • - - {experiment.trainingSubmissions ? 4 : 3} - -
    - Finished + + {/* Run Automatic Evaluation */} +
  • +
    + + {experiment.trainingSubmissions ? 4 : 3} + +
    + Run Automatic Evaluation + {moduleRequests.requestEvaluation.isLoading && ( + + Evaluating submissions... ( + {(submissionsWithAutomaticEvaluation?.size ?? 0) + 1}/ + {experiment.evaluationSubmissions.length}) + + )} + {moduleRequests.requestEvaluation.isError && ( + + {moduleRequests.requestEvaluation.error.message} + + )} + {moduleRequests.requestEvaluation.isSuccess && ( + + Evaluated submissions ( + {submissionsWithAutomaticEvaluation?.size ?? 0}/ + {experiment.evaluationSubmissions.length}) + + )} +
    + {moduleExperiment.continueWithAutomaticEvaluation && ( + + )}
  • ); diff --git a/playground/src/hooks/batch_module_experiment.ts b/playground/src/hooks/batch_module_experiment.ts index 7c6f9f68d..89480a30a 100644 --- a/playground/src/hooks/batch_module_experiment.ts +++ b/playground/src/hooks/batch_module_experiment.ts @@ -54,8 +54,8 @@ export default function useBatchModuleExperiment(experiment: Experiment, moduleC // Stores automatic evaluation of submissions const [submissionsWithAutomaticEvaluation, setSubmissionsWithAutomaticEvaluation] = useState< - Map - >(new Map()); + Map | undefined + >(undefined); const [processingStep, setProcessingStep] = useState< ExperimentStep | undefined @@ -103,7 +103,7 @@ export default function useBatchModuleExperiment(experiment: Experiment, moduleC } : {} ), ...( - submissionsWithAutomaticEvaluation.size > 0 ? { + submissionsWithAutomaticEvaluation && submissionsWithAutomaticEvaluation.size > 0 ? { automaticEvaluation: { type: "automaticEvaluation", runId: data.runId, @@ -189,6 +189,10 @@ export default function useBatchModuleExperiment(experiment: Experiment, moduleC })); }) : undefined; + const continueWithAutomaticEvaluation = (data.step === "finished" && submissionsWithAutomaticEvaluation === undefined) ? (() => { + stepAutomaticEvaluation(); + }) : undefined; + // Module requests const sendSubmissions = useSendSubmissions(); const sendFeedbacks = useSendFeedbacks(); @@ -386,7 +390,7 @@ export default function useBatchModuleExperiment(experiment: Experiment, moduleC console.log("Running automatic evaluation..."); let remainingSubmissions = experiment.evaluationSubmissions.filter( - (submission) => !submissionsWithAutomaticEvaluation.has(submission.id) + (submission) => !submissionsWithAutomaticEvaluation?.has(submission.id) ); let index = 0; @@ -477,20 +481,18 @@ export default function useBatchModuleExperiment(experiment: Experiment, moduleC processingStep !== "generatingFeedbackSuggestions" ) { stepGenerateFeedbackSuggestions(); - } else if ( - data.step === "finished" && - processingStep !== "finished" - ) { - stepAutomaticEvaluation(); - } + } + // Automatic evaluation is triggered manually }, [data.step]); return { data, submissionsWithManualRatings, + submissionsWithAutomaticEvaluation, getManualRatingsSetter, startExperiment, continueAfterTraining, + continueWithAutomaticEvaluation, exportData, importData, moduleRequests: { From 5053a4aa54d3d8767e370f4d365d5d3f0ef00b85 Mon Sep 17 00:00:00 2001 From: "Felix T.J. Dietrich" Date: Tue, 7 Nov 2023 16:57:53 +0100 Subject: [PATCH 10/54] fix color --- .../conduct_experiment/module_experiment_progress.tsx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/playground/src/components/view_mode/evaluation_mode/conduct_experiment/module_experiment_progress.tsx b/playground/src/components/view_mode/evaluation_mode/conduct_experiment/module_experiment_progress.tsx index 345c9e67a..75d0e9964 100644 --- a/playground/src/components/view_mode/evaluation_mode/conduct_experiment/module_experiment_progress.tsx +++ b/playground/src/components/view_mode/evaluation_mode/conduct_experiment/module_experiment_progress.tsx @@ -197,7 +197,7 @@ export default function ModuleExperimentProgress({ submissionsWithAutomaticEvaluation?.size === data.submissionsWithFeedbackSuggestions.size ? "text-green-500 border-green-500" - : stepToIndex(data.step) === 4 + : stepToIndex(data.step) === 4 && submissionsWithAutomaticEvaluation !== undefined ? "text-yellow-500 border-yellow-500" : "text-gray-500 border-gray-500" )} @@ -211,7 +211,7 @@ export default function ModuleExperimentProgress({ submissionsWithAutomaticEvaluation?.size === data.submissionsWithFeedbackSuggestions.size ? "text-green-500" - : stepToIndex(data.step) === 4 + : stepToIndex(data.step) === 4 && submissionsWithAutomaticEvaluation !== undefined ? "text-yellow-500" : "text-gray-500" )} From e9bcb263309ecfb9a299dbfa706f72f2fd466ffd Mon Sep 17 00:00:00 2001 From: "Felix T.J. Dietrich" Date: Tue, 7 Nov 2023 17:18:41 +0100 Subject: [PATCH 11/54] add evaluation model --- module_text_llm/.env.example | 6 ++++++ .../module_text_llm/helpers/models/__init__.py | 12 +++++++++++- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/module_text_llm/.env.example b/module_text_llm/.env.example index 2ecf0a8f4..aedc5bdba 100644 --- a/module_text_llm/.env.example +++ b/module_text_llm/.env.example @@ -14,6 +14,12 @@ DATABASE_URL=sqlite:///../data/data.sqlite # See below for options, available models are also logged on startup LLM_DEFAULT_MODEL="azure_openai_gpt-35" +# Enable LLM-as-a-judge approach 0 = disabled, 1 = enabled +LLM_ENABLE_LLM_AS_A_JUDGE=1 +# Evaluation model to use for the LLM-as-a-judge approach [Only important if you want to use it in the /evaluate endpoint] +# See below for options, available models are also logged on startup +LLM_EVALUATION_MODEL="azure_openai_gpt-4" + # Standard OpenAI (Non-Azure) [leave blank if not used] # Model names prefixed with `openai_` followed by the model name, e.g. `openai_text-davinci-003` # A list of models can be found in `module_text_llm/helpers/models/openai.py` (openai_models) diff --git a/module_text_llm/module_text_llm/helpers/models/__init__.py b/module_text_llm/module_text_llm/helpers/models/__init__.py index 4d2fe5a65..144bcf923 100644 --- a/module_text_llm/module_text_llm/helpers/models/__init__.py +++ b/module_text_llm/module_text_llm/helpers/models/__init__.py @@ -1,10 +1,16 @@ import os -from typing import Type, Union, List +from typing import Type, Union, List, Optional +from langchain.base_language import BaseLanguageModel + from module_text_llm.helpers.models.model_config import ModelConfig DefaultModelConfig: Type[ModelConfig] default_model_name = os.environ.get("LLM_DEFAULT_MODEL") +evaluation_model_name = os.environ.get("LLM_EVALUATION_MODEL") + +# Model used during evaluation for judging the output (should be a more powerful model) +evaluation_model: Optional[BaseLanguageModel] = None types: List[Type[ModelConfig]] = [] try: @@ -12,6 +18,8 @@ types.append(openai_config.OpenAIModelConfig) if default_model_name in openai_config.available_models: DefaultModelConfig = openai_config.OpenAIModelConfig + if evaluation_model_name in openai_config.available_models: + evaluation_model = openai_config.available_models[evaluation_model_name] except AttributeError: pass @@ -20,6 +28,8 @@ types.append(replicate_config.ReplicateModelConfig) if default_model_name in replicate_config.available_models: DefaultModelConfig = replicate_config.ReplicateModelConfig + if evaluation_model_name in replicate_config.available_models: + evaluation_model = replicate_config.available_models[evaluation_model_name] except AttributeError: pass From cabed57f55d9decb2fbad1a7bbba038e295ab55d Mon Sep 17 00:00:00 2001 From: "Felix T.J. Dietrich" Date: Tue, 7 Nov 2023 17:30:33 +0100 Subject: [PATCH 12/54] add llm as a judge --- module_text_llm/module_text_llm/__main__.py | 22 ++++- .../module_text_llm/generate_evaluation.py | 95 +++++++++++++++++++ .../module_text_llm/helpers/utils.py | 25 +++++ .../prompts/generate_evaluation.py | 26 +++++ 4 files changed, 166 insertions(+), 2 deletions(-) create mode 100644 module_text_llm/module_text_llm/generate_evaluation.py create mode 100644 module_text_llm/module_text_llm/prompts/generate_evaluation.py diff --git a/module_text_llm/module_text_llm/__main__.py b/module_text_llm/module_text_llm/__main__.py index e9bf8d448..e3f7d7769 100644 --- a/module_text_llm/module_text_llm/__main__.py +++ b/module_text_llm/module_text_llm/__main__.py @@ -1,14 +1,16 @@ -from typing import List +import os +from typing import List, Any import nltk import tiktoken -from athena import app, submission_selector, submissions_consumer, feedback_consumer, feedback_provider +from athena import app, submission_selector, submissions_consumer, feedback_consumer, feedback_provider, evaluation_provider from athena.text import Exercise, Submission, Feedback from athena.logger import logger from module_text_llm.config import Configuration from module_text_llm.generate_suggestions import generate_suggestions +from module_text_llm.generate_evaluation import generate_evaluation @submissions_consumer @@ -33,6 +35,22 @@ async def suggest_feedback(exercise: Exercise, submission: Submission, module_co return await generate_suggestions(exercise, submission, module_config.approach, module_config.debug) +@evaluation_provider +async def evaluate_feedback( + exercise: Exercise, submission: Submission, + true_feedbacks: List[Feedback], predicted_feedbacks: List[Feedback], +) -> Any: + logger.info( + "evaluate_feedback: Evaluation for submission %d of exercise %d was requested with %d true and %d predicted feedbacks", + submission.id, exercise.id, len(true_feedbacks), len(predicted_feedbacks) + ) + + evaluation = {} + if bool(os.environ.get("LLM_ENABLE_LLM_AS_A_JUDGE")): + evaluation["llm-as-a-judge"] = await generate_evaluation(exercise, submission, true_feedbacks, predicted_feedbacks) + + return evaluation + if __name__ == "__main__": nltk.download("punkt") tiktoken.get_encoding("cl100k_base") diff --git a/module_text_llm/module_text_llm/generate_evaluation.py b/module_text_llm/module_text_llm/generate_evaluation.py new file mode 100644 index 000000000..4c4a9969b --- /dev/null +++ b/module_text_llm/module_text_llm/generate_evaluation.py @@ -0,0 +1,95 @@ +from typing import List, Sequence, Dict, Literal +from pydantic import BaseModel, Field +import json + +from athena.text import Exercise, Submission, Feedback +from athena.logger import logger + +from module_text_llm.helpers.models import evaluation_model +from module_text_llm.helpers.llm_utils import ( + get_chat_prompt_with_formatting_instructions, + check_prompt_length_and_omit_features_if_necessary, + predict_and_parse +) +from module_text_llm.helpers.utils import add_sentence_numbers, get_line_range_from_index_range +from module_text_llm.prompts.generate_evaluation import system_message, human_message + + +class AccuracyMetric(BaseModel): + id: int = Field(..., description="Feedback ID") + reasoning: str = Field(..., description="Step-by-step critical reasoning of the labels") + acceptance_label: Literal["accepted", "rejected"] = Field(..., description="Estimated acceptance label") + level_of_needed_modification_label: Literal["no", "minor", "major"] = Field(..., description="Estimated level of needed modification") + +class Evaluation(BaseModel): + metrics: Sequence[AccuracyMetric] = Field(...) + + +async def generate_evaluation( + exercise: Exercise, + submission: Submission, + true_feedbacks: List[Feedback], + predicted_feedbacks: List[Feedback] +) -> Dict[int, dict]: + + if evaluation_model is None: + raise EnvironmentError("No evaluation model available, please set up LLM_EVALUATION_MODEL correctly" + "by setting it to one of the available models logged during startup.") + max_input_tokens = 3000 + + def feedback_to_dict(feedback: Feedback): + line_start, line_end = get_line_range_from_index_range( + feedback.index_start, feedback.index_end, submission.text) + return { + "id": feedback.id, + "title": feedback.title, + "description": feedback.description, + "line_start": line_start, + "line_end": line_end, + "credits": feedback.credits + } + + prompt_input = { + "submission": add_sentence_numbers(submission.text), + "true_feedbacks": json.dumps([feedback_to_dict(feedback) for feedback in true_feedbacks]), + "predicted_feedbacks": json.dumps([feedback_to_dict(feedback) for feedback in predicted_feedbacks]), + } + + chat_prompt = get_chat_prompt_with_formatting_instructions( + model=evaluation_model, + system_message=system_message, + human_message=human_message, + pydantic_object=Evaluation + ) + + # Check if the prompt is too long and omit features if necessary (in order of importance) + omittable_features = ["submission"] + prompt_input, should_run = check_prompt_length_and_omit_features_if_necessary( + prompt=chat_prompt, + prompt_input=prompt_input, + max_input_tokens=max_input_tokens, + omittable_features=omittable_features, + debug=False + ) + + if not should_run: + logger.warning("Evaluation input too long. Skipping.") + return {} + + result = await predict_and_parse( + model=evaluation_model, + chat_prompt=chat_prompt, + prompt_input=prompt_input, + pydantic_object=Evaluation, + tags=[ + f"exercise-{exercise.id}", + f"submission-{submission.id}", + "evaluation" + ] + ) + + if result is None: + logger.warning("Evaluation failed. Skipping.") + return {} + + return { item.id: item.dict() for item in result.metrics } diff --git a/module_text_llm/module_text_llm/helpers/utils.py b/module_text_llm/module_text_llm/helpers/utils.py index 2ed05aec5..24cf41024 100644 --- a/module_text_llm/module_text_llm/helpers/utils.py +++ b/module_text_llm/module_text_llm/helpers/utils.py @@ -92,3 +92,28 @@ def get_index_range_from_line_range(line_start: Optional[int], line_end: Optiona line_end_index = min(max(int(line_end), 0), len(sentence_spans) - 1) return sentence_spans[line_start_index][0], sentence_spans[line_end_index][1] + + +def get_line_range_from_index_range(index_start: Optional[int], index_end: Optional[int], content: str) -> Tuple[Optional[int], Optional[int]]: + if index_start is None and index_end is None: + return None, None + + index_start = index_start or index_end or 0 + index_end = index_end or index_start or 0 + + if index_start > index_end: + index_start, index_end = index_end, index_start + + sentence_spans = get_sentence_spans(content) + + line_start = None + line_end = None + + for line_number, (start_index, end_index) in enumerate(sentence_spans, start=1): + if start_index <= index_start < end_index: + line_start = line_number + if start_index <= index_end <= end_index: + line_end = line_number + break + + return line_start, line_end \ No newline at end of file diff --git a/module_text_llm/module_text_llm/prompts/generate_evaluation.py b/module_text_llm/module_text_llm/prompts/generate_evaluation.py new file mode 100644 index 000000000..10daa84a4 --- /dev/null +++ b/module_text_llm/module_text_llm/prompts/generate_evaluation.py @@ -0,0 +1,26 @@ +system_message = """\ +You are now an evaluator for feedback accuracy generated by a machine-learning system. + +# Task +Your task is to estimate if a human tutor would accept or reject the feedback suggestion and how much modification is needed to make the feedback useful. + +# Score Criteria +Accept feedback that is useful to the tutor, meaning that it can be applied to the submission with minor or no modification. \ +Our goal is to reduce the workload of tutors and reduce their cognitive load. \ +Reject feedback that is not useful and would burden the tutor. + +Put the focus on the description of the feedback, the title is optional. \ +The `line_start` and `line_end` should make sense with respect to the submission but do not need to be exact. \ +Credits should make sense with respect to the feedback and the submission but also do not need to be exact. + +# Submission (with sentence numbers : ): +{submission} + +# Example (Human) Feedback: +{true_feedbacks} +""" + +human_message = """\ +### Model Output: +{predicted_feedbacks} +""" \ No newline at end of file From 2595d5c60daa86a50555984be9510e97c46e04e6 Mon Sep 17 00:00:00 2001 From: "Felix T.J. Dietrich" Date: Tue, 7 Nov 2023 17:55:01 +0100 Subject: [PATCH 13/54] fix ui issue and some var naming --- module_text_llm/module_text_llm/__main__.py | 2 +- playground/src/hooks/batch_module_experiment.ts | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/module_text_llm/module_text_llm/__main__.py b/module_text_llm/module_text_llm/__main__.py index e3f7d7769..cc6c42baa 100644 --- a/module_text_llm/module_text_llm/__main__.py +++ b/module_text_llm/module_text_llm/__main__.py @@ -47,7 +47,7 @@ async def evaluate_feedback( evaluation = {} if bool(os.environ.get("LLM_ENABLE_LLM_AS_A_JUDGE")): - evaluation["llm-as-a-judge"] = await generate_evaluation(exercise, submission, true_feedbacks, predicted_feedbacks) + evaluation["llm_as_a_judge"] = await generate_evaluation(exercise, submission, true_feedbacks, predicted_feedbacks) return evaluation diff --git a/playground/src/hooks/batch_module_experiment.ts b/playground/src/hooks/batch_module_experiment.ts index 89480a30a..e2610fbb8 100644 --- a/playground/src/hooks/batch_module_experiment.ts +++ b/playground/src/hooks/batch_module_experiment.ts @@ -190,6 +190,7 @@ export default function useBatchModuleExperiment(experiment: Experiment, moduleC }) : undefined; const continueWithAutomaticEvaluation = (data.step === "finished" && submissionsWithAutomaticEvaluation === undefined) ? (() => { + setSubmissionsWithAutomaticEvaluation((prevState) => new Map(prevState)); stepAutomaticEvaluation(); }) : undefined; From 27397579cddde0f3060c53d449075d9691d16a0f Mon Sep 17 00:00:00 2001 From: "Felix T.J. Dietrich" Date: Tue, 7 Nov 2023 18:54:25 +0100 Subject: [PATCH 14/54] fix line break --- playground/src/components/details/exercise_detail/common.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/playground/src/components/details/exercise_detail/common.tsx b/playground/src/components/details/exercise_detail/common.tsx index 852b19a66..8c583a292 100644 --- a/playground/src/components/details/exercise_detail/common.tsx +++ b/playground/src/components/details/exercise_detail/common.tsx @@ -54,7 +54,7 @@ export default function CommonExerciseDetail({ Missing criterion title )} - Grading Criterion {criterion.id} + Grading Criterion {criterion.id} {criterion.structured_grading_instructions.map( From 6b383e3c3729745143aa8505397bca93681d66a5 Mon Sep 17 00:00:00 2001 From: "Felix T.J. Dietrich" Date: Tue, 7 Nov 2023 19:58:53 +0100 Subject: [PATCH 15/54] add langsmith logging --- module_text_llm/module_text_llm/__main__.py | 47 ++++++++- module_text_llm/poetry.lock | 111 ++++++++++++++++---- module_text_llm/pyproject.toml | 1 + playground/src/pages/api/athena_request.ts | 17 ++- 4 files changed, 151 insertions(+), 25 deletions(-) diff --git a/module_text_llm/module_text_llm/__main__.py b/module_text_llm/module_text_llm/__main__.py index cc6c42baa..0c9bfd29d 100644 --- a/module_text_llm/module_text_llm/__main__.py +++ b/module_text_llm/module_text_llm/__main__.py @@ -1,10 +1,13 @@ +import json import os from typing import List, Any import nltk import tiktoken +from langsmith import Client as LangsmithClient +from langsmith.schemas import Run -from athena import app, submission_selector, submissions_consumer, feedback_consumer, feedback_provider, evaluation_provider +from athena import app, get_experiment_environment, submission_selector, submissions_consumer, feedback_consumer, feedback_provider, evaluation_provider from athena.text import Exercise, Submission, Feedback from athena.logger import logger @@ -49,6 +52,48 @@ async def evaluate_feedback( if bool(os.environ.get("LLM_ENABLE_LLM_AS_A_JUDGE")): evaluation["llm_as_a_judge"] = await generate_evaluation(exercise, submission, true_feedbacks, predicted_feedbacks) + # Gather LLM token usage and response times + if bool(os.environ.get("LANGCHAIN_TRACING_V2")): + experiment = get_experiment_environment() + client = LangsmithClient() + project_name = os.environ.get("LANGCHAIN_PROJECT") + runs = list(client.list_runs( + project_name=project_name, + filter=f'and(has(tags, "run-{experiment.run_id}"), has(tags, "submission-{submission.id}"))' + )) + logger.info("evaluate_feedback: Found %d runs for submission %d of exercise %d.", len(runs), submission.id, exercise.id) + + def get_statistics(runs: List[Run]): + return { + "response_time": sum((run.end_time - run.start_time).total_seconds() for run in runs if run.end_time is not None), + "prompt_tokens": sum(run.prompt_tokens for run in runs if run.prompt_tokens is not None), + "completion_tokens": sum(run.completion_tokens for run in runs if run.completion_tokens is not None), + "total_tokens": sum(run.total_tokens for run in runs if run.total_tokens is not None), + } + + suggestion_runs = [] + evaluation_runs = [] + for run in runs: + if "evaluation" in (run.tags or []): + evaluation_runs.append(run) + else: + suggestion_runs.append(run) + + if suggestion_runs or evaluation_runs: + evaluation["runs"] = {} + if suggestion_runs: + evaluation["runs"]["suggestions"] = { + "count": len(suggestion_runs), + "statistics": get_statistics(suggestion_runs), + "runs": [json.loads(run.json()) for run in suggestion_runs] + } + if evaluation_runs: + evaluation["runs"]["evaluation"] = { + "count": len(evaluation_runs), + "statistics": get_statistics(evaluation_runs), + "runs": [json.loads(run.json()) for run in evaluation_runs] + } + return evaluation if __name__ == "__main__": diff --git a/module_text_llm/poetry.lock b/module_text_llm/poetry.lock index 96c269625..28e23ad99 100644 --- a/module_text_llm/poetry.lock +++ b/module_text_llm/poetry.lock @@ -1,9 +1,10 @@ -# This file is automatically @generated by Poetry 1.6.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.4.2 and should not be changed by hand. [[package]] name = "aiohttp" version = "3.8.6" description = "Async http client/server framework (asyncio)" +category = "main" optional = false python-versions = ">=3.6" files = [ @@ -112,6 +113,7 @@ speedups = ["Brotli", "aiodns", "cchardet"] name = "aiosignal" version = "1.3.1" description = "aiosignal: a list of registered asynchronous callbacks" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -126,6 +128,7 @@ frozenlist = ">=1.1.0" name = "anyio" version = "3.7.1" description = "High level compatibility layer for multiple asynchronous event loop implementations" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -146,6 +149,7 @@ trio = ["trio (<0.22)"] name = "astroid" version = "2.15.8" description = "An abstract syntax tree for Python with inference support." +category = "dev" optional = false python-versions = ">=3.7.2" files = [ @@ -161,6 +165,7 @@ wrapt = {version = ">=1.14,<2", markers = "python_version >= \"3.11\""} name = "async-timeout" version = "4.0.3" description = "Timeout context manager for asyncio programs" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -172,6 +177,7 @@ files = [ name = "athena" version = "1.0.0" description = "This is a helper module for easier development of Athena modules. It provides communication functionality with the Assessment Module manager, as well as helper functions for storage." +category = "main" optional = false python-versions = "3.11.*" files = [] @@ -193,6 +199,7 @@ url = "../athena" name = "attrs" version = "23.1.0" description = "Classes Without Boilerplate" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -211,6 +218,7 @@ tests-no-zope = ["cloudpickle", "hypothesis", "mypy (>=1.1.1)", "pympler", "pyte name = "certifi" version = "2023.7.22" description = "Python package for providing Mozilla's CA Bundle." +category = "main" optional = false python-versions = ">=3.6" files = [ @@ -222,6 +230,7 @@ files = [ name = "charset-normalizer" version = "3.3.1" description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." +category = "main" optional = false python-versions = ">=3.7.0" files = [ @@ -321,6 +330,7 @@ files = [ name = "click" version = "8.1.7" description = "Composable command line interface toolkit" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -335,6 +345,7 @@ colorama = {version = "*", markers = "platform_system == \"Windows\""} name = "colorama" version = "0.4.6" description = "Cross-platform colored terminal text." +category = "main" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" files = [ @@ -346,6 +357,7 @@ files = [ name = "dataclasses-json" version = "0.6.1" description = "Easily serialize dataclasses to and from JSON." +category = "main" optional = false python-versions = ">=3.7,<4.0" files = [ @@ -361,6 +373,7 @@ typing-inspect = ">=0.4.0,<1" name = "dill" version = "0.3.7" description = "serialize all of Python" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -375,6 +388,7 @@ graph = ["objgraph (>=1.7.2)"] name = "dodgy" version = "0.2.1" description = "Dodgy: Searches for dodgy looking lines in Python code" +category = "dev" optional = false python-versions = "*" files = [ @@ -386,6 +400,7 @@ files = [ name = "fastapi" version = "0.96.1" description = "FastAPI framework, high performance, easy to learn, fast to code, ready for production" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -407,6 +422,7 @@ test = ["anyio[trio] (>=3.2.1,<4.0.0)", "black (==23.1.0)", "coverage[toml] (>=6 name = "flake8" version = "2.3.0" description = "the modular source code checker: pep8, pyflakes and co" +category = "dev" optional = false python-versions = "*" files = [ @@ -423,6 +439,7 @@ pyflakes = ">=0.8.1" name = "flake8-polyfill" version = "1.0.2" description = "Polyfill package for Flake8 plugins" +category = "dev" optional = false python-versions = "*" files = [ @@ -437,6 +454,7 @@ flake8 = "*" name = "frozenlist" version = "1.4.0" description = "A list-like structure which implements collections.abc.MutableSequence" +category = "main" optional = false python-versions = ">=3.8" files = [ @@ -507,6 +525,7 @@ files = [ name = "gitdb" version = "4.0.11" description = "Git Object Database" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -521,6 +540,7 @@ smmap = ">=3.0.1,<6" name = "gitpython" version = "3.1.40" description = "GitPython is a Python library used to interact with Git repositories" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -538,6 +558,7 @@ test = ["black", "coverage[toml]", "ddt (>=1.1.1,!=1.4.3)", "mock", "mypy", "pre name = "greenlet" version = "3.0.1" description = "Lightweight in-process concurrent programming" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -608,6 +629,7 @@ test = ["objgraph", "psutil"] name = "h11" version = "0.14.0" description = "A pure-Python, bring-your-own-I/O implementation of HTTP/1.1" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -619,6 +641,7 @@ files = [ name = "httpcore" version = "0.17.3" description = "A minimal low-level HTTP client." +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -630,16 +653,17 @@ files = [ anyio = ">=3.0,<5.0" certifi = "*" h11 = ">=0.13,<0.15" -sniffio = "==1.*" +sniffio = ">=1.0.0,<2.0.0" [package.extras] http2 = ["h2 (>=3,<5)"] -socks = ["socksio (==1.*)"] +socks = ["socksio (>=1.0.0,<2.0.0)"] [[package]] name = "httpx" version = "0.24.1" description = "The next generation HTTP client." +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -655,14 +679,15 @@ sniffio = "*" [package.extras] brotli = ["brotli", "brotlicffi"] -cli = ["click (==8.*)", "pygments (==2.*)", "rich (>=10,<14)"] +cli = ["click (>=8.0.0,<9.0.0)", "pygments (>=2.0.0,<3.0.0)", "rich (>=10,<14)"] http2 = ["h2 (>=3,<5)"] -socks = ["socksio (==1.*)"] +socks = ["socksio (>=1.0.0,<2.0.0)"] [[package]] name = "idna" version = "3.4" description = "Internationalized Domain Names in Applications (IDNA)" +category = "main" optional = false python-versions = ">=3.5" files = [ @@ -674,6 +699,7 @@ files = [ name = "isort" version = "5.12.0" description = "A Python utility / library to sort Python imports." +category = "dev" optional = false python-versions = ">=3.8.0" files = [ @@ -691,6 +717,7 @@ requirements-deprecated-finder = ["pip-api", "pipreqs"] name = "joblib" version = "1.3.2" description = "Lightweight pipelining with Python functions" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -702,6 +729,7 @@ files = [ name = "jsonpatch" version = "1.33" description = "Apply JSON-Patches (RFC 6902)" +category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*, !=3.6.*" files = [ @@ -716,6 +744,7 @@ jsonpointer = ">=1.9" name = "jsonpointer" version = "2.4" description = "Identify specific nodes in a JSON document (RFC 6901)" +category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*, !=3.6.*" files = [ @@ -727,6 +756,7 @@ files = [ name = "langchain" version = "0.0.325" description = "Building applications with LLMs through composability" +category = "main" optional = false python-versions = ">=3.8.1,<4.0" files = [ @@ -764,13 +794,14 @@ text-helpers = ["chardet (>=5.1.0,<6.0.0)"] [[package]] name = "langsmith" -version = "0.0.52" +version = "0.0.60" description = "Client library to connect to the LangSmith LLM Tracing and Evaluation Platform." +category = "main" optional = false python-versions = ">=3.8.1,<4.0" files = [ - {file = "langsmith-0.0.52-py3-none-any.whl", hash = "sha256:d02a0ade5a53b36143084e57003ed38ccbdf5fc15a5a0eb14f8989ceaee0b807"}, - {file = "langsmith-0.0.52.tar.gz", hash = "sha256:1dc29082d257deea1859cb22c53d9481ca5c4a37f3af40c0f9d300fb8adc91db"}, + {file = "langsmith-0.0.60-py3-none-any.whl", hash = "sha256:94f9ef9898fa5fb5afed72538bb3ccca9a92a841b37654d699c732a76c623379"}, + {file = "langsmith-0.0.60.tar.gz", hash = "sha256:f63513398d8d4530e3aa552926924c8443ac9d21c3812f303fa20fa2c44a9a42"}, ] [package.dependencies] @@ -781,6 +812,7 @@ requests = ">=2,<3" name = "lazy-object-proxy" version = "1.9.0" description = "A fast and thorough lazy object proxy." +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -826,6 +858,7 @@ files = [ name = "marshmallow" version = "3.20.1" description = "A lightweight library for converting complex datatypes to and from native Python datatypes." +category = "main" optional = false python-versions = ">=3.8" files = [ @@ -846,6 +879,7 @@ tests = ["pytest", "pytz", "simplejson"] name = "mccabe" version = "0.7.0" description = "McCabe checker, plugin for flake8" +category = "dev" optional = false python-versions = ">=3.6" files = [ @@ -857,6 +891,7 @@ files = [ name = "multidict" version = "6.0.4" description = "multidict implementation" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -940,6 +975,7 @@ files = [ name = "mypy" version = "1.6.1" description = "Optional static typing for Python" +category = "main" optional = false python-versions = ">=3.8" files = [ @@ -985,6 +1021,7 @@ reports = ["lxml"] name = "mypy-extensions" version = "1.0.0" description = "Type system extensions for programs checked with the mypy type checker." +category = "main" optional = false python-versions = ">=3.5" files = [ @@ -996,6 +1033,7 @@ files = [ name = "nltk" version = "3.8.1" description = "Natural Language Toolkit" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1021,6 +1059,7 @@ twitter = ["twython"] name = "numpy" version = "1.26.1" description = "Fundamental package for array computing in Python" +category = "main" optional = false python-versions = "<3.13,>=3.9" files = [ @@ -1062,6 +1101,7 @@ files = [ name = "openai" version = "0.27.10" description = "Python client library for the OpenAI API" +category = "main" optional = false python-versions = ">=3.7.1" files = [ @@ -1076,7 +1116,7 @@ tqdm = "*" [package.extras] datalib = ["numpy", "openpyxl (>=3.0.7)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)"] -dev = ["black (>=21.6b0,<22.0)", "pytest (==6.*)", "pytest-asyncio", "pytest-mock"] +dev = ["black (>=21.6b0,<22.0)", "pytest (>=6.0.0,<7.0.0)", "pytest-asyncio", "pytest-mock"] embeddings = ["matplotlib", "numpy", "openpyxl (>=3.0.7)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)", "plotly", "scikit-learn (>=1.0.2)", "scipy", "tenacity (>=8.0.1)"] wandb = ["numpy", "openpyxl (>=3.0.7)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)", "wandb"] @@ -1084,6 +1124,7 @@ wandb = ["numpy", "openpyxl (>=3.0.7)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1 name = "packaging" version = "23.2" description = "Core utilities for Python packages" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1095,6 +1136,7 @@ files = [ name = "pep8" version = "1.7.1" description = "Python style guide checker" +category = "dev" optional = false python-versions = "*" files = [ @@ -1106,6 +1148,7 @@ files = [ name = "pep8-naming" version = "0.10.0" description = "Check PEP-8 naming conventions, plugin for flake8" +category = "dev" optional = false python-versions = "*" files = [ @@ -1120,6 +1163,7 @@ flake8-polyfill = ">=1.0.2,<2" name = "platformdirs" version = "3.11.0" description = "A small Python package for determining appropriate platform-specific dirs, e.g. a \"user data dir\"." +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -1135,6 +1179,7 @@ test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=7.4)", "pytest-co name = "prospector" version = "1.10.3" description = "Prospector is a tool to analyse Python code by aggregating the result of other tools." +category = "dev" optional = false python-versions = ">=3.7.2,<4.0" files = [ @@ -1174,6 +1219,7 @@ with-vulture = ["vulture (>=1.5)"] name = "psycopg2" version = "2.9.9" description = "psycopg2 - Python-PostgreSQL Database Adapter" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1181,8 +1227,6 @@ files = [ {file = "psycopg2-2.9.9-cp310-cp310-win_amd64.whl", hash = "sha256:426f9f29bde126913a20a96ff8ce7d73fd8a216cfb323b1f04da402d452853c3"}, {file = "psycopg2-2.9.9-cp311-cp311-win32.whl", hash = "sha256:ade01303ccf7ae12c356a5e10911c9e1c51136003a9a1d92f7aa9d010fb98372"}, {file = "psycopg2-2.9.9-cp311-cp311-win_amd64.whl", hash = "sha256:121081ea2e76729acfb0673ff33755e8703d45e926e416cb59bae3a86c6a4981"}, - {file = "psycopg2-2.9.9-cp312-cp312-win32.whl", hash = "sha256:d735786acc7dd25815e89cc4ad529a43af779db2e25aa7c626de864127e5a024"}, - {file = "psycopg2-2.9.9-cp312-cp312-win_amd64.whl", hash = "sha256:a7653d00b732afb6fc597e29c50ad28087dcb4fbfb28e86092277a559ae4e693"}, {file = "psycopg2-2.9.9-cp37-cp37m-win32.whl", hash = "sha256:5e0d98cade4f0e0304d7d6f25bbfbc5bd186e07b38eac65379309c4ca3193efa"}, {file = "psycopg2-2.9.9-cp37-cp37m-win_amd64.whl", hash = "sha256:7e2dacf8b009a1c1e843b5213a87f7c544b2b042476ed7755be813eaf4e8347a"}, {file = "psycopg2-2.9.9-cp38-cp38-win32.whl", hash = "sha256:ff432630e510709564c01dafdbe996cb552e0b9f3f065eb89bdce5bd31fabf4c"}, @@ -1196,6 +1240,7 @@ files = [ name = "pycodestyle" version = "2.11.1" description = "Python style guide checker" +category = "dev" optional = false python-versions = ">=3.8" files = [ @@ -1207,6 +1252,7 @@ files = [ name = "pydantic" version = "1.10.13" description = "Data validation and settings management using python type hints" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1259,6 +1305,7 @@ email = ["email-validator (>=1.0.3)"] name = "pydocstyle" version = "6.3.0" description = "Python docstring style checker" +category = "dev" optional = false python-versions = ">=3.6" files = [ @@ -1276,6 +1323,7 @@ toml = ["tomli (>=1.2.3)"] name = "pyflakes" version = "2.5.0" description = "passive checker of Python programs" +category = "dev" optional = false python-versions = ">=3.6" files = [ @@ -1287,6 +1335,7 @@ files = [ name = "pylint" version = "2.17.7" description = "python code static checker" +category = "dev" optional = false python-versions = ">=3.7.2" files = [ @@ -1311,6 +1360,7 @@ testutils = ["gitpython (>3)"] name = "pylint-celery" version = "0.3" description = "pylint-celery is a Pylint plugin to aid Pylint in recognising and understandingerrors caused when using the Celery library" +category = "dev" optional = false python-versions = "*" files = [ @@ -1326,6 +1376,7 @@ pylint-plugin-utils = ">=0.2.1" name = "pylint-django" version = "2.5.3" description = "A Pylint plugin to help Pylint understand the Django web framework" +category = "dev" optional = false python-versions = "*" files = [ @@ -1345,6 +1396,7 @@ with-django = ["Django"] name = "pylint-flask" version = "0.6" description = "pylint-flask is a Pylint plugin to aid Pylint in recognizing and understanding errors caused when using Flask" +category = "dev" optional = false python-versions = "*" files = [ @@ -1358,6 +1410,7 @@ pylint-plugin-utils = ">=0.2.1" name = "pylint-plugin-utils" version = "0.7" description = "Utilities and helpers for writing Pylint plugins" +category = "dev" optional = false python-versions = ">=3.6.2" files = [ @@ -1372,6 +1425,7 @@ pylint = ">=1.7" name = "python-dotenv" version = "1.0.0" description = "Read key-value pairs from a .env file and set them as environment variables" +category = "main" optional = false python-versions = ">=3.8" files = [ @@ -1386,6 +1440,7 @@ cli = ["click (>=5.0)"] name = "pyyaml" version = "6.0.1" description = "YAML parser and emitter for Python" +category = "main" optional = false python-versions = ">=3.6" files = [ @@ -1394,7 +1449,6 @@ files = [ {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69b023b2b4daa7548bcfbd4aa3da05b3a74b772db9e23b982788168117739938"}, {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:81e0b275a9ecc9c0c0c07b4b90ba548307583c125f54d5b6946cfee6360c733d"}, {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba336e390cd8e4d1739f42dfe9bb83a3cc2e80f567d8805e11b46f4a943f5515"}, - {file = "PyYAML-6.0.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:326c013efe8048858a6d312ddd31d56e468118ad4cdeda36c719bf5bb6192290"}, {file = "PyYAML-6.0.1-cp310-cp310-win32.whl", hash = "sha256:bd4af7373a854424dabd882decdc5579653d7868b8fb26dc7d0e99f823aa5924"}, {file = "PyYAML-6.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:fd1592b3fdf65fff2ad0004b5e363300ef59ced41c2e6b3a99d4089fa8c5435d"}, {file = "PyYAML-6.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6965a7bc3cf88e5a1c3bd2e0b5c22f8d677dc88a455344035f03399034eb3007"}, @@ -1402,15 +1456,8 @@ files = [ {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42f8152b8dbc4fe7d96729ec2b99c7097d656dc1213a3229ca5383f973a5ed6d"}, {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc"}, {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2b04aac4d386b172d5b9692e2d2da8de7bfb6c387fa4f801fbf6fb2e6ba4673"}, - {file = "PyYAML-6.0.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e7d73685e87afe9f3b36c799222440d6cf362062f78be1013661b00c5c6f678b"}, {file = "PyYAML-6.0.1-cp311-cp311-win32.whl", hash = "sha256:1635fd110e8d85d55237ab316b5b011de701ea0f29d07611174a1b42f1444741"}, {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, - {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"}, - {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"}, - {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"}, - {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"}, - {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"}, - {file = "PyYAML-6.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:0d3304d8c0adc42be59c5f8a4d9e3d7379e6955ad754aa9d6ab7a398b59dd1df"}, {file = "PyYAML-6.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:50550eb667afee136e9a77d6dc71ae76a44df8b3e51e41b77f6de2932bfe0f47"}, {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1fe35611261b29bd1de0070f0b2f47cb6ff71fa6595c077e42bd0c419fa27b98"}, {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:704219a11b772aea0d8ecd7058d0082713c3562b4e271b849ad7dc4a5c90c13c"}, @@ -1427,7 +1474,6 @@ files = [ {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0cd17c15d3bb3fa06978b4e8958dcdc6e0174ccea823003a106c7d4d7899ac5"}, {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:28c119d996beec18c05208a8bd78cbe4007878c6dd15091efb73a30e90539696"}, {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e07cbde391ba96ab58e532ff4803f79c4129397514e1413a7dc761ccd755735"}, - {file = "PyYAML-6.0.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:49a183be227561de579b4a36efbb21b3eab9651dd81b1858589f796549873dd6"}, {file = "PyYAML-6.0.1-cp38-cp38-win32.whl", hash = "sha256:184c5108a2aca3c5b3d3bf9395d50893a7ab82a38004c8f61c258d4428e80206"}, {file = "PyYAML-6.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:1e2722cc9fbb45d9b87631ac70924c11d3a401b2d7f410cc0e3bbf249f2dca62"}, {file = "PyYAML-6.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9eb6caa9a297fc2c2fb8862bc5370d0303ddba53ba97e71f08023b6cd73d16a8"}, @@ -1435,7 +1481,6 @@ files = [ {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5773183b6446b2c99bb77e77595dd486303b4faab2b086e7b17bc6bef28865f6"}, {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b786eecbdf8499b9ca1d697215862083bd6d2a99965554781d0d8d1ad31e13a0"}, {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc1bf2925a1ecd43da378f4db9e4f799775d6367bdb94671027b73b393a7c42c"}, - {file = "PyYAML-6.0.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:04ac92ad1925b2cff1db0cfebffb6ffc43457495c9b3c39d3fcae417d7125dc5"}, {file = "PyYAML-6.0.1-cp39-cp39-win32.whl", hash = "sha256:faca3bdcf85b2fc05d06ff3fbc1f83e1391b3e724afa3feba7d13eeab355484c"}, {file = "PyYAML-6.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:510c9deebc5c0225e8c96813043e62b680ba2f9c50a08d3724c7f28a747d1486"}, {file = "PyYAML-6.0.1.tar.gz", hash = "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43"}, @@ -1445,6 +1490,7 @@ files = [ name = "regex" version = "2023.10.3" description = "Alternative regular expression module, to replace re." +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1542,6 +1588,7 @@ files = [ name = "replicate" version = "0.11.0" description = "Python client for Replicate" +category = "main" optional = false python-versions = ">=3.8" files = [ @@ -1561,6 +1608,7 @@ dev = ["black", "mypy", "pytest", "responses", "ruff"] name = "requests" version = "2.31.0" description = "Python HTTP for Humans." +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1582,6 +1630,7 @@ use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] name = "requirements-detector" version = "1.2.2" description = "Python tool to find and list requirements of a Python project" +category = "dev" optional = false python-versions = ">=3.7,<4.0" files = [ @@ -1599,6 +1648,7 @@ toml = ">=0.10.2,<0.11.0" name = "semver" version = "3.0.2" description = "Python helper for Semantic Versioning (https://semver.org)" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -1610,6 +1660,7 @@ files = [ name = "setoptconf-tmp" version = "0.3.1" description = "A module for retrieving program settings from various sources in a consistant method." +category = "dev" optional = false python-versions = "*" files = [ @@ -1624,6 +1675,7 @@ yaml = ["pyyaml"] name = "smmap" version = "5.0.1" description = "A pure Python implementation of a sliding window memory map manager" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1635,6 +1687,7 @@ files = [ name = "sniffio" version = "1.3.0" description = "Sniff out which async library your code is running under" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1646,6 +1699,7 @@ files = [ name = "snowballstemmer" version = "2.2.0" description = "This package provides 29 stemmers for 28 languages generated from Snowball algorithms." +category = "dev" optional = false python-versions = "*" files = [ @@ -1657,6 +1711,7 @@ files = [ name = "sqlalchemy" version = "2.0.22" description = "Database Abstraction Library" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1744,6 +1799,7 @@ sqlcipher = ["sqlcipher3-binary"] name = "starlette" version = "0.27.0" description = "The little ASGI library that shines." +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1761,6 +1817,7 @@ full = ["httpx (>=0.22.0)", "itsdangerous", "jinja2", "python-multipart", "pyyam name = "tenacity" version = "8.2.3" description = "Retry code until it succeeds" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1775,6 +1832,7 @@ doc = ["reno", "sphinx", "tornado (>=4.5)"] name = "tiktoken" version = "0.4.0" description = "tiktoken is a fast BPE tokeniser for use with OpenAI's models" +category = "main" optional = false python-versions = ">=3.8" files = [ @@ -1820,6 +1878,7 @@ blobfile = ["blobfile (>=2)"] name = "toml" version = "0.10.2" description = "Python Library for Tom's Obvious, Minimal Language" +category = "dev" optional = false python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*" files = [ @@ -1831,6 +1890,7 @@ files = [ name = "tomlkit" version = "0.12.1" description = "Style preserving TOML library" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -1842,6 +1902,7 @@ files = [ name = "tqdm" version = "4.66.1" description = "Fast, Extensible Progress Meter" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1862,6 +1923,7 @@ telegram = ["requests"] name = "typing-extensions" version = "4.8.0" description = "Backported and Experimental Type Hints for Python 3.8+" +category = "main" optional = false python-versions = ">=3.8" files = [ @@ -1873,6 +1935,7 @@ files = [ name = "typing-inspect" version = "0.9.0" description = "Runtime inspection utilities for typing module." +category = "main" optional = false python-versions = "*" files = [ @@ -1888,6 +1951,7 @@ typing-extensions = ">=3.7.4" name = "urllib3" version = "2.0.7" description = "HTTP library with thread-safe connection pooling, file post, and more." +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1905,6 +1969,7 @@ zstd = ["zstandard (>=0.18.0)"] name = "uvicorn" version = "0.23.2" description = "The lightning-fast ASGI server." +category = "main" optional = false python-versions = ">=3.8" files = [ @@ -1923,6 +1988,7 @@ standard = ["colorama (>=0.4)", "httptools (>=0.5.0)", "python-dotenv (>=0.13)", name = "wrapt" version = "1.15.0" description = "Module for decorators, wrappers and monkey patching." +category = "dev" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,>=2.7" files = [ @@ -2007,6 +2073,7 @@ files = [ name = "yarl" version = "1.9.2" description = "Yet another URL library" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -2093,4 +2160,4 @@ multidict = ">=4.0" [metadata] lock-version = "2.0" python-versions = "3.11.*" -content-hash = "680a5df064fcdd1cac69f7130fe0cc41571497de32b7797be0f88a0aa4e7d098" +content-hash = "844e1ad75ca9b73100279326d787a4621e504c69482e4348051b214e941fd49d" diff --git a/module_text_llm/pyproject.toml b/module_text_llm/pyproject.toml index e3d7ba38f..9610767ea 100644 --- a/module_text_llm/pyproject.toml +++ b/module_text_llm/pyproject.toml @@ -15,6 +15,7 @@ nltk = "^3.8.1" gitpython = "^3.1.37" replicate = "^0.11.0" tiktoken = "^0.4.0" +langsmith = "^0.0.60" [tool.poetry.scripts] module = "athena:run_module" diff --git a/playground/src/pages/api/athena_request.ts b/playground/src/pages/api/athena_request.ts index de94908fc..b3971db81 100644 --- a/playground/src/pages/api/athena_request.ts +++ b/playground/src/pages/api/athena_request.ts @@ -17,7 +17,20 @@ export default async function handler( const url = req.query.url; let response; const secret = req.headers["authorization"] as string; - const moduleConfig = req.headers["x-module-config"] as string | undefined; + const forwardHeaders = [ + "X-Module-Config", + "X-Experiment-ID", + "X-Module-Configuration-ID", + "X-Run-ID", + ] + + const headers = Object.fromEntries( + forwardHeaders.flatMap((header) => { + const value = req.headers[header.toLowerCase()] as string | undefined; + return value ? [[header, value]] : []; + }) + ) + if (!secret) { console.warn("No secret provided"); } @@ -27,7 +40,7 @@ export default async function handler( "Content-Type": "application/json", Accept: "application/json", "Authorization": secret, - ...(moduleConfig && { "X-Module-Config": moduleConfig }), + ...headers, }, method: req.method, ...(req.method === "POST" ? { body: JSON.stringify(req.body) } : {}), From e46df2ce5dae50a7d7ca6935115593d21c0867b9 Mon Sep 17 00:00:00 2001 From: "Felix T.J. Dietrich" Date: Tue, 7 Nov 2023 20:04:14 +0100 Subject: [PATCH 16/54] inline statistics --- module_text_llm/module_text_llm/__main__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/module_text_llm/module_text_llm/__main__.py b/module_text_llm/module_text_llm/__main__.py index 0c9bfd29d..9b81dfedb 100644 --- a/module_text_llm/module_text_llm/__main__.py +++ b/module_text_llm/module_text_llm/__main__.py @@ -84,13 +84,13 @@ def get_statistics(runs: List[Run]): if suggestion_runs: evaluation["runs"]["suggestions"] = { "count": len(suggestion_runs), - "statistics": get_statistics(suggestion_runs), + **get_statistics(suggestion_runs), "runs": [json.loads(run.json()) for run in suggestion_runs] } if evaluation_runs: evaluation["runs"]["evaluation"] = { "count": len(evaluation_runs), - "statistics": get_statistics(evaluation_runs), + **get_statistics(evaluation_runs), "runs": [json.loads(run.json()) for run in evaluation_runs] } From 8d50922ad9f4a1cb5d1595fe7ff51bc9a0e472dc Mon Sep 17 00:00:00 2001 From: "Felix T.J. Dietrich" Date: Tue, 7 Nov 2023 20:46:36 +0100 Subject: [PATCH 17/54] add sgi evaluation --- module_text_llm/module_text_llm/__main__.py | 56 +++++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/module_text_llm/module_text_llm/__main__.py b/module_text_llm/module_text_llm/__main__.py index 9b81dfedb..75897b438 100644 --- a/module_text_llm/module_text_llm/__main__.py +++ b/module_text_llm/module_text_llm/__main__.py @@ -94,6 +94,62 @@ def get_statistics(runs: List[Run]): "runs": [json.loads(run.json()) for run in evaluation_runs] } + actual_feedback_count = len(true_feedbacks) + actual_feedback_with_grading_instructions = [] + suggestions_count = len(predicted_feedbacks) + suggestions_with_grading_instructions = [] + + # Init usage counts for SGIs + actual_sgi_usage = { + sgi.id: 0 for criterion in exercise.grading_criteria or [] for sgi in criterion.structured_grading_instructions + } + suggested_sgi_usage = { + sgi.id: 0 for criterion in exercise.grading_criteria or [] for sgi in criterion.structured_grading_instructions + } + + # Count SGIs in actual feedbacks + for feedback in true_feedbacks: + if feedback.structured_grading_instruction_id: + actual_feedback_with_grading_instructions.append(feedback) + actual_sgi_usage[feedback.structured_grading_instruction_id] += 1 + + # Count SGIs in suggested feedbacks + for feedback in predicted_feedbacks: + if feedback.structured_grading_instruction_id: + suggestions_with_grading_instructions.append(feedback) + suggested_sgi_usage[feedback.structured_grading_instruction_id] += 1 + + actual_feedback_with_grading_instructions_count = len(actual_feedback_with_grading_instructions) + suggestions_with_grading_instructions_count = len(suggestions_with_grading_instructions) + + # Match SGIs + matched_feedback = 0 + unmatched_feedback = actual_feedback_count - actual_feedback_with_grading_instructions_count + unmatched_suggestions = suggestions_count - suggestions_with_grading_instructions_count + + for feedback in actual_feedback_with_grading_instructions: + for index, suggestion in enumerate(suggestions_with_grading_instructions): + if feedback.structured_grading_instruction_id == suggestion.structured_grading_instruction_id: + matched_feedback += 1 + del suggestions_with_grading_instructions[index] + break + else: + unmatched_feedback += 1 + + unmatched_suggestions += len(suggestions_with_grading_instructions) + + evaluation["feedback_statistics"] = { + "actual_feedback_count": actual_feedback_count, + "suggestions_count": suggestions_count, + "actual_feedback_with_grading_instructions_count": actual_feedback_with_grading_instructions_count, + "suggestions_with_grading_instructions_count": suggestions_with_grading_instructions_count, + "actual_sgi_usage": actual_sgi_usage, + "suggested_sgi_usage": suggested_sgi_usage, + "matched_feedback": matched_feedback, + "unmatched_feedback": unmatched_feedback, + "unmatched_suggestions": unmatched_suggestions, + } + return evaluation if __name__ == "__main__": From a753b8a789d0bb1bb30b95a064a06911493afe38 Mon Sep 17 00:00:00 2001 From: "Felix T.J. Dietrich" Date: Tue, 7 Nov 2023 21:04:54 +0100 Subject: [PATCH 18/54] refactor --- module_text_llm/module_text_llm/__main__.py | 117 +++--------------- module_text_llm/module_text_llm/evaluation.py | 117 ++++++++++++++++++ 2 files changed, 131 insertions(+), 103 deletions(-) create mode 100644 module_text_llm/module_text_llm/evaluation.py diff --git a/module_text_llm/module_text_llm/__main__.py b/module_text_llm/module_text_llm/__main__.py index 75897b438..430f7b7fb 100644 --- a/module_text_llm/module_text_llm/__main__.py +++ b/module_text_llm/module_text_llm/__main__.py @@ -4,14 +4,13 @@ import nltk import tiktoken -from langsmith import Client as LangsmithClient -from langsmith.schemas import Run -from athena import app, get_experiment_environment, submission_selector, submissions_consumer, feedback_consumer, feedback_provider, evaluation_provider +from athena import app, submission_selector, submissions_consumer, feedback_consumer, feedback_provider, evaluation_provider from athena.text import Exercise, Submission, Feedback from athena.logger import logger from module_text_llm.config import Configuration +from module_text_llm.evaluation import get_feedback_statistics, get_llm_statistics from module_text_llm.generate_suggestions import generate_suggestions from module_text_llm.generate_evaluation import generate_evaluation @@ -44,112 +43,24 @@ async def evaluate_feedback( true_feedbacks: List[Feedback], predicted_feedbacks: List[Feedback], ) -> Any: logger.info( - "evaluate_feedback: Evaluation for submission %d of exercise %d was requested with %d true and %d predicted feedbacks", - submission.id, exercise.id, len(true_feedbacks), len(predicted_feedbacks) + "evaluate_feedback: Evaluation for submission %d of exercise %d was requested with %d true and %d predicted feedbacks", + submission.id, exercise.id, len( + true_feedbacks), len(predicted_feedbacks) ) - + evaluation = {} + + # 1. LLM as a judge if bool(os.environ.get("LLM_ENABLE_LLM_AS_A_JUDGE")): evaluation["llm_as_a_judge"] = await generate_evaluation(exercise, submission, true_feedbacks, predicted_feedbacks) - # Gather LLM token usage and response times + # 2. LangSmith runs, token usage, and respose times if bool(os.environ.get("LANGCHAIN_TRACING_V2")): - experiment = get_experiment_environment() - client = LangsmithClient() - project_name = os.environ.get("LANGCHAIN_PROJECT") - runs = list(client.list_runs( - project_name=project_name, - filter=f'and(has(tags, "run-{experiment.run_id}"), has(tags, "submission-{submission.id}"))' - )) - logger.info("evaluate_feedback: Found %d runs for submission %d of exercise %d.", len(runs), submission.id, exercise.id) - - def get_statistics(runs: List[Run]): - return { - "response_time": sum((run.end_time - run.start_time).total_seconds() for run in runs if run.end_time is not None), - "prompt_tokens": sum(run.prompt_tokens for run in runs if run.prompt_tokens is not None), - "completion_tokens": sum(run.completion_tokens for run in runs if run.completion_tokens is not None), - "total_tokens": sum(run.total_tokens for run in runs if run.total_tokens is not None), - } - - suggestion_runs = [] - evaluation_runs = [] - for run in runs: - if "evaluation" in (run.tags or []): - evaluation_runs.append(run) - else: - suggestion_runs.append(run) - - if suggestion_runs or evaluation_runs: - evaluation["runs"] = {} - if suggestion_runs: - evaluation["runs"]["suggestions"] = { - "count": len(suggestion_runs), - **get_statistics(suggestion_runs), - "runs": [json.loads(run.json()) for run in suggestion_runs] - } - if evaluation_runs: - evaluation["runs"]["evaluation"] = { - "count": len(evaluation_runs), - **get_statistics(evaluation_runs), - "runs": [json.loads(run.json()) for run in evaluation_runs] - } - - actual_feedback_count = len(true_feedbacks) - actual_feedback_with_grading_instructions = [] - suggestions_count = len(predicted_feedbacks) - suggestions_with_grading_instructions = [] - - # Init usage counts for SGIs - actual_sgi_usage = { - sgi.id: 0 for criterion in exercise.grading_criteria or [] for sgi in criterion.structured_grading_instructions - } - suggested_sgi_usage = { - sgi.id: 0 for criterion in exercise.grading_criteria or [] for sgi in criterion.structured_grading_instructions - } - - # Count SGIs in actual feedbacks - for feedback in true_feedbacks: - if feedback.structured_grading_instruction_id: - actual_feedback_with_grading_instructions.append(feedback) - actual_sgi_usage[feedback.structured_grading_instruction_id] += 1 - - # Count SGIs in suggested feedbacks - for feedback in predicted_feedbacks: - if feedback.structured_grading_instruction_id: - suggestions_with_grading_instructions.append(feedback) - suggested_sgi_usage[feedback.structured_grading_instruction_id] += 1 - - actual_feedback_with_grading_instructions_count = len(actual_feedback_with_grading_instructions) - suggestions_with_grading_instructions_count = len(suggestions_with_grading_instructions) - - # Match SGIs - matched_feedback = 0 - unmatched_feedback = actual_feedback_count - actual_feedback_with_grading_instructions_count - unmatched_suggestions = suggestions_count - suggestions_with_grading_instructions_count - - for feedback in actual_feedback_with_grading_instructions: - for index, suggestion in enumerate(suggestions_with_grading_instructions): - if feedback.structured_grading_instruction_id == suggestion.structured_grading_instruction_id: - matched_feedback += 1 - del suggestions_with_grading_instructions[index] - break - else: - unmatched_feedback += 1 - - unmatched_suggestions += len(suggestions_with_grading_instructions) - - evaluation["feedback_statistics"] = { - "actual_feedback_count": actual_feedback_count, - "suggestions_count": suggestions_count, - "actual_feedback_with_grading_instructions_count": actual_feedback_with_grading_instructions_count, - "suggestions_with_grading_instructions_count": suggestions_with_grading_instructions_count, - "actual_sgi_usage": actual_sgi_usage, - "suggested_sgi_usage": suggested_sgi_usage, - "matched_feedback": matched_feedback, - "unmatched_feedback": unmatched_feedback, - "unmatched_suggestions": unmatched_suggestions, - } - + evaluation["llm_statistics"] = get_llm_statistics(submission) + + # 3. Feedback statistics + evaluation["feedback_statistics"] = get_feedback_statistics(exercise, submission, true_feedbacks, predicted_feedbacks) + return evaluation if __name__ == "__main__": diff --git a/module_text_llm/module_text_llm/evaluation.py b/module_text_llm/module_text_llm/evaluation.py new file mode 100644 index 000000000..2d6989892 --- /dev/null +++ b/module_text_llm/module_text_llm/evaluation.py @@ -0,0 +1,117 @@ +import json +import os +from typing import List + +from langsmith import Client as LangSmithClient +from langsmith.schemas import Run + +from athena import get_experiment_environment +from athena.text import Exercise, Submission, Feedback + + +def get_llm_statistics(submission: Submission): + experiment = get_experiment_environment() + client = LangSmithClient() + project_name = os.environ.get("LANGCHAIN_PROJECT") + runs = list(client.list_runs( + project_name=project_name, + filter=f'and(has(tags, "run-{experiment.run_id}"), has(tags, "submission-{submission.id}"))' + )) + + def get_statistics(runs: List[Run]): + return { + "response_time": sum((run.end_time - run.start_time).total_seconds() for run in runs if run.end_time is not None), + "prompt_tokens": sum(run.prompt_tokens for run in runs if run.prompt_tokens is not None), + "completion_tokens": sum(run.completion_tokens for run in runs if run.completion_tokens is not None), + "total_tokens": sum(run.total_tokens for run in runs if run.total_tokens is not None), + } + + suggestion_runs = [] + evaluation_runs = [] + for run in runs: + if "evaluation" in (run.tags or []): + evaluation_runs.append(run) + else: + suggestion_runs.append(run) + + llm_statistics = {} + if suggestion_runs or evaluation_runs: + if suggestion_runs: + llm_statistics["suggestions"] = { + "count": len(suggestion_runs), + **get_statistics(suggestion_runs), + "runs": [json.loads(run.json()) for run in suggestion_runs] + } + if evaluation_runs: + llm_statistics["evaluation"] = { + "count": len(evaluation_runs), + **get_statistics(evaluation_runs), + "runs": [json.loads(run.json()) for run in evaluation_runs] + } + + return llm_statistics + + +def get_feedback_statistics(exercise: Exercise, submission: Submission, + true_feedbacks: List[Feedback], predicted_feedbacks: List[Feedback]): + actual_feedback_count = len(true_feedbacks) + actual_feedback_with_grading_instructions = [] + suggestions_count = len(predicted_feedbacks) + suggestions_with_grading_instructions = [] + + # Init usage counts for SGIs + actual_sgi_usage = { + sgi.id: 0 for criterion in exercise.grading_criteria or [] for sgi in criterion.structured_grading_instructions + } + suggested_sgi_usage = { + sgi.id: 0 for criterion in exercise.grading_criteria or [] for sgi in criterion.structured_grading_instructions + } + + # Count SGIs in actual feedbacks + for feedback in true_feedbacks: + if feedback.structured_grading_instruction_id: + actual_feedback_with_grading_instructions.append(feedback) + actual_sgi_usage[feedback.structured_grading_instruction_id] += 1 + + # Count SGIs in suggested feedbacks + for feedback in predicted_feedbacks: + if feedback.structured_grading_instruction_id: + suggestions_with_grading_instructions.append(feedback) + suggested_sgi_usage[feedback.structured_grading_instruction_id] += 1 + + actual_feedback_with_grading_instructions_count = len( + actual_feedback_with_grading_instructions) + suggestions_with_grading_instructions_count = len( + suggestions_with_grading_instructions) + + # Match SGIs + matched_feedback = 0 + unmatched_feedback = actual_feedback_count - \ + actual_feedback_with_grading_instructions_count + unmatched_suggestions = suggestions_count - \ + suggestions_with_grading_instructions_count + + for feedback in actual_feedback_with_grading_instructions: + for index, suggestion in enumerate(suggestions_with_grading_instructions): + if feedback.structured_grading_instruction_id == suggestion.structured_grading_instruction_id: + matched_feedback += 1 + del suggestions_with_grading_instructions[index] + break + else: + unmatched_feedback += 1 + + unmatched_suggestions += len(suggestions_with_grading_instructions) + + feedback_statistics = { + "actual_feedback_count": actual_feedback_count, + "suggestions_count": suggestions_count, + "actual_feedback_with_grading_instructions_count": actual_feedback_with_grading_instructions_count, + "suggestions_with_grading_instructions_count": suggestions_with_grading_instructions_count, + "actual_sgi_usage": actual_sgi_usage, + "suggested_sgi_usage": suggested_sgi_usage, + "matched_feedback": matched_feedback, + "unmatched_feedback": unmatched_feedback, + "unmatched_suggestions": unmatched_suggestions, + } + + return feedback_statistics From fa0bde50196d9b3af4817aa06892dae25125df69 Mon Sep 17 00:00:00 2001 From: "Felix T.J. Dietrich" Date: Tue, 7 Nov 2023 21:05:24 +0100 Subject: [PATCH 19/54] remove unused --- module_text_llm/module_text_llm/__main__.py | 2 +- module_text_llm/module_text_llm/evaluation.py | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/module_text_llm/module_text_llm/__main__.py b/module_text_llm/module_text_llm/__main__.py index 430f7b7fb..1fbeb4cf8 100644 --- a/module_text_llm/module_text_llm/__main__.py +++ b/module_text_llm/module_text_llm/__main__.py @@ -59,7 +59,7 @@ async def evaluate_feedback( evaluation["llm_statistics"] = get_llm_statistics(submission) # 3. Feedback statistics - evaluation["feedback_statistics"] = get_feedback_statistics(exercise, submission, true_feedbacks, predicted_feedbacks) + evaluation["feedback_statistics"] = get_feedback_statistics(exercise, true_feedbacks, predicted_feedbacks) return evaluation diff --git a/module_text_llm/module_text_llm/evaluation.py b/module_text_llm/module_text_llm/evaluation.py index 2d6989892..055dc2c94 100644 --- a/module_text_llm/module_text_llm/evaluation.py +++ b/module_text_llm/module_text_llm/evaluation.py @@ -52,8 +52,7 @@ def get_statistics(runs: List[Run]): return llm_statistics -def get_feedback_statistics(exercise: Exercise, submission: Submission, - true_feedbacks: List[Feedback], predicted_feedbacks: List[Feedback]): +def get_feedback_statistics(exercise: Exercise, true_feedbacks: List[Feedback], predicted_feedbacks: List[Feedback]): actual_feedback_count = len(true_feedbacks) actual_feedback_with_grading_instructions = [] suggestions_count = len(predicted_feedbacks) From a4d7d8cf5eb2206241472a0e084df4c4d6625cc0 Mon Sep 17 00:00:00 2001 From: "Felix T.J. Dietrich" Date: Tue, 7 Nov 2023 21:36:17 +0100 Subject: [PATCH 20/54] update ini --- assessment_module_manager/modules.docker.ini | 7 ++++++- assessment_module_manager/modules.ini | 4 ++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/assessment_module_manager/modules.docker.ini b/assessment_module_manager/modules.docker.ini index db9d22854..340efa797 100644 --- a/assessment_module_manager/modules.docker.ini +++ b/assessment_module_manager/modules.docker.ini @@ -1,19 +1,24 @@ [module_example] url = http://module-example:5001 type = programming +supports_evaluation = false [module_programming_llm] url = http://module-programming-llm:5002 type = programming +supports_evaluation = false [module_text_llm] url = http://module-text-llm:5003 type = text +supports_evaluation = true [module_text_cofee] url = http://module-text-cofee:5004 type = text +supports_evaluation = false [module_programming_themisml] url = http://module-programming-themisml:5005 -type = programming \ No newline at end of file +type = programming +supports_evaluation = false \ No newline at end of file diff --git a/assessment_module_manager/modules.ini b/assessment_module_manager/modules.ini index 3402183f0..70745eb78 100644 --- a/assessment_module_manager/modules.ini +++ b/assessment_module_manager/modules.ini @@ -1,7 +1,7 @@ [module_example] url = http://localhost:5001 type = programming -supports_evaluation = true +supports_evaluation = false [module_programming_llm] url = http://localhost:5002 @@ -11,7 +11,7 @@ supports_evaluation = false [module_text_llm] url = http://localhost:5003 type = text -supports_evaluation = false +supports_evaluation = true [module_text_cofee] url = http://localhost:5004 From fd4fdab3cf714298a5ccbd0dd333c4c6f9708c23 Mon Sep 17 00:00:00 2001 From: "Felix T.J. Dietrich" Date: Tue, 7 Nov 2023 21:40:51 +0100 Subject: [PATCH 21/54] only use selected modules --- playground/src/hooks/athena/request_evaluation.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/playground/src/hooks/athena/request_evaluation.ts b/playground/src/hooks/athena/request_evaluation.ts index 76227258f..620fb362b 100644 --- a/playground/src/hooks/athena/request_evaluation.ts +++ b/playground/src/hooks/athena/request_evaluation.ts @@ -48,7 +48,7 @@ export default function useRequestEvaluation( const modules = onlyUseContextModule ? [contextModule] : Object.values(health?.modules ?? {}).filter( - (module) => module.healthy && module.type === contextModule.type + (module) => module.healthy && module.type === contextModule.type && module.supportsEvaluation ); const results = await Promise.allSettled( From f2c173661b83234099347bdca9e57d3d31a1ba91 Mon Sep 17 00:00:00 2001 From: "Felix T.J. Dietrich" Date: Tue, 7 Nov 2023 22:49:19 +0100 Subject: [PATCH 22/54] remove skip --- module_text_llm/module_text_llm/__main__.py | 2 +- playground/src/hooks/batch_module_experiment.ts | 10 ---------- 2 files changed, 1 insertion(+), 11 deletions(-) diff --git a/module_text_llm/module_text_llm/__main__.py b/module_text_llm/module_text_llm/__main__.py index 1fbeb4cf8..49d069635 100644 --- a/module_text_llm/module_text_llm/__main__.py +++ b/module_text_llm/module_text_llm/__main__.py @@ -51,7 +51,7 @@ async def evaluate_feedback( evaluation = {} # 1. LLM as a judge - if bool(os.environ.get("LLM_ENABLE_LLM_AS_A_JUDGE")): + if len(predicted_feedbacks) > 0 and bool(os.environ.get("LLM_ENABLE_LLM_AS_A_JUDGE")): evaluation["llm_as_a_judge"] = await generate_evaluation(exercise, submission, true_feedbacks, predicted_feedbacks) # 2. LangSmith runs, token usage, and respose times diff --git a/playground/src/hooks/batch_module_experiment.ts b/playground/src/hooks/batch_module_experiment.ts index e2610fbb8..778365ade 100644 --- a/playground/src/hooks/batch_module_experiment.ts +++ b/playground/src/hooks/batch_module_experiment.ts @@ -406,16 +406,6 @@ export default function useBatchModuleExperiment(experiment: Experiment, moduleC submission.id )?.suggestions ?? []; - if (predictedFeedbacks.length === 0) { - // Skip if there are no predicted feedbacks - setSubmissionsWithAutomaticEvaluation((prevState) => { - const newMap = new Map(prevState); - newMap.set(submission.id, {}); - return newMap; - }); - continue; - } - try { const responses = await requestEvaluation.mutateAsync({ exercise: experiment.exercise, From 33f2a2b580fe7948754317b195172a469abf7328 Mon Sep 17 00:00:00 2001 From: "Felix T.J. Dietrich" Date: Sun, 5 Nov 2023 21:54:36 +0100 Subject: [PATCH 23/54] add endpoint --- athena/athena/endpoints.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/athena/athena/endpoints.py b/athena/athena/endpoints.py index 8e9d4dcc5..eac88656f 100644 --- a/athena/athena/endpoints.py +++ b/athena/athena/endpoints.py @@ -2,7 +2,7 @@ import inspect from fastapi import Depends, BackgroundTasks from pydantic import BaseModel, ValidationError -from typing import TypeVar, Callable, List, Union, Any, Coroutine, Type +from typing import TypeVar, Callable, Dict, List, Union, Any, Coroutine, Type from athena.app import app from athena.authenticate import authenticated @@ -358,4 +358,12 @@ def config_schema_provider(cls: Type[C]) -> Type[C]: async def wrapper(): return cls.schema() - return cls \ No newline at end of file + return cls + +def evaluation_provider(func: Union[ + Callable[[E, S, List[F], List[F]], Dict[int, Any]], + Callable[[E, S, List[F], List[F]], Coroutine[Any, Any, Dict[int, Any]]], + Callable[[E, S, List[F], List[F], C], Dict[int, Any]], + Callable[[E, S, List[F], List[F], C], Coroutine[Any, Any, Dict[int, Any]]] +]): + pass \ No newline at end of file From 368ebcca320cb5eee4d25724df52157354f2933f Mon Sep 17 00:00:00 2001 From: "Felix T.J. Dietrich" Date: Sun, 5 Nov 2023 22:09:19 +0100 Subject: [PATCH 24/54] add evaluation_provider --- athena/athena/endpoints.py | 64 ++++++++++++++++++++++++++++++++++---- 1 file changed, 58 insertions(+), 6 deletions(-) diff --git a/athena/athena/endpoints.py b/athena/athena/endpoints.py index eac88656f..a762a37d0 100644 --- a/athena/athena/endpoints.py +++ b/athena/athena/endpoints.py @@ -2,7 +2,7 @@ import inspect from fastapi import Depends, BackgroundTasks from pydantic import BaseModel, ValidationError -from typing import TypeVar, Callable, Dict, List, Union, Any, Coroutine, Type +from typing import TypeVar, Callable, List, Union, Any, Coroutine, Type from athena.app import app from athena.authenticate import authenticated @@ -361,9 +361,61 @@ async def wrapper(): return cls def evaluation_provider(func: Union[ - Callable[[E, S, List[F], List[F]], Dict[int, Any]], - Callable[[E, S, List[F], List[F]], Coroutine[Any, Any, Dict[int, Any]]], - Callable[[E, S, List[F], List[F], C], Dict[int, Any]], - Callable[[E, S, List[F], List[F], C], Coroutine[Any, Any, Dict[int, Any]]] + Callable[[E, S, List[F], List[F]], Any], + Callable[[E, S, List[F], List[F]], Coroutine[Any, Any, Any]] ]): - pass \ No newline at end of file + """ + Provide evaluated feedback to the Assessment Module Manager. + + Note: The evaluation provider is usually called during the research and development phase (by the Playground). + Return arbitrary evaluation results. + + This decorator can be used with several types of functions: synchronous or asynchronous. + + Examples: + Below are some examples of possible functions that you can decorate with this decorator: + + Without using module config (both synchronous and asynchronous forms): + >>> @evaluation_provider + ... def sync_evaluate_feedback( + ... exercise: Exercise, submission: Submission, + ... true_feedbacks: List[Feedback], predicted_feedbacks: List[Feedback] + ... ) -> Any: + ... # evaluate predicted feedback here and return evaluation results + + >>> @feedback_provider + ... async def async_evaluate_feedback( + ... exercise: Exercise, submission: Submission, + ... true_feedbacks: List[Feedback], predicted_feedbacks: List[Feedback] + ... ) -> Any: + ... # evaluate predicted feedback here and return evaluation results + """ + exercise_type = inspect.signature(func).parameters["exercise"].annotation + submission_type = inspect.signature(func).parameters["submission"].annotation + feedback_type = inspect.signature(func).parameters["predicted_feedbacks"].annotation.__args__[0] + + @app.post("/evaluation", responses=module_responses) + @authenticated + @with_meta + async def wrapper( + exercise: exercise_type, + submission: submission_type, + true_feedbacks: List[feedback_type], + predicted_feedbacks: List[feedback_type], + ): + # Retrieve existing metadata for the exercise, submission and feedback + exercise.meta.update(get_stored_exercise_meta(exercise) or {}) + submission.meta.update(get_stored_submission_meta(submission) or {}) + for feedback in true_feedbacks: + feedback.meta.update(get_stored_feedback_meta(feedback) or {}) + for feedback in predicted_feedbacks: + feedback.meta.update(get_stored_feedback_meta(feedback) or {}) + + # Call the actual provider + if inspect.iscoroutinefunction(func): + evaluation = await func(exercise, submission, true_feedbacks, predicted_feedbacks) + else: + evaluation = func(exercise, submission, true_feedbacks, predicted_feedbacks) + + return evaluation + return wrapper \ No newline at end of file From 5c681065ebd40f77745fe22cc5234ee1e2d42141 Mon Sep 17 00:00:00 2001 From: "Felix T.J. Dietrich" Date: Sun, 5 Nov 2023 22:09:27 +0100 Subject: [PATCH 25/54] add new line --- athena/athena/endpoints.py | 1 + 1 file changed, 1 insertion(+) diff --git a/athena/athena/endpoints.py b/athena/athena/endpoints.py index a762a37d0..411edaf0a 100644 --- a/athena/athena/endpoints.py +++ b/athena/athena/endpoints.py @@ -360,6 +360,7 @@ async def wrapper(): return cls + def evaluation_provider(func: Union[ Callable[[E, S, List[F], List[F]], Any], Callable[[E, S, List[F], List[F]], Coroutine[Any, Any, Any]] From 7afd65520a30e6eb1a42b782b8c0854d79c02240 Mon Sep 17 00:00:00 2001 From: "Felix T.J. Dietrich" Date: Sun, 5 Nov 2023 22:10:28 +0100 Subject: [PATCH 26/54] add evaluation_provider to export --- athena/athena/__init__.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/athena/athena/__init__.py b/athena/athena/__init__.py index 90fb46e62..8a67e5315 100644 --- a/athena/athena/__init__.py +++ b/athena/athena/__init__.py @@ -6,7 +6,7 @@ from .schemas import ExerciseType, GradingCriterion, StructuredGradingInstruction from .metadata import emit_meta, get_meta from .experiment import get_experiment_environment -from .endpoints import submission_selector, submissions_consumer, feedback_consumer, feedback_provider, config_schema_provider # type: ignore +from .endpoints import submission_selector, submissions_consumer, feedback_consumer, feedback_provider, config_schema_provider, evaluation_provider # type: ignore @app.get("/") @@ -28,6 +28,7 @@ def run_module(): "feedback_consumer", "feedback_provider", "config_schema_provider", + "evaluation_provider", "emit_meta", "get_meta", "get_experiment_environment", From 192614433d139a1ae56a8ccb999e300c8516435a Mon Sep 17 00:00:00 2001 From: "Felix T.J. Dietrich" Date: Sun, 5 Nov 2023 22:25:35 +0100 Subject: [PATCH 27/54] add example evaluation endpoint --- module_example/module_example/__main__.py | 30 +++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) diff --git a/module_example/module_example/__main__.py b/module_example/module_example/__main__.py index 7bdef91fe..182f160bb 100644 --- a/module_example/module_example/__main__.py +++ b/module_example/module_example/__main__.py @@ -1,10 +1,11 @@ """ Entry point for the module_example module. """ -from typing import List +import random +from typing import List, Any from pydantic import BaseModel, Field -from athena import app, config_schema_provider, submissions_consumer, submission_selector, feedback_consumer, feedback_provider, emit_meta +from athena import app, config_schema_provider, submissions_consumer, submission_selector, feedback_consumer, feedback_provider, evaluation_provider, emit_meta from athena.programming import Exercise, Submission, Feedback from athena.logger import logger from athena.storage import store_exercise, store_submissions, store_feedback @@ -139,5 +140,30 @@ def suggest_feedback(exercise: Exercise, submission: Submission, module_config: ] +# Only if it makes sense for a module (Optional) +@evaluation_provider +def evaluate_feedback(exercise: Exercise, submission: Submission, true_feedbacks: List[Feedback], predicted_feedbacks: List[Feedback]) -> Any: + logger.info( + "evaluate_feedback: Evaluation for submission %d of exercise %d was requested with %d true and %d predicted feedbacks", + submission.id, exercise.id, len(true_feedbacks), len(predicted_feedbacks) + ) + + # Do something with the true and predicted feedback and return the evaluation result + # Generate some example evaluation result + evaluation_results = [] + true_feedback_embeddings = [random.random() for _ in true_feedbacks] + predicted_feedback_embeddings = [random.random() for _ in predicted_feedbacks] + for feedback, embedding in zip(predicted_feedbacks, predicted_feedback_embeddings): + feedback_evaluation = { + "feedback_id": feedback.id, + "embedding": embedding, + "has_match": len([t for t in true_feedback_embeddings if abs(t - embedding) < 0.1]) > 0, + "correctness": random.random() + } + evaluation_results.append(feedback_evaluation) + + return evaluation_results + + if __name__ == "__main__": app.start() From db5e5180576299bdd8fc25dbcf75817b98d4d168 Mon Sep 17 00:00:00 2001 From: "Felix T.J. Dietrich" Date: Sun, 5 Nov 2023 22:44:22 +0100 Subject: [PATCH 28/54] add playground ui --- .../view_mode/module_requests/index.tsx | 7 +- .../module_requests/request_evaluation.tsx | 172 ++++++++++++++++++ .../src/hooks/athena/request_evaluation.ts | 31 ++++ 3 files changed, 208 insertions(+), 2 deletions(-) create mode 100644 playground/src/components/view_mode/module_requests/request_evaluation.tsx create mode 100644 playground/src/hooks/athena/request_evaluation.ts diff --git a/playground/src/components/view_mode/module_requests/index.tsx b/playground/src/components/view_mode/module_requests/index.tsx index dca29946d..dcda17bf2 100644 --- a/playground/src/components/view_mode/module_requests/index.tsx +++ b/playground/src/components/view_mode/module_requests/index.tsx @@ -3,12 +3,14 @@ import type { ModuleMeta } from "@/model/health_response"; import { useState } from "react"; import { ModuleProvider } from "@/hooks/module_context"; +import ModuleAndConfigSelect from "@/components/selectors/module_and_config_select"; import GetConfigSchema from "@/components/view_mode/module_requests/get_config_schema"; import SendSubmissions from "@/components/view_mode/module_requests/send_submissions"; +import SelectSubmission from "@/components/view_mode/module_requests/request_submission_selection"; import SendFeedbacks from "@/components/view_mode/module_requests/send_feedbacks"; import RequestFeedbackSuggestions from "@/components/view_mode/module_requests/request_feedback_suggestions"; -import SelectSubmission from "@/components/view_mode/module_requests/request_submission_selection"; -import ModuleAndConfigSelect from "@/components/selectors/module_and_config_select"; +import RequestEvaluation from "@/components/view_mode/module_requests/request_evaluation"; + export default function ModuleRequests() { const [moduleAndConfig, setModuleAndConfig] = useState<{ module: ModuleMeta; moduleConfig: any } | undefined>(undefined); @@ -34,6 +36,7 @@ export default function ModuleRequests() { +
    )} diff --git a/playground/src/components/view_mode/module_requests/request_evaluation.tsx b/playground/src/components/view_mode/module_requests/request_evaluation.tsx new file mode 100644 index 000000000..c98ac6d27 --- /dev/null +++ b/playground/src/components/view_mode/module_requests/request_evaluation.tsx @@ -0,0 +1,172 @@ +import type { Submission } from "@/model/submission"; +import type { Exercise } from "@/model/exercise"; +import type { Feedback } from "@/model/feedback"; +import type ModuleResponse from "@/model/module_response"; + +import { useEffect, useState } from "react"; + +import { useModule } from "@/hooks/module_context"; +import { useBaseInfo } from "@/hooks/base_info_context"; +import useRequestEvaluation from "@/hooks/athena/request_evaluation"; +import useFeedbacks from "@/hooks/playground/feedbacks"; + +import ExerciseSelect from "@/components/selectors/exercise_select"; +import SubmissionSelect from "@/components/selectors/submission_select"; +import ModuleResponseView from "@/components/module_response_view"; +import Disclosure from "@/components/disclosure"; +import ExerciseDetail from "@/components/details/exercise_detail"; +import SubmissionDetail from "@/components/details/submission_detail"; + +export default function RequestEvaluation() { + const { module } = useModule(); + const { dataMode } = useBaseInfo(); + + const [exercise, setExercise] = useState(undefined); + const [submission, setSubmission] = useState( + undefined + ); + + const [predictedFeedbacks, setPredictedFeedbacks] = useState([]); + + const { + data: trueFeedbacks, + isLoading: isLoadingTrueFeedbacks, + error: errorTrueFeedbacks, + } = useFeedbacks(exercise, submission); + + const { + data: response, + isLoading, + error, + mutate, + reset, + } = useRequestEvaluation(); + + useEffect(() => setExercise(undefined), [module, dataMode]); + + return ( +
    +

    + Request Evaluation from Athena{" "} + (OPTIONAL) +

    +

    + Evaluate a list of feedback suggestions during the research and + development phase. Compare the predicted feedback with the actual + feedback using the function annotated with{" "} + @evaluation_provider. Each module can implement custom + metrics to evaluate the feedback suggestions during evaluation and + respond with arbitrary evaluation results. +

    + { + setExercise(exercise); + reset(); + setSubmission(undefined); + setPredictedFeedbacks([]); + }} + disabled={isLoading} + /> + {exercise && ( + <> + { + setSubmission(submission); + setPredictedFeedbacks([]); + }} + disabled={isLoading} + /> +
    + + {submission && + (trueFeedbacks ? ( + +

    + The following feedbacks given by the tutor in the past. +

    + f.submission_id === submission.id + )} + /> +
    + ) : ( +
    + No true feedbacks available +
    + ))} + {submission && ( + +

    + Provide feedback as predicted feedbacks to + test the evaluation. +

    + f.submission_id === submission.id + )} + onFeedbacksChange={setPredictedFeedbacks} + /> +
    + )} + {isLoadingTrueFeedbacks && ( +
    Loading feedbacks...
    + )} + {errorTrueFeedbacks && ( +
    + Failed to load feedbacks +
    + )} +
    + + )} + + +
    + ); +} diff --git a/playground/src/hooks/athena/request_evaluation.ts b/playground/src/hooks/athena/request_evaluation.ts new file mode 100644 index 000000000..d6ccc9f60 --- /dev/null +++ b/playground/src/hooks/athena/request_evaluation.ts @@ -0,0 +1,31 @@ +import type { Exercise } from "@/model/exercise"; +import type { Submission } from "@/model/submission"; +import type ModuleResponse from "@/model/module_response"; + +import { UseMutationOptions, useMutation } from "react-query"; +import { AthenaError, useAthenaFetcher } from "@/hooks/athena_fetcher"; +import { Feedback } from "@/model/feedback"; + +/** + * Requests an evaluation for an exercise and a submission given the true and predicted feedbacks from an Athena module. + * + * @example + * const { data, isLoading, error, mutate } = useRequestEvaluation(); + * mutate({ exercise, submission, trueFeedbacks, predictedFeedbacks }); + * + * @param options The react-query options. + */ +export default function useRequestEvaluation( + options: Omit< + UseMutationOptions, + "mutationFn" + > = {} +) { + const athenaFetcher = useAthenaFetcher(); + return useMutation({ + mutationFn: async ({ exercise, submission, trueFeedbacks, predictedFeedbacks }) => { + return await athenaFetcher("/evaluation", { exercise, submission, true_feedbacks: trueFeedbacks, predicted_feedbacks: predictedFeedbacks }); + }, + ...options, + }); +} From 2da43911793e73d9ac3bebec313ecf89d991ee81 Mon Sep 17 00:00:00 2001 From: "Felix T.J. Dietrich" Date: Mon, 6 Nov 2023 16:22:16 +0100 Subject: [PATCH 29/54] add automatic evaluation --- .../conduct_experiment/index.tsx | 17 +-- .../src/hooks/batch_module_experiment.ts | 108 +++++++++++++++++- playground/src/model/automatic_evaluation.ts | 3 + 3 files changed, 117 insertions(+), 11 deletions(-) create mode 100644 playground/src/model/automatic_evaluation.ts diff --git a/playground/src/components/view_mode/evaluation_mode/conduct_experiment/index.tsx b/playground/src/components/view_mode/evaluation_mode/conduct_experiment/index.tsx index f91cc7b3e..3c78e1ecb 100644 --- a/playground/src/components/view_mode/evaluation_mode/conduct_experiment/index.tsx +++ b/playground/src/components/view_mode/evaluation_mode/conduct_experiment/index.tsx @@ -65,6 +65,12 @@ export default function ConductExperiment({ data: data.manualRatings, }); } + if (data.automaticEvaluation) { + files.push({ + name: `${experiment.exerciseType}_automatic_evaluation_${moduleConfigurations[index].name}_${experiment.id}_run-${data.automaticEvaluation.runId}`, + data: data.automaticEvaluation, + }); + } } return files; }) @@ -102,14 +108,11 @@ export default function ConductExperiment({ return; } - if ( - !data.type || - (data.type !== "results" && data.type !== "manualRatings") - ) { - alert("No correct type found in the data i.e. 'results' or 'manualRatings'"); + if (!data.type || !["results", "manualRatings", "automaticEvaluation"].includes(data.type)) { + alert("No correct type found in the data i.e. 'results', 'manualRatings', or 'automaticEvaluation'."); return; } - const type = data.type as "results" | "manualRatings"; + const type = data.type as "results" | "manualRatings" | "automaticEvaluation"; try { moduleViewRef.importData(data); @@ -209,7 +212,7 @@ export default function ConductExperiment({ // If all files have been read, sort and import if (filesProcessed === files.length) { - // Sort the array by 'type', 'results' first and then 'manualRatings' + // Sort the array by 'type', 'results' first and then 'manualRatings' or 'automaticEvaluation' const sortedData = fileDataArray.sort((a, b) => { if (a.type === "results" && b.type !== "results") { return -1; diff --git a/playground/src/hooks/batch_module_experiment.ts b/playground/src/hooks/batch_module_experiment.ts index ba99982c2..a2ff10fb0 100644 --- a/playground/src/hooks/batch_module_experiment.ts +++ b/playground/src/hooks/batch_module_experiment.ts @@ -1,5 +1,6 @@ import type { Feedback } from "@/model/feedback"; import type { ManualRating } from "@/model/manual_rating"; +import type { AutomaticEvaluation } from "@/model/automatic_evaluation"; import type { Experiment } from "@/components/view_mode/evaluation_mode/define_experiment"; import type { ModuleConfiguration } from "@/components/view_mode/evaluation_mode/configure_modules"; @@ -9,6 +10,7 @@ import { useSendFeedbacks } from "./athena/send_feedbacks"; import useRequestSubmissionSelection from "./athena/request_submission_selection"; import useRequestFeedbackSuggestions from "./athena/request_feedback_suggestions"; import useSendSubmissions from "./athena/send_submissions"; +import useRequestEvaluation from "./athena/request_evaluation"; import { useExperimentIdentifiersSetRunId } from "./experiment_identifiers_context"; export type ExperimentStep = @@ -50,6 +52,11 @@ export default function useBatchModuleExperiment(experiment: Experiment, moduleC Map >(new Map()); + // Stores automatic evaluation of submissions + const [submissionsWithAutomaticEvaluation, setSubmissionsWithAutomaticEvaluation] = useState< + Map + >(new Map()); + const [processingStep, setProcessingStep] = useState< ExperimentStep | undefined >(undefined); @@ -95,6 +102,19 @@ export default function useBatchModuleExperiment(experiment: Experiment, moduleC }, } : {} ), + ...( + submissionsWithAutomaticEvaluation.size > 0 ? { + automaticEvaluation: { + type: "automaticEvaluation", + runId: data.runId, + experimentId: experiment.id, + moduleConfigurationId: moduleConfiguration.id, + submissionsWithAutomaticEvaluation: Object.fromEntries( + submissionsWithAutomaticEvaluation + ), + }, + } : {} + ), }; }; @@ -108,6 +128,7 @@ export default function useBatchModuleExperiment(experiment: Experiment, moduleC throw new Error("Invalid results data"); } + setProcessingStep(undefined); setData(() => ({ runId: importedData.runId, step: importedData.step, @@ -134,7 +155,22 @@ export default function useBatchModuleExperiment(experiment: Experiment, moduleC ) )); return; + } else if (importedData.type === "automaticEvaluation") { + // Relies on the fact that the automatic evaluations have to be imported after the results + if (importedData.runId !== data.runId) { + throw new Error("Run ID does not match, have you imported the results first?"); + } + if (importedData.submissionsWithAutomaticEvaluation === undefined) { + throw new Error("Invalid automatic evaluation data"); + } + setSubmissionsWithAutomaticEvaluation(() => new Map( + Object.entries(importedData.submissionsWithAutomaticEvaluation).map( + ([key, value]) => [Number(key), value as any] + ) + )); + return; } + throw new Error("Unknown import data type"); }; @@ -158,6 +194,7 @@ export default function useBatchModuleExperiment(experiment: Experiment, moduleC const sendFeedbacks = useSendFeedbacks(); const requestSubmissionSelection = useRequestSubmissionSelection(); const requestFeedbackSuggestions = useRequestFeedbackSuggestions(); + const requestEvaluation = useRequestEvaluation(); // 1. Send submissions to Athena const stepSendSubmissions = () => { @@ -338,10 +375,70 @@ export default function useBatchModuleExperiment(experiment: Experiment, moduleC setData((prevState) => ({ ...prevState, - step: "finished", + step: "finished", // Automatic evaluation is done separately })); }; + // 4. Automatic evaluation (after results are 'finished') + const stepAutomaticEvaluation = async () => { + setProcessingStep("finished"); + + console.log("Running automatic evaluation..."); + + let remainingSubmissions = experiment.evaluationSubmissions.filter( + (submission) => !submissionsWithAutomaticEvaluation.has(submission.id) + ); + + let index = 0; + for (const submission of remainingSubmissions) { + console.log( + `Evaluating... (${index + 1}/${ + remainingSubmissions.length + })` + ); + + const predictedFeedbacks = data.submissionsWithFeedbackSuggestions.get( + submission.id + )?.suggestions ?? []; + + if (predictedFeedbacks.length === 0) { + // Skip if there are no predicted feedbacks + setSubmissionsWithAutomaticEvaluation((prevState) => { + const newMap = new Map(prevState); + newMap.set(submission.id, {}); + return newMap; + }); + continue; + } + + try { + const response = await requestEvaluation.mutateAsync({ + exercise: experiment.exercise, + submission, + trueFeedbacks: experiment.tutorFeedbacks.filter( + (feedback) => feedback.submission_id === submission.id + ), + predictedFeedbacks: predictedFeedbacks, + }); + if (!isMounted.current) { + return; + } + console.log(`Received evaluation for submission ${submission.id}:`, response.data); + + setSubmissionsWithAutomaticEvaluation((prevState) => { + const newMap = new Map(prevState); + newMap.set(submission.id, response.data); + return newMap; + }); + } catch (error) { + console.error( + `Error while evaluating submission ${submission.id}:`, + error + ); + } + } + }; + useEffect(() => { isMounted.current = true; return () => { @@ -375,10 +472,12 @@ export default function useBatchModuleExperiment(experiment: Experiment, moduleC processingStep !== "generatingFeedbackSuggestions" ) { stepGenerateFeedbackSuggestions(); + } else if ( + data.step === "finished" && + processingStep !== "finished" + ) { + stepAutomaticEvaluation(); } - // TODO: Add automatic evaluation step here - // Note: Evaluate tutor feedback more globally to not do it multiple times - // Note 2: Actually, I probably want to have it in parallel with the feedback suggestions for the interactive mode! }, [data.step]); return { @@ -394,6 +493,7 @@ export default function useBatchModuleExperiment(experiment: Experiment, moduleC sendFeedbacks, requestSubmissionSelection, requestFeedbackSuggestions, + requestEvaluation, }, }; } diff --git a/playground/src/model/automatic_evaluation.ts b/playground/src/model/automatic_evaluation.ts new file mode 100644 index 000000000..fb55b9c86 --- /dev/null +++ b/playground/src/model/automatic_evaluation.ts @@ -0,0 +1,3 @@ +export type AutomaticEvaluation = { + [module: string]: any; +}; From a8589dc5a1a241b63a6680ffc44128102b7835b5 Mon Sep 17 00:00:00 2001 From: "Felix T.J. Dietrich" Date: Mon, 6 Nov 2023 17:22:55 +0100 Subject: [PATCH 30/54] add automatic evaluation --- .../endpoints/health_endpoint.py | 5 +- .../module/list_modules.py | 1 + .../module/module.py | 1 + assessment_module_manager/modules.ini | 5 ++ .../module_requests/request_evaluation.tsx | 4 +- .../src/hooks/athena/request_evaluation.ts | 62 ++++++++++++++++--- playground/src/hooks/athena_fetcher.ts | 42 ++++++++----- .../src/hooks/batch_module_experiment.ts | 11 +++- playground/src/hooks/module_context.tsx | 2 +- playground/src/model/health_response.ts | 1 + 10 files changed, 103 insertions(+), 31 deletions(-) diff --git a/assessment_module_manager/assessment_module_manager/endpoints/health_endpoint.py b/assessment_module_manager/assessment_module_manager/endpoints/health_endpoint.py index e1a8d252f..85879b673 100644 --- a/assessment_module_manager/assessment_module_manager/endpoints/health_endpoint.py +++ b/assessment_module_manager/assessment_module_manager/endpoints/health_endpoint.py @@ -27,6 +27,7 @@ class HealthResponse(BaseModel): """ Response indicating whether the Assessment Module Manager is healthy, and whether all the modules are healthy (i.e. reachable). + Additional information about the modules is also provided. """ status: str = Field(const=True, default="ok", example="ok") modules: dict = Field( @@ -35,7 +36,8 @@ class HealthResponse(BaseModel): "module_example": { "url": "http://localhost:5001", "type": "programming", - "healthy": True + "healthy": True, + "supportsEvaluation": True } } ] @@ -56,6 +58,7 @@ async def get_health() -> HealthResponse: "url": module.url, "type": module.type, "healthy": await is_healthy(module), + "supportsEvaluation": module.supports_evaluation } for module in get_modules() } diff --git a/assessment_module_manager/assessment_module_manager/module/list_modules.py b/assessment_module_manager/assessment_module_manager/module/list_modules.py index b2862e489..e9f18f106 100644 --- a/assessment_module_manager/assessment_module_manager/module/list_modules.py +++ b/assessment_module_manager/assessment_module_manager/module/list_modules.py @@ -18,6 +18,7 @@ def list_modules() -> List[Module]: name=module, url=cast(AnyHttpUrl, modules_config[module]["url"]), type=ExerciseType(modules_config[module]["type"]), + supports_evaluation=modules_config[module].getboolean("supports_evaluation"), ) for module in modules_config.sections() ] diff --git a/assessment_module_manager/assessment_module_manager/module/module.py b/assessment_module_manager/assessment_module_manager/module/module.py index 35dbb6da1..65e99931f 100644 --- a/assessment_module_manager/assessment_module_manager/module/module.py +++ b/assessment_module_manager/assessment_module_manager/module/module.py @@ -8,3 +8,4 @@ class Module(BaseModel): name: str = Field(example="module_example") url: AnyHttpUrl = Field(example="http://localhost:5001") type: ExerciseType = Field(example=ExerciseType.text) + supports_evaluation: bool = Field(description="Whether the module supports evaluation", example=True) diff --git a/assessment_module_manager/modules.ini b/assessment_module_manager/modules.ini index 0dde7b074..3402183f0 100644 --- a/assessment_module_manager/modules.ini +++ b/assessment_module_manager/modules.ini @@ -1,19 +1,24 @@ [module_example] url = http://localhost:5001 type = programming +supports_evaluation = true [module_programming_llm] url = http://localhost:5002 type = programming +supports_evaluation = false [module_text_llm] url = http://localhost:5003 type = text +supports_evaluation = false [module_text_cofee] url = http://localhost:5004 type = text +supports_evaluation = false [module_programming_themisml] url = http://localhost:5005 type = programming +supports_evaluation = false diff --git a/playground/src/components/view_mode/module_requests/request_evaluation.tsx b/playground/src/components/view_mode/module_requests/request_evaluation.tsx index c98ac6d27..f004708d6 100644 --- a/playground/src/components/view_mode/module_requests/request_evaluation.tsx +++ b/playground/src/components/view_mode/module_requests/request_evaluation.tsx @@ -40,7 +40,7 @@ export default function RequestEvaluation() { error, mutate, reset, - } = useRequestEvaluation(); + } = useRequestEvaluation(undefined, true) // onlyUseContextModule = true for module requests only useEffect(() => setExercise(undefined), [module, dataMode]); @@ -130,7 +130,7 @@ export default function RequestEvaluation() { )} diff --git a/playground/src/hooks/athena/request_evaluation.ts b/playground/src/hooks/athena/request_evaluation.ts index d6ccc9f60..76227258f 100644 --- a/playground/src/hooks/athena/request_evaluation.ts +++ b/playground/src/hooks/athena/request_evaluation.ts @@ -5,26 +5,74 @@ import type ModuleResponse from "@/model/module_response"; import { UseMutationOptions, useMutation } from "react-query"; import { AthenaError, useAthenaFetcher } from "@/hooks/athena_fetcher"; import { Feedback } from "@/model/feedback"; +import { useModule } from "@/hooks/module_context"; +import useHealth from "@/hooks/health"; /** - * Requests an evaluation for an exercise and a submission given the true and predicted feedbacks from an Athena module. + * Requests an evaluation for an exercise and a submission given the true and predicted feedbacks from healthy Athena modules. + * + * @param options The react-query options. + * @param onlyUseContextModule - If true, only the context module is used for the evaluation. Otherwise, all healthy modules are used. * * @example * const { data, isLoading, error, mutate } = useRequestEvaluation(); * mutate({ exercise, submission, trueFeedbacks, predictedFeedbacks }); - * - * @param options The react-query options. */ export default function useRequestEvaluation( options: Omit< - UseMutationOptions, + UseMutationOptions< + ModuleResponse[] | undefined, + AthenaError, + { + exercise: Exercise; + submission: Submission; + trueFeedbacks: Feedback[]; + predictedFeedbacks: Feedback[]; + } + >, "mutationFn" - > = {} + > = {}, + onlyUseContextModule = false ) { const athenaFetcher = useAthenaFetcher(); + const { module: contextModule } = useModule(); + const { data: health } = useHealth(); + return useMutation({ - mutationFn: async ({ exercise, submission, trueFeedbacks, predictedFeedbacks }) => { - return await athenaFetcher("/evaluation", { exercise, submission, true_feedbacks: trueFeedbacks, predicted_feedbacks: predictedFeedbacks }); + mutationFn: async ({ + exercise, + submission, + trueFeedbacks, + predictedFeedbacks, + }) => { + const modules = onlyUseContextModule + ? [contextModule] + : Object.values(health?.modules ?? {}).filter( + (module) => module.healthy && module.type === contextModule.type + ); + + const results = await Promise.allSettled( + modules.map((module) => + athenaFetcher( + "/evaluation", + { + exercise, + submission, + true_feedbacks: trueFeedbacks, + predicted_feedbacks: predictedFeedbacks, + }, + { module: module, moduleConfig: undefined } + ) + ) + ); + + return results.flatMap((result) => { + if (result.status === "fulfilled") { + return [result.value]; + } else { + return []; + } + }); }, ...options, }); diff --git a/playground/src/hooks/athena_fetcher.ts b/playground/src/hooks/athena_fetcher.ts index 1f23f20ba..69b9a2278 100644 --- a/playground/src/hooks/athena_fetcher.ts +++ b/playground/src/hooks/athena_fetcher.ts @@ -1,4 +1,5 @@ import type ModuleResponse from "@/model/module_response"; +import type { Module } from "@/hooks/module_context"; import baseUrl from "@/helpers/base_url"; import { useBaseInfo } from "@/hooks/base_info_context"; @@ -36,27 +37,34 @@ export class AthenaError extends Error { * @returns A function that can be used to fetch data from the module or that returns undefined if the module is not set. */ export function useAthenaFetcher() { - const { module, moduleConfig } = useModule(); + const { module: contextModule, moduleConfig: contextModuleConfig } = useModule(); const { athenaUrl, athenaSecret } = useBaseInfo(); const { experimentId, moduleConfigurationId, runId } = useExperimentIdentifiers(); - const headers: { [key: string]: string } = {}; - if (moduleConfig) { - headers["X-Module-Config"] = JSON.stringify(moduleConfig); - } - if (experimentId) { - headers["X-Experiment-ID"] = experimentId; - } - if (moduleConfigurationId) { - headers["X-Module-Configuration-ID"] = moduleConfigurationId; - } - if (runId) { - headers["X-Run-ID"] = runId; - } - return ( - async (moduleRoute: string, body?: any) => { - const url = `${athenaUrl}/modules/${module.type}/${module.name}${moduleRoute}`; + async (moduleRoute: string, body?: any, overrideModule?: Module) => { + let targetModule = contextModule; + let targetModuleConfig = contextModuleConfig; + if (overrideModule) { + targetModule = overrideModule.module; + targetModuleConfig = overrideModule.moduleConfig; + } + + const headers: { [key: string]: string } = {}; + if (targetModuleConfig) { + headers["X-Module-Config"] = JSON.stringify(targetModuleConfig); + } + if (experimentId) { + headers["X-Experiment-ID"] = experimentId; + } + if (moduleConfigurationId) { + headers["X-Module-Configuration-ID"] = moduleConfigurationId; + } + if (runId) { + headers["X-Run-ID"] = runId; + } + + const url = `${athenaUrl}/modules/${targetModule.type}/${targetModule.name}${moduleRoute}`; const response = await fetch( `${baseUrl}/api/athena_request?${new URLSearchParams({ url: url, diff --git a/playground/src/hooks/batch_module_experiment.ts b/playground/src/hooks/batch_module_experiment.ts index a2ff10fb0..7c6f9f68d 100644 --- a/playground/src/hooks/batch_module_experiment.ts +++ b/playground/src/hooks/batch_module_experiment.ts @@ -412,7 +412,7 @@ export default function useBatchModuleExperiment(experiment: Experiment, moduleC } try { - const response = await requestEvaluation.mutateAsync({ + const responses = await requestEvaluation.mutateAsync({ exercise: experiment.exercise, submission, trueFeedbacks: experiment.tutorFeedbacks.filter( @@ -423,11 +423,16 @@ export default function useBatchModuleExperiment(experiment: Experiment, moduleC if (!isMounted.current) { return; } - console.log(`Received evaluation for submission ${submission.id}:`, response.data); + + const data = Object.fromEntries( + responses.map((response) => [response.module_name, response.data]) + ); + + console.log(`Received evaluation for submission ${submission.id}:`, data); setSubmissionsWithAutomaticEvaluation((prevState) => { const newMap = new Map(prevState); - newMap.set(submission.id, response.data); + newMap.set(submission.id, data); return newMap; }); } catch (error) { diff --git a/playground/src/hooks/module_context.tsx b/playground/src/hooks/module_context.tsx index c2a96232e..f7aab7666 100644 --- a/playground/src/hooks/module_context.tsx +++ b/playground/src/hooks/module_context.tsx @@ -2,7 +2,7 @@ import type { ModuleMeta } from '@/model/health_response'; import { ReactNode, createContext, useContext, useReducer } from 'react'; -type Module = { +export type Module = { module: ModuleMeta; moduleConfig: any; }; diff --git a/playground/src/model/health_response.ts b/playground/src/model/health_response.ts index 7502b21d9..4ffe27de9 100644 --- a/playground/src/model/health_response.ts +++ b/playground/src/model/health_response.ts @@ -2,6 +2,7 @@ export type ModuleMeta = { name: string; type: string; healthy: boolean; + supportsEvaluation: boolean; }; export type HealthResponse = { From 39c729dc879d3ab08f7596278ff11c387c1b5640 Mon Sep 17 00:00:00 2001 From: "Felix T.J. Dietrich" Date: Tue, 7 Nov 2023 16:34:34 +0100 Subject: [PATCH 31/54] add UI changes --- .../batch_module_experiment.tsx | 64 ++++-- .../module_experiment_progress.tsx | 193 +++++++++++------- .../src/hooks/batch_module_experiment.ts | 22 +- 3 files changed, 177 insertions(+), 102 deletions(-) diff --git a/playground/src/components/view_mode/evaluation_mode/conduct_experiment/batch_module_experiment.tsx b/playground/src/components/view_mode/evaluation_mode/conduct_experiment/batch_module_experiment.tsx index d2e9fdb02..560d8b204 100644 --- a/playground/src/components/view_mode/evaluation_mode/conduct_experiment/batch_module_experiment.tsx +++ b/playground/src/components/view_mode/evaluation_mode/conduct_experiment/batch_module_experiment.tsx @@ -2,7 +2,12 @@ import type { Submission } from "@/model/submission"; import type { Experiment } from "../define_experiment"; import type { ExperimentStep } from "@/hooks/batch_module_experiment"; -import React, { useImperativeHandle, useState, ForwardedRef, useEffect } from "react"; +import React, { + useImperativeHandle, + useState, + ForwardedRef, + useEffect, +} from "react"; import Modal from "react-modal"; import { FullScreenHandle } from "react-full-screen"; @@ -14,6 +19,7 @@ import { ModuleConfiguration } from "../configure_modules"; import ModuleExperimentProgress from "./module_experiment_progress"; import SubmissionDetail from "@/components/details/submission_detail"; import ModuleConfigSelect from "@/components/selectors/module_config_select"; +import { twMerge } from "tailwind-merge"; type ConductBatchModuleExperimentProps = { experiment: Experiment; @@ -53,7 +59,10 @@ const ConductBatchModuleExperiment = React.forwardRef< ref: ForwardedRef ) => { const { data: health } = useHealth(); - const moduleExperiment = useBatchModuleExperiment(experiment, moduleConfiguration); + const moduleExperiment = useBatchModuleExperiment( + experiment, + moduleConfiguration + ); const [showProgress, setShowProgress] = useState(true); const [isConfigModalOpen, setConfigModalOpen] = useState(false); @@ -88,14 +97,6 @@ const ConductBatchModuleExperiment = React.forwardRef<

    {moduleConfiguration.name}

    - {moduleExperiment.continueAfterTraining && ( - - )}
    ) { return ( - + { @@ -86,10 +86,11 @@ export default function ModuleExperimentProgress({ className={twMerge( "flex items-center justify-center w-6 h-6 border rounded-full shrink-0", stepToIndex(data.step) >= 2 - ? stepToIndex(data.step) > 2 || moduleExperiment.continueAfterTraining - ? "text-green-500 border-green-500" - : "text-yellow-500 border-yellow-500" - : "text-gray-500 border-gray-500" + ? stepToIndex(data.step) > 2 || + moduleExperiment.continueAfterTraining + ? "text-green-500 border-green-500" + : "text-yellow-500 border-yellow-500" + : "text-gray-500 border-gray-500" )} > 2 @@ -98,10 +99,11 @@ export default function ModuleExperimentProgress({ className={twMerge( "flex flex-col", stepToIndex(data.step) >= 2 - ? stepToIndex(data.step) > 2 || moduleExperiment.continueAfterTraining - ? "text-green-500" - : "text-yellow-500" - : "text-gray-500" + ? stepToIndex(data.step) > 2 || + moduleExperiment.continueAfterTraining + ? "text-green-500" + : "text-yellow-500" + : "text-gray-500" )} > Sending Training Feedback @@ -129,70 +131,121 @@ export default function ModuleExperimentProgress({ )} {/* Generate Feedback Suggestions */} -
  • - 3 - ? "text-green-500 border-green-500" - : stepToIndex(data.step) === 3 - ? "text-yellow-500 border-yellow-500" - : "text-gray-500 border-gray-500" - )} - > - {experiment.trainingSubmissions ? 3 : 2} - -
    3 - ? "text-green-500" - : stepToIndex(data.step) === 3 - ? "text-yellow-500" - : "text-gray-500" - )} - > - Generating Feedback Suggestions - {moduleRequests.requestFeedbackSuggestions.isLoading && ( - - Generating feedback suggestions... ( - {data.submissionsWithFeedbackSuggestions.size + 1}/ - {experiment.evaluationSubmissions.length}) - - )} - {moduleRequests.requestFeedbackSuggestions.isError && ( - - {moduleRequests.requestFeedbackSuggestions.error.message} - - )} - {moduleRequests.requestFeedbackSuggestions.isSuccess && ( - - Generated feedback suggestions ( - {data.submissionsWithFeedbackSuggestions.size}/ - {experiment.evaluationSubmissions.length}) - - )} +
  • +
    + 3 + ? "text-green-500 border-green-500" + : stepToIndex(data.step) === 3 + ? "text-yellow-500 border-yellow-500" + : "text-gray-500 border-gray-500" + )} + > + {experiment.trainingSubmissions ? 3 : 2} + +
    3 + ? "text-green-500" + : stepToIndex(data.step) === 3 + ? "text-yellow-500" + : "text-gray-500" + )} + > + Generating Feedback Suggestions + {moduleRequests.requestFeedbackSuggestions.isLoading && ( + + Generating feedback suggestions... ( + {data.submissionsWithFeedbackSuggestions.size + 1}/ + {experiment.evaluationSubmissions.length}) + + )} + {moduleRequests.requestFeedbackSuggestions.isError && ( + + {moduleRequests.requestFeedbackSuggestions.error.message} + + )} + {moduleRequests.requestFeedbackSuggestions.isSuccess && ( + + Generated feedback suggestions ( + {data.submissionsWithFeedbackSuggestions.size}/ + {experiment.evaluationSubmissions.length}) + + )} +
    + {moduleExperiment.continueAfterTraining && ( + + )}
  • -
  • - - {experiment.trainingSubmissions ? 4 : 3} - -
    - Finished + + {/* Run Automatic Evaluation */} +
  • +
    + + {experiment.trainingSubmissions ? 4 : 3} + +
    + Run Automatic Evaluation + {moduleRequests.requestEvaluation.isLoading && ( + + Evaluating submissions... ( + {(submissionsWithAutomaticEvaluation?.size ?? 0) + 1}/ + {experiment.evaluationSubmissions.length}) + + )} + {moduleRequests.requestEvaluation.isError && ( + + {moduleRequests.requestEvaluation.error.message} + + )} + {moduleRequests.requestEvaluation.isSuccess && ( + + Evaluated submissions ( + {submissionsWithAutomaticEvaluation?.size ?? 0}/ + {experiment.evaluationSubmissions.length}) + + )} +
    + {moduleExperiment.continueWithAutomaticEvaluation && ( + + )}
  • ); diff --git a/playground/src/hooks/batch_module_experiment.ts b/playground/src/hooks/batch_module_experiment.ts index 7c6f9f68d..89480a30a 100644 --- a/playground/src/hooks/batch_module_experiment.ts +++ b/playground/src/hooks/batch_module_experiment.ts @@ -54,8 +54,8 @@ export default function useBatchModuleExperiment(experiment: Experiment, moduleC // Stores automatic evaluation of submissions const [submissionsWithAutomaticEvaluation, setSubmissionsWithAutomaticEvaluation] = useState< - Map - >(new Map()); + Map | undefined + >(undefined); const [processingStep, setProcessingStep] = useState< ExperimentStep | undefined @@ -103,7 +103,7 @@ export default function useBatchModuleExperiment(experiment: Experiment, moduleC } : {} ), ...( - submissionsWithAutomaticEvaluation.size > 0 ? { + submissionsWithAutomaticEvaluation && submissionsWithAutomaticEvaluation.size > 0 ? { automaticEvaluation: { type: "automaticEvaluation", runId: data.runId, @@ -189,6 +189,10 @@ export default function useBatchModuleExperiment(experiment: Experiment, moduleC })); }) : undefined; + const continueWithAutomaticEvaluation = (data.step === "finished" && submissionsWithAutomaticEvaluation === undefined) ? (() => { + stepAutomaticEvaluation(); + }) : undefined; + // Module requests const sendSubmissions = useSendSubmissions(); const sendFeedbacks = useSendFeedbacks(); @@ -386,7 +390,7 @@ export default function useBatchModuleExperiment(experiment: Experiment, moduleC console.log("Running automatic evaluation..."); let remainingSubmissions = experiment.evaluationSubmissions.filter( - (submission) => !submissionsWithAutomaticEvaluation.has(submission.id) + (submission) => !submissionsWithAutomaticEvaluation?.has(submission.id) ); let index = 0; @@ -477,20 +481,18 @@ export default function useBatchModuleExperiment(experiment: Experiment, moduleC processingStep !== "generatingFeedbackSuggestions" ) { stepGenerateFeedbackSuggestions(); - } else if ( - data.step === "finished" && - processingStep !== "finished" - ) { - stepAutomaticEvaluation(); - } + } + // Automatic evaluation is triggered manually }, [data.step]); return { data, submissionsWithManualRatings, + submissionsWithAutomaticEvaluation, getManualRatingsSetter, startExperiment, continueAfterTraining, + continueWithAutomaticEvaluation, exportData, importData, moduleRequests: { From fdb073dc6f3d7cfaaf10ec6610851a41a70cdca9 Mon Sep 17 00:00:00 2001 From: "Felix T.J. Dietrich" Date: Tue, 7 Nov 2023 16:57:53 +0100 Subject: [PATCH 32/54] fix color --- .../conduct_experiment/module_experiment_progress.tsx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/playground/src/components/view_mode/evaluation_mode/conduct_experiment/module_experiment_progress.tsx b/playground/src/components/view_mode/evaluation_mode/conduct_experiment/module_experiment_progress.tsx index 345c9e67a..75d0e9964 100644 --- a/playground/src/components/view_mode/evaluation_mode/conduct_experiment/module_experiment_progress.tsx +++ b/playground/src/components/view_mode/evaluation_mode/conduct_experiment/module_experiment_progress.tsx @@ -197,7 +197,7 @@ export default function ModuleExperimentProgress({ submissionsWithAutomaticEvaluation?.size === data.submissionsWithFeedbackSuggestions.size ? "text-green-500 border-green-500" - : stepToIndex(data.step) === 4 + : stepToIndex(data.step) === 4 && submissionsWithAutomaticEvaluation !== undefined ? "text-yellow-500 border-yellow-500" : "text-gray-500 border-gray-500" )} @@ -211,7 +211,7 @@ export default function ModuleExperimentProgress({ submissionsWithAutomaticEvaluation?.size === data.submissionsWithFeedbackSuggestions.size ? "text-green-500" - : stepToIndex(data.step) === 4 + : stepToIndex(data.step) === 4 && submissionsWithAutomaticEvaluation !== undefined ? "text-yellow-500" : "text-gray-500" )} From d0838f5aca3507235615dedc26ac5543c219ec4b Mon Sep 17 00:00:00 2001 From: "Felix T.J. Dietrich" Date: Tue, 7 Nov 2023 17:18:41 +0100 Subject: [PATCH 33/54] add evaluation model --- module_text_llm/.env.example | 6 ++++++ .../module_text_llm/helpers/models/__init__.py | 12 +++++++++++- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/module_text_llm/.env.example b/module_text_llm/.env.example index 2ecf0a8f4..aedc5bdba 100644 --- a/module_text_llm/.env.example +++ b/module_text_llm/.env.example @@ -14,6 +14,12 @@ DATABASE_URL=sqlite:///../data/data.sqlite # See below for options, available models are also logged on startup LLM_DEFAULT_MODEL="azure_openai_gpt-35" +# Enable LLM-as-a-judge approach 0 = disabled, 1 = enabled +LLM_ENABLE_LLM_AS_A_JUDGE=1 +# Evaluation model to use for the LLM-as-a-judge approach [Only important if you want to use it in the /evaluate endpoint] +# See below for options, available models are also logged on startup +LLM_EVALUATION_MODEL="azure_openai_gpt-4" + # Standard OpenAI (Non-Azure) [leave blank if not used] # Model names prefixed with `openai_` followed by the model name, e.g. `openai_text-davinci-003` # A list of models can be found in `module_text_llm/helpers/models/openai.py` (openai_models) diff --git a/module_text_llm/module_text_llm/helpers/models/__init__.py b/module_text_llm/module_text_llm/helpers/models/__init__.py index 4d2fe5a65..144bcf923 100644 --- a/module_text_llm/module_text_llm/helpers/models/__init__.py +++ b/module_text_llm/module_text_llm/helpers/models/__init__.py @@ -1,10 +1,16 @@ import os -from typing import Type, Union, List +from typing import Type, Union, List, Optional +from langchain.base_language import BaseLanguageModel + from module_text_llm.helpers.models.model_config import ModelConfig DefaultModelConfig: Type[ModelConfig] default_model_name = os.environ.get("LLM_DEFAULT_MODEL") +evaluation_model_name = os.environ.get("LLM_EVALUATION_MODEL") + +# Model used during evaluation for judging the output (should be a more powerful model) +evaluation_model: Optional[BaseLanguageModel] = None types: List[Type[ModelConfig]] = [] try: @@ -12,6 +18,8 @@ types.append(openai_config.OpenAIModelConfig) if default_model_name in openai_config.available_models: DefaultModelConfig = openai_config.OpenAIModelConfig + if evaluation_model_name in openai_config.available_models: + evaluation_model = openai_config.available_models[evaluation_model_name] except AttributeError: pass @@ -20,6 +28,8 @@ types.append(replicate_config.ReplicateModelConfig) if default_model_name in replicate_config.available_models: DefaultModelConfig = replicate_config.ReplicateModelConfig + if evaluation_model_name in replicate_config.available_models: + evaluation_model = replicate_config.available_models[evaluation_model_name] except AttributeError: pass From 05608aa98c73480b48841dbd12a8f82df1fc8be9 Mon Sep 17 00:00:00 2001 From: "Felix T.J. Dietrich" Date: Tue, 7 Nov 2023 17:30:33 +0100 Subject: [PATCH 34/54] add llm as a judge --- module_text_llm/module_text_llm/__main__.py | 22 ++++- .../module_text_llm/generate_evaluation.py | 95 +++++++++++++++++++ .../module_text_llm/helpers/utils.py | 25 +++++ .../prompts/generate_evaluation.py | 26 +++++ 4 files changed, 166 insertions(+), 2 deletions(-) create mode 100644 module_text_llm/module_text_llm/generate_evaluation.py create mode 100644 module_text_llm/module_text_llm/prompts/generate_evaluation.py diff --git a/module_text_llm/module_text_llm/__main__.py b/module_text_llm/module_text_llm/__main__.py index e9bf8d448..e3f7d7769 100644 --- a/module_text_llm/module_text_llm/__main__.py +++ b/module_text_llm/module_text_llm/__main__.py @@ -1,14 +1,16 @@ -from typing import List +import os +from typing import List, Any import nltk import tiktoken -from athena import app, submission_selector, submissions_consumer, feedback_consumer, feedback_provider +from athena import app, submission_selector, submissions_consumer, feedback_consumer, feedback_provider, evaluation_provider from athena.text import Exercise, Submission, Feedback from athena.logger import logger from module_text_llm.config import Configuration from module_text_llm.generate_suggestions import generate_suggestions +from module_text_llm.generate_evaluation import generate_evaluation @submissions_consumer @@ -33,6 +35,22 @@ async def suggest_feedback(exercise: Exercise, submission: Submission, module_co return await generate_suggestions(exercise, submission, module_config.approach, module_config.debug) +@evaluation_provider +async def evaluate_feedback( + exercise: Exercise, submission: Submission, + true_feedbacks: List[Feedback], predicted_feedbacks: List[Feedback], +) -> Any: + logger.info( + "evaluate_feedback: Evaluation for submission %d of exercise %d was requested with %d true and %d predicted feedbacks", + submission.id, exercise.id, len(true_feedbacks), len(predicted_feedbacks) + ) + + evaluation = {} + if bool(os.environ.get("LLM_ENABLE_LLM_AS_A_JUDGE")): + evaluation["llm-as-a-judge"] = await generate_evaluation(exercise, submission, true_feedbacks, predicted_feedbacks) + + return evaluation + if __name__ == "__main__": nltk.download("punkt") tiktoken.get_encoding("cl100k_base") diff --git a/module_text_llm/module_text_llm/generate_evaluation.py b/module_text_llm/module_text_llm/generate_evaluation.py new file mode 100644 index 000000000..4c4a9969b --- /dev/null +++ b/module_text_llm/module_text_llm/generate_evaluation.py @@ -0,0 +1,95 @@ +from typing import List, Sequence, Dict, Literal +from pydantic import BaseModel, Field +import json + +from athena.text import Exercise, Submission, Feedback +from athena.logger import logger + +from module_text_llm.helpers.models import evaluation_model +from module_text_llm.helpers.llm_utils import ( + get_chat_prompt_with_formatting_instructions, + check_prompt_length_and_omit_features_if_necessary, + predict_and_parse +) +from module_text_llm.helpers.utils import add_sentence_numbers, get_line_range_from_index_range +from module_text_llm.prompts.generate_evaluation import system_message, human_message + + +class AccuracyMetric(BaseModel): + id: int = Field(..., description="Feedback ID") + reasoning: str = Field(..., description="Step-by-step critical reasoning of the labels") + acceptance_label: Literal["accepted", "rejected"] = Field(..., description="Estimated acceptance label") + level_of_needed_modification_label: Literal["no", "minor", "major"] = Field(..., description="Estimated level of needed modification") + +class Evaluation(BaseModel): + metrics: Sequence[AccuracyMetric] = Field(...) + + +async def generate_evaluation( + exercise: Exercise, + submission: Submission, + true_feedbacks: List[Feedback], + predicted_feedbacks: List[Feedback] +) -> Dict[int, dict]: + + if evaluation_model is None: + raise EnvironmentError("No evaluation model available, please set up LLM_EVALUATION_MODEL correctly" + "by setting it to one of the available models logged during startup.") + max_input_tokens = 3000 + + def feedback_to_dict(feedback: Feedback): + line_start, line_end = get_line_range_from_index_range( + feedback.index_start, feedback.index_end, submission.text) + return { + "id": feedback.id, + "title": feedback.title, + "description": feedback.description, + "line_start": line_start, + "line_end": line_end, + "credits": feedback.credits + } + + prompt_input = { + "submission": add_sentence_numbers(submission.text), + "true_feedbacks": json.dumps([feedback_to_dict(feedback) for feedback in true_feedbacks]), + "predicted_feedbacks": json.dumps([feedback_to_dict(feedback) for feedback in predicted_feedbacks]), + } + + chat_prompt = get_chat_prompt_with_formatting_instructions( + model=evaluation_model, + system_message=system_message, + human_message=human_message, + pydantic_object=Evaluation + ) + + # Check if the prompt is too long and omit features if necessary (in order of importance) + omittable_features = ["submission"] + prompt_input, should_run = check_prompt_length_and_omit_features_if_necessary( + prompt=chat_prompt, + prompt_input=prompt_input, + max_input_tokens=max_input_tokens, + omittable_features=omittable_features, + debug=False + ) + + if not should_run: + logger.warning("Evaluation input too long. Skipping.") + return {} + + result = await predict_and_parse( + model=evaluation_model, + chat_prompt=chat_prompt, + prompt_input=prompt_input, + pydantic_object=Evaluation, + tags=[ + f"exercise-{exercise.id}", + f"submission-{submission.id}", + "evaluation" + ] + ) + + if result is None: + logger.warning("Evaluation failed. Skipping.") + return {} + + return { item.id: item.dict() for item in result.metrics } diff --git a/module_text_llm/module_text_llm/helpers/utils.py b/module_text_llm/module_text_llm/helpers/utils.py index 2ed05aec5..24cf41024 100644 --- a/module_text_llm/module_text_llm/helpers/utils.py +++ b/module_text_llm/module_text_llm/helpers/utils.py @@ -92,3 +92,28 @@ def get_index_range_from_line_range(line_start: Optional[int], line_end: Optiona line_end_index = min(max(int(line_end), 0), len(sentence_spans) - 1) return sentence_spans[line_start_index][0], sentence_spans[line_end_index][1] + + +def get_line_range_from_index_range(index_start: Optional[int], index_end: Optional[int], content: str) -> Tuple[Optional[int], Optional[int]]: + if index_start is None and index_end is None: + return None, None + + index_start = index_start or index_end or 0 + index_end = index_end or index_start or 0 + + if index_start > index_end: + index_start, index_end = index_end, index_start + + sentence_spans = get_sentence_spans(content) + + line_start = None + line_end = None + + for line_number, (start_index, end_index) in enumerate(sentence_spans, start=1): + if start_index <= index_start < end_index: + line_start = line_number + if start_index <= index_end <= end_index: + line_end = line_number + break + + return line_start, line_end \ No newline at end of file diff --git a/module_text_llm/module_text_llm/prompts/generate_evaluation.py b/module_text_llm/module_text_llm/prompts/generate_evaluation.py new file mode 100644 index 000000000..10daa84a4 --- /dev/null +++ b/module_text_llm/module_text_llm/prompts/generate_evaluation.py @@ -0,0 +1,26 @@ +system_message = """\ +You are now an evaluator for feedback accuracy generated by a machine-learning system. + +# Task +Your task is to estimate if a human tutor would accept or reject the feedback suggestion and how much modification is needed to make the feedback useful. + +# Score Criteria +Accept feedback that is useful to the tutor, meaning that it can be applied to the submission with minor or no modification. \ +Our goal is to reduce the workload of tutors and reduce their cognitive load. \ +Reject feedback that is not useful and would burden the tutor. + +Put the focus on the description of the feedback, the title is optional. \ +The `line_start` and `line_end` should make sense with respect to the submission but do not need to be exact. \ +Credits should make sense with respect to the feedback and the submission but also do not need to be exact. + +# Submission (with sentence numbers : ): +{submission} + +# Example (Human) Feedback: +{true_feedbacks} +""" + +human_message = """\ +### Model Output: +{predicted_feedbacks} +""" \ No newline at end of file From c68ba0f96a67500b0bcacf7ba813ad09bd8f97dc Mon Sep 17 00:00:00 2001 From: "Felix T.J. Dietrich" Date: Tue, 7 Nov 2023 17:55:01 +0100 Subject: [PATCH 35/54] fix ui issue and some var naming --- module_text_llm/module_text_llm/__main__.py | 2 +- playground/src/hooks/batch_module_experiment.ts | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/module_text_llm/module_text_llm/__main__.py b/module_text_llm/module_text_llm/__main__.py index e3f7d7769..cc6c42baa 100644 --- a/module_text_llm/module_text_llm/__main__.py +++ b/module_text_llm/module_text_llm/__main__.py @@ -47,7 +47,7 @@ async def evaluate_feedback( evaluation = {} if bool(os.environ.get("LLM_ENABLE_LLM_AS_A_JUDGE")): - evaluation["llm-as-a-judge"] = await generate_evaluation(exercise, submission, true_feedbacks, predicted_feedbacks) + evaluation["llm_as_a_judge"] = await generate_evaluation(exercise, submission, true_feedbacks, predicted_feedbacks) return evaluation diff --git a/playground/src/hooks/batch_module_experiment.ts b/playground/src/hooks/batch_module_experiment.ts index 89480a30a..e2610fbb8 100644 --- a/playground/src/hooks/batch_module_experiment.ts +++ b/playground/src/hooks/batch_module_experiment.ts @@ -190,6 +190,7 @@ export default function useBatchModuleExperiment(experiment: Experiment, moduleC }) : undefined; const continueWithAutomaticEvaluation = (data.step === "finished" && submissionsWithAutomaticEvaluation === undefined) ? (() => { + setSubmissionsWithAutomaticEvaluation((prevState) => new Map(prevState)); stepAutomaticEvaluation(); }) : undefined; From afb1892727bb9292c4862048195bda24b8d7b729 Mon Sep 17 00:00:00 2001 From: "Felix T.J. Dietrich" Date: Tue, 7 Nov 2023 18:54:25 +0100 Subject: [PATCH 36/54] fix line break --- playground/src/components/details/exercise_detail/common.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/playground/src/components/details/exercise_detail/common.tsx b/playground/src/components/details/exercise_detail/common.tsx index 852b19a66..8c583a292 100644 --- a/playground/src/components/details/exercise_detail/common.tsx +++ b/playground/src/components/details/exercise_detail/common.tsx @@ -54,7 +54,7 @@ export default function CommonExerciseDetail({ Missing criterion title )} - Grading Criterion {criterion.id} + Grading Criterion {criterion.id} {criterion.structured_grading_instructions.map( From 2a1d4b636ba65cbe504f6be4b1601bd1a4e6cc99 Mon Sep 17 00:00:00 2001 From: "Felix T.J. Dietrich" Date: Tue, 7 Nov 2023 19:58:53 +0100 Subject: [PATCH 37/54] add langsmith logging --- module_text_llm/module_text_llm/__main__.py | 47 ++++++++- module_text_llm/poetry.lock | 111 ++++++++++++++++---- module_text_llm/pyproject.toml | 1 + playground/src/pages/api/athena_request.ts | 17 ++- 4 files changed, 151 insertions(+), 25 deletions(-) diff --git a/module_text_llm/module_text_llm/__main__.py b/module_text_llm/module_text_llm/__main__.py index cc6c42baa..0c9bfd29d 100644 --- a/module_text_llm/module_text_llm/__main__.py +++ b/module_text_llm/module_text_llm/__main__.py @@ -1,10 +1,13 @@ +import json import os from typing import List, Any import nltk import tiktoken +from langsmith import Client as LangsmithClient +from langsmith.schemas import Run -from athena import app, submission_selector, submissions_consumer, feedback_consumer, feedback_provider, evaluation_provider +from athena import app, get_experiment_environment, submission_selector, submissions_consumer, feedback_consumer, feedback_provider, evaluation_provider from athena.text import Exercise, Submission, Feedback from athena.logger import logger @@ -49,6 +52,48 @@ async def evaluate_feedback( if bool(os.environ.get("LLM_ENABLE_LLM_AS_A_JUDGE")): evaluation["llm_as_a_judge"] = await generate_evaluation(exercise, submission, true_feedbacks, predicted_feedbacks) + # Gather LLM token usage and response times + if bool(os.environ.get("LANGCHAIN_TRACING_V2")): + experiment = get_experiment_environment() + client = LangsmithClient() + project_name = os.environ.get("LANGCHAIN_PROJECT") + runs = list(client.list_runs( + project_name=project_name, + filter=f'and(has(tags, "run-{experiment.run_id}"), has(tags, "submission-{submission.id}"))' + )) + logger.info("evaluate_feedback: Found %d runs for submission %d of exercise %d.", len(runs), submission.id, exercise.id) + + def get_statistics(runs: List[Run]): + return { + "response_time": sum((run.end_time - run.start_time).total_seconds() for run in runs if run.end_time is not None), + "prompt_tokens": sum(run.prompt_tokens for run in runs if run.prompt_tokens is not None), + "completion_tokens": sum(run.completion_tokens for run in runs if run.completion_tokens is not None), + "total_tokens": sum(run.total_tokens for run in runs if run.total_tokens is not None), + } + + suggestion_runs = [] + evaluation_runs = [] + for run in runs: + if "evaluation" in (run.tags or []): + evaluation_runs.append(run) + else: + suggestion_runs.append(run) + + if suggestion_runs or evaluation_runs: + evaluation["runs"] = {} + if suggestion_runs: + evaluation["runs"]["suggestions"] = { + "count": len(suggestion_runs), + "statistics": get_statistics(suggestion_runs), + "runs": [json.loads(run.json()) for run in suggestion_runs] + } + if evaluation_runs: + evaluation["runs"]["evaluation"] = { + "count": len(evaluation_runs), + "statistics": get_statistics(evaluation_runs), + "runs": [json.loads(run.json()) for run in evaluation_runs] + } + return evaluation if __name__ == "__main__": diff --git a/module_text_llm/poetry.lock b/module_text_llm/poetry.lock index 96c269625..28e23ad99 100644 --- a/module_text_llm/poetry.lock +++ b/module_text_llm/poetry.lock @@ -1,9 +1,10 @@ -# This file is automatically @generated by Poetry 1.6.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.4.2 and should not be changed by hand. [[package]] name = "aiohttp" version = "3.8.6" description = "Async http client/server framework (asyncio)" +category = "main" optional = false python-versions = ">=3.6" files = [ @@ -112,6 +113,7 @@ speedups = ["Brotli", "aiodns", "cchardet"] name = "aiosignal" version = "1.3.1" description = "aiosignal: a list of registered asynchronous callbacks" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -126,6 +128,7 @@ frozenlist = ">=1.1.0" name = "anyio" version = "3.7.1" description = "High level compatibility layer for multiple asynchronous event loop implementations" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -146,6 +149,7 @@ trio = ["trio (<0.22)"] name = "astroid" version = "2.15.8" description = "An abstract syntax tree for Python with inference support." +category = "dev" optional = false python-versions = ">=3.7.2" files = [ @@ -161,6 +165,7 @@ wrapt = {version = ">=1.14,<2", markers = "python_version >= \"3.11\""} name = "async-timeout" version = "4.0.3" description = "Timeout context manager for asyncio programs" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -172,6 +177,7 @@ files = [ name = "athena" version = "1.0.0" description = "This is a helper module for easier development of Athena modules. It provides communication functionality with the Assessment Module manager, as well as helper functions for storage." +category = "main" optional = false python-versions = "3.11.*" files = [] @@ -193,6 +199,7 @@ url = "../athena" name = "attrs" version = "23.1.0" description = "Classes Without Boilerplate" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -211,6 +218,7 @@ tests-no-zope = ["cloudpickle", "hypothesis", "mypy (>=1.1.1)", "pympler", "pyte name = "certifi" version = "2023.7.22" description = "Python package for providing Mozilla's CA Bundle." +category = "main" optional = false python-versions = ">=3.6" files = [ @@ -222,6 +230,7 @@ files = [ name = "charset-normalizer" version = "3.3.1" description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." +category = "main" optional = false python-versions = ">=3.7.0" files = [ @@ -321,6 +330,7 @@ files = [ name = "click" version = "8.1.7" description = "Composable command line interface toolkit" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -335,6 +345,7 @@ colorama = {version = "*", markers = "platform_system == \"Windows\""} name = "colorama" version = "0.4.6" description = "Cross-platform colored terminal text." +category = "main" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" files = [ @@ -346,6 +357,7 @@ files = [ name = "dataclasses-json" version = "0.6.1" description = "Easily serialize dataclasses to and from JSON." +category = "main" optional = false python-versions = ">=3.7,<4.0" files = [ @@ -361,6 +373,7 @@ typing-inspect = ">=0.4.0,<1" name = "dill" version = "0.3.7" description = "serialize all of Python" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -375,6 +388,7 @@ graph = ["objgraph (>=1.7.2)"] name = "dodgy" version = "0.2.1" description = "Dodgy: Searches for dodgy looking lines in Python code" +category = "dev" optional = false python-versions = "*" files = [ @@ -386,6 +400,7 @@ files = [ name = "fastapi" version = "0.96.1" description = "FastAPI framework, high performance, easy to learn, fast to code, ready for production" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -407,6 +422,7 @@ test = ["anyio[trio] (>=3.2.1,<4.0.0)", "black (==23.1.0)", "coverage[toml] (>=6 name = "flake8" version = "2.3.0" description = "the modular source code checker: pep8, pyflakes and co" +category = "dev" optional = false python-versions = "*" files = [ @@ -423,6 +439,7 @@ pyflakes = ">=0.8.1" name = "flake8-polyfill" version = "1.0.2" description = "Polyfill package for Flake8 plugins" +category = "dev" optional = false python-versions = "*" files = [ @@ -437,6 +454,7 @@ flake8 = "*" name = "frozenlist" version = "1.4.0" description = "A list-like structure which implements collections.abc.MutableSequence" +category = "main" optional = false python-versions = ">=3.8" files = [ @@ -507,6 +525,7 @@ files = [ name = "gitdb" version = "4.0.11" description = "Git Object Database" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -521,6 +540,7 @@ smmap = ">=3.0.1,<6" name = "gitpython" version = "3.1.40" description = "GitPython is a Python library used to interact with Git repositories" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -538,6 +558,7 @@ test = ["black", "coverage[toml]", "ddt (>=1.1.1,!=1.4.3)", "mock", "mypy", "pre name = "greenlet" version = "3.0.1" description = "Lightweight in-process concurrent programming" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -608,6 +629,7 @@ test = ["objgraph", "psutil"] name = "h11" version = "0.14.0" description = "A pure-Python, bring-your-own-I/O implementation of HTTP/1.1" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -619,6 +641,7 @@ files = [ name = "httpcore" version = "0.17.3" description = "A minimal low-level HTTP client." +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -630,16 +653,17 @@ files = [ anyio = ">=3.0,<5.0" certifi = "*" h11 = ">=0.13,<0.15" -sniffio = "==1.*" +sniffio = ">=1.0.0,<2.0.0" [package.extras] http2 = ["h2 (>=3,<5)"] -socks = ["socksio (==1.*)"] +socks = ["socksio (>=1.0.0,<2.0.0)"] [[package]] name = "httpx" version = "0.24.1" description = "The next generation HTTP client." +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -655,14 +679,15 @@ sniffio = "*" [package.extras] brotli = ["brotli", "brotlicffi"] -cli = ["click (==8.*)", "pygments (==2.*)", "rich (>=10,<14)"] +cli = ["click (>=8.0.0,<9.0.0)", "pygments (>=2.0.0,<3.0.0)", "rich (>=10,<14)"] http2 = ["h2 (>=3,<5)"] -socks = ["socksio (==1.*)"] +socks = ["socksio (>=1.0.0,<2.0.0)"] [[package]] name = "idna" version = "3.4" description = "Internationalized Domain Names in Applications (IDNA)" +category = "main" optional = false python-versions = ">=3.5" files = [ @@ -674,6 +699,7 @@ files = [ name = "isort" version = "5.12.0" description = "A Python utility / library to sort Python imports." +category = "dev" optional = false python-versions = ">=3.8.0" files = [ @@ -691,6 +717,7 @@ requirements-deprecated-finder = ["pip-api", "pipreqs"] name = "joblib" version = "1.3.2" description = "Lightweight pipelining with Python functions" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -702,6 +729,7 @@ files = [ name = "jsonpatch" version = "1.33" description = "Apply JSON-Patches (RFC 6902)" +category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*, !=3.6.*" files = [ @@ -716,6 +744,7 @@ jsonpointer = ">=1.9" name = "jsonpointer" version = "2.4" description = "Identify specific nodes in a JSON document (RFC 6901)" +category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*, !=3.6.*" files = [ @@ -727,6 +756,7 @@ files = [ name = "langchain" version = "0.0.325" description = "Building applications with LLMs through composability" +category = "main" optional = false python-versions = ">=3.8.1,<4.0" files = [ @@ -764,13 +794,14 @@ text-helpers = ["chardet (>=5.1.0,<6.0.0)"] [[package]] name = "langsmith" -version = "0.0.52" +version = "0.0.60" description = "Client library to connect to the LangSmith LLM Tracing and Evaluation Platform." +category = "main" optional = false python-versions = ">=3.8.1,<4.0" files = [ - {file = "langsmith-0.0.52-py3-none-any.whl", hash = "sha256:d02a0ade5a53b36143084e57003ed38ccbdf5fc15a5a0eb14f8989ceaee0b807"}, - {file = "langsmith-0.0.52.tar.gz", hash = "sha256:1dc29082d257deea1859cb22c53d9481ca5c4a37f3af40c0f9d300fb8adc91db"}, + {file = "langsmith-0.0.60-py3-none-any.whl", hash = "sha256:94f9ef9898fa5fb5afed72538bb3ccca9a92a841b37654d699c732a76c623379"}, + {file = "langsmith-0.0.60.tar.gz", hash = "sha256:f63513398d8d4530e3aa552926924c8443ac9d21c3812f303fa20fa2c44a9a42"}, ] [package.dependencies] @@ -781,6 +812,7 @@ requests = ">=2,<3" name = "lazy-object-proxy" version = "1.9.0" description = "A fast and thorough lazy object proxy." +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -826,6 +858,7 @@ files = [ name = "marshmallow" version = "3.20.1" description = "A lightweight library for converting complex datatypes to and from native Python datatypes." +category = "main" optional = false python-versions = ">=3.8" files = [ @@ -846,6 +879,7 @@ tests = ["pytest", "pytz", "simplejson"] name = "mccabe" version = "0.7.0" description = "McCabe checker, plugin for flake8" +category = "dev" optional = false python-versions = ">=3.6" files = [ @@ -857,6 +891,7 @@ files = [ name = "multidict" version = "6.0.4" description = "multidict implementation" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -940,6 +975,7 @@ files = [ name = "mypy" version = "1.6.1" description = "Optional static typing for Python" +category = "main" optional = false python-versions = ">=3.8" files = [ @@ -985,6 +1021,7 @@ reports = ["lxml"] name = "mypy-extensions" version = "1.0.0" description = "Type system extensions for programs checked with the mypy type checker." +category = "main" optional = false python-versions = ">=3.5" files = [ @@ -996,6 +1033,7 @@ files = [ name = "nltk" version = "3.8.1" description = "Natural Language Toolkit" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1021,6 +1059,7 @@ twitter = ["twython"] name = "numpy" version = "1.26.1" description = "Fundamental package for array computing in Python" +category = "main" optional = false python-versions = "<3.13,>=3.9" files = [ @@ -1062,6 +1101,7 @@ files = [ name = "openai" version = "0.27.10" description = "Python client library for the OpenAI API" +category = "main" optional = false python-versions = ">=3.7.1" files = [ @@ -1076,7 +1116,7 @@ tqdm = "*" [package.extras] datalib = ["numpy", "openpyxl (>=3.0.7)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)"] -dev = ["black (>=21.6b0,<22.0)", "pytest (==6.*)", "pytest-asyncio", "pytest-mock"] +dev = ["black (>=21.6b0,<22.0)", "pytest (>=6.0.0,<7.0.0)", "pytest-asyncio", "pytest-mock"] embeddings = ["matplotlib", "numpy", "openpyxl (>=3.0.7)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)", "plotly", "scikit-learn (>=1.0.2)", "scipy", "tenacity (>=8.0.1)"] wandb = ["numpy", "openpyxl (>=3.0.7)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)", "wandb"] @@ -1084,6 +1124,7 @@ wandb = ["numpy", "openpyxl (>=3.0.7)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1 name = "packaging" version = "23.2" description = "Core utilities for Python packages" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1095,6 +1136,7 @@ files = [ name = "pep8" version = "1.7.1" description = "Python style guide checker" +category = "dev" optional = false python-versions = "*" files = [ @@ -1106,6 +1148,7 @@ files = [ name = "pep8-naming" version = "0.10.0" description = "Check PEP-8 naming conventions, plugin for flake8" +category = "dev" optional = false python-versions = "*" files = [ @@ -1120,6 +1163,7 @@ flake8-polyfill = ">=1.0.2,<2" name = "platformdirs" version = "3.11.0" description = "A small Python package for determining appropriate platform-specific dirs, e.g. a \"user data dir\"." +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -1135,6 +1179,7 @@ test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=7.4)", "pytest-co name = "prospector" version = "1.10.3" description = "Prospector is a tool to analyse Python code by aggregating the result of other tools." +category = "dev" optional = false python-versions = ">=3.7.2,<4.0" files = [ @@ -1174,6 +1219,7 @@ with-vulture = ["vulture (>=1.5)"] name = "psycopg2" version = "2.9.9" description = "psycopg2 - Python-PostgreSQL Database Adapter" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1181,8 +1227,6 @@ files = [ {file = "psycopg2-2.9.9-cp310-cp310-win_amd64.whl", hash = "sha256:426f9f29bde126913a20a96ff8ce7d73fd8a216cfb323b1f04da402d452853c3"}, {file = "psycopg2-2.9.9-cp311-cp311-win32.whl", hash = "sha256:ade01303ccf7ae12c356a5e10911c9e1c51136003a9a1d92f7aa9d010fb98372"}, {file = "psycopg2-2.9.9-cp311-cp311-win_amd64.whl", hash = "sha256:121081ea2e76729acfb0673ff33755e8703d45e926e416cb59bae3a86c6a4981"}, - {file = "psycopg2-2.9.9-cp312-cp312-win32.whl", hash = "sha256:d735786acc7dd25815e89cc4ad529a43af779db2e25aa7c626de864127e5a024"}, - {file = "psycopg2-2.9.9-cp312-cp312-win_amd64.whl", hash = "sha256:a7653d00b732afb6fc597e29c50ad28087dcb4fbfb28e86092277a559ae4e693"}, {file = "psycopg2-2.9.9-cp37-cp37m-win32.whl", hash = "sha256:5e0d98cade4f0e0304d7d6f25bbfbc5bd186e07b38eac65379309c4ca3193efa"}, {file = "psycopg2-2.9.9-cp37-cp37m-win_amd64.whl", hash = "sha256:7e2dacf8b009a1c1e843b5213a87f7c544b2b042476ed7755be813eaf4e8347a"}, {file = "psycopg2-2.9.9-cp38-cp38-win32.whl", hash = "sha256:ff432630e510709564c01dafdbe996cb552e0b9f3f065eb89bdce5bd31fabf4c"}, @@ -1196,6 +1240,7 @@ files = [ name = "pycodestyle" version = "2.11.1" description = "Python style guide checker" +category = "dev" optional = false python-versions = ">=3.8" files = [ @@ -1207,6 +1252,7 @@ files = [ name = "pydantic" version = "1.10.13" description = "Data validation and settings management using python type hints" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1259,6 +1305,7 @@ email = ["email-validator (>=1.0.3)"] name = "pydocstyle" version = "6.3.0" description = "Python docstring style checker" +category = "dev" optional = false python-versions = ">=3.6" files = [ @@ -1276,6 +1323,7 @@ toml = ["tomli (>=1.2.3)"] name = "pyflakes" version = "2.5.0" description = "passive checker of Python programs" +category = "dev" optional = false python-versions = ">=3.6" files = [ @@ -1287,6 +1335,7 @@ files = [ name = "pylint" version = "2.17.7" description = "python code static checker" +category = "dev" optional = false python-versions = ">=3.7.2" files = [ @@ -1311,6 +1360,7 @@ testutils = ["gitpython (>3)"] name = "pylint-celery" version = "0.3" description = "pylint-celery is a Pylint plugin to aid Pylint in recognising and understandingerrors caused when using the Celery library" +category = "dev" optional = false python-versions = "*" files = [ @@ -1326,6 +1376,7 @@ pylint-plugin-utils = ">=0.2.1" name = "pylint-django" version = "2.5.3" description = "A Pylint plugin to help Pylint understand the Django web framework" +category = "dev" optional = false python-versions = "*" files = [ @@ -1345,6 +1396,7 @@ with-django = ["Django"] name = "pylint-flask" version = "0.6" description = "pylint-flask is a Pylint plugin to aid Pylint in recognizing and understanding errors caused when using Flask" +category = "dev" optional = false python-versions = "*" files = [ @@ -1358,6 +1410,7 @@ pylint-plugin-utils = ">=0.2.1" name = "pylint-plugin-utils" version = "0.7" description = "Utilities and helpers for writing Pylint plugins" +category = "dev" optional = false python-versions = ">=3.6.2" files = [ @@ -1372,6 +1425,7 @@ pylint = ">=1.7" name = "python-dotenv" version = "1.0.0" description = "Read key-value pairs from a .env file and set them as environment variables" +category = "main" optional = false python-versions = ">=3.8" files = [ @@ -1386,6 +1440,7 @@ cli = ["click (>=5.0)"] name = "pyyaml" version = "6.0.1" description = "YAML parser and emitter for Python" +category = "main" optional = false python-versions = ">=3.6" files = [ @@ -1394,7 +1449,6 @@ files = [ {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69b023b2b4daa7548bcfbd4aa3da05b3a74b772db9e23b982788168117739938"}, {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:81e0b275a9ecc9c0c0c07b4b90ba548307583c125f54d5b6946cfee6360c733d"}, {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba336e390cd8e4d1739f42dfe9bb83a3cc2e80f567d8805e11b46f4a943f5515"}, - {file = "PyYAML-6.0.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:326c013efe8048858a6d312ddd31d56e468118ad4cdeda36c719bf5bb6192290"}, {file = "PyYAML-6.0.1-cp310-cp310-win32.whl", hash = "sha256:bd4af7373a854424dabd882decdc5579653d7868b8fb26dc7d0e99f823aa5924"}, {file = "PyYAML-6.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:fd1592b3fdf65fff2ad0004b5e363300ef59ced41c2e6b3a99d4089fa8c5435d"}, {file = "PyYAML-6.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6965a7bc3cf88e5a1c3bd2e0b5c22f8d677dc88a455344035f03399034eb3007"}, @@ -1402,15 +1456,8 @@ files = [ {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42f8152b8dbc4fe7d96729ec2b99c7097d656dc1213a3229ca5383f973a5ed6d"}, {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc"}, {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2b04aac4d386b172d5b9692e2d2da8de7bfb6c387fa4f801fbf6fb2e6ba4673"}, - {file = "PyYAML-6.0.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e7d73685e87afe9f3b36c799222440d6cf362062f78be1013661b00c5c6f678b"}, {file = "PyYAML-6.0.1-cp311-cp311-win32.whl", hash = "sha256:1635fd110e8d85d55237ab316b5b011de701ea0f29d07611174a1b42f1444741"}, {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, - {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"}, - {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"}, - {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"}, - {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"}, - {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"}, - {file = "PyYAML-6.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:0d3304d8c0adc42be59c5f8a4d9e3d7379e6955ad754aa9d6ab7a398b59dd1df"}, {file = "PyYAML-6.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:50550eb667afee136e9a77d6dc71ae76a44df8b3e51e41b77f6de2932bfe0f47"}, {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1fe35611261b29bd1de0070f0b2f47cb6ff71fa6595c077e42bd0c419fa27b98"}, {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:704219a11b772aea0d8ecd7058d0082713c3562b4e271b849ad7dc4a5c90c13c"}, @@ -1427,7 +1474,6 @@ files = [ {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0cd17c15d3bb3fa06978b4e8958dcdc6e0174ccea823003a106c7d4d7899ac5"}, {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:28c119d996beec18c05208a8bd78cbe4007878c6dd15091efb73a30e90539696"}, {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e07cbde391ba96ab58e532ff4803f79c4129397514e1413a7dc761ccd755735"}, - {file = "PyYAML-6.0.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:49a183be227561de579b4a36efbb21b3eab9651dd81b1858589f796549873dd6"}, {file = "PyYAML-6.0.1-cp38-cp38-win32.whl", hash = "sha256:184c5108a2aca3c5b3d3bf9395d50893a7ab82a38004c8f61c258d4428e80206"}, {file = "PyYAML-6.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:1e2722cc9fbb45d9b87631ac70924c11d3a401b2d7f410cc0e3bbf249f2dca62"}, {file = "PyYAML-6.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9eb6caa9a297fc2c2fb8862bc5370d0303ddba53ba97e71f08023b6cd73d16a8"}, @@ -1435,7 +1481,6 @@ files = [ {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5773183b6446b2c99bb77e77595dd486303b4faab2b086e7b17bc6bef28865f6"}, {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b786eecbdf8499b9ca1d697215862083bd6d2a99965554781d0d8d1ad31e13a0"}, {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc1bf2925a1ecd43da378f4db9e4f799775d6367bdb94671027b73b393a7c42c"}, - {file = "PyYAML-6.0.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:04ac92ad1925b2cff1db0cfebffb6ffc43457495c9b3c39d3fcae417d7125dc5"}, {file = "PyYAML-6.0.1-cp39-cp39-win32.whl", hash = "sha256:faca3bdcf85b2fc05d06ff3fbc1f83e1391b3e724afa3feba7d13eeab355484c"}, {file = "PyYAML-6.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:510c9deebc5c0225e8c96813043e62b680ba2f9c50a08d3724c7f28a747d1486"}, {file = "PyYAML-6.0.1.tar.gz", hash = "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43"}, @@ -1445,6 +1490,7 @@ files = [ name = "regex" version = "2023.10.3" description = "Alternative regular expression module, to replace re." +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1542,6 +1588,7 @@ files = [ name = "replicate" version = "0.11.0" description = "Python client for Replicate" +category = "main" optional = false python-versions = ">=3.8" files = [ @@ -1561,6 +1608,7 @@ dev = ["black", "mypy", "pytest", "responses", "ruff"] name = "requests" version = "2.31.0" description = "Python HTTP for Humans." +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1582,6 +1630,7 @@ use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] name = "requirements-detector" version = "1.2.2" description = "Python tool to find and list requirements of a Python project" +category = "dev" optional = false python-versions = ">=3.7,<4.0" files = [ @@ -1599,6 +1648,7 @@ toml = ">=0.10.2,<0.11.0" name = "semver" version = "3.0.2" description = "Python helper for Semantic Versioning (https://semver.org)" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -1610,6 +1660,7 @@ files = [ name = "setoptconf-tmp" version = "0.3.1" description = "A module for retrieving program settings from various sources in a consistant method." +category = "dev" optional = false python-versions = "*" files = [ @@ -1624,6 +1675,7 @@ yaml = ["pyyaml"] name = "smmap" version = "5.0.1" description = "A pure Python implementation of a sliding window memory map manager" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1635,6 +1687,7 @@ files = [ name = "sniffio" version = "1.3.0" description = "Sniff out which async library your code is running under" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1646,6 +1699,7 @@ files = [ name = "snowballstemmer" version = "2.2.0" description = "This package provides 29 stemmers for 28 languages generated from Snowball algorithms." +category = "dev" optional = false python-versions = "*" files = [ @@ -1657,6 +1711,7 @@ files = [ name = "sqlalchemy" version = "2.0.22" description = "Database Abstraction Library" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1744,6 +1799,7 @@ sqlcipher = ["sqlcipher3-binary"] name = "starlette" version = "0.27.0" description = "The little ASGI library that shines." +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1761,6 +1817,7 @@ full = ["httpx (>=0.22.0)", "itsdangerous", "jinja2", "python-multipart", "pyyam name = "tenacity" version = "8.2.3" description = "Retry code until it succeeds" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1775,6 +1832,7 @@ doc = ["reno", "sphinx", "tornado (>=4.5)"] name = "tiktoken" version = "0.4.0" description = "tiktoken is a fast BPE tokeniser for use with OpenAI's models" +category = "main" optional = false python-versions = ">=3.8" files = [ @@ -1820,6 +1878,7 @@ blobfile = ["blobfile (>=2)"] name = "toml" version = "0.10.2" description = "Python Library for Tom's Obvious, Minimal Language" +category = "dev" optional = false python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*" files = [ @@ -1831,6 +1890,7 @@ files = [ name = "tomlkit" version = "0.12.1" description = "Style preserving TOML library" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -1842,6 +1902,7 @@ files = [ name = "tqdm" version = "4.66.1" description = "Fast, Extensible Progress Meter" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1862,6 +1923,7 @@ telegram = ["requests"] name = "typing-extensions" version = "4.8.0" description = "Backported and Experimental Type Hints for Python 3.8+" +category = "main" optional = false python-versions = ">=3.8" files = [ @@ -1873,6 +1935,7 @@ files = [ name = "typing-inspect" version = "0.9.0" description = "Runtime inspection utilities for typing module." +category = "main" optional = false python-versions = "*" files = [ @@ -1888,6 +1951,7 @@ typing-extensions = ">=3.7.4" name = "urllib3" version = "2.0.7" description = "HTTP library with thread-safe connection pooling, file post, and more." +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1905,6 +1969,7 @@ zstd = ["zstandard (>=0.18.0)"] name = "uvicorn" version = "0.23.2" description = "The lightning-fast ASGI server." +category = "main" optional = false python-versions = ">=3.8" files = [ @@ -1923,6 +1988,7 @@ standard = ["colorama (>=0.4)", "httptools (>=0.5.0)", "python-dotenv (>=0.13)", name = "wrapt" version = "1.15.0" description = "Module for decorators, wrappers and monkey patching." +category = "dev" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,>=2.7" files = [ @@ -2007,6 +2073,7 @@ files = [ name = "yarl" version = "1.9.2" description = "Yet another URL library" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -2093,4 +2160,4 @@ multidict = ">=4.0" [metadata] lock-version = "2.0" python-versions = "3.11.*" -content-hash = "680a5df064fcdd1cac69f7130fe0cc41571497de32b7797be0f88a0aa4e7d098" +content-hash = "844e1ad75ca9b73100279326d787a4621e504c69482e4348051b214e941fd49d" diff --git a/module_text_llm/pyproject.toml b/module_text_llm/pyproject.toml index e3d7ba38f..9610767ea 100644 --- a/module_text_llm/pyproject.toml +++ b/module_text_llm/pyproject.toml @@ -15,6 +15,7 @@ nltk = "^3.8.1" gitpython = "^3.1.37" replicate = "^0.11.0" tiktoken = "^0.4.0" +langsmith = "^0.0.60" [tool.poetry.scripts] module = "athena:run_module" diff --git a/playground/src/pages/api/athena_request.ts b/playground/src/pages/api/athena_request.ts index de94908fc..b3971db81 100644 --- a/playground/src/pages/api/athena_request.ts +++ b/playground/src/pages/api/athena_request.ts @@ -17,7 +17,20 @@ export default async function handler( const url = req.query.url; let response; const secret = req.headers["authorization"] as string; - const moduleConfig = req.headers["x-module-config"] as string | undefined; + const forwardHeaders = [ + "X-Module-Config", + "X-Experiment-ID", + "X-Module-Configuration-ID", + "X-Run-ID", + ] + + const headers = Object.fromEntries( + forwardHeaders.flatMap((header) => { + const value = req.headers[header.toLowerCase()] as string | undefined; + return value ? [[header, value]] : []; + }) + ) + if (!secret) { console.warn("No secret provided"); } @@ -27,7 +40,7 @@ export default async function handler( "Content-Type": "application/json", Accept: "application/json", "Authorization": secret, - ...(moduleConfig && { "X-Module-Config": moduleConfig }), + ...headers, }, method: req.method, ...(req.method === "POST" ? { body: JSON.stringify(req.body) } : {}), From e845e1f76e6f0d03714c194f94c284be4a1d5e3c Mon Sep 17 00:00:00 2001 From: "Felix T.J. Dietrich" Date: Tue, 7 Nov 2023 20:04:14 +0100 Subject: [PATCH 38/54] inline statistics --- module_text_llm/module_text_llm/__main__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/module_text_llm/module_text_llm/__main__.py b/module_text_llm/module_text_llm/__main__.py index 0c9bfd29d..9b81dfedb 100644 --- a/module_text_llm/module_text_llm/__main__.py +++ b/module_text_llm/module_text_llm/__main__.py @@ -84,13 +84,13 @@ def get_statistics(runs: List[Run]): if suggestion_runs: evaluation["runs"]["suggestions"] = { "count": len(suggestion_runs), - "statistics": get_statistics(suggestion_runs), + **get_statistics(suggestion_runs), "runs": [json.loads(run.json()) for run in suggestion_runs] } if evaluation_runs: evaluation["runs"]["evaluation"] = { "count": len(evaluation_runs), - "statistics": get_statistics(evaluation_runs), + **get_statistics(evaluation_runs), "runs": [json.loads(run.json()) for run in evaluation_runs] } From d17e48609dc3f2d3137d27d23ba37ea495db9f52 Mon Sep 17 00:00:00 2001 From: "Felix T.J. Dietrich" Date: Tue, 7 Nov 2023 20:46:36 +0100 Subject: [PATCH 39/54] add sgi evaluation --- module_text_llm/module_text_llm/__main__.py | 56 +++++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/module_text_llm/module_text_llm/__main__.py b/module_text_llm/module_text_llm/__main__.py index 9b81dfedb..75897b438 100644 --- a/module_text_llm/module_text_llm/__main__.py +++ b/module_text_llm/module_text_llm/__main__.py @@ -94,6 +94,62 @@ def get_statistics(runs: List[Run]): "runs": [json.loads(run.json()) for run in evaluation_runs] } + actual_feedback_count = len(true_feedbacks) + actual_feedback_with_grading_instructions = [] + suggestions_count = len(predicted_feedbacks) + suggestions_with_grading_instructions = [] + + # Init usage counts for SGIs + actual_sgi_usage = { + sgi.id: 0 for criterion in exercise.grading_criteria or [] for sgi in criterion.structured_grading_instructions + } + suggested_sgi_usage = { + sgi.id: 0 for criterion in exercise.grading_criteria or [] for sgi in criterion.structured_grading_instructions + } + + # Count SGIs in actual feedbacks + for feedback in true_feedbacks: + if feedback.structured_grading_instruction_id: + actual_feedback_with_grading_instructions.append(feedback) + actual_sgi_usage[feedback.structured_grading_instruction_id] += 1 + + # Count SGIs in suggested feedbacks + for feedback in predicted_feedbacks: + if feedback.structured_grading_instruction_id: + suggestions_with_grading_instructions.append(feedback) + suggested_sgi_usage[feedback.structured_grading_instruction_id] += 1 + + actual_feedback_with_grading_instructions_count = len(actual_feedback_with_grading_instructions) + suggestions_with_grading_instructions_count = len(suggestions_with_grading_instructions) + + # Match SGIs + matched_feedback = 0 + unmatched_feedback = actual_feedback_count - actual_feedback_with_grading_instructions_count + unmatched_suggestions = suggestions_count - suggestions_with_grading_instructions_count + + for feedback in actual_feedback_with_grading_instructions: + for index, suggestion in enumerate(suggestions_with_grading_instructions): + if feedback.structured_grading_instruction_id == suggestion.structured_grading_instruction_id: + matched_feedback += 1 + del suggestions_with_grading_instructions[index] + break + else: + unmatched_feedback += 1 + + unmatched_suggestions += len(suggestions_with_grading_instructions) + + evaluation["feedback_statistics"] = { + "actual_feedback_count": actual_feedback_count, + "suggestions_count": suggestions_count, + "actual_feedback_with_grading_instructions_count": actual_feedback_with_grading_instructions_count, + "suggestions_with_grading_instructions_count": suggestions_with_grading_instructions_count, + "actual_sgi_usage": actual_sgi_usage, + "suggested_sgi_usage": suggested_sgi_usage, + "matched_feedback": matched_feedback, + "unmatched_feedback": unmatched_feedback, + "unmatched_suggestions": unmatched_suggestions, + } + return evaluation if __name__ == "__main__": From 52664610226f0c4faa1f0aa5fce92135c403841d Mon Sep 17 00:00:00 2001 From: "Felix T.J. Dietrich" Date: Tue, 7 Nov 2023 21:04:54 +0100 Subject: [PATCH 40/54] refactor --- module_text_llm/module_text_llm/__main__.py | 117 +++--------------- module_text_llm/module_text_llm/evaluation.py | 117 ++++++++++++++++++ 2 files changed, 131 insertions(+), 103 deletions(-) create mode 100644 module_text_llm/module_text_llm/evaluation.py diff --git a/module_text_llm/module_text_llm/__main__.py b/module_text_llm/module_text_llm/__main__.py index 75897b438..430f7b7fb 100644 --- a/module_text_llm/module_text_llm/__main__.py +++ b/module_text_llm/module_text_llm/__main__.py @@ -4,14 +4,13 @@ import nltk import tiktoken -from langsmith import Client as LangsmithClient -from langsmith.schemas import Run -from athena import app, get_experiment_environment, submission_selector, submissions_consumer, feedback_consumer, feedback_provider, evaluation_provider +from athena import app, submission_selector, submissions_consumer, feedback_consumer, feedback_provider, evaluation_provider from athena.text import Exercise, Submission, Feedback from athena.logger import logger from module_text_llm.config import Configuration +from module_text_llm.evaluation import get_feedback_statistics, get_llm_statistics from module_text_llm.generate_suggestions import generate_suggestions from module_text_llm.generate_evaluation import generate_evaluation @@ -44,112 +43,24 @@ async def evaluate_feedback( true_feedbacks: List[Feedback], predicted_feedbacks: List[Feedback], ) -> Any: logger.info( - "evaluate_feedback: Evaluation for submission %d of exercise %d was requested with %d true and %d predicted feedbacks", - submission.id, exercise.id, len(true_feedbacks), len(predicted_feedbacks) + "evaluate_feedback: Evaluation for submission %d of exercise %d was requested with %d true and %d predicted feedbacks", + submission.id, exercise.id, len( + true_feedbacks), len(predicted_feedbacks) ) - + evaluation = {} + + # 1. LLM as a judge if bool(os.environ.get("LLM_ENABLE_LLM_AS_A_JUDGE")): evaluation["llm_as_a_judge"] = await generate_evaluation(exercise, submission, true_feedbacks, predicted_feedbacks) - # Gather LLM token usage and response times + # 2. LangSmith runs, token usage, and respose times if bool(os.environ.get("LANGCHAIN_TRACING_V2")): - experiment = get_experiment_environment() - client = LangsmithClient() - project_name = os.environ.get("LANGCHAIN_PROJECT") - runs = list(client.list_runs( - project_name=project_name, - filter=f'and(has(tags, "run-{experiment.run_id}"), has(tags, "submission-{submission.id}"))' - )) - logger.info("evaluate_feedback: Found %d runs for submission %d of exercise %d.", len(runs), submission.id, exercise.id) - - def get_statistics(runs: List[Run]): - return { - "response_time": sum((run.end_time - run.start_time).total_seconds() for run in runs if run.end_time is not None), - "prompt_tokens": sum(run.prompt_tokens for run in runs if run.prompt_tokens is not None), - "completion_tokens": sum(run.completion_tokens for run in runs if run.completion_tokens is not None), - "total_tokens": sum(run.total_tokens for run in runs if run.total_tokens is not None), - } - - suggestion_runs = [] - evaluation_runs = [] - for run in runs: - if "evaluation" in (run.tags or []): - evaluation_runs.append(run) - else: - suggestion_runs.append(run) - - if suggestion_runs or evaluation_runs: - evaluation["runs"] = {} - if suggestion_runs: - evaluation["runs"]["suggestions"] = { - "count": len(suggestion_runs), - **get_statistics(suggestion_runs), - "runs": [json.loads(run.json()) for run in suggestion_runs] - } - if evaluation_runs: - evaluation["runs"]["evaluation"] = { - "count": len(evaluation_runs), - **get_statistics(evaluation_runs), - "runs": [json.loads(run.json()) for run in evaluation_runs] - } - - actual_feedback_count = len(true_feedbacks) - actual_feedback_with_grading_instructions = [] - suggestions_count = len(predicted_feedbacks) - suggestions_with_grading_instructions = [] - - # Init usage counts for SGIs - actual_sgi_usage = { - sgi.id: 0 for criterion in exercise.grading_criteria or [] for sgi in criterion.structured_grading_instructions - } - suggested_sgi_usage = { - sgi.id: 0 for criterion in exercise.grading_criteria or [] for sgi in criterion.structured_grading_instructions - } - - # Count SGIs in actual feedbacks - for feedback in true_feedbacks: - if feedback.structured_grading_instruction_id: - actual_feedback_with_grading_instructions.append(feedback) - actual_sgi_usage[feedback.structured_grading_instruction_id] += 1 - - # Count SGIs in suggested feedbacks - for feedback in predicted_feedbacks: - if feedback.structured_grading_instruction_id: - suggestions_with_grading_instructions.append(feedback) - suggested_sgi_usage[feedback.structured_grading_instruction_id] += 1 - - actual_feedback_with_grading_instructions_count = len(actual_feedback_with_grading_instructions) - suggestions_with_grading_instructions_count = len(suggestions_with_grading_instructions) - - # Match SGIs - matched_feedback = 0 - unmatched_feedback = actual_feedback_count - actual_feedback_with_grading_instructions_count - unmatched_suggestions = suggestions_count - suggestions_with_grading_instructions_count - - for feedback in actual_feedback_with_grading_instructions: - for index, suggestion in enumerate(suggestions_with_grading_instructions): - if feedback.structured_grading_instruction_id == suggestion.structured_grading_instruction_id: - matched_feedback += 1 - del suggestions_with_grading_instructions[index] - break - else: - unmatched_feedback += 1 - - unmatched_suggestions += len(suggestions_with_grading_instructions) - - evaluation["feedback_statistics"] = { - "actual_feedback_count": actual_feedback_count, - "suggestions_count": suggestions_count, - "actual_feedback_with_grading_instructions_count": actual_feedback_with_grading_instructions_count, - "suggestions_with_grading_instructions_count": suggestions_with_grading_instructions_count, - "actual_sgi_usage": actual_sgi_usage, - "suggested_sgi_usage": suggested_sgi_usage, - "matched_feedback": matched_feedback, - "unmatched_feedback": unmatched_feedback, - "unmatched_suggestions": unmatched_suggestions, - } - + evaluation["llm_statistics"] = get_llm_statistics(submission) + + # 3. Feedback statistics + evaluation["feedback_statistics"] = get_feedback_statistics(exercise, submission, true_feedbacks, predicted_feedbacks) + return evaluation if __name__ == "__main__": diff --git a/module_text_llm/module_text_llm/evaluation.py b/module_text_llm/module_text_llm/evaluation.py new file mode 100644 index 000000000..2d6989892 --- /dev/null +++ b/module_text_llm/module_text_llm/evaluation.py @@ -0,0 +1,117 @@ +import json +import os +from typing import List + +from langsmith import Client as LangSmithClient +from langsmith.schemas import Run + +from athena import get_experiment_environment +from athena.text import Exercise, Submission, Feedback + + +def get_llm_statistics(submission: Submission): + experiment = get_experiment_environment() + client = LangSmithClient() + project_name = os.environ.get("LANGCHAIN_PROJECT") + runs = list(client.list_runs( + project_name=project_name, + filter=f'and(has(tags, "run-{experiment.run_id}"), has(tags, "submission-{submission.id}"))' + )) + + def get_statistics(runs: List[Run]): + return { + "response_time": sum((run.end_time - run.start_time).total_seconds() for run in runs if run.end_time is not None), + "prompt_tokens": sum(run.prompt_tokens for run in runs if run.prompt_tokens is not None), + "completion_tokens": sum(run.completion_tokens for run in runs if run.completion_tokens is not None), + "total_tokens": sum(run.total_tokens for run in runs if run.total_tokens is not None), + } + + suggestion_runs = [] + evaluation_runs = [] + for run in runs: + if "evaluation" in (run.tags or []): + evaluation_runs.append(run) + else: + suggestion_runs.append(run) + + llm_statistics = {} + if suggestion_runs or evaluation_runs: + if suggestion_runs: + llm_statistics["suggestions"] = { + "count": len(suggestion_runs), + **get_statistics(suggestion_runs), + "runs": [json.loads(run.json()) for run in suggestion_runs] + } + if evaluation_runs: + llm_statistics["evaluation"] = { + "count": len(evaluation_runs), + **get_statistics(evaluation_runs), + "runs": [json.loads(run.json()) for run in evaluation_runs] + } + + return llm_statistics + + +def get_feedback_statistics(exercise: Exercise, submission: Submission, + true_feedbacks: List[Feedback], predicted_feedbacks: List[Feedback]): + actual_feedback_count = len(true_feedbacks) + actual_feedback_with_grading_instructions = [] + suggestions_count = len(predicted_feedbacks) + suggestions_with_grading_instructions = [] + + # Init usage counts for SGIs + actual_sgi_usage = { + sgi.id: 0 for criterion in exercise.grading_criteria or [] for sgi in criterion.structured_grading_instructions + } + suggested_sgi_usage = { + sgi.id: 0 for criterion in exercise.grading_criteria or [] for sgi in criterion.structured_grading_instructions + } + + # Count SGIs in actual feedbacks + for feedback in true_feedbacks: + if feedback.structured_grading_instruction_id: + actual_feedback_with_grading_instructions.append(feedback) + actual_sgi_usage[feedback.structured_grading_instruction_id] += 1 + + # Count SGIs in suggested feedbacks + for feedback in predicted_feedbacks: + if feedback.structured_grading_instruction_id: + suggestions_with_grading_instructions.append(feedback) + suggested_sgi_usage[feedback.structured_grading_instruction_id] += 1 + + actual_feedback_with_grading_instructions_count = len( + actual_feedback_with_grading_instructions) + suggestions_with_grading_instructions_count = len( + suggestions_with_grading_instructions) + + # Match SGIs + matched_feedback = 0 + unmatched_feedback = actual_feedback_count - \ + actual_feedback_with_grading_instructions_count + unmatched_suggestions = suggestions_count - \ + suggestions_with_grading_instructions_count + + for feedback in actual_feedback_with_grading_instructions: + for index, suggestion in enumerate(suggestions_with_grading_instructions): + if feedback.structured_grading_instruction_id == suggestion.structured_grading_instruction_id: + matched_feedback += 1 + del suggestions_with_grading_instructions[index] + break + else: + unmatched_feedback += 1 + + unmatched_suggestions += len(suggestions_with_grading_instructions) + + feedback_statistics = { + "actual_feedback_count": actual_feedback_count, + "suggestions_count": suggestions_count, + "actual_feedback_with_grading_instructions_count": actual_feedback_with_grading_instructions_count, + "suggestions_with_grading_instructions_count": suggestions_with_grading_instructions_count, + "actual_sgi_usage": actual_sgi_usage, + "suggested_sgi_usage": suggested_sgi_usage, + "matched_feedback": matched_feedback, + "unmatched_feedback": unmatched_feedback, + "unmatched_suggestions": unmatched_suggestions, + } + + return feedback_statistics From 9f7494ad925aa405ad5b19f7c748c16df20f9e2b Mon Sep 17 00:00:00 2001 From: "Felix T.J. Dietrich" Date: Tue, 7 Nov 2023 21:05:24 +0100 Subject: [PATCH 41/54] remove unused --- module_text_llm/module_text_llm/__main__.py | 2 +- module_text_llm/module_text_llm/evaluation.py | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/module_text_llm/module_text_llm/__main__.py b/module_text_llm/module_text_llm/__main__.py index 430f7b7fb..1fbeb4cf8 100644 --- a/module_text_llm/module_text_llm/__main__.py +++ b/module_text_llm/module_text_llm/__main__.py @@ -59,7 +59,7 @@ async def evaluate_feedback( evaluation["llm_statistics"] = get_llm_statistics(submission) # 3. Feedback statistics - evaluation["feedback_statistics"] = get_feedback_statistics(exercise, submission, true_feedbacks, predicted_feedbacks) + evaluation["feedback_statistics"] = get_feedback_statistics(exercise, true_feedbacks, predicted_feedbacks) return evaluation diff --git a/module_text_llm/module_text_llm/evaluation.py b/module_text_llm/module_text_llm/evaluation.py index 2d6989892..055dc2c94 100644 --- a/module_text_llm/module_text_llm/evaluation.py +++ b/module_text_llm/module_text_llm/evaluation.py @@ -52,8 +52,7 @@ def get_statistics(runs: List[Run]): return llm_statistics -def get_feedback_statistics(exercise: Exercise, submission: Submission, - true_feedbacks: List[Feedback], predicted_feedbacks: List[Feedback]): +def get_feedback_statistics(exercise: Exercise, true_feedbacks: List[Feedback], predicted_feedbacks: List[Feedback]): actual_feedback_count = len(true_feedbacks) actual_feedback_with_grading_instructions = [] suggestions_count = len(predicted_feedbacks) From d44178b7f3c97e190d627a9b602e2f3b1b5e982c Mon Sep 17 00:00:00 2001 From: "Felix T.J. Dietrich" Date: Tue, 7 Nov 2023 21:36:17 +0100 Subject: [PATCH 42/54] update ini --- assessment_module_manager/modules.docker.ini | 7 ++++++- assessment_module_manager/modules.ini | 4 ++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/assessment_module_manager/modules.docker.ini b/assessment_module_manager/modules.docker.ini index db9d22854..340efa797 100644 --- a/assessment_module_manager/modules.docker.ini +++ b/assessment_module_manager/modules.docker.ini @@ -1,19 +1,24 @@ [module_example] url = http://module-example:5001 type = programming +supports_evaluation = false [module_programming_llm] url = http://module-programming-llm:5002 type = programming +supports_evaluation = false [module_text_llm] url = http://module-text-llm:5003 type = text +supports_evaluation = true [module_text_cofee] url = http://module-text-cofee:5004 type = text +supports_evaluation = false [module_programming_themisml] url = http://module-programming-themisml:5005 -type = programming \ No newline at end of file +type = programming +supports_evaluation = false \ No newline at end of file diff --git a/assessment_module_manager/modules.ini b/assessment_module_manager/modules.ini index 3402183f0..70745eb78 100644 --- a/assessment_module_manager/modules.ini +++ b/assessment_module_manager/modules.ini @@ -1,7 +1,7 @@ [module_example] url = http://localhost:5001 type = programming -supports_evaluation = true +supports_evaluation = false [module_programming_llm] url = http://localhost:5002 @@ -11,7 +11,7 @@ supports_evaluation = false [module_text_llm] url = http://localhost:5003 type = text -supports_evaluation = false +supports_evaluation = true [module_text_cofee] url = http://localhost:5004 From 42f8210f37a3254da2701b9848438faf91f752c9 Mon Sep 17 00:00:00 2001 From: "Felix T.J. Dietrich" Date: Tue, 7 Nov 2023 21:40:51 +0100 Subject: [PATCH 43/54] only use selected modules --- playground/src/hooks/athena/request_evaluation.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/playground/src/hooks/athena/request_evaluation.ts b/playground/src/hooks/athena/request_evaluation.ts index 76227258f..620fb362b 100644 --- a/playground/src/hooks/athena/request_evaluation.ts +++ b/playground/src/hooks/athena/request_evaluation.ts @@ -48,7 +48,7 @@ export default function useRequestEvaluation( const modules = onlyUseContextModule ? [contextModule] : Object.values(health?.modules ?? {}).filter( - (module) => module.healthy && module.type === contextModule.type + (module) => module.healthy && module.type === contextModule.type && module.supportsEvaluation ); const results = await Promise.allSettled( From cc5693dbcaa0916276a8ccc3ff65d1e2df3b7824 Mon Sep 17 00:00:00 2001 From: "Felix T.J. Dietrich" Date: Tue, 7 Nov 2023 22:49:19 +0100 Subject: [PATCH 44/54] remove skip --- module_text_llm/module_text_llm/__main__.py | 2 +- playground/src/hooks/batch_module_experiment.ts | 10 ---------- 2 files changed, 1 insertion(+), 11 deletions(-) diff --git a/module_text_llm/module_text_llm/__main__.py b/module_text_llm/module_text_llm/__main__.py index 1fbeb4cf8..49d069635 100644 --- a/module_text_llm/module_text_llm/__main__.py +++ b/module_text_llm/module_text_llm/__main__.py @@ -51,7 +51,7 @@ async def evaluate_feedback( evaluation = {} # 1. LLM as a judge - if bool(os.environ.get("LLM_ENABLE_LLM_AS_A_JUDGE")): + if len(predicted_feedbacks) > 0 and bool(os.environ.get("LLM_ENABLE_LLM_AS_A_JUDGE")): evaluation["llm_as_a_judge"] = await generate_evaluation(exercise, submission, true_feedbacks, predicted_feedbacks) # 2. LangSmith runs, token usage, and respose times diff --git a/playground/src/hooks/batch_module_experiment.ts b/playground/src/hooks/batch_module_experiment.ts index e2610fbb8..778365ade 100644 --- a/playground/src/hooks/batch_module_experiment.ts +++ b/playground/src/hooks/batch_module_experiment.ts @@ -406,16 +406,6 @@ export default function useBatchModuleExperiment(experiment: Experiment, moduleC submission.id )?.suggestions ?? []; - if (predictedFeedbacks.length === 0) { - // Skip if there are no predicted feedbacks - setSubmissionsWithAutomaticEvaluation((prevState) => { - const newMap = new Map(prevState); - newMap.set(submission.id, {}); - return newMap; - }); - continue; - } - try { const responses = await requestEvaluation.mutateAsync({ exercise: experiment.exercise, From 9462ec56921eb2fe32a114b3c16c836b42119f17 Mon Sep 17 00:00:00 2001 From: "Felix T.J. Dietrich" Date: Tue, 7 Nov 2023 23:39:18 +0100 Subject: [PATCH 45/54] add retries --- playground/src/hooks/athena/request_evaluation.ts | 1 + playground/src/hooks/athena/request_feedback_suggestions.ts | 1 + playground/src/hooks/athena/request_submission_selection.ts | 1 + playground/src/hooks/athena/send_feedbacks.ts | 1 + playground/src/hooks/athena/send_submissions.ts | 1 + 5 files changed, 5 insertions(+) diff --git a/playground/src/hooks/athena/request_evaluation.ts b/playground/src/hooks/athena/request_evaluation.ts index 620fb362b..d1d9082de 100644 --- a/playground/src/hooks/athena/request_evaluation.ts +++ b/playground/src/hooks/athena/request_evaluation.ts @@ -74,6 +74,7 @@ export default function useRequestEvaluation( } }); }, + retry: 3, ...options, }); } diff --git a/playground/src/hooks/athena/request_feedback_suggestions.ts b/playground/src/hooks/athena/request_feedback_suggestions.ts index 5a58470dd..0040fd763 100644 --- a/playground/src/hooks/athena/request_feedback_suggestions.ts +++ b/playground/src/hooks/athena/request_feedback_suggestions.ts @@ -40,6 +40,7 @@ export default function useRequestFeedbackSuggestions( } return response; }, + retry: 3, ...options, }); } diff --git a/playground/src/hooks/athena/request_submission_selection.ts b/playground/src/hooks/athena/request_submission_selection.ts index 4190592b9..fad4f8ec7 100644 --- a/playground/src/hooks/athena/request_submission_selection.ts +++ b/playground/src/hooks/athena/request_submission_selection.ts @@ -26,6 +26,7 @@ export default function useRequestSubmissionSelection( const submissionIds = submissions.map((submission) => submission.id) return await athenaFetcher("/select_submission", { exercise, submission_ids: submissionIds }); }, + retry: 3, ...options, }); } diff --git a/playground/src/hooks/athena/send_feedbacks.ts b/playground/src/hooks/athena/send_feedbacks.ts index c06278838..509fc056e 100644 --- a/playground/src/hooks/athena/send_feedbacks.ts +++ b/playground/src/hooks/athena/send_feedbacks.ts @@ -26,6 +26,7 @@ export function useSendFeedbacks( mutationFn: async ({ exercise, submission, feedbacks }) => { return await athenaFetcher("/feedbacks", { exercise, submission, feedbacks }); }, + retry: 3, ...options, }); } \ No newline at end of file diff --git a/playground/src/hooks/athena/send_submissions.ts b/playground/src/hooks/athena/send_submissions.ts index 3b91414ce..12003035a 100644 --- a/playground/src/hooks/athena/send_submissions.ts +++ b/playground/src/hooks/athena/send_submissions.ts @@ -25,6 +25,7 @@ export default function useSendSubmissions( mutationFn: async ({ exercise, submissions }) => { return await athenaFetcher("/submissions", { exercise, submissions }); }, + retry: 3, ...options, }); } From 292d588dc2cf495e3003daca040a4d120aa4864f Mon Sep 17 00:00:00 2001 From: "Felix T.J. Dietrich" Date: Wed, 8 Nov 2023 11:01:26 +0100 Subject: [PATCH 46/54] enable example module evaluation support for now --- assessment_module_manager/modules.docker.ini | 2 +- assessment_module_manager/modules.ini | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/assessment_module_manager/modules.docker.ini b/assessment_module_manager/modules.docker.ini index 340efa797..4c5f74e9b 100644 --- a/assessment_module_manager/modules.docker.ini +++ b/assessment_module_manager/modules.docker.ini @@ -1,7 +1,7 @@ [module_example] url = http://module-example:5001 type = programming -supports_evaluation = false +supports_evaluation = true [module_programming_llm] url = http://module-programming-llm:5002 diff --git a/assessment_module_manager/modules.ini b/assessment_module_manager/modules.ini index 70745eb78..73fd4c91b 100644 --- a/assessment_module_manager/modules.ini +++ b/assessment_module_manager/modules.ini @@ -1,7 +1,7 @@ [module_example] url = http://localhost:5001 type = programming -supports_evaluation = false +supports_evaluation = true [module_programming_llm] url = http://localhost:5002 From 55482931a242d697c7969e552c8481cb2574f46b Mon Sep 17 00:00:00 2001 From: "Felix T.J. Dietrich" Date: Wed, 8 Nov 2023 23:25:21 +0100 Subject: [PATCH 47/54] fix filter --- module_text_llm/module_text_llm/evaluation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/module_text_llm/module_text_llm/evaluation.py b/module_text_llm/module_text_llm/evaluation.py index 055dc2c94..fe5701937 100644 --- a/module_text_llm/module_text_llm/evaluation.py +++ b/module_text_llm/module_text_llm/evaluation.py @@ -15,7 +15,7 @@ def get_llm_statistics(submission: Submission): project_name = os.environ.get("LANGCHAIN_PROJECT") runs = list(client.list_runs( project_name=project_name, - filter=f'and(has(tags, "run-{experiment.run_id}"), has(tags, "submission-{submission.id}"))' + filter=f'has(tags, "submission-{submission.id}")' if experiment.run_id is None else f'and(has(tags, "run-{experiment.run_id}"), has(tags, "submission-{submission.id}"))' )) def get_statistics(runs: List[Run]): From 8de0ee696767c8bedf6e52d4c0ac13c812bb233f Mon Sep 17 00:00:00 2001 From: "Felix T.J. Dietrich" Date: Thu, 9 Nov 2023 10:57:34 +0100 Subject: [PATCH 48/54] implement feedbacl --- athena/athena/endpoints.py | 4 +--- module_text_llm/module_text_llm/evaluation.py | 21 +++++++------------ 2 files changed, 9 insertions(+), 16 deletions(-) diff --git a/athena/athena/endpoints.py b/athena/athena/endpoints.py index 411edaf0a..72787eb31 100644 --- a/athena/athena/endpoints.py +++ b/athena/athena/endpoints.py @@ -407,9 +407,7 @@ async def wrapper( # Retrieve existing metadata for the exercise, submission and feedback exercise.meta.update(get_stored_exercise_meta(exercise) or {}) submission.meta.update(get_stored_submission_meta(submission) or {}) - for feedback in true_feedbacks: - feedback.meta.update(get_stored_feedback_meta(feedback) or {}) - for feedback in predicted_feedbacks: + for feedback in true_feedbacks + predicted_feedbacks: feedback.meta.update(get_stored_feedback_meta(feedback) or {}) # Call the actual provider diff --git a/module_text_llm/module_text_llm/evaluation.py b/module_text_llm/module_text_llm/evaluation.py index fe5701937..74d9e4db7 100644 --- a/module_text_llm/module_text_llm/evaluation.py +++ b/module_text_llm/module_text_llm/evaluation.py @@ -78,34 +78,29 @@ def get_feedback_statistics(exercise: Exercise, true_feedbacks: List[Feedback], suggestions_with_grading_instructions.append(feedback) suggested_sgi_usage[feedback.structured_grading_instruction_id] += 1 - actual_feedback_with_grading_instructions_count = len( - actual_feedback_with_grading_instructions) - suggestions_with_grading_instructions_count = len( - suggestions_with_grading_instructions) + unmatched_suggestions_with_grading_instructions = suggestions_with_grading_instructions.copy() # Match SGIs matched_feedback = 0 - unmatched_feedback = actual_feedback_count - \ - actual_feedback_with_grading_instructions_count - unmatched_suggestions = suggestions_count - \ - suggestions_with_grading_instructions_count + unmatched_feedback = actual_feedback_count - len(actual_feedback_with_grading_instructions) + unmatched_suggestions = suggestions_count - len(suggestions_with_grading_instructions) for feedback in actual_feedback_with_grading_instructions: - for index, suggestion in enumerate(suggestions_with_grading_instructions): + for index, suggestion in enumerate(unmatched_suggestions_with_grading_instructions): if feedback.structured_grading_instruction_id == suggestion.structured_grading_instruction_id: matched_feedback += 1 - del suggestions_with_grading_instructions[index] + del unmatched_suggestions_with_grading_instructions[index] break else: unmatched_feedback += 1 - unmatched_suggestions += len(suggestions_with_grading_instructions) + unmatched_suggestions += len(unmatched_suggestions_with_grading_instructions) feedback_statistics = { "actual_feedback_count": actual_feedback_count, "suggestions_count": suggestions_count, - "actual_feedback_with_grading_instructions_count": actual_feedback_with_grading_instructions_count, - "suggestions_with_grading_instructions_count": suggestions_with_grading_instructions_count, + "actual_feedback_with_grading_instructions_count": len(actual_feedback_with_grading_instructions), + "suggestions_with_grading_instructions_count":len(suggestions_with_grading_instructions), "actual_sgi_usage": actual_sgi_usage, "suggested_sgi_usage": suggested_sgi_usage, "matched_feedback": matched_feedback, From 0edea59c89771ea0884d7dc442cab79b4ec1ef06 Mon Sep 17 00:00:00 2001 From: "Felix T.J. Dietrich" Date: Thu, 9 Nov 2023 11:01:10 +0100 Subject: [PATCH 49/54] update retry --- playground/src/hooks/athena/request_evaluation.ts | 1 - .../src/hooks/athena/request_feedback_suggestions.ts | 1 - .../src/hooks/athena/request_submission_selection.ts | 1 - playground/src/hooks/athena/send_feedbacks.ts | 1 - playground/src/hooks/athena/send_submissions.ts | 1 - playground/src/hooks/batch_module_experiment.ts | 12 +++++++----- 6 files changed, 7 insertions(+), 10 deletions(-) diff --git a/playground/src/hooks/athena/request_evaluation.ts b/playground/src/hooks/athena/request_evaluation.ts index d1d9082de..620fb362b 100644 --- a/playground/src/hooks/athena/request_evaluation.ts +++ b/playground/src/hooks/athena/request_evaluation.ts @@ -74,7 +74,6 @@ export default function useRequestEvaluation( } }); }, - retry: 3, ...options, }); } diff --git a/playground/src/hooks/athena/request_feedback_suggestions.ts b/playground/src/hooks/athena/request_feedback_suggestions.ts index 0040fd763..5a58470dd 100644 --- a/playground/src/hooks/athena/request_feedback_suggestions.ts +++ b/playground/src/hooks/athena/request_feedback_suggestions.ts @@ -40,7 +40,6 @@ export default function useRequestFeedbackSuggestions( } return response; }, - retry: 3, ...options, }); } diff --git a/playground/src/hooks/athena/request_submission_selection.ts b/playground/src/hooks/athena/request_submission_selection.ts index fad4f8ec7..4190592b9 100644 --- a/playground/src/hooks/athena/request_submission_selection.ts +++ b/playground/src/hooks/athena/request_submission_selection.ts @@ -26,7 +26,6 @@ export default function useRequestSubmissionSelection( const submissionIds = submissions.map((submission) => submission.id) return await athenaFetcher("/select_submission", { exercise, submission_ids: submissionIds }); }, - retry: 3, ...options, }); } diff --git a/playground/src/hooks/athena/send_feedbacks.ts b/playground/src/hooks/athena/send_feedbacks.ts index 509fc056e..c06278838 100644 --- a/playground/src/hooks/athena/send_feedbacks.ts +++ b/playground/src/hooks/athena/send_feedbacks.ts @@ -26,7 +26,6 @@ export function useSendFeedbacks( mutationFn: async ({ exercise, submission, feedbacks }) => { return await athenaFetcher("/feedbacks", { exercise, submission, feedbacks }); }, - retry: 3, ...options, }); } \ No newline at end of file diff --git a/playground/src/hooks/athena/send_submissions.ts b/playground/src/hooks/athena/send_submissions.ts index 12003035a..3b91414ce 100644 --- a/playground/src/hooks/athena/send_submissions.ts +++ b/playground/src/hooks/athena/send_submissions.ts @@ -25,7 +25,6 @@ export default function useSendSubmissions( mutationFn: async ({ exercise, submissions }) => { return await athenaFetcher("/submissions", { exercise, submissions }); }, - retry: 3, ...options, }); } diff --git a/playground/src/hooks/batch_module_experiment.ts b/playground/src/hooks/batch_module_experiment.ts index 778365ade..e999631cc 100644 --- a/playground/src/hooks/batch_module_experiment.ts +++ b/playground/src/hooks/batch_module_experiment.ts @@ -195,11 +195,13 @@ export default function useBatchModuleExperiment(experiment: Experiment, moduleC }) : undefined; // Module requests - const sendSubmissions = useSendSubmissions(); - const sendFeedbacks = useSendFeedbacks(); - const requestSubmissionSelection = useRequestSubmissionSelection(); - const requestFeedbackSuggestions = useRequestFeedbackSuggestions(); - const requestEvaluation = useRequestEvaluation(); + // By default useMutation does not retry, but we want to retry a few times to not get stuck + // If we still get stuck we can just `Export` -> `Cancel Experiment` -> `Import` again to continue for now + const sendSubmissions = useSendSubmissions({ retry: 3 }); + const sendFeedbacks = useSendFeedbacks({ retry: 3 }); + const requestSubmissionSelection = useRequestSubmissionSelection({ retry: 3 }); + const requestFeedbackSuggestions = useRequestFeedbackSuggestions({ retry: 3 }); + const requestEvaluation = useRequestEvaluation({ retry: 3 }); // 1. Send submissions to Athena const stepSendSubmissions = () => { From b4b529e6c0e72b7835db647ae7d907b03c9088a8 Mon Sep 17 00:00:00 2001 From: "Felix T.J. Dietrich" Date: Thu, 9 Nov 2023 11:26:00 +0100 Subject: [PATCH 50/54] validate grading instruction id --- .../generate_suggestions_by_file.py | 9 ++++++++- module_text_llm/module_text_llm/generate_suggestions.py | 9 ++++++++- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py b/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py index 3d2238b31..60a43b6ad 100644 --- a/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py +++ b/module_programming_llm/module_programming_llm/generate_suggestions_by_file.py @@ -227,12 +227,19 @@ async def generate_suggestions_by_file(exercise: Exercise, submission: Submissio ] ) + grading_instruction_ids = set( + grading_instruction.id + for criterion in exercise.grading_criteria or [] + for grading_instruction in criterion.structured_grading_instructions + ) + feedbacks: List[Feedback] = [] for prompt_input, result in zip(prompt_inputs, results): file_path = prompt_input["file_path"] if result is None: continue for feedback in result.feedbacks: + grading_instruction_id = feedback.grading_instruction_id if feedback.grading_instruction_id in grading_instruction_ids else None feedbacks.append(Feedback( exercise_id=exercise.id, submission_id=submission.id, @@ -242,7 +249,7 @@ async def generate_suggestions_by_file(exercise: Exercise, submission: Submissio line_start=feedback.line_start, line_end=feedback.line_end, credits=feedback.credits, - structured_grading_instruction_id=feedback.grading_instruction_id, + structured_grading_instruction_id=grading_instruction_id, meta={} )) diff --git a/module_text_llm/module_text_llm/generate_suggestions.py b/module_text_llm/module_text_llm/generate_suggestions.py index a279d98c8..d43563ad6 100644 --- a/module_text_llm/module_text_llm/generate_suggestions.py +++ b/module_text_llm/module_text_llm/generate_suggestions.py @@ -94,9 +94,16 @@ async def generate_suggestions(exercise: Exercise, submission: Submission, confi if result is None: return [] + grading_instruction_ids = set( + grading_instruction.id + for criterion in exercise.grading_criteria or [] + for grading_instruction in criterion.structured_grading_instructions + ) + feedbacks = [] for feedback in result.feedbacks: index_start, index_end = get_index_range_from_line_range(feedback.line_start, feedback.line_end, submission.text) + grading_instruction_id = feedback.grading_instruction_id if feedback.grading_instruction_id in grading_instruction_ids else None feedbacks.append(Feedback( exercise_id=exercise.id, submission_id=submission.id, @@ -105,7 +112,7 @@ async def generate_suggestions(exercise: Exercise, submission: Submission, confi index_start=index_start, index_end=index_end, credits=feedback.credits, - structured_grading_instruction_id=feedback.grading_instruction_id, + structured_grading_instruction_id=grading_instruction_id, meta={} )) From 331f3538e2afcf88e0a4c6dd2a6086fc63385ce5 Mon Sep 17 00:00:00 2001 From: "Felix T.J. Dietrich" Date: Thu, 9 Nov 2023 11:28:16 +0100 Subject: [PATCH 51/54] add additional check --- module_text_llm/module_text_llm/evaluation.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/module_text_llm/module_text_llm/evaluation.py b/module_text_llm/module_text_llm/evaluation.py index 74d9e4db7..f0af94e3e 100644 --- a/module_text_llm/module_text_llm/evaluation.py +++ b/module_text_llm/module_text_llm/evaluation.py @@ -68,13 +68,13 @@ def get_feedback_statistics(exercise: Exercise, true_feedbacks: List[Feedback], # Count SGIs in actual feedbacks for feedback in true_feedbacks: - if feedback.structured_grading_instruction_id: + if feedback.structured_grading_instruction_id and feedback.structured_grading_instruction_id in actual_sgi_usage: actual_feedback_with_grading_instructions.append(feedback) actual_sgi_usage[feedback.structured_grading_instruction_id] += 1 # Count SGIs in suggested feedbacks for feedback in predicted_feedbacks: - if feedback.structured_grading_instruction_id: + if feedback.structured_grading_instruction_id and feedback.structured_grading_instruction_id in suggested_sgi_usage: suggestions_with_grading_instructions.append(feedback) suggested_sgi_usage[feedback.structured_grading_instruction_id] += 1 From 4eac2951d107708be7a69e54e5fe3ef5959b033b Mon Sep 17 00:00:00 2001 From: "Felix T.J. Dietrich" Date: Thu, 9 Nov 2023 11:39:11 +0100 Subject: [PATCH 52/54] fix index --- playground/src/hooks/batch_module_experiment.ts | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/playground/src/hooks/batch_module_experiment.ts b/playground/src/hooks/batch_module_experiment.ts index e999631cc..8c256bb5c 100644 --- a/playground/src/hooks/batch_module_experiment.ts +++ b/playground/src/hooks/batch_module_experiment.ts @@ -396,10 +396,11 @@ export default function useBatchModuleExperiment(experiment: Experiment, moduleC (submission) => !submissionsWithAutomaticEvaluation?.has(submission.id) ); - let index = 0; + let num = 0; for (const submission of remainingSubmissions) { + num += 1; console.log( - `Evaluating... (${index + 1}/${ + `Evaluating... (${num}/${ remainingSubmissions.length })` ); From 0fe6e6d0ea0faac645bb85665bb40836aad1ed04 Mon Sep 17 00:00:00 2001 From: "Felix T.J. Dietrich" Date: Sat, 11 Nov 2023 12:29:17 +0100 Subject: [PATCH 53/54] add docs --- docs/module/structure.rst | 33 ++++++++++++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/docs/module/structure.rst b/docs/module/structure.rst index 31c206f55..126ab8b98 100644 --- a/docs/module/structure.rst +++ b/docs/module/structure.rst @@ -94,7 +94,7 @@ Example: ) ] -Provide Config Schema +Provide Config Schema (Optional) ~~~~~~~~~~~~~~~~~~~~~~ Get a schema for config options of the module as json schema. The config complying to the schema can then be provided in the header of a request `X-Module-Config` to override the default values. The module can decorate one pydantic model with ``@config_schema_provider`` to provide the schema and should have default values set for all fields as default configuration. The configuration class can be appended to the function signature of all other decorators to provide the configuration to the function. @@ -108,6 +108,37 @@ Example: debug: bool = Field(False, description="Whether the module is in debug mode.") ... +Provide Evaluation (Optional) +~~~~~~~~~~~~~~~~~~ +Get an arbitrary evaluation for a submission with historical ``true_feedback`` and feedback suggestions ``predicted_feedback``. The Playground would usually call this when conducting an evaluation during an experiment. The module will receive the request at the function annotated with ``@evaluation_provider``. + +If you want to have the ``/evaluation`` endpoint available during the Playground evaluation mode, you need to set ``supports_evaluation = true`` in the ``modules.ini`` and ``modules.docker.ini`` files. + +Example: + .. code-block:: python + + from athena import * + + @evaluation_provider + def evaluate_feedback(exercise: Exercise, submission: Submission, true_feedbacks: List[Feedback], predicted_feedbacks: List[Feedback]) -> Any: + # Do something with the true and predicted feedback and return the evaluation result + ... + # Example: Generate some example evaluation result + evaluation_results = [] + true_feedback_embeddings = [random.random() for _ in true_feedbacks] + predicted_feedback_embeddings = [random.random() for _ in predicted_feedbacks] + for feedback, embedding in zip(predicted_feedbacks, predicted_feedback_embeddings): + feedback_evaluation = { + "feedback_id": feedback.id, + "embedding": embedding, + "has_match": len([t for t in true_feedback_embeddings if abs(t - embedding) < 0.1]) > 0, + "correctness": random.random() + } + evaluation_results.append(feedback_evaluation) + ... + # Return arbitrary evaluation results + return evaluation_results + Environment Variables --------------------- You should provide at least the following environment variables for your module to work properly: From 2b7f2122e3c16299d3ff7e86d88bed7c8665b3fc Mon Sep 17 00:00:00 2001 From: "Felix T.J. Dietrich" Date: Sun, 12 Nov 2023 19:32:10 +0100 Subject: [PATCH 54/54] fix text module --- module_text_llm/poetry.lock | 513 ++++++++++++++++----------------- module_text_llm/pyproject.toml | 2 +- 2 files changed, 256 insertions(+), 259 deletions(-) diff --git a/module_text_llm/poetry.lock b/module_text_llm/poetry.lock index 8db9814e0..f5206889c 100644 --- a/module_text_llm/poetry.lock +++ b/module_text_llm/poetry.lock @@ -228,102 +228,102 @@ files = [ [[package]] name = "charset-normalizer" -version = "3.3.1" +version = "3.3.2" description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." category = "main" optional = false python-versions = ">=3.7.0" files = [ - {file = "charset-normalizer-3.3.1.tar.gz", hash = "sha256:d9137a876020661972ca6eec0766d81aef8a5627df628b664b234b73396e727e"}, - {file = "charset_normalizer-3.3.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:8aee051c89e13565c6bd366813c386939f8e928af93c29fda4af86d25b73d8f8"}, - {file = "charset_normalizer-3.3.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:352a88c3df0d1fa886562384b86f9a9e27563d4704ee0e9d56ec6fcd270ea690"}, - {file = "charset_normalizer-3.3.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:223b4d54561c01048f657fa6ce41461d5ad8ff128b9678cfe8b2ecd951e3f8a2"}, - {file = "charset_normalizer-3.3.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4f861d94c2a450b974b86093c6c027888627b8082f1299dfd5a4bae8e2292821"}, - {file = "charset_normalizer-3.3.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1171ef1fc5ab4693c5d151ae0fdad7f7349920eabbaca6271f95969fa0756c2d"}, - {file = "charset_normalizer-3.3.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:28f512b9a33235545fbbdac6a330a510b63be278a50071a336afc1b78781b147"}, - {file = "charset_normalizer-3.3.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c0e842112fe3f1a4ffcf64b06dc4c61a88441c2f02f373367f7b4c1aa9be2ad5"}, - {file = "charset_normalizer-3.3.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3f9bc2ce123637a60ebe819f9fccc614da1bcc05798bbbaf2dd4ec91f3e08846"}, - {file = "charset_normalizer-3.3.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:f194cce575e59ffe442c10a360182a986535fd90b57f7debfaa5c845c409ecc3"}, - {file = "charset_normalizer-3.3.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:9a74041ba0bfa9bc9b9bb2cd3238a6ab3b7618e759b41bd15b5f6ad958d17605"}, - {file = "charset_normalizer-3.3.1-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:b578cbe580e3b41ad17b1c428f382c814b32a6ce90f2d8e39e2e635d49e498d1"}, - {file = "charset_normalizer-3.3.1-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:6db3cfb9b4fcecb4390db154e75b49578c87a3b9979b40cdf90d7e4b945656e1"}, - {file = "charset_normalizer-3.3.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:debb633f3f7856f95ad957d9b9c781f8e2c6303ef21724ec94bea2ce2fcbd056"}, - {file = "charset_normalizer-3.3.1-cp310-cp310-win32.whl", hash = "sha256:87071618d3d8ec8b186d53cb6e66955ef2a0e4fa63ccd3709c0c90ac5a43520f"}, - {file = "charset_normalizer-3.3.1-cp310-cp310-win_amd64.whl", hash = "sha256:e372d7dfd154009142631de2d316adad3cc1c36c32a38b16a4751ba78da2a397"}, - {file = "charset_normalizer-3.3.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:ae4070f741f8d809075ef697877fd350ecf0b7c5837ed68738607ee0a2c572cf"}, - {file = "charset_normalizer-3.3.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:58e875eb7016fd014c0eea46c6fa92b87b62c0cb31b9feae25cbbe62c919f54d"}, - {file = "charset_normalizer-3.3.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:dbd95e300367aa0827496fe75a1766d198d34385a58f97683fe6e07f89ca3e3c"}, - {file = "charset_normalizer-3.3.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:de0b4caa1c8a21394e8ce971997614a17648f94e1cd0640fbd6b4d14cab13a72"}, - {file = "charset_normalizer-3.3.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:985c7965f62f6f32bf432e2681173db41336a9c2611693247069288bcb0c7f8b"}, - {file = "charset_normalizer-3.3.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a15c1fe6d26e83fd2e5972425a772cca158eae58b05d4a25a4e474c221053e2d"}, - {file = "charset_normalizer-3.3.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ae55d592b02c4349525b6ed8f74c692509e5adffa842e582c0f861751701a673"}, - {file = "charset_normalizer-3.3.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:be4d9c2770044a59715eb57c1144dedea7c5d5ae80c68fb9959515037cde2008"}, - {file = "charset_normalizer-3.3.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:851cf693fb3aaef71031237cd68699dded198657ec1e76a76eb8be58c03a5d1f"}, - {file = "charset_normalizer-3.3.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:31bbaba7218904d2eabecf4feec0d07469284e952a27400f23b6628439439fa7"}, - {file = "charset_normalizer-3.3.1-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:871d045d6ccc181fd863a3cd66ee8e395523ebfbc57f85f91f035f50cee8e3d4"}, - {file = "charset_normalizer-3.3.1-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:501adc5eb6cd5f40a6f77fbd90e5ab915c8fd6e8c614af2db5561e16c600d6f3"}, - {file = "charset_normalizer-3.3.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:f5fb672c396d826ca16a022ac04c9dce74e00a1c344f6ad1a0fdc1ba1f332213"}, - {file = "charset_normalizer-3.3.1-cp311-cp311-win32.whl", hash = "sha256:bb06098d019766ca16fc915ecaa455c1f1cd594204e7f840cd6258237b5079a8"}, - {file = "charset_normalizer-3.3.1-cp311-cp311-win_amd64.whl", hash = "sha256:8af5a8917b8af42295e86b64903156b4f110a30dca5f3b5aedea123fbd638bff"}, - {file = "charset_normalizer-3.3.1-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:7ae8e5142dcc7a49168f4055255dbcced01dc1714a90a21f87448dc8d90617d1"}, - {file = "charset_normalizer-3.3.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:5b70bab78accbc672f50e878a5b73ca692f45f5b5e25c8066d748c09405e6a55"}, - {file = "charset_normalizer-3.3.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:5ceca5876032362ae73b83347be8b5dbd2d1faf3358deb38c9c88776779b2e2f"}, - {file = "charset_normalizer-3.3.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:34d95638ff3613849f473afc33f65c401a89f3b9528d0d213c7037c398a51296"}, - {file = "charset_normalizer-3.3.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9edbe6a5bf8b56a4a84533ba2b2f489d0046e755c29616ef8830f9e7d9cf5728"}, - {file = "charset_normalizer-3.3.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f6a02a3c7950cafaadcd46a226ad9e12fc9744652cc69f9e5534f98b47f3bbcf"}, - {file = "charset_normalizer-3.3.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:10b8dd31e10f32410751b3430996f9807fc4d1587ca69772e2aa940a82ab571a"}, - {file = "charset_normalizer-3.3.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:edc0202099ea1d82844316604e17d2b175044f9bcb6b398aab781eba957224bd"}, - {file = "charset_normalizer-3.3.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:b891a2f68e09c5ef989007fac11476ed33c5c9994449a4e2c3386529d703dc8b"}, - {file = "charset_normalizer-3.3.1-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:71ef3b9be10070360f289aea4838c784f8b851be3ba58cf796262b57775c2f14"}, - {file = "charset_normalizer-3.3.1-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:55602981b2dbf8184c098bc10287e8c245e351cd4fdcad050bd7199d5a8bf514"}, - {file = "charset_normalizer-3.3.1-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:46fb9970aa5eeca547d7aa0de5d4b124a288b42eaefac677bde805013c95725c"}, - {file = "charset_normalizer-3.3.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:520b7a142d2524f999447b3a0cf95115df81c4f33003c51a6ab637cbda9d0bf4"}, - {file = "charset_normalizer-3.3.1-cp312-cp312-win32.whl", hash = "sha256:8ec8ef42c6cd5856a7613dcd1eaf21e5573b2185263d87d27c8edcae33b62a61"}, - {file = "charset_normalizer-3.3.1-cp312-cp312-win_amd64.whl", hash = "sha256:baec8148d6b8bd5cee1ae138ba658c71f5b03e0d69d5907703e3e1df96db5e41"}, - {file = "charset_normalizer-3.3.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:63a6f59e2d01310f754c270e4a257426fe5a591dc487f1983b3bbe793cf6bac6"}, - {file = "charset_normalizer-3.3.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1d6bfc32a68bc0933819cfdfe45f9abc3cae3877e1d90aac7259d57e6e0f85b1"}, - {file = "charset_normalizer-3.3.1-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4f3100d86dcd03c03f7e9c3fdb23d92e32abbca07e7c13ebd7ddfbcb06f5991f"}, - {file = "charset_normalizer-3.3.1-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:39b70a6f88eebe239fa775190796d55a33cfb6d36b9ffdd37843f7c4c1b5dc67"}, - {file = "charset_normalizer-3.3.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4e12f8ee80aa35e746230a2af83e81bd6b52daa92a8afaef4fea4a2ce9b9f4fa"}, - {file = "charset_normalizer-3.3.1-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7b6cefa579e1237ce198619b76eaa148b71894fb0d6bcf9024460f9bf30fd228"}, - {file = "charset_normalizer-3.3.1-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:61f1e3fb621f5420523abb71f5771a204b33c21d31e7d9d86881b2cffe92c47c"}, - {file = "charset_normalizer-3.3.1-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:4f6e2a839f83a6a76854d12dbebde50e4b1afa63e27761549d006fa53e9aa80e"}, - {file = "charset_normalizer-3.3.1-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:1ec937546cad86d0dce5396748bf392bb7b62a9eeb8c66efac60e947697f0e58"}, - {file = "charset_normalizer-3.3.1-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:82ca51ff0fc5b641a2d4e1cc8c5ff108699b7a56d7f3ad6f6da9dbb6f0145b48"}, - {file = "charset_normalizer-3.3.1-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:633968254f8d421e70f91c6ebe71ed0ab140220469cf87a9857e21c16687c034"}, - {file = "charset_normalizer-3.3.1-cp37-cp37m-win32.whl", hash = "sha256:c0c72d34e7de5604df0fde3644cc079feee5e55464967d10b24b1de268deceb9"}, - {file = "charset_normalizer-3.3.1-cp37-cp37m-win_amd64.whl", hash = "sha256:63accd11149c0f9a99e3bc095bbdb5a464862d77a7e309ad5938fbc8721235ae"}, - {file = "charset_normalizer-3.3.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:5a3580a4fdc4ac05f9e53c57f965e3594b2f99796231380adb2baaab96e22761"}, - {file = "charset_normalizer-3.3.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:2465aa50c9299d615d757c1c888bc6fef384b7c4aec81c05a0172b4400f98557"}, - {file = "charset_normalizer-3.3.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:cb7cd68814308aade9d0c93c5bd2ade9f9441666f8ba5aa9c2d4b389cb5e2a45"}, - {file = "charset_normalizer-3.3.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:91e43805ccafa0a91831f9cd5443aa34528c0c3f2cc48c4cb3d9a7721053874b"}, - {file = "charset_normalizer-3.3.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:854cc74367180beb327ab9d00f964f6d91da06450b0855cbbb09187bcdb02de5"}, - {file = "charset_normalizer-3.3.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c15070ebf11b8b7fd1bfff7217e9324963c82dbdf6182ff7050519e350e7ad9f"}, - {file = "charset_normalizer-3.3.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2c4c99f98fc3a1835af8179dcc9013f93594d0670e2fa80c83aa36346ee763d2"}, - {file = "charset_normalizer-3.3.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3fb765362688821404ad6cf86772fc54993ec11577cd5a92ac44b4c2ba52155b"}, - {file = "charset_normalizer-3.3.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:dced27917823df984fe0c80a5c4ad75cf58df0fbfae890bc08004cd3888922a2"}, - {file = "charset_normalizer-3.3.1-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:a66bcdf19c1a523e41b8e9d53d0cedbfbac2e93c649a2e9502cb26c014d0980c"}, - {file = "charset_normalizer-3.3.1-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:ecd26be9f112c4f96718290c10f4caea6cc798459a3a76636b817a0ed7874e42"}, - {file = "charset_normalizer-3.3.1-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:3f70fd716855cd3b855316b226a1ac8bdb3caf4f7ea96edcccc6f484217c9597"}, - {file = "charset_normalizer-3.3.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:17a866d61259c7de1bdadef418a37755050ddb4b922df8b356503234fff7932c"}, - {file = "charset_normalizer-3.3.1-cp38-cp38-win32.whl", hash = "sha256:548eefad783ed787b38cb6f9a574bd8664468cc76d1538215d510a3cd41406cb"}, - {file = "charset_normalizer-3.3.1-cp38-cp38-win_amd64.whl", hash = "sha256:45f053a0ece92c734d874861ffe6e3cc92150e32136dd59ab1fb070575189c97"}, - {file = "charset_normalizer-3.3.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:bc791ec3fd0c4309a753f95bb6c749ef0d8ea3aea91f07ee1cf06b7b02118f2f"}, - {file = "charset_normalizer-3.3.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:0c8c61fb505c7dad1d251c284e712d4e0372cef3b067f7ddf82a7fa82e1e9a93"}, - {file = "charset_normalizer-3.3.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:2c092be3885a1b7899cd85ce24acedc1034199d6fca1483fa2c3a35c86e43041"}, - {file = "charset_normalizer-3.3.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c2000c54c395d9e5e44c99dc7c20a64dc371f777faf8bae4919ad3e99ce5253e"}, - {file = "charset_normalizer-3.3.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4cb50a0335382aac15c31b61d8531bc9bb657cfd848b1d7158009472189f3d62"}, - {file = "charset_normalizer-3.3.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c30187840d36d0ba2893bc3271a36a517a717f9fd383a98e2697ee890a37c273"}, - {file = "charset_normalizer-3.3.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fe81b35c33772e56f4b6cf62cf4aedc1762ef7162a31e6ac7fe5e40d0149eb67"}, - {file = "charset_normalizer-3.3.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d0bf89afcbcf4d1bb2652f6580e5e55a840fdf87384f6063c4a4f0c95e378656"}, - {file = "charset_normalizer-3.3.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:06cf46bdff72f58645434d467bf5228080801298fbba19fe268a01b4534467f5"}, - {file = "charset_normalizer-3.3.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:3c66df3f41abee950d6638adc7eac4730a306b022570f71dd0bd6ba53503ab57"}, - {file = "charset_normalizer-3.3.1-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:cd805513198304026bd379d1d516afbf6c3c13f4382134a2c526b8b854da1c2e"}, - {file = "charset_normalizer-3.3.1-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:9505dc359edb6a330efcd2be825fdb73ee3e628d9010597aa1aee5aa63442e97"}, - {file = "charset_normalizer-3.3.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:31445f38053476a0c4e6d12b047b08ced81e2c7c712e5a1ad97bc913256f91b2"}, - {file = "charset_normalizer-3.3.1-cp39-cp39-win32.whl", hash = "sha256:bd28b31730f0e982ace8663d108e01199098432a30a4c410d06fe08fdb9e93f4"}, - {file = "charset_normalizer-3.3.1-cp39-cp39-win_amd64.whl", hash = "sha256:555fe186da0068d3354cdf4bbcbc609b0ecae4d04c921cc13e209eece7720727"}, - {file = "charset_normalizer-3.3.1-py3-none-any.whl", hash = "sha256:800561453acdecedaac137bf09cd719c7a440b6800ec182f077bb8e7025fb708"}, + {file = "charset-normalizer-3.3.2.tar.gz", hash = "sha256:f30c3cb33b24454a82faecaf01b19c18562b1e89558fb6c56de4d9118a032fd5"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:25baf083bf6f6b341f4121c2f3c548875ee6f5339300e08be3f2b2ba1721cdd3"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:06435b539f889b1f6f4ac1758871aae42dc3a8c0e24ac9e60c2384973ad73027"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9063e24fdb1e498ab71cb7419e24622516c4a04476b17a2dab57e8baa30d6e03"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6897af51655e3691ff853668779c7bad41579facacf5fd7253b0133308cf000d"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1d3193f4a680c64b4b6a9115943538edb896edc190f0b222e73761716519268e"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cd70574b12bb8a4d2aaa0094515df2463cb429d8536cfb6c7ce983246983e5a6"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8465322196c8b4d7ab6d1e049e4c5cb460d0394da4a27d23cc242fbf0034b6b5"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a9a8e9031d613fd2009c182b69c7b2c1ef8239a0efb1df3f7c8da66d5dd3d537"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:beb58fe5cdb101e3a055192ac291b7a21e3b7ef4f67fa1d74e331a7f2124341c"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:e06ed3eb3218bc64786f7db41917d4e686cc4856944f53d5bdf83a6884432e12"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:2e81c7b9c8979ce92ed306c249d46894776a909505d8f5a4ba55b14206e3222f"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:572c3763a264ba47b3cf708a44ce965d98555f618ca42c926a9c1616d8f34269"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:fd1abc0d89e30cc4e02e4064dc67fcc51bd941eb395c502aac3ec19fab46b519"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-win32.whl", hash = "sha256:3d47fa203a7bd9c5b6cee4736ee84ca03b8ef23193c0d1ca99b5089f72645c73"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-win_amd64.whl", hash = "sha256:10955842570876604d404661fbccbc9c7e684caf432c09c715ec38fbae45ae09"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:802fe99cca7457642125a8a88a084cef28ff0cf9407060f7b93dca5aa25480db"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:573f6eac48f4769d667c4442081b1794f52919e7edada77495aaed9236d13a96"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:549a3a73da901d5bc3ce8d24e0600d1fa85524c10287f6004fbab87672bf3e1e"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f27273b60488abe721a075bcca6d7f3964f9f6f067c8c4c605743023d7d3944f"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1ceae2f17a9c33cb48e3263960dc5fc8005351ee19db217e9b1bb15d28c02574"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:65f6f63034100ead094b8744b3b97965785388f308a64cf8d7c34f2f2e5be0c4"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:753f10e867343b4511128c6ed8c82f7bec3bd026875576dfd88483c5c73b2fd8"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4a78b2b446bd7c934f5dcedc588903fb2f5eec172f3d29e52a9096a43722adfc"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:e537484df0d8f426ce2afb2d0f8e1c3d0b114b83f8850e5f2fbea0e797bd82ae"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:eb6904c354526e758fda7167b33005998fb68c46fbc10e013ca97f21ca5c8887"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:deb6be0ac38ece9ba87dea880e438f25ca3eddfac8b002a2ec3d9183a454e8ae"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:4ab2fe47fae9e0f9dee8c04187ce5d09f48eabe611be8259444906793ab7cbce"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:80402cd6ee291dcb72644d6eac93785fe2c8b9cb30893c1af5b8fdd753b9d40f"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-win32.whl", hash = "sha256:7cd13a2e3ddeed6913a65e66e94b51d80a041145a026c27e6bb76c31a853c6ab"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-win_amd64.whl", hash = "sha256:663946639d296df6a2bb2aa51b60a2454ca1cb29835324c640dafb5ff2131a77"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:0b2b64d2bb6d3fb9112bafa732def486049e63de9618b5843bcdd081d8144cd8"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:ddbb2551d7e0102e7252db79ba445cdab71b26640817ab1e3e3648dad515003b"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:55086ee1064215781fff39a1af09518bc9255b50d6333f2e4c74ca09fac6a8f6"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8f4a014bc36d3c57402e2977dada34f9c12300af536839dc38c0beab8878f38a"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a10af20b82360ab00827f916a6058451b723b4e65030c5a18577c8b2de5b3389"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8d756e44e94489e49571086ef83b2bb8ce311e730092d2c34ca8f7d925cb20aa"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:90d558489962fd4918143277a773316e56c72da56ec7aa3dc3dbbe20fdfed15b"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6ac7ffc7ad6d040517be39eb591cac5ff87416c2537df6ba3cba3bae290c0fed"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:7ed9e526742851e8d5cc9e6cf41427dfc6068d4f5a3bb03659444b4cabf6bc26"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:8bdb58ff7ba23002a4c5808d608e4e6c687175724f54a5dade5fa8c67b604e4d"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:6b3251890fff30ee142c44144871185dbe13b11bab478a88887a639655be1068"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:b4a23f61ce87adf89be746c8a8974fe1c823c891d8f86eb218bb957c924bb143"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:efcb3f6676480691518c177e3b465bcddf57cea040302f9f4e6e191af91174d4"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-win32.whl", hash = "sha256:d965bba47ddeec8cd560687584e88cf699fd28f192ceb452d1d7ee807c5597b7"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-win_amd64.whl", hash = "sha256:96b02a3dc4381e5494fad39be677abcb5e6634bf7b4fa83a6dd3112607547001"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:95f2a5796329323b8f0512e09dbb7a1860c46a39da62ecb2324f116fa8fdc85c"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c002b4ffc0be611f0d9da932eb0f704fe2602a9a949d1f738e4c34c75b0863d5"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a981a536974bbc7a512cf44ed14938cf01030a99e9b3a06dd59578882f06f985"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3287761bc4ee9e33561a7e058c72ac0938c4f57fe49a09eae428fd88aafe7bb6"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:42cb296636fcc8b0644486d15c12376cb9fa75443e00fb25de0b8602e64c1714"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0a55554a2fa0d408816b3b5cedf0045f4b8e1a6065aec45849de2d6f3f8e9786"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:c083af607d2515612056a31f0a8d9e0fcb5876b7bfc0abad3ecd275bc4ebc2d5"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:87d1351268731db79e0f8e745d92493ee2841c974128ef629dc518b937d9194c"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:bd8f7df7d12c2db9fab40bdd87a7c09b1530128315d047a086fa3ae3435cb3a8"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:c180f51afb394e165eafe4ac2936a14bee3eb10debc9d9e4db8958fe36afe711"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:8c622a5fe39a48f78944a87d4fb8a53ee07344641b0562c540d840748571b811"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-win32.whl", hash = "sha256:db364eca23f876da6f9e16c9da0df51aa4f104a972735574842618b8c6d999d4"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-win_amd64.whl", hash = "sha256:86216b5cee4b06df986d214f664305142d9c76df9b6512be2738aa72a2048f99"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:6463effa3186ea09411d50efc7d85360b38d5f09b870c48e4600f63af490e56a"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:6c4caeef8fa63d06bd437cd4bdcf3ffefe6738fb1b25951440d80dc7df8c03ac"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:37e55c8e51c236f95b033f6fb391d7d7970ba5fe7ff453dad675e88cf303377a"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fb69256e180cb6c8a894fee62b3afebae785babc1ee98b81cdf68bbca1987f33"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ae5f4161f18c61806f411a13b0310bea87f987c7d2ecdbdaad0e94eb2e404238"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b2b0a0c0517616b6869869f8c581d4eb2dd83a4d79e0ebcb7d373ef9956aeb0a"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:45485e01ff4d3630ec0d9617310448a8702f70e9c01906b0d0118bdf9d124cf2"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:eb00ed941194665c332bf8e078baf037d6c35d7c4f3102ea2d4f16ca94a26dc8"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:2127566c664442652f024c837091890cb1942c30937add288223dc895793f898"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:a50aebfa173e157099939b17f18600f72f84eed3049e743b68ad15bd69b6bf99"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:4d0d1650369165a14e14e1e47b372cfcb31d6ab44e6e33cb2d4e57265290044d"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:923c0c831b7cfcb071580d3f46c4baf50f174be571576556269530f4bbd79d04"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:06a81e93cd441c56a9b65d8e1d043daeb97a3d0856d177d5c90ba85acb3db087"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-win32.whl", hash = "sha256:6ef1d82a3af9d3eecdba2321dc1b3c238245d890843e040e41e470ffa64c3e25"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-win_amd64.whl", hash = "sha256:eb8821e09e916165e160797a6c17edda0679379a4be5c716c260e836e122f54b"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:c235ebd9baae02f1b77bcea61bce332cb4331dc3617d254df3323aa01ab47bd4"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:5b4c145409bef602a690e7cfad0a15a55c13320ff7a3ad7ca59c13bb8ba4d45d"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:68d1f8a9e9e37c1223b656399be5d6b448dea850bed7d0f87a8311f1ff3dabb0"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:22afcb9f253dac0696b5a4be4a1c0f8762f8239e21b99680099abd9b2b1b2269"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e27ad930a842b4c5eb8ac0016b0a54f5aebbe679340c26101df33424142c143c"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1f79682fbe303db92bc2b1136016a38a42e835d932bab5b3b1bfcfbf0640e519"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b261ccdec7821281dade748d088bb6e9b69e6d15b30652b74cbbac25e280b796"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:122c7fa62b130ed55f8f285bfd56d5f4b4a5b503609d181f9ad85e55c89f4185"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:d0eccceffcb53201b5bfebb52600a5fb483a20b61da9dbc885f8b103cbe7598c"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:9f96df6923e21816da7e0ad3fd47dd8f94b2a5ce594e00677c0013018b813458"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:7f04c839ed0b6b98b1a7501a002144b76c18fb1c1850c8b98d458ac269e26ed2"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:34d1c8da1e78d2e001f363791c98a272bb734000fcef47a491c1e3b0505657a8"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:ff8fa367d09b717b2a17a052544193ad76cd49979c805768879cb63d9ca50561"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-win32.whl", hash = "sha256:aed38f6e4fb3f5d6bf81bfa990a07806be9d83cf7bacef998ab1a9bd660a581f"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-win_amd64.whl", hash = "sha256:b01b88d45a6fcb69667cd6d2f7a9aeb4bf53760d7fc536bf679ec94fe9f3ff3d"}, + {file = "charset_normalizer-3.3.2-py3-none-any.whl", hash = "sha256:3e4d1f6587322d2788836a99c69062fbb091331ec940e02d12d179c1d53e25fc"}, ] [[package]] @@ -355,14 +355,14 @@ files = [ [[package]] name = "dataclasses-json" -version = "0.6.1" +version = "0.6.2" description = "Easily serialize dataclasses to and from JSON." category = "main" optional = false python-versions = ">=3.7,<4.0" files = [ - {file = "dataclasses_json-0.6.1-py3-none-any.whl", hash = "sha256:1bd8418a61fe3d588bb0079214d7fb71d44937da40742b787256fd53b26b6c80"}, - {file = "dataclasses_json-0.6.1.tar.gz", hash = "sha256:a53c220c35134ce08211a1057fd0e5bf76dc5331627c6b241cacbc570a89faae"}, + {file = "dataclasses_json-0.6.2-py3-none-any.whl", hash = "sha256:71816ced3d0f55a2c5bc1a813ace1b8d4234e79a08744269a7cf84d6f7c06e99"}, + {file = "dataclasses_json-0.6.2.tar.gz", hash = "sha256:1b934c1bd63e775880946b8361a902d7de86e894bab8098eab27c010f95724d1"}, ] [package.dependencies] @@ -973,39 +973,39 @@ files = [ [[package]] name = "mypy" -version = "1.6.1" +version = "1.7.0" description = "Optional static typing for Python" category = "main" optional = false python-versions = ">=3.8" files = [ - {file = "mypy-1.6.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e5012e5cc2ac628177eaac0e83d622b2dd499e28253d4107a08ecc59ede3fc2c"}, - {file = "mypy-1.6.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:d8fbb68711905f8912e5af474ca8b78d077447d8f3918997fecbf26943ff3cbb"}, - {file = "mypy-1.6.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:21a1ad938fee7d2d96ca666c77b7c494c3c5bd88dff792220e1afbebb2925b5e"}, - {file = "mypy-1.6.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:b96ae2c1279d1065413965c607712006205a9ac541895004a1e0d4f281f2ff9f"}, - {file = "mypy-1.6.1-cp310-cp310-win_amd64.whl", hash = "sha256:40b1844d2e8b232ed92e50a4bd11c48d2daa351f9deee6c194b83bf03e418b0c"}, - {file = "mypy-1.6.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:81af8adaa5e3099469e7623436881eff6b3b06db5ef75e6f5b6d4871263547e5"}, - {file = "mypy-1.6.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:8c223fa57cb154c7eab5156856c231c3f5eace1e0bed9b32a24696b7ba3c3245"}, - {file = "mypy-1.6.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a8032e00ce71c3ceb93eeba63963b864bf635a18f6c0c12da6c13c450eedb183"}, - {file = "mypy-1.6.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:4c46b51de523817a0045b150ed11b56f9fff55f12b9edd0f3ed35b15a2809de0"}, - {file = "mypy-1.6.1-cp311-cp311-win_amd64.whl", hash = "sha256:19f905bcfd9e167159b3d63ecd8cb5e696151c3e59a1742e79bc3bcb540c42c7"}, - {file = "mypy-1.6.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:82e469518d3e9a321912955cc702d418773a2fd1e91c651280a1bda10622f02f"}, - {file = "mypy-1.6.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:d4473c22cc296425bbbce7e9429588e76e05bc7342da359d6520b6427bf76660"}, - {file = "mypy-1.6.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:59a0d7d24dfb26729e0a068639a6ce3500e31d6655df8557156c51c1cb874ce7"}, - {file = "mypy-1.6.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:cfd13d47b29ed3bbaafaff7d8b21e90d827631afda134836962011acb5904b71"}, - {file = "mypy-1.6.1-cp312-cp312-win_amd64.whl", hash = "sha256:eb4f18589d196a4cbe5290b435d135dee96567e07c2b2d43b5c4621b6501531a"}, - {file = "mypy-1.6.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:41697773aa0bf53ff917aa077e2cde7aa50254f28750f9b88884acea38a16169"}, - {file = "mypy-1.6.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:7274b0c57737bd3476d2229c6389b2ec9eefeb090bbaf77777e9d6b1b5a9d143"}, - {file = "mypy-1.6.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bbaf4662e498c8c2e352da5f5bca5ab29d378895fa2d980630656178bd607c46"}, - {file = "mypy-1.6.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:bb8ccb4724f7d8601938571bf3f24da0da791fe2db7be3d9e79849cb64e0ae85"}, - {file = "mypy-1.6.1-cp38-cp38-win_amd64.whl", hash = "sha256:68351911e85145f582b5aa6cd9ad666c8958bcae897a1bfda8f4940472463c45"}, - {file = "mypy-1.6.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:49ae115da099dcc0922a7a895c1eec82c1518109ea5c162ed50e3b3594c71208"}, - {file = "mypy-1.6.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:8b27958f8c76bed8edaa63da0739d76e4e9ad4ed325c814f9b3851425582a3cd"}, - {file = "mypy-1.6.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:925cd6a3b7b55dfba252b7c4561892311c5358c6b5a601847015a1ad4eb7d332"}, - {file = "mypy-1.6.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:8f57e6b6927a49550da3d122f0cb983d400f843a8a82e65b3b380d3d7259468f"}, - {file = "mypy-1.6.1-cp39-cp39-win_amd64.whl", hash = "sha256:a43ef1c8ddfdb9575691720b6352761f3f53d85f1b57d7745701041053deff30"}, - {file = "mypy-1.6.1-py3-none-any.whl", hash = "sha256:4cbe68ef919c28ea561165206a2dcb68591c50f3bcf777932323bc208d949cf1"}, - {file = "mypy-1.6.1.tar.gz", hash = "sha256:4d01c00d09a0be62a4ca3f933e315455bde83f37f892ba4b08ce92f3cf44bcc1"}, + {file = "mypy-1.7.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:5da84d7bf257fd8f66b4f759a904fd2c5a765f70d8b52dde62b521972a0a2357"}, + {file = "mypy-1.7.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:a3637c03f4025f6405737570d6cbfa4f1400eb3c649317634d273687a09ffc2f"}, + {file = "mypy-1.7.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b633f188fc5ae1b6edca39dae566974d7ef4e9aaaae00bc36efe1f855e5173ac"}, + {file = "mypy-1.7.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:d6ed9a3997b90c6f891138e3f83fb8f475c74db4ccaa942a1c7bf99e83a989a1"}, + {file = "mypy-1.7.0-cp310-cp310-win_amd64.whl", hash = "sha256:1fe46e96ae319df21359c8db77e1aecac8e5949da4773c0274c0ef3d8d1268a9"}, + {file = "mypy-1.7.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:df67fbeb666ee8828f675fee724cc2cbd2e4828cc3df56703e02fe6a421b7401"}, + {file = "mypy-1.7.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a79cdc12a02eb526d808a32a934c6fe6df07b05f3573d210e41808020aed8b5d"}, + {file = "mypy-1.7.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f65f385a6f43211effe8c682e8ec3f55d79391f70a201575def73d08db68ead1"}, + {file = "mypy-1.7.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:0e81ffd120ee24959b449b647c4b2fbfcf8acf3465e082b8d58fd6c4c2b27e46"}, + {file = "mypy-1.7.0-cp311-cp311-win_amd64.whl", hash = "sha256:f29386804c3577c83d76520abf18cfcd7d68264c7e431c5907d250ab502658ee"}, + {file = "mypy-1.7.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:87c076c174e2c7ef8ab416c4e252d94c08cd4980a10967754f91571070bf5fbe"}, + {file = "mypy-1.7.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:6cb8d5f6d0fcd9e708bb190b224089e45902cacef6f6915481806b0c77f7786d"}, + {file = "mypy-1.7.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d93e76c2256aa50d9c82a88e2f569232e9862c9982095f6d54e13509f01222fc"}, + {file = "mypy-1.7.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:cddee95dea7990e2215576fae95f6b78a8c12f4c089d7e4367564704e99118d3"}, + {file = "mypy-1.7.0-cp312-cp312-win_amd64.whl", hash = "sha256:d01921dbd691c4061a3e2ecdbfbfad029410c5c2b1ee88946bf45c62c6c91210"}, + {file = "mypy-1.7.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:185cff9b9a7fec1f9f7d8352dff8a4c713b2e3eea9c6c4b5ff7f0edf46b91e41"}, + {file = "mypy-1.7.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:7a7b1e399c47b18feb6f8ad4a3eef3813e28c1e871ea7d4ea5d444b2ac03c418"}, + {file = "mypy-1.7.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fc9fe455ad58a20ec68599139ed1113b21f977b536a91b42bef3ffed5cce7391"}, + {file = "mypy-1.7.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:d0fa29919d2e720c8dbaf07d5578f93d7b313c3e9954c8ec05b6d83da592e5d9"}, + {file = "mypy-1.7.0-cp38-cp38-win_amd64.whl", hash = "sha256:2b53655a295c1ed1af9e96b462a736bf083adba7b314ae775563e3fb4e6795f5"}, + {file = "mypy-1.7.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:c1b06b4b109e342f7dccc9efda965fc3970a604db70f8560ddfdee7ef19afb05"}, + {file = "mypy-1.7.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:bf7a2f0a6907f231d5e41adba1a82d7d88cf1f61a70335889412dec99feeb0f8"}, + {file = "mypy-1.7.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:551d4a0cdcbd1d2cccdcc7cb516bb4ae888794929f5b040bb51aae1846062901"}, + {file = "mypy-1.7.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:55d28d7963bef00c330cb6461db80b0b72afe2f3c4e2963c99517cf06454e665"}, + {file = "mypy-1.7.0-cp39-cp39-win_amd64.whl", hash = "sha256:870bd1ffc8a5862e593185a4c169804f2744112b4a7c55b93eb50f48e7a77010"}, + {file = "mypy-1.7.0-py3-none-any.whl", hash = "sha256:96650d9a4c651bc2a4991cf46f100973f656d69edc7faf91844e87fe627f7e96"}, + {file = "mypy-1.7.0.tar.gz", hash = "sha256:1e280b5697202efa698372d2f39e9a6713a0395a756b1c6bd48995f8d72690dc"}, ] [package.dependencies] @@ -1015,6 +1015,7 @@ typing-extensions = ">=4.1.0" [package.extras] dmypy = ["psutil (>=4.0)"] install-types = ["pip"] +mypyc = ["setuptools (>=50)"] reports = ["lxml"] [[package]] @@ -1161,14 +1162,14 @@ flake8-polyfill = ">=1.0.2,<2" [[package]] name = "platformdirs" -version = "3.11.0" +version = "4.0.0" description = "A small Python package for determining appropriate platform-specific dirs, e.g. a \"user data dir\"." category = "dev" optional = false python-versions = ">=3.7" files = [ - {file = "platformdirs-3.11.0-py3-none-any.whl", hash = "sha256:e9d171d00af68be50e9202731309c4e658fd8bc76f55c11c7dd760d023bda68e"}, - {file = "platformdirs-3.11.0.tar.gz", hash = "sha256:cf8ee52a3afdb965072dcc652433e0c7e3e40cf5ea1477cd4b3b1d2eb75495b3"}, + {file = "platformdirs-4.0.0-py3-none-any.whl", hash = "sha256:118c954d7e949b35437270383a3f2531e99dd93cf7ce4dc8340d3356d30f173b"}, + {file = "platformdirs-4.0.0.tar.gz", hash = "sha256:cb633b2bcf10c51af60beb0ab06d2f1d69064b43abf4c185ca6b28865f3f9731"}, ] [package.extras] @@ -1709,61 +1710,61 @@ files = [ [[package]] name = "sqlalchemy" -version = "2.0.22" +version = "2.0.23" description = "Database Abstraction Library" category = "main" optional = false python-versions = ">=3.7" files = [ - {file = "SQLAlchemy-2.0.22-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:f146c61ae128ab43ea3a0955de1af7e1633942c2b2b4985ac51cc292daf33222"}, - {file = "SQLAlchemy-2.0.22-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:875de9414393e778b655a3d97d60465eb3fae7c919e88b70cc10b40b9f56042d"}, - {file = "SQLAlchemy-2.0.22-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:13790cb42f917c45c9c850b39b9941539ca8ee7917dacf099cc0b569f3d40da7"}, - {file = "SQLAlchemy-2.0.22-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e04ab55cf49daf1aeb8c622c54d23fa4bec91cb051a43cc24351ba97e1dd09f5"}, - {file = "SQLAlchemy-2.0.22-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:a42c9fa3abcda0dcfad053e49c4f752eef71ecd8c155221e18b99d4224621176"}, - {file = "SQLAlchemy-2.0.22-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:14cd3bcbb853379fef2cd01e7c64a5d6f1d005406d877ed9509afb7a05ff40a5"}, - {file = "SQLAlchemy-2.0.22-cp310-cp310-win32.whl", hash = "sha256:d143c5a9dada696bcfdb96ba2de4a47d5a89168e71d05a076e88a01386872f97"}, - {file = "SQLAlchemy-2.0.22-cp310-cp310-win_amd64.whl", hash = "sha256:ccd87c25e4c8559e1b918d46b4fa90b37f459c9b4566f1dfbce0eb8122571547"}, - {file = "SQLAlchemy-2.0.22-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:4f6ff392b27a743c1ad346d215655503cec64405d3b694228b3454878bf21590"}, - {file = "SQLAlchemy-2.0.22-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f776c2c30f0e5f4db45c3ee11a5f2a8d9de68e81eb73ec4237de1e32e04ae81c"}, - {file = "SQLAlchemy-2.0.22-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c8f1792d20d2f4e875ce7a113f43c3561ad12b34ff796b84002a256f37ce9437"}, - {file = "SQLAlchemy-2.0.22-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d80eeb5189d7d4b1af519fc3f148fe7521b9dfce8f4d6a0820e8f5769b005051"}, - {file = "SQLAlchemy-2.0.22-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:69fd9e41cf9368afa034e1c81f3570afb96f30fcd2eb1ef29cb4d9371c6eece2"}, - {file = "SQLAlchemy-2.0.22-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:54bcceaf4eebef07dadfde424f5c26b491e4a64e61761dea9459103ecd6ccc95"}, - {file = "SQLAlchemy-2.0.22-cp311-cp311-win32.whl", hash = "sha256:7ee7ccf47aa503033b6afd57efbac6b9e05180f492aeed9fcf70752556f95624"}, - {file = "SQLAlchemy-2.0.22-cp311-cp311-win_amd64.whl", hash = "sha256:b560f075c151900587ade06706b0c51d04b3277c111151997ea0813455378ae0"}, - {file = "SQLAlchemy-2.0.22-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:2c9bac865ee06d27a1533471405ad240a6f5d83195eca481f9fc4a71d8b87df8"}, - {file = "SQLAlchemy-2.0.22-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:625b72d77ac8ac23da3b1622e2da88c4aedaee14df47c8432bf8f6495e655de2"}, - {file = "SQLAlchemy-2.0.22-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b39a6e21110204a8c08d40ff56a73ba542ec60bab701c36ce721e7990df49fb9"}, - {file = "SQLAlchemy-2.0.22-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:53a766cb0b468223cafdf63e2d37f14a4757476157927b09300c8c5832d88560"}, - {file = "SQLAlchemy-2.0.22-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:0e1ce8ebd2e040357dde01a3fb7d30d9b5736b3e54a94002641dfd0aa12ae6ce"}, - {file = "SQLAlchemy-2.0.22-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:505f503763a767556fa4deae5194b2be056b64ecca72ac65224381a0acab7ebe"}, - {file = "SQLAlchemy-2.0.22-cp312-cp312-win32.whl", hash = "sha256:154a32f3c7b00de3d090bc60ec8006a78149e221f1182e3edcf0376016be9396"}, - {file = "SQLAlchemy-2.0.22-cp312-cp312-win_amd64.whl", hash = "sha256:129415f89744b05741c6f0b04a84525f37fbabe5dc3774f7edf100e7458c48cd"}, - {file = "SQLAlchemy-2.0.22-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:3940677d341f2b685a999bffe7078697b5848a40b5f6952794ffcf3af150c301"}, - {file = "SQLAlchemy-2.0.22-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:55914d45a631b81a8a2cb1a54f03eea265cf1783241ac55396ec6d735be14883"}, - {file = "SQLAlchemy-2.0.22-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2096d6b018d242a2bcc9e451618166f860bb0304f590d205173d317b69986c95"}, - {file = "SQLAlchemy-2.0.22-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:19c6986cf2fb4bc8e0e846f97f4135a8e753b57d2aaaa87c50f9acbe606bd1db"}, - {file = "SQLAlchemy-2.0.22-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:6ac28bd6888fe3c81fbe97584eb0b96804bd7032d6100b9701255d9441373ec1"}, - {file = "SQLAlchemy-2.0.22-cp37-cp37m-win32.whl", hash = "sha256:cb9a758ad973e795267da334a92dd82bb7555cb36a0960dcabcf724d26299db8"}, - {file = "SQLAlchemy-2.0.22-cp37-cp37m-win_amd64.whl", hash = "sha256:40b1206a0d923e73aa54f0a6bd61419a96b914f1cd19900b6c8226899d9742ad"}, - {file = "SQLAlchemy-2.0.22-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:3aa1472bf44f61dd27987cd051f1c893b7d3b17238bff8c23fceaef4f1133868"}, - {file = "SQLAlchemy-2.0.22-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:56a7e2bb639df9263bf6418231bc2a92a773f57886d371ddb7a869a24919face"}, - {file = "SQLAlchemy-2.0.22-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ccca778c0737a773a1ad86b68bda52a71ad5950b25e120b6eb1330f0df54c3d0"}, - {file = "SQLAlchemy-2.0.22-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7c6c3e9350f9fb16de5b5e5fbf17b578811a52d71bb784cc5ff71acb7de2a7f9"}, - {file = "SQLAlchemy-2.0.22-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:564e9f9e4e6466273dbfab0e0a2e5fe819eec480c57b53a2cdee8e4fdae3ad5f"}, - {file = "SQLAlchemy-2.0.22-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:af66001d7b76a3fab0d5e4c1ec9339ac45748bc4a399cbc2baa48c1980d3c1f4"}, - {file = "SQLAlchemy-2.0.22-cp38-cp38-win32.whl", hash = "sha256:9e55dff5ec115316dd7a083cdc1a52de63693695aecf72bc53a8e1468ce429e5"}, - {file = "SQLAlchemy-2.0.22-cp38-cp38-win_amd64.whl", hash = "sha256:4e869a8ff7ee7a833b74868a0887e8462445ec462432d8cbeff5e85f475186da"}, - {file = "SQLAlchemy-2.0.22-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9886a72c8e6371280cb247c5d32c9c8fa141dc560124348762db8a8b236f8692"}, - {file = "SQLAlchemy-2.0.22-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:a571bc8ac092a3175a1d994794a8e7a1f2f651e7c744de24a19b4f740fe95034"}, - {file = "SQLAlchemy-2.0.22-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8db5ba8b7da759b727faebc4289a9e6a51edadc7fc32207a30f7c6203a181592"}, - {file = "SQLAlchemy-2.0.22-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0b0b3f2686c3f162123adba3cb8b626ed7e9b8433ab528e36ed270b4f70d1cdb"}, - {file = "SQLAlchemy-2.0.22-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:0c1fea8c0abcb070ffe15311853abfda4e55bf7dc1d4889497b3403629f3bf00"}, - {file = "SQLAlchemy-2.0.22-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:4bb062784f37b2d75fd9b074c8ec360ad5df71f933f927e9e95c50eb8e05323c"}, - {file = "SQLAlchemy-2.0.22-cp39-cp39-win32.whl", hash = "sha256:58a3aba1bfb32ae7af68da3f277ed91d9f57620cf7ce651db96636790a78b736"}, - {file = "SQLAlchemy-2.0.22-cp39-cp39-win_amd64.whl", hash = "sha256:92e512a6af769e4725fa5b25981ba790335d42c5977e94ded07db7d641490a85"}, - {file = "SQLAlchemy-2.0.22-py3-none-any.whl", hash = "sha256:3076740335e4aaadd7deb3fe6dcb96b3015f1613bd190a4e1634e1b99b02ec86"}, - {file = "SQLAlchemy-2.0.22.tar.gz", hash = "sha256:5434cc601aa17570d79e5377f5fd45ff92f9379e2abed0be5e8c2fba8d353d2b"}, + {file = "SQLAlchemy-2.0.23-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:638c2c0b6b4661a4fd264f6fb804eccd392745c5887f9317feb64bb7cb03b3ea"}, + {file = "SQLAlchemy-2.0.23-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e3b5036aa326dc2df50cba3c958e29b291a80f604b1afa4c8ce73e78e1c9f01d"}, + {file = "SQLAlchemy-2.0.23-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:787af80107fb691934a01889ca8f82a44adedbf5ef3d6ad7d0f0b9ac557e0c34"}, + {file = "SQLAlchemy-2.0.23-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c14eba45983d2f48f7546bb32b47937ee2cafae353646295f0e99f35b14286ab"}, + {file = "SQLAlchemy-2.0.23-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:0666031df46b9badba9bed00092a1ffa3aa063a5e68fa244acd9f08070e936d3"}, + {file = "SQLAlchemy-2.0.23-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:89a01238fcb9a8af118eaad3ffcc5dedaacbd429dc6fdc43fe430d3a941ff965"}, + {file = "SQLAlchemy-2.0.23-cp310-cp310-win32.whl", hash = "sha256:cabafc7837b6cec61c0e1e5c6d14ef250b675fa9c3060ed8a7e38653bd732ff8"}, + {file = "SQLAlchemy-2.0.23-cp310-cp310-win_amd64.whl", hash = "sha256:87a3d6b53c39cd173990de2f5f4b83431d534a74f0e2f88bd16eabb5667e65c6"}, + {file = "SQLAlchemy-2.0.23-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:d5578e6863eeb998980c212a39106ea139bdc0b3f73291b96e27c929c90cd8e1"}, + {file = "SQLAlchemy-2.0.23-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:62d9e964870ea5ade4bc870ac4004c456efe75fb50404c03c5fd61f8bc669a72"}, + {file = "SQLAlchemy-2.0.23-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c80c38bd2ea35b97cbf7c21aeb129dcbebbf344ee01a7141016ab7b851464f8e"}, + {file = "SQLAlchemy-2.0.23-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:75eefe09e98043cff2fb8af9796e20747ae870c903dc61d41b0c2e55128f958d"}, + {file = "SQLAlchemy-2.0.23-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:bd45a5b6c68357578263d74daab6ff9439517f87da63442d244f9f23df56138d"}, + {file = "SQLAlchemy-2.0.23-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:a86cb7063e2c9fb8e774f77fbf8475516d270a3e989da55fa05d08089d77f8c4"}, + {file = "SQLAlchemy-2.0.23-cp311-cp311-win32.whl", hash = "sha256:b41f5d65b54cdf4934ecede2f41b9c60c9f785620416e8e6c48349ab18643855"}, + {file = "SQLAlchemy-2.0.23-cp311-cp311-win_amd64.whl", hash = "sha256:9ca922f305d67605668e93991aaf2c12239c78207bca3b891cd51a4515c72e22"}, + {file = "SQLAlchemy-2.0.23-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:d0f7fb0c7527c41fa6fcae2be537ac137f636a41b4c5a4c58914541e2f436b45"}, + {file = "SQLAlchemy-2.0.23-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:7c424983ab447dab126c39d3ce3be5bee95700783204a72549c3dceffe0fc8f4"}, + {file = "SQLAlchemy-2.0.23-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f508ba8f89e0a5ecdfd3761f82dda2a3d7b678a626967608f4273e0dba8f07ac"}, + {file = "SQLAlchemy-2.0.23-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6463aa765cf02b9247e38b35853923edbf2f6fd1963df88706bc1d02410a5577"}, + {file = "SQLAlchemy-2.0.23-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:e599a51acf3cc4d31d1a0cf248d8f8d863b6386d2b6782c5074427ebb7803bda"}, + {file = "SQLAlchemy-2.0.23-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:fd54601ef9cc455a0c61e5245f690c8a3ad67ddb03d3b91c361d076def0b4c60"}, + {file = "SQLAlchemy-2.0.23-cp312-cp312-win32.whl", hash = "sha256:42d0b0290a8fb0165ea2c2781ae66e95cca6e27a2fbe1016ff8db3112ac1e846"}, + {file = "SQLAlchemy-2.0.23-cp312-cp312-win_amd64.whl", hash = "sha256:227135ef1e48165f37590b8bfc44ed7ff4c074bf04dc8d6f8e7f1c14a94aa6ca"}, + {file = "SQLAlchemy-2.0.23-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:14aebfe28b99f24f8a4c1346c48bc3d63705b1f919a24c27471136d2f219f02d"}, + {file = "SQLAlchemy-2.0.23-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3e983fa42164577d073778d06d2cc5d020322425a509a08119bdcee70ad856bf"}, + {file = "SQLAlchemy-2.0.23-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e0dc9031baa46ad0dd5a269cb7a92a73284d1309228be1d5935dac8fb3cae24"}, + {file = "SQLAlchemy-2.0.23-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:5f94aeb99f43729960638e7468d4688f6efccb837a858b34574e01143cf11f89"}, + {file = "SQLAlchemy-2.0.23-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:63bfc3acc970776036f6d1d0e65faa7473be9f3135d37a463c5eba5efcdb24c8"}, + {file = "SQLAlchemy-2.0.23-cp37-cp37m-win32.whl", hash = "sha256:f48ed89dd11c3c586f45e9eec1e437b355b3b6f6884ea4a4c3111a3358fd0c18"}, + {file = "SQLAlchemy-2.0.23-cp37-cp37m-win_amd64.whl", hash = "sha256:1e018aba8363adb0599e745af245306cb8c46b9ad0a6fc0a86745b6ff7d940fc"}, + {file = "SQLAlchemy-2.0.23-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:64ac935a90bc479fee77f9463f298943b0e60005fe5de2aa654d9cdef46c54df"}, + {file = "SQLAlchemy-2.0.23-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:c4722f3bc3c1c2fcc3702dbe0016ba31148dd6efcd2a2fd33c1b4897c6a19693"}, + {file = "SQLAlchemy-2.0.23-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4af79c06825e2836de21439cb2a6ce22b2ca129bad74f359bddd173f39582bf5"}, + {file = "SQLAlchemy-2.0.23-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:683ef58ca8eea4747737a1c35c11372ffeb84578d3aab8f3e10b1d13d66f2bc4"}, + {file = "SQLAlchemy-2.0.23-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:d4041ad05b35f1f4da481f6b811b4af2f29e83af253bf37c3c4582b2c68934ab"}, + {file = "SQLAlchemy-2.0.23-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:aeb397de65a0a62f14c257f36a726945a7f7bb60253462e8602d9b97b5cbe204"}, + {file = "SQLAlchemy-2.0.23-cp38-cp38-win32.whl", hash = "sha256:42ede90148b73fe4ab4a089f3126b2cfae8cfefc955c8174d697bb46210c8306"}, + {file = "SQLAlchemy-2.0.23-cp38-cp38-win_amd64.whl", hash = "sha256:964971b52daab357d2c0875825e36584d58f536e920f2968df8d581054eada4b"}, + {file = "SQLAlchemy-2.0.23-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:616fe7bcff0a05098f64b4478b78ec2dfa03225c23734d83d6c169eb41a93e55"}, + {file = "SQLAlchemy-2.0.23-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:0e680527245895aba86afbd5bef6c316831c02aa988d1aad83c47ffe92655e74"}, + {file = "SQLAlchemy-2.0.23-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9585b646ffb048c0250acc7dad92536591ffe35dba624bb8fd9b471e25212a35"}, + {file = "SQLAlchemy-2.0.23-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4895a63e2c271ffc7a81ea424b94060f7b3b03b4ea0cd58ab5bb676ed02f4221"}, + {file = "SQLAlchemy-2.0.23-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:cc1d21576f958c42d9aec68eba5c1a7d715e5fc07825a629015fe8e3b0657fb0"}, + {file = "SQLAlchemy-2.0.23-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:967c0b71156f793e6662dd839da54f884631755275ed71f1539c95bbada9aaab"}, + {file = "SQLAlchemy-2.0.23-cp39-cp39-win32.whl", hash = "sha256:0a8c6aa506893e25a04233bc721c6b6cf844bafd7250535abb56cb6cc1368884"}, + {file = "SQLAlchemy-2.0.23-cp39-cp39-win_amd64.whl", hash = "sha256:f3420d00d2cb42432c1d0e44540ae83185ccbbc67a6054dcc8ab5387add6620b"}, + {file = "SQLAlchemy-2.0.23-py3-none-any.whl", hash = "sha256:31952bbc527d633b9479f5f81e8b9dfada00b91d6baba021a869095f1a97006d"}, + {file = "SQLAlchemy-2.0.23.tar.gz", hash = "sha256:c1bda93cbbe4aa2aa0aa8655c5aeda505cd219ff3e8da91d1d329e143e4aff69"}, ] [package.dependencies] @@ -1773,6 +1774,7 @@ typing-extensions = ">=4.2.0" [package.extras] aiomysql = ["aiomysql (>=0.2.0)", "greenlet (!=0.4.17)"] +aioodbc = ["aioodbc", "greenlet (!=0.4.17)"] aiosqlite = ["aiosqlite", "greenlet (!=0.4.17)", "typing-extensions (!=3.10.0.1)"] asyncio = ["greenlet (!=0.4.17)"] asyncmy = ["asyncmy (>=0.2.3,!=0.2.4,!=0.2.6)", "greenlet (!=0.4.17)"] @@ -1783,7 +1785,7 @@ mssql-pyodbc = ["pyodbc"] mypy = ["mypy (>=0.910)"] mysql = ["mysqlclient (>=1.4.0)"] mysql-connector = ["mysql-connector-python"] -oracle = ["cx-oracle (>=7)"] +oracle = ["cx-oracle (>=8)"] oracle-oracledb = ["oracledb (>=1.0.1)"] postgresql = ["psycopg2 (>=2.7)"] postgresql-asyncpg = ["asyncpg", "greenlet (!=0.4.17)"] @@ -1888,14 +1890,14 @@ files = [ [[package]] name = "tomlkit" -version = "0.12.1" +version = "0.12.2" description = "Style preserving TOML library" category = "dev" optional = false python-versions = ">=3.7" files = [ - {file = "tomlkit-0.12.1-py3-none-any.whl", hash = "sha256:712cbd236609acc6a3e2e97253dfc52d4c2082982a88f61b640ecf0817eab899"}, - {file = "tomlkit-0.12.1.tar.gz", hash = "sha256:38e1ff8edb991273ec9f6181244a6a391ac30e9f5098e7535640ea6be97a7c86"}, + {file = "tomlkit-0.12.2-py3-none-any.whl", hash = "sha256:eeea7ac7563faeab0a1ed8fe12c2e5a51c61f933f2502f7e9db0241a65163ad0"}, + {file = "tomlkit-0.12.2.tar.gz", hash = "sha256:df32fab589a81f0d7dc525a4267b6d7a64ee99619cbd1eeb0fae32c1dd426977"}, ] [[package]] @@ -1986,87 +1988,82 @@ standard = ["colorama (>=0.4)", "httptools (>=0.5.0)", "python-dotenv (>=0.13)", [[package]] name = "wrapt" -version = "1.15.0" +version = "1.16.0" description = "Module for decorators, wrappers and monkey patching." category = "dev" optional = false -python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,>=2.7" -files = [ - {file = "wrapt-1.15.0-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:ca1cccf838cd28d5a0883b342474c630ac48cac5df0ee6eacc9c7290f76b11c1"}, - {file = "wrapt-1.15.0-cp27-cp27m-manylinux1_i686.whl", hash = "sha256:e826aadda3cae59295b95343db8f3d965fb31059da7de01ee8d1c40a60398b29"}, - {file = "wrapt-1.15.0-cp27-cp27m-manylinux1_x86_64.whl", hash = "sha256:5fc8e02f5984a55d2c653f5fea93531e9836abbd84342c1d1e17abc4a15084c2"}, - {file = "wrapt-1.15.0-cp27-cp27m-manylinux2010_i686.whl", hash = "sha256:96e25c8603a155559231c19c0349245eeb4ac0096fe3c1d0be5c47e075bd4f46"}, - {file = "wrapt-1.15.0-cp27-cp27m-manylinux2010_x86_64.whl", hash = "sha256:40737a081d7497efea35ab9304b829b857f21558acfc7b3272f908d33b0d9d4c"}, - {file = "wrapt-1.15.0-cp27-cp27mu-manylinux1_i686.whl", hash = "sha256:f87ec75864c37c4c6cb908d282e1969e79763e0d9becdfe9fe5473b7bb1e5f09"}, - {file = "wrapt-1.15.0-cp27-cp27mu-manylinux1_x86_64.whl", hash = "sha256:1286eb30261894e4c70d124d44b7fd07825340869945c79d05bda53a40caa079"}, - {file = "wrapt-1.15.0-cp27-cp27mu-manylinux2010_i686.whl", hash = "sha256:493d389a2b63c88ad56cdc35d0fa5752daac56ca755805b1b0c530f785767d5e"}, - {file = "wrapt-1.15.0-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:58d7a75d731e8c63614222bcb21dd992b4ab01a399f1f09dd82af17bbfc2368a"}, - {file = "wrapt-1.15.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:21f6d9a0d5b3a207cdf7acf8e58d7d13d463e639f0c7e01d82cdb671e6cb7923"}, - {file = "wrapt-1.15.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ce42618f67741d4697684e501ef02f29e758a123aa2d669e2d964ff734ee00ee"}, - {file = "wrapt-1.15.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:41d07d029dd4157ae27beab04d22b8e261eddfc6ecd64ff7000b10dc8b3a5727"}, - {file = "wrapt-1.15.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:54accd4b8bc202966bafafd16e69da9d5640ff92389d33d28555c5fd4f25ccb7"}, - {file = "wrapt-1.15.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2fbfbca668dd15b744418265a9607baa970c347eefd0db6a518aaf0cfbd153c0"}, - {file = "wrapt-1.15.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:76e9c727a874b4856d11a32fb0b389afc61ce8aaf281ada613713ddeadd1cfec"}, - {file = "wrapt-1.15.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:e20076a211cd6f9b44a6be58f7eeafa7ab5720eb796975d0c03f05b47d89eb90"}, - {file = "wrapt-1.15.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:a74d56552ddbde46c246b5b89199cb3fd182f9c346c784e1a93e4dc3f5ec9975"}, - {file = "wrapt-1.15.0-cp310-cp310-win32.whl", hash = "sha256:26458da5653aa5b3d8dc8b24192f574a58984c749401f98fff994d41d3f08da1"}, - {file = "wrapt-1.15.0-cp310-cp310-win_amd64.whl", hash = "sha256:75760a47c06b5974aa5e01949bf7e66d2af4d08cb8c1d6516af5e39595397f5e"}, - {file = "wrapt-1.15.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:ba1711cda2d30634a7e452fc79eabcadaffedf241ff206db2ee93dd2c89a60e7"}, - {file = "wrapt-1.15.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:56374914b132c702aa9aa9959c550004b8847148f95e1b824772d453ac204a72"}, - {file = "wrapt-1.15.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a89ce3fd220ff144bd9d54da333ec0de0399b52c9ac3d2ce34b569cf1a5748fb"}, - {file = "wrapt-1.15.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3bbe623731d03b186b3d6b0d6f51865bf598587c38d6f7b0be2e27414f7f214e"}, - {file = "wrapt-1.15.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3abbe948c3cbde2689370a262a8d04e32ec2dd4f27103669a45c6929bcdbfe7c"}, - {file = "wrapt-1.15.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:b67b819628e3b748fd3c2192c15fb951f549d0f47c0449af0764d7647302fda3"}, - {file = "wrapt-1.15.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:7eebcdbe3677e58dd4c0e03b4f2cfa346ed4049687d839adad68cc38bb559c92"}, - {file = "wrapt-1.15.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:74934ebd71950e3db69960a7da29204f89624dde411afbfb3b4858c1409b1e98"}, - {file = "wrapt-1.15.0-cp311-cp311-win32.whl", hash = "sha256:bd84395aab8e4d36263cd1b9308cd504f6cf713b7d6d3ce25ea55670baec5416"}, - {file = "wrapt-1.15.0-cp311-cp311-win_amd64.whl", hash = "sha256:a487f72a25904e2b4bbc0817ce7a8de94363bd7e79890510174da9d901c38705"}, - {file = "wrapt-1.15.0-cp35-cp35m-manylinux1_i686.whl", hash = "sha256:4ff0d20f2e670800d3ed2b220d40984162089a6e2c9646fdb09b85e6f9a8fc29"}, - {file = "wrapt-1.15.0-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:9ed6aa0726b9b60911f4aed8ec5b8dd7bf3491476015819f56473ffaef8959bd"}, - {file = "wrapt-1.15.0-cp35-cp35m-manylinux2010_i686.whl", hash = "sha256:896689fddba4f23ef7c718279e42f8834041a21342d95e56922e1c10c0cc7afb"}, - {file = "wrapt-1.15.0-cp35-cp35m-manylinux2010_x86_64.whl", hash = "sha256:75669d77bb2c071333417617a235324a1618dba66f82a750362eccbe5b61d248"}, - {file = "wrapt-1.15.0-cp35-cp35m-win32.whl", hash = "sha256:fbec11614dba0424ca72f4e8ba3c420dba07b4a7c206c8c8e4e73f2e98f4c559"}, - {file = "wrapt-1.15.0-cp35-cp35m-win_amd64.whl", hash = "sha256:fd69666217b62fa5d7c6aa88e507493a34dec4fa20c5bd925e4bc12fce586639"}, - {file = "wrapt-1.15.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:b0724f05c396b0a4c36a3226c31648385deb6a65d8992644c12a4963c70326ba"}, - {file = "wrapt-1.15.0-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bbeccb1aa40ab88cd29e6c7d8585582c99548f55f9b2581dfc5ba68c59a85752"}, - {file = "wrapt-1.15.0-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:38adf7198f8f154502883242f9fe7333ab05a5b02de7d83aa2d88ea621f13364"}, - {file = "wrapt-1.15.0-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:578383d740457fa790fdf85e6d346fda1416a40549fe8db08e5e9bd281c6a475"}, - {file = "wrapt-1.15.0-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:a4cbb9ff5795cd66f0066bdf5947f170f5d63a9274f99bdbca02fd973adcf2a8"}, - {file = "wrapt-1.15.0-cp36-cp36m-musllinux_1_1_i686.whl", hash = "sha256:af5bd9ccb188f6a5fdda9f1f09d9f4c86cc8a539bd48a0bfdc97723970348418"}, - {file = "wrapt-1.15.0-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:b56d5519e470d3f2fe4aa7585f0632b060d532d0696c5bdfb5e8319e1d0f69a2"}, - {file = "wrapt-1.15.0-cp36-cp36m-win32.whl", hash = "sha256:77d4c1b881076c3ba173484dfa53d3582c1c8ff1f914c6461ab70c8428b796c1"}, - {file = "wrapt-1.15.0-cp36-cp36m-win_amd64.whl", hash = "sha256:077ff0d1f9d9e4ce6476c1a924a3332452c1406e59d90a2cf24aeb29eeac9420"}, - {file = "wrapt-1.15.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:5c5aa28df055697d7c37d2099a7bc09f559d5053c3349b1ad0c39000e611d317"}, - {file = "wrapt-1.15.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3a8564f283394634a7a7054b7983e47dbf39c07712d7b177b37e03f2467a024e"}, - {file = "wrapt-1.15.0-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:780c82a41dc493b62fc5884fb1d3a3b81106642c5c5c78d6a0d4cbe96d62ba7e"}, - {file = "wrapt-1.15.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e169e957c33576f47e21864cf3fc9ff47c223a4ebca8960079b8bd36cb014fd0"}, - {file = "wrapt-1.15.0-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:b02f21c1e2074943312d03d243ac4388319f2456576b2c6023041c4d57cd7019"}, - {file = "wrapt-1.15.0-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:f2e69b3ed24544b0d3dbe2c5c0ba5153ce50dcebb576fdc4696d52aa22db6034"}, - {file = "wrapt-1.15.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:d787272ed958a05b2c86311d3a4135d3c2aeea4fc655705f074130aa57d71653"}, - {file = "wrapt-1.15.0-cp37-cp37m-win32.whl", hash = "sha256:02fce1852f755f44f95af51f69d22e45080102e9d00258053b79367d07af39c0"}, - {file = "wrapt-1.15.0-cp37-cp37m-win_amd64.whl", hash = "sha256:abd52a09d03adf9c763d706df707c343293d5d106aea53483e0ec8d9e310ad5e"}, - {file = "wrapt-1.15.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:cdb4f085756c96a3af04e6eca7f08b1345e94b53af8921b25c72f096e704e145"}, - {file = "wrapt-1.15.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:230ae493696a371f1dbffaad3dafbb742a4d27a0afd2b1aecebe52b740167e7f"}, - {file = "wrapt-1.15.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:63424c681923b9f3bfbc5e3205aafe790904053d42ddcc08542181a30a7a51bd"}, - {file = "wrapt-1.15.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d6bcbfc99f55655c3d93feb7ef3800bd5bbe963a755687cbf1f490a71fb7794b"}, - {file = "wrapt-1.15.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c99f4309f5145b93eca6e35ac1a988f0dc0a7ccf9ccdcd78d3c0adf57224e62f"}, - {file = "wrapt-1.15.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:b130fe77361d6771ecf5a219d8e0817d61b236b7d8b37cc045172e574ed219e6"}, - {file = "wrapt-1.15.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:96177eb5645b1c6985f5c11d03fc2dbda9ad24ec0f3a46dcce91445747e15094"}, - {file = "wrapt-1.15.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:d5fe3e099cf07d0fb5a1e23d399e5d4d1ca3e6dfcbe5c8570ccff3e9208274f7"}, - {file = "wrapt-1.15.0-cp38-cp38-win32.whl", hash = "sha256:abd8f36c99512755b8456047b7be10372fca271bf1467a1caa88db991e7c421b"}, - {file = "wrapt-1.15.0-cp38-cp38-win_amd64.whl", hash = "sha256:b06fa97478a5f478fb05e1980980a7cdf2712015493b44d0c87606c1513ed5b1"}, - {file = "wrapt-1.15.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:2e51de54d4fb8fb50d6ee8327f9828306a959ae394d3e01a1ba8b2f937747d86"}, - {file = "wrapt-1.15.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:0970ddb69bba00670e58955f8019bec4a42d1785db3faa043c33d81de2bf843c"}, - {file = "wrapt-1.15.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:76407ab327158c510f44ded207e2f76b657303e17cb7a572ffe2f5a8a48aa04d"}, - {file = "wrapt-1.15.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:cd525e0e52a5ff16653a3fc9e3dd827981917d34996600bbc34c05d048ca35cc"}, - {file = "wrapt-1.15.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9d37ac69edc5614b90516807de32d08cb8e7b12260a285ee330955604ed9dd29"}, - {file = "wrapt-1.15.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:078e2a1a86544e644a68422f881c48b84fef6d18f8c7a957ffd3f2e0a74a0d4a"}, - {file = "wrapt-1.15.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:2cf56d0e237280baed46f0b5316661da892565ff58309d4d2ed7dba763d984b8"}, - {file = "wrapt-1.15.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:7dc0713bf81287a00516ef43137273b23ee414fe41a3c14be10dd95ed98a2df9"}, - {file = "wrapt-1.15.0-cp39-cp39-win32.whl", hash = "sha256:46ed616d5fb42f98630ed70c3529541408166c22cdfd4540b88d5f21006b0eff"}, - {file = "wrapt-1.15.0-cp39-cp39-win_amd64.whl", hash = "sha256:eef4d64c650f33347c1f9266fa5ae001440b232ad9b98f1f43dfe7a79435c0a6"}, - {file = "wrapt-1.15.0-py3-none-any.whl", hash = "sha256:64b1df0f83706b4ef4cfb4fb0e4c2669100fd7ecacfb59e091fad300d4e04640"}, - {file = "wrapt-1.15.0.tar.gz", hash = "sha256:d06730c6aed78cee4126234cf2d071e01b44b915e725a6cb439a879ec9754a3a"}, +python-versions = ">=3.6" +files = [ + {file = "wrapt-1.16.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:ffa565331890b90056c01db69c0fe634a776f8019c143a5ae265f9c6bc4bd6d4"}, + {file = "wrapt-1.16.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e4fdb9275308292e880dcbeb12546df7f3e0f96c6b41197e0cf37d2826359020"}, + {file = "wrapt-1.16.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bb2dee3874a500de01c93d5c71415fcaef1d858370d405824783e7a8ef5db440"}, + {file = "wrapt-1.16.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2a88e6010048489cda82b1326889ec075a8c856c2e6a256072b28eaee3ccf487"}, + {file = "wrapt-1.16.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ac83a914ebaf589b69f7d0a1277602ff494e21f4c2f743313414378f8f50a4cf"}, + {file = "wrapt-1.16.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:73aa7d98215d39b8455f103de64391cb79dfcad601701a3aa0dddacf74911d72"}, + {file = "wrapt-1.16.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:807cc8543a477ab7422f1120a217054f958a66ef7314f76dd9e77d3f02cdccd0"}, + {file = "wrapt-1.16.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:bf5703fdeb350e36885f2875d853ce13172ae281c56e509f4e6eca049bdfb136"}, + {file = "wrapt-1.16.0-cp310-cp310-win32.whl", hash = "sha256:f6b2d0c6703c988d334f297aa5df18c45e97b0af3679bb75059e0e0bd8b1069d"}, + {file = "wrapt-1.16.0-cp310-cp310-win_amd64.whl", hash = "sha256:decbfa2f618fa8ed81c95ee18a387ff973143c656ef800c9f24fb7e9c16054e2"}, + {file = "wrapt-1.16.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:1a5db485fe2de4403f13fafdc231b0dbae5eca4359232d2efc79025527375b09"}, + {file = "wrapt-1.16.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:75ea7d0ee2a15733684badb16de6794894ed9c55aa5e9903260922f0482e687d"}, + {file = "wrapt-1.16.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a452f9ca3e3267cd4d0fcf2edd0d035b1934ac2bd7e0e57ac91ad6b95c0c6389"}, + {file = "wrapt-1.16.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:43aa59eadec7890d9958748db829df269f0368521ba6dc68cc172d5d03ed8060"}, + {file = "wrapt-1.16.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:72554a23c78a8e7aa02abbd699d129eead8b147a23c56e08d08dfc29cfdddca1"}, + {file = "wrapt-1.16.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:d2efee35b4b0a347e0d99d28e884dfd82797852d62fcd7ebdeee26f3ceb72cf3"}, + {file = "wrapt-1.16.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:6dcfcffe73710be01d90cae08c3e548d90932d37b39ef83969ae135d36ef3956"}, + {file = "wrapt-1.16.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:eb6e651000a19c96f452c85132811d25e9264d836951022d6e81df2fff38337d"}, + {file = "wrapt-1.16.0-cp311-cp311-win32.whl", hash = "sha256:66027d667efe95cc4fa945af59f92c5a02c6f5bb6012bff9e60542c74c75c362"}, + {file = "wrapt-1.16.0-cp311-cp311-win_amd64.whl", hash = "sha256:aefbc4cb0a54f91af643660a0a150ce2c090d3652cf4052a5397fb2de549cd89"}, + {file = "wrapt-1.16.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:5eb404d89131ec9b4f748fa5cfb5346802e5ee8836f57d516576e61f304f3b7b"}, + {file = "wrapt-1.16.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:9090c9e676d5236a6948330e83cb89969f433b1943a558968f659ead07cb3b36"}, + {file = "wrapt-1.16.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:94265b00870aa407bd0cbcfd536f17ecde43b94fb8d228560a1e9d3041462d73"}, + {file = "wrapt-1.16.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f2058f813d4f2b5e3a9eb2eb3faf8f1d99b81c3e51aeda4b168406443e8ba809"}, + {file = "wrapt-1.16.0-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:98b5e1f498a8ca1858a1cdbffb023bfd954da4e3fa2c0cb5853d40014557248b"}, + {file = "wrapt-1.16.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:14d7dc606219cdd7405133c713f2c218d4252f2a469003f8c46bb92d5d095d81"}, + {file = "wrapt-1.16.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:49aac49dc4782cb04f58986e81ea0b4768e4ff197b57324dcbd7699c5dfb40b9"}, + {file = "wrapt-1.16.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:418abb18146475c310d7a6dc71143d6f7adec5b004ac9ce08dc7a34e2babdc5c"}, + {file = "wrapt-1.16.0-cp312-cp312-win32.whl", hash = "sha256:685f568fa5e627e93f3b52fda002c7ed2fa1800b50ce51f6ed1d572d8ab3e7fc"}, + {file = "wrapt-1.16.0-cp312-cp312-win_amd64.whl", hash = "sha256:dcdba5c86e368442528f7060039eda390cc4091bfd1dca41e8046af7c910dda8"}, + {file = "wrapt-1.16.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:d462f28826f4657968ae51d2181a074dfe03c200d6131690b7d65d55b0f360f8"}, + {file = "wrapt-1.16.0-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a33a747400b94b6d6b8a165e4480264a64a78c8a4c734b62136062e9a248dd39"}, + {file = "wrapt-1.16.0-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b3646eefa23daeba62643a58aac816945cadc0afaf21800a1421eeba5f6cfb9c"}, + {file = "wrapt-1.16.0-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3ebf019be5c09d400cf7b024aa52b1f3aeebeff51550d007e92c3c1c4afc2a40"}, + {file = "wrapt-1.16.0-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:0d2691979e93d06a95a26257adb7bfd0c93818e89b1406f5a28f36e0d8c1e1fc"}, + {file = "wrapt-1.16.0-cp36-cp36m-musllinux_1_1_i686.whl", hash = "sha256:1acd723ee2a8826f3d53910255643e33673e1d11db84ce5880675954183ec47e"}, + {file = "wrapt-1.16.0-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:bc57efac2da352a51cc4658878a68d2b1b67dbe9d33c36cb826ca449d80a8465"}, + {file = "wrapt-1.16.0-cp36-cp36m-win32.whl", hash = "sha256:da4813f751142436b075ed7aa012a8778aa43a99f7b36afe9b742d3ed8bdc95e"}, + {file = "wrapt-1.16.0-cp36-cp36m-win_amd64.whl", hash = "sha256:6f6eac2360f2d543cc875a0e5efd413b6cbd483cb3ad7ebf888884a6e0d2e966"}, + {file = "wrapt-1.16.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:a0ea261ce52b5952bf669684a251a66df239ec6d441ccb59ec7afa882265d593"}, + {file = "wrapt-1.16.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7bd2d7ff69a2cac767fbf7a2b206add2e9a210e57947dd7ce03e25d03d2de292"}, + {file = "wrapt-1.16.0-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9159485323798c8dc530a224bd3ffcf76659319ccc7bbd52e01e73bd0241a0c5"}, + {file = "wrapt-1.16.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a86373cf37cd7764f2201b76496aba58a52e76dedfaa698ef9e9688bfd9e41cf"}, + {file = "wrapt-1.16.0-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:73870c364c11f03ed072dda68ff7aea6d2a3a5c3fe250d917a429c7432e15228"}, + {file = "wrapt-1.16.0-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:b935ae30c6e7400022b50f8d359c03ed233d45b725cfdd299462f41ee5ffba6f"}, + {file = "wrapt-1.16.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:db98ad84a55eb09b3c32a96c576476777e87c520a34e2519d3e59c44710c002c"}, + {file = "wrapt-1.16.0-cp37-cp37m-win32.whl", hash = "sha256:9153ed35fc5e4fa3b2fe97bddaa7cbec0ed22412b85bcdaf54aeba92ea37428c"}, + {file = "wrapt-1.16.0-cp37-cp37m-win_amd64.whl", hash = "sha256:66dfbaa7cfa3eb707bbfcd46dab2bc6207b005cbc9caa2199bcbc81d95071a00"}, + {file = "wrapt-1.16.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1dd50a2696ff89f57bd8847647a1c363b687d3d796dc30d4dd4a9d1689a706f0"}, + {file = "wrapt-1.16.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:44a2754372e32ab315734c6c73b24351d06e77ffff6ae27d2ecf14cf3d229202"}, + {file = "wrapt-1.16.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8e9723528b9f787dc59168369e42ae1c3b0d3fadb2f1a71de14531d321ee05b0"}, + {file = "wrapt-1.16.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:dbed418ba5c3dce92619656802cc5355cb679e58d0d89b50f116e4a9d5a9603e"}, + {file = "wrapt-1.16.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:941988b89b4fd6b41c3f0bfb20e92bd23746579736b7343283297c4c8cbae68f"}, + {file = "wrapt-1.16.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:6a42cd0cfa8ffc1915aef79cb4284f6383d8a3e9dcca70c445dcfdd639d51267"}, + {file = "wrapt-1.16.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:1ca9b6085e4f866bd584fb135a041bfc32cab916e69f714a7d1d397f8c4891ca"}, + {file = "wrapt-1.16.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:d5e49454f19ef621089e204f862388d29e6e8d8b162efce05208913dde5b9ad6"}, + {file = "wrapt-1.16.0-cp38-cp38-win32.whl", hash = "sha256:c31f72b1b6624c9d863fc095da460802f43a7c6868c5dda140f51da24fd47d7b"}, + {file = "wrapt-1.16.0-cp38-cp38-win_amd64.whl", hash = "sha256:490b0ee15c1a55be9c1bd8609b8cecd60e325f0575fc98f50058eae366e01f41"}, + {file = "wrapt-1.16.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9b201ae332c3637a42f02d1045e1d0cccfdc41f1f2f801dafbaa7e9b4797bfc2"}, + {file = "wrapt-1.16.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:2076fad65c6736184e77d7d4729b63a6d1ae0b70da4868adeec40989858eb3fb"}, + {file = "wrapt-1.16.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c5cd603b575ebceca7da5a3a251e69561bec509e0b46e4993e1cac402b7247b8"}, + {file = "wrapt-1.16.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b47cfad9e9bbbed2339081f4e346c93ecd7ab504299403320bf85f7f85c7d46c"}, + {file = "wrapt-1.16.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f8212564d49c50eb4565e502814f694e240c55551a5f1bc841d4fcaabb0a9b8a"}, + {file = "wrapt-1.16.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:5f15814a33e42b04e3de432e573aa557f9f0f56458745c2074952f564c50e664"}, + {file = "wrapt-1.16.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:db2e408d983b0e61e238cf579c09ef7020560441906ca990fe8412153e3b291f"}, + {file = "wrapt-1.16.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:edfad1d29c73f9b863ebe7082ae9321374ccb10879eeabc84ba3b69f2579d537"}, + {file = "wrapt-1.16.0-cp39-cp39-win32.whl", hash = "sha256:ed867c42c268f876097248e05b6117a65bcd1e63b779e916fe2e33cd6fd0d3c3"}, + {file = "wrapt-1.16.0-cp39-cp39-win_amd64.whl", hash = "sha256:eb1b046be06b0fce7249f1d025cd359b4b80fc1c3e24ad9eca33e0dcdb2e4a35"}, + {file = "wrapt-1.16.0-py3-none-any.whl", hash = "sha256:6906c4100a8fcbf2fa735f6059214bb13b97f75b1a61777fcf6432121ef12ef1"}, + {file = "wrapt-1.16.0.tar.gz", hash = "sha256:5f370f952971e7d17c7d1ead40e49f32345a7f7a5373571ef44d800d06b1899d"}, ] [[package]] @@ -2160,4 +2157,4 @@ multidict = ">=4.0" [metadata] lock-version = "2.0" python-versions = "3.11.*" -content-hash = "5e0f58a4637859d0c3589028a35641afbd9989e458b3dab5a9cefc9136705e77" +content-hash = "53eeaab1a23bfa1b616447c8734e6fa34ed64242daaea0f94de00414e45460c8" diff --git a/module_text_llm/pyproject.toml b/module_text_llm/pyproject.toml index d2bde08fb..b5b313aa4 100644 --- a/module_text_llm/pyproject.toml +++ b/module_text_llm/pyproject.toml @@ -15,7 +15,7 @@ nltk = "^3.8.1" gitpython = "^3.1.37" replicate = "^0.11.0" tiktoken = "^0.4.0" -langsmith = "^0.0.60" +langsmith = "^0.0.63" [tool.poetry.scripts] module = "athena:run_module"