ls1intum · EneaGore · Oct 22, 2024 · Oct 22, 2024 · Oct 22, 2024 · Oct 24, 2024
diff --git a/llm_core/llm_core/models/__init__.py b/llm_core/llm_core/models/__init__.py
@@ -3,10 +3,11 @@
 from langchain.base_language import BaseLanguageModel
 
 from llm_core.models.model_config import ModelConfig
-
+from athena.logger import logger
 
 DefaultModelConfig: Type[ModelConfig]
 MiniModelConfig: ModelConfig
+OllamaModelConfig: ModelConfig
 default_model_name = os.environ.get("LLM_DEFAULT_MODEL")
 evaluation_model_name = os.environ.get("LLM_EVALUATION_MODEL")
 
@@ -20,10 +21,21 @@
     if default_model_name in openai_config.available_models:
         DefaultModelConfig = openai_config.OpenAIModelConfig
     if evaluation_model_name in openai_config.available_models:
+        logger.info("Evaluation model: %s", evaluation_model_name)
+        for model in openai_config.available_models:
+            logger.info("Available openai models: %s", model)
         evaluation_model = openai_config.available_models[evaluation_model_name]
 except AttributeError:
     pass
 
+try:
+    import llm_core.models.ollama as ollama_config #type: ignore
+    types.append(ollama_config.OllamaModelConfig)
+    OllamaModelConfig = ollama_config.OllamaModelConfig(model_name="llama3.1:70b",max_tokens=1000, temperature=0,top_p=1,presence_penalty=0,frequency_penalty=0)
+    # DefaultModelConfig = ollama_config.OllamaModelConfig
+except AttributeError:
+    pass
+
 if not types:
     raise EnvironmentError(
         "No model configurations available, please set up at least one provider in the environment variables.")

diff --git a/llm_core/llm_core/models/ollama.py b/llm_core/llm_core/models/ollama.py
@@ -0,0 +1,115 @@
+import requests
+from enum import Enum
+from llm_core.models.model_config import ModelConfig # type: ignore
+from pydantic import validator, Field, PositiveInt
+from langchain.base_language import BaseLanguageModel
+import os
+from langchain_community.chat_models import ChatOllama # type: ignore
+from athena.logger import logger
+from requests.exceptions import RequestException, Timeout
+
+if os.environ.get('GPU_USER') and os.environ.get('GPU_PASSWORD') and os.environ.get('OLLAMA_ENDPOINT')   is not None:
+    try:
+        if(os.environ["GPU_USER"] and os.environ["GPU_PASSWORD"]):
+            auth_header= {
+            'Authorization': requests.auth._basic_auth_str(os.environ["GPU_USER"],os.environ["GPU_PASSWORD"]) # type: ignore
+            }
+
+
+        def get_ollama_models():
+            url = os.environ["OLLAMA_ENDPOINT"] + "/api/tags"
+            response = requests.get(url, auth=(os.environ["GPU_USER"], os.environ["GPU_PASSWORD"]))
+            data = response.json()
+            model_list = [model['name'] for model in data['models']]
+            return model_list
+
+        ollama_models = get_ollama_models()
+        available_models = {}
+
+        if([os.environ["OLLAMA_ENDPOINT"]]):
+            available_models = {
+                name : ChatOllama(
+                    name = name,
+                    model = name,
+                    base_url = os.environ["OLLAMA_ENDPOINT"],
+                    headers = auth_header,
+                    format = "json"
+                ) for name in ollama_models
+            } 
+
+        default_model_name = "llama3.1:70b"
+        LlamaModel = Enum('LlamaModel', {name: name for name in available_models}) # type: ignore
+        class OllamaModelConfig(ModelConfig):
+                """Ollama LLM configuration."""
+                logger.info("Available ollama models: %s", ", ".join(available_models.keys()))
+
+                model_name: LlamaModel = Field(default=default_model_name,  # type: ignore
+                                                description="The name of the model to use.")
+
+                fromat : str = Field(default = "json" , description="The format to respond with")
+
+                max_tokens: PositiveInt = Field(1000, description="")
+
+                temperature: float = Field(default=0.0, ge=0, le=2, description="")
+
+                top_p: float = Field(default=1, ge=0, le=1, description="")
+
+                headers : dict = Field(default= auth_header, description="headers for authentication") 
+
+                presence_penalty: float = Field(default=0, ge=-2, le=2, description="")
+
+                frequency_penalty: float = Field(default=0, ge=-2, le=2, description="")
+
+                base_url : str = Field(default="https://gpu-artemis.ase.cit.tum.de/ollama", description=" Base Url where ollama is hosted")
+                @validator('max_tokens')
+                def max_tokens_must_be_positive(cls, v):
+                    """
+                    Validate that max_tokens is a positive integer.
+                    """
+                    if v <= 0:
+                        raise ValueError('max_tokens must be a positive integer')
+                    return v
+
+                def get_model(self) -> BaseLanguageModel:
+                    print("Getting Model: ", self.model_name.value)
+                    """Get the model from the configuration.
+
+                    Returns:
+                        BaseLanguageModel: The model.
+                    """
+
+                    model = available_models[self.model_name.value]
+                    kwargs = model.__dict__
+                    secrets = {secret: getattr(model, secret) for secret in model.lc_secrets.keys()}
+                    kwargs.update(secrets)
+
+                    model_kwargs = kwargs.get("model_kwargs", {})
+                    for attr, value in self.dict().items():
+                        if attr == "model_name":
+                            # Skip model_name
+                            continue
+                        if hasattr(model, attr):
+                            # If the model has the attribute, add it to kwargs
+                            kwargs[attr] = value
+                        else:
+                            # Otherwise, add it to model_kwargs (necessary for chat models)
+                            model_kwargs[attr] = value
+                    kwargs["model_kwargs"] = model_kwargs
+
+                    allowed_fields = set(self.__fields__.keys())
+                    filtered_kwargs = {k: v for k, v in kwargs.items() if k in allowed_fields}
+                    filtered_kwargs["headers"] = auth_header
+                    filtered_kwargs["model"]= self.model_name.value
+
+                    # Initialize a copy of the model using the filtered kwargs
+                    model = model.__class__(**filtered_kwargs)
+
+                    return model
+
+
+                class Config:
+                    title = 'Ollama'
+    except Timeout:
+        print("Connection timed out. Skipping server connection step.")
+    except RequestException as e:
+        print(f"Failed to connect to the server: {e}. Skipping this step.")
diff --git a/llm_core/llm_core/models/openai.py b/llm_core/llm_core/models/openai.py
@@ -24,7 +24,7 @@
 if openai_available:
     openai.api_type = "openai"
     for model in openai.models.list():
-        if "gpt" in model.id:
+        if ("gpt" in model.id or "o1" in model.id) and "audio" not in model.id and "realtime" not in model.id:
             available_models[OPENAI_PREFIX + model.id] = ChatOpenAI(model=model.id)
 
 # Load Azure OpenAI models

diff --git a/llm_core/llm_core/utils/llm_utils.py b/llm_core/llm_core/utils/llm_utils.py
@@ -107,4 +107,12 @@ def get_chat_prompt_with_formatting_instructions(
     system_message_prompt.prompt.partial_variables = {"format_instructions": output_parser.get_format_instructions()}
     system_message_prompt.prompt.input_variables.remove("format_instructions")
     human_message_prompt = HumanMessagePromptTemplate.from_template(human_message + "\n\nJSON response following the provided schema:")
-    return ChatPromptTemplate.from_messages([system_message_prompt, human_message_prompt])
+    return ChatPromptTemplate.from_messages([system_message_prompt, human_message_prompt])
+
+def get_simple_chat_prompt(
+        system_message: str, 
+        human_message: str,
+        ) -> ChatPromptTemplate:
+        system_message_prompt = SystemMessagePromptTemplate.from_template(system_message)
+        human_message_prompt = HumanMessagePromptTemplate.from_template(human_message)
+        return ChatPromptTemplate.from_messages([system_message_prompt, human_message_prompt])
diff --git a/llm_core/llm_core/utils/predict_and_parse.py b/llm_core/llm_core/utils/predict_and_parse.py
@@ -4,9 +4,37 @@
 from langchain_core.pydantic_v1 import BaseModel, ValidationError
 from langchain_core.runnables import RunnableSequence
 from athena import get_experiment_environment
+from langchain_community.chat_models import ChatOllama # type: ignore
+from langchain.output_parsers import PydanticOutputParser
+from langchain_openai import ChatOpenAI
 
 T = TypeVar("T", bound=BaseModel)
 
+def isOllama(model: BaseLanguageModel) -> bool:
+    return isinstance(model, ChatOllama)
+
+async def predict_plain_text(        
+        model: BaseLanguageModel, 
+        chat_prompt: ChatPromptTemplate, 
+        prompt_input: dict,
+        tags: Optional[List[str]]) -> Optional[str]: 
+    """Predict plain text using the provided model and prompt.
+
+    Args:
+        model (BaseLanguageModel): The model to predict with.
+        chat_prompt (ChatPromptTemplate): The prompt template to use.
+        prompt_input (dict): Input parameters to use for the prompt.
+        tags (Optional[List[str]]): List of tags to tag the prediction with.
+
+    Returns:
+        Optional[str]: Prediction as a string, or None if it failed.
+    """
+    try:
+        chain = chat_prompt | model
+        return await chain.ainvoke(prompt_input, config={"tags": tags})
+    except:
+        raise ValueError("Llm prediction failed.")
+
 async def predict_and_parse(
         model: BaseLanguageModel, 
         chat_prompt: ChatPromptTemplate, 
@@ -37,7 +65,30 @@ async def predict_and_parse(
     if experiment.run_id is not None:
         tags.append(f"run-{experiment.run_id}")
 
-
+    if isOllama(model):
+        try:
+            outputParser = PydanticOutputParser(pydantic_object = pydantic_object)
+            chain = chat_prompt | model
+            llm_output = await chain.ainvoke(prompt_input, config={"tags": tags})
+            try:
+                result = outputParser.parse(llm_output.content)
+                return result
+            except:
+                outputModel = ChatOpenAI(model="gpt-4o-mini")
+                structured_output_llm = outputModel.with_structured_output(pydantic_object, method = "json_mode")
+                chat_prompt = ChatPromptTemplate.from_messages(
+            [
+                ("system", "Your only task is to format the following output into json:"),
+                ("human", "{output}"),
+            ])
+                chain = RunnableSequence(
+                    chat_prompt,
+                    structured_output_llm
+                )
+                return await chain.ainvoke(input = {"output": llm_output.content}, config={"tags": tags})
+        except ValidationError as e:
+            raise ValueError(f"Could not parse output: {e}") from e
+
     if (use_function_calling):
         structured_output_llm = model.with_structured_output(pydantic_object)
         chain = chat_prompt | structured_output_llm
@@ -63,4 +114,4 @@ async def predict_and_parse(
             return await chain.ainvoke(prompt_input, config={"tags": tags})
         except ValidationError as e:
             raise ValueError(f"Could not parse output: {e}") from e
-        
+
diff --git a/modules/text/module_text_llm/.env.example b/modules/text/module_text_llm/.env.example
@@ -39,3 +39,7 @@ OPENAI_API_VERSION="2024-06-01" # change base if needed
 # LANGCHAIN_ENDPOINT="https://api.smith.langchain.com"
 # LANGCHAIN_API_KEY="XXX"
 # LANGCHAIN_PROJECT="XXX"
+
+GPU_USER=
+GPU_PASSWORD=
+OLLAMA_ENDPOINT= #'https://gpu-artemis.ase.cit.tum.de/ollama'
diff --git a/modules/text/module_text_llm/module_text_llm/basic_approach/generate_suggestions.py b/modules/text/module_text_llm/module_text_llm/basic_approach/generate_suggestions.py
@@ -15,7 +15,7 @@
 from module_text_llm.helpers.utils import add_sentence_numbers, get_index_range_from_line_range, format_grading_instructions
 from module_text_llm.basic_approach.prompt_generate_suggestions import AssessmentModel
 
-async def generate_suggestions(exercise: Exercise, submission: Submission, config: ApproachConfig, debug: bool, is_graded: bool) -> List[Feedback]:
+async def generate_suggestions(exercise: Exercise, submission: Submission, config:ApproachConfig, debug: bool, is_graded: bool) -> List[Feedback]:
     model = config.model.get_model()  # type: ignore[attr-defined]
     prompt_input = {
         "max_points": exercise.max_points,

diff --git a/modules/text/module_text_llm/module_text_llm/config.py b/modules/text/module_text_llm/module_text_llm/config.py
@@ -4,8 +4,9 @@
 
 from module_text_llm.chain_of_thought_approach import ChainOfThoughtConfig
 from module_text_llm.basic_approach import BasicApproachConfig
+from module_text_llm.ollama_chain_of_thought_approach import OllamaChainOfThoughtConfig
 
-ApproachConfigUnion = Union[BasicApproachConfig, ChainOfThoughtConfig]
+ApproachConfigUnion = Union[BasicApproachConfig, ChainOfThoughtConfig, OllamaChainOfThoughtConfig]
 
 @config_schema_provider
 class Configuration(BaseModel):

diff --git a/modules/text/module_text_llm/module_text_llm/generate_evaluation.py b/modules/text/module_text_llm/module_text_llm/generate_evaluation.py
@@ -23,7 +23,7 @@ class AccuracyMetric(BaseModel):
     level_of_needed_modification_label: Literal["no", "minor", "major"] = Field(..., description="Estimated level of needed modification")
 
 class Evaluation(BaseModel):
-    metrics: Sequence[AccuracyMetric] = Field(...)
+    metrics: List[AccuracyMetric] = Field(..., description="List of accuracy metrics for each feedback")
 
 
 async def generate_evaluation(
@@ -86,7 +86,8 @@ def feedback_to_dict(feedback: Feedback):
             f"exercise-{exercise.id}",
             f"submission-{submission.id}",
             "evaluation"
-        ]
+        ],
+        use_function_calling=True
     )
 
     if result is None:

diff --git a/modules/text/module_text_llm/module_text_llm/ollama_chain_of_thought_approach/__init__.py b/modules/text/module_text_llm/module_text_llm/ollama_chain_of_thought_approach/__init__.py
@@ -0,0 +1,22 @@
+from pydantic import Field
+from typing import Literal
+from llm_core.models import ModelConfigType
+from athena.text import Exercise, Submission
+try:
+    from llm_core.models import OllamaModelConfig
+except ImportError as e:
+    print(f"Warning: Failed to import models. {e}")
+    OllamaModelConfig = None
+
+from module_text_llm.approach_config import ApproachConfig
+from module_text_llm.ollama_chain_of_thought_approach.prompt_generate_feedback import CoTGenerateSuggestionsPrompt
+from module_text_llm.ollama_chain_of_thought_approach.prompt_thinking import ThinkingPrompt
+from module_text_llm.ollama_chain_of_thought_approach.generate_suggestions import generate_suggestions
+class OllamaChainOfThoughtConfig(ApproachConfig):
+    type: Literal['ollama_chain_of_thought'] = 'ollama_chain_of_thought'
+    model: ModelConfigType = Field(default=OllamaModelConfig)  # type: ignore
+    thinking_prompt: ThinkingPrompt = Field(default=ThinkingPrompt())
+    generate_suggestions_prompt: CoTGenerateSuggestionsPrompt = Field(default=CoTGenerateSuggestionsPrompt())
+
+    async def generate_suggestions(self, exercise: Exercise, submission: Submission, config, *, debug: bool, is_graded: bool):
+        return await generate_suggestions(exercise,submission,config,debug,is_graded)