Skip to content

Commit

Permalink
rebase onto develop
Browse files Browse the repository at this point in the history
  • Loading branch information
dmytropolityka committed Oct 29, 2024
1 parent 139cc19 commit 968c6a7
Show file tree
Hide file tree
Showing 17 changed files with 516 additions and 2,917 deletions.
4 changes: 3 additions & 1 deletion llm_core/llm_core/models/model_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@


class ModelConfig(BaseModel, ABC):

class Config:
protected_namespaces = ()

@abstractmethod
def get_model(self) -> BaseLanguageModel:
pass
5 changes: 3 additions & 2 deletions llm_core/llm_core/utils/llm_utils.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from typing import Type, TypeVar, List
from pydantic import BaseModel
import tiktoken
from langchain.chat_models import ChatOpenAI
from langchain_openai import ChatOpenAI
from langchain.base_language import BaseLanguageModel
from langchain.prompts import (
ChatPromptTemplate,
Expand Down Expand Up @@ -106,5 +106,6 @@ def get_chat_prompt_with_formatting_instructions(
system_message_prompt = SystemMessagePromptTemplate.from_template(system_message + "\n{format_instructions}")
system_message_prompt.prompt.partial_variables = {"format_instructions": output_parser.get_format_instructions()}
system_message_prompt.prompt.input_variables.remove("format_instructions")
human_message_prompt = HumanMessagePromptTemplate.from_template(human_message + "\n\nJSON response following the provided schema:")
human_message_prompt = HumanMessagePromptTemplate.from_template(
human_message + "\n\nJSON response following the provided schema:")
return ChatPromptTemplate.from_messages([system_message_prompt, human_message_prompt])
55 changes: 42 additions & 13 deletions llm_core/llm_core/utils/predict_and_parse.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,24 @@
from typing import Optional, Type, TypeVar, List

from langchain_core.exceptions import OutputParserException
from langchain_core.language_models import BaseLanguageModel
from langchain_core.output_parsers import PydanticOutputParser
from langchain_core.output_parsers.openai_functions import JsonOutputFunctionsParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.pydantic_v1 import BaseModel, ValidationError
from langchain_core.runnables import RunnableSequence
from langchain_core.tracers import langchain
from langchain_core.utils.function_calling import convert_to_openai_function
from athena import get_experiment_environment
from athena.logger import logger
from .llm_utils import supports_function_calling

T = TypeVar("T", bound=BaseModel)

async def predict_and_parse(
model: BaseLanguageModel,
chat_prompt: ChatPromptTemplate,
prompt_input: dict,
pydantic_object: Type[T],
model: BaseLanguageModel,
chat_prompt: ChatPromptTemplate,
prompt_input: dict,
pydantic_object: Type[T],
tags: Optional[List[str]]
) -> Optional[T]:
"""Predicts an LLM completion using the model and parses the output using the provided Pydantic model
Expand All @@ -26,6 +33,8 @@ async def predict_and_parse(
Returns:
Optional[T]: Parsed output, or None if it could not be parsed
"""
langchain.debug = True

experiment = get_experiment_environment()

tags = tags or []
Expand All @@ -36,13 +45,33 @@ async def predict_and_parse(
if experiment.run_id is not None:
tags.append(f"run-{experiment.run_id}")

structured_output_llm = model.with_structured_output(pydantic_object, method="json_mode")
chain = RunnableSequence(
chat_prompt,
structured_output_llm
)
if supports_function_calling(model):
openai_functions = [convert_to_openai_function(pydantic_object)]

runnable = chat_prompt | model.bind(functions=openai_functions).with_retry(
retry_if_exception_type=(ValueError, OutputParserException),
wait_exponential_jitter=True,
stop_after_attempt=3,
) | JsonOutputFunctionsParser()

try:
output_dict = await runnable.ainvoke(prompt_input)
return pydantic_object.parse_obj(output_dict)
except (OutputParserException, ValidationError) as e:
logger.error("Exception type: %s, Message: %s", type(e).__name__, e)
return None

output_parser = PydanticOutputParser(pydantic_object=pydantic_object)

runnable = chat_prompt | model.with_retry(
retry_if_exception_type=(ValueError, OutputParserException),
wait_exponential_jitter=True,
stop_after_attempt=3,
) | output_parser

try:
return await chain.ainvoke(prompt_input, config={"tags": tags})
except ValidationError as e:
raise ValueError(f"Could not parse output: {e}") from e
output_dict = await runnable.ainvoke(prompt_input)
return pydantic_object.parse_obj(output_dict)
except (OutputParserException, ValidationError) as e:
logger.error("Exception type: %s, Message: %s", type(e).__name__, e)
return None
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,7 @@

from athena.programming import Submission, Exercise, Feedback
from module_programming_llm.config import Configuration
from module_programming_llm.helpers.web_search import bulk_search
from module_programming_llm.helpers.models import ModelConfigType
from llm_core.models import ModelConfigType
from module_programming_llm.prompts import GenerateFileSummary, SplitProblemStatementByFile, \
SplitGradingInstructionsByFile, GenerateSuggestionsByFile, GenerateSuggestionsByFileOutput
from module_programming_llm.prompts.filter_out_solution.filter_out_solution import FilterOutSolution
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from pydantic import BaseModel, Field

from module_programming_llm.helpers.models import ModelConfigType, DefaultModelConfig
from llm_core.models import ModelConfigType, DefaultModelConfig
from athena import config_schema_provider
from module_programming_llm.prompts import SplitProblemStatementByFile, SplitGradingInstructionsByFile, \
GenerateSuggestionsByFile, GenerateFileSummary
Expand Down
Original file line number Diff line number Diff line change
@@ -1,61 +1,22 @@
from typing import Sequence, List

from langchain.chains.qa_with_sources.retrieval import RetrievalQAWithSourcesChain
from langchain_community.document_loaders import AsyncChromiumLoader
from langchain_community.document_transformers import BeautifulSoupTransformer
from duckduckgo_search import DDGS
import re

from langchain_community.retrievers import WebResearchRetriever
from langchain_community.utilities import GoogleSearchAPIWrapper
from langchain_community.vectorstores import Chroma
from langchain_core.documents import Document
from langchain_core.tools import Tool
from langchain_openai import OpenAIEmbeddings

from module_programming_llm.helpers.models import ModelConfigType
from llm_core.models import ModelConfigType


def bulk_search(queries: Sequence[str], model: ModelConfigType) -> List[str]:
result = []
for query in queries:
result.append(answer_query(query, model))
return result
#
#
# def search(query: str) -> List[str]:
# results = DDGS().text(query, max_results=5)
# urls = []
# for result in results:
# url = result['href']
# urls.append(url)
#
# docs = get_page(urls)
#
# content = []
# for doc in docs:
# page_text = re.sub("\n\n+", "\n", doc.page_content)
# text = truncate(page_text)
# content.append(text)
#
# return content
#
#
# def get_page(urls: List[str]) -> Sequence[Document]:
# loader = AsyncChromiumLoader(urls, headless=True)
# html = loader.load()
#
# bs_transformer = BeautifulSoupTransformer()
# docs_transformed = bs_transformer.transform_documents(html, tags_to_extract=["p"], remove_unwanted_tags=["a"])
#
# return docs_transformed
#
#
# def truncate(text) -> str:
# words = text.split()
# truncated = " ".join(words[:1000])
#
# return truncated


def answer_query(query, model: ModelConfigType):
model = model.get_model() # type: ignore[attr-defined]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,18 +12,18 @@
FeedbackModel as SuggestionsFeedbackModel
from .prompt import system_message as prompt_system_message, human_message as prompt_human_message
from pydantic import Field
from module_programming_llm.helpers.llm_utils import (
from llm_core.utils.llm_utils import (
get_chat_prompt_with_formatting_instructions,
predict_and_parse, num_tokens_from_string,
num_tokens_from_string,
check_prompt_length_and_omit_features_if_necessary,
)
from llm_core.utils.predict_and_parse import predict_and_parse
from module_programming_llm.helpers.utils import (
get_diff,
load_files_from_repo,
add_line_numbers
)
from ...helpers.models import ModelConfigType

from llm_core.models import ModelConfigType

class FilterOutSolution(PipelineStep[FilterOutSolutionInput, List[Optional[FilterOutSolutionOutput]]]):
"""A pipeline step to remove potential solutions from the output."""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,16 +9,17 @@
from .generate_file_summary_output import FileDescription
from .prompt import system_message as prompt_system_message, human_message as prompt_human_message
from pydantic import Field
from module_programming_llm.helpers.llm_utils import (
from llm_core.utils.llm_utils import (
get_chat_prompt_with_formatting_instructions,
predict_and_parse, num_tokens_from_prompt,
num_tokens_from_prompt
)
from llm_core.utils.predict_and_parse import predict_and_parse
from module_programming_llm.helpers.utils import (
get_diff,
load_files_from_repo,
add_line_numbers
)
from ...helpers.models import ModelConfigType
from llm_core.models import ModelConfigType


class GenerateFileSummary(PipelineStep[GenerateFileSummaryInput, Optional[GenerateFileSummaryOutput]]):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,17 +9,18 @@
from .generate_grading_criterion_output import GenerateGradingCriterionOutput
from .prompt import system_message as prompt_system_message, human_message as prompt_human_message
from pydantic import Field
from module_programming_llm.helpers.llm_utils import (
from llm_core.utils.llm_utils import (
get_chat_prompt_with_formatting_instructions,
predict_and_parse, num_tokens_from_string,
num_tokens_from_string,
check_prompt_length_and_omit_features_if_necessary, num_tokens_from_prompt,
)
from llm_core.utils.predict_and_parse import predict_and_parse
from module_programming_llm.helpers.utils import (
get_diff,
load_files_from_repo,
add_line_numbers, get_programming_language_file_extension
)
from ...helpers.models import ModelConfigType
from llm_core.models import ModelConfigType


class GenerateGradingCriterion(PipelineStep[GenerateGradingCriterionInput, Optional[GenerateGradingCriterionOutput]]):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,17 +8,18 @@
from .generate_suggestions_by_file_output import GenerateSuggestionsByFileOutput, FeedbackModel
from .prompt import system_message as prompt_system_message, human_message as prompt_human_message
from pydantic import Field
from module_programming_llm.helpers.llm_utils import (
from llm_core.utils.llm_utils import (
get_chat_prompt_with_formatting_instructions,
predict_and_parse, num_tokens_from_string,
num_tokens_from_string,
check_prompt_length_and_omit_features_if_necessary,
)
from llm_core.utils.predict_and_parse import predict_and_parse
from module_programming_llm.helpers.utils import (
get_diff,
load_files_from_repo,
add_line_numbers, get_programming_language_file_extension, format_grading_instructions
)
from ...helpers.models import ModelConfigType
from llm_core.models import ModelConfigType


class GenerateSuggestionsByFile(PipelineStep[GenerateSuggestionsByFileInput, List[Optional[GenerateSuggestionsByFileOutput]]]):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from abc import abstractmethod
from pydantic import BaseModel, Field

from module_programming_llm.helpers.models import ModelConfigType
from llm_core.models import ModelConfigType

# Generic types for input and output
TInput = TypeVar('TInput')
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,16 @@
from .rag_output import RAGOutput
from .prompt import system_message as prompt_system_message, human_message as prompt_human_message
from pydantic import Field
from module_programming_llm.helpers.llm_utils import (
from llm_core.utils.llm_utils import (
get_chat_prompt_with_formatting_instructions,
predict_and_parse,
num_tokens_from_prompt,
)
from llm_core.utils.predict_and_parse import predict_and_parse
from module_programming_llm.helpers.utils import (
get_diff,
load_files_from_repo,
)
from ...helpers.models import ModelConfigType
from llm_core.models import ModelConfigType


class RAG(PipelineStep[RAGInput, Optional[RAGOutput]]):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,16 +9,17 @@
from .split_grading_instructions_by_file_input import SplitGradingInstructionsByFileInput
from .prompt import system_message as prompt_system_message, human_message as prompt_human_message
from pydantic import Field
from module_programming_llm.helpers.llm_utils import (
from llm_core.utils.llm_utils import (
get_chat_prompt_with_formatting_instructions,
num_tokens_from_string,
predict_and_parse, num_tokens_from_prompt,
num_tokens_from_prompt,
)
from llm_core.utils.predict_and_parse import predict_and_parse
from module_programming_llm.helpers.utils import (
get_diff,
)
from .split_grading_instructions_by_file_output import FileGradingInstruction, SplitGradingInstructionsByFileOutput
from ...helpers.models import ModelConfigType
from llm_core.models import ModelConfigType


class SplitGradingInstructionsByFile(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,10 @@
from .split_problem_statement_by_file_input import SplitProblemStatementByFileInput
from .split_problem_statement_by_file_output import FileProblemStatement, SplitProblemStatementByFileOutput
from ..pipeline_step import PipelineStep
from ...helpers.llm_utils import num_tokens_from_string, get_chat_prompt_with_formatting_instructions, \
num_tokens_from_prompt, predict_and_parse
from ...helpers.models import ModelConfigType
from llm_core.utils.llm_utils import num_tokens_from_string, get_chat_prompt_with_formatting_instructions, \
num_tokens_from_prompt
from llm_core.utils.predict_and_parse import predict_and_parse
from llm_core.models import ModelConfigType
from ...helpers.utils import get_diff


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,17 +11,18 @@
from .validate_suggestions_output import ValidateSuggestionsOutput
from .prompt import system_message as prompt_system_message, human_message as prompt_human_message
from pydantic import Field
from module_programming_llm.helpers.llm_utils import (
from llm_core.utils.llm_utils import (
get_chat_prompt_with_formatting_instructions,
predict_and_parse, num_tokens_from_string,
num_tokens_from_string,
check_prompt_length_and_omit_features_if_necessary,
)
from llm_core.utils.predict_and_parse import predict_and_parse
from module_programming_llm.helpers.utils import (
get_diff,
load_files_from_repo,
add_line_numbers, get_programming_language_file_extension
)
from ...helpers.models import ModelConfigType
from llm_core.models import ModelConfigType


class ValidateSuggestions(PipelineStep[ValidateSuggestionsInput, List[Optional[ValidateSuggestionsOutput]]]):
Expand Down
Loading

0 comments on commit 968c6a7

Please sign in to comment.