Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added support for calling HuggingFace models #824

Open
wants to merge 5 commits into
base: development
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 13 additions & 4 deletions .github/workflows/config.json
Original file line number Diff line number Diff line change
Expand Up @@ -68,10 +68,19 @@
"min_query_expansion_related_question_similarity_score": 90,
"expand_to_multiple_questions": true
},
"openai": {
"azure_oai_chat_deployment_name": "gpt-35-turbo",
"azure_oai_eval_deployment_name": "gpt-35-turbo",
"temperature": 0
"llm": {
"chat_llm": {
"llm_type": "openai",
"model_name": "gpt-35-turbo",
"temperature": 0,
"max_tokens": 4096
},
"eval_llm": {
"llm_type": "openai",
"model_name": "gpt-35-turbo",
"temperature": 0,
"max_tokens": 4096
}
},
"eval": {
"metric_types": [
Expand Down
17 changes: 13 additions & 4 deletions config.sample.json
Original file line number Diff line number Diff line change
Expand Up @@ -69,10 +69,19 @@
"min_query_expansion_related_question_similarity_score": 90,
"expand_to_multiple_questions": false
},
"openai": {
"azure_oai_chat_deployment_name": "gpt-35-turbo",
"azure_oai_eval_deployment_name": "gpt-35-turbo",
"temperature": 0
"llm": {
"chat_llm": {
"llm_type": "openai",
"model_name": "gpt-35-turbo",
"temperature": 0,
"max_tokens": 4096
},
"eval_llm": {
"llm_type": "openai",
"model_name": "gpt-35-turbo",
"temperature": 0,
"max_tokens": 4096
}
},
"eval": {
"metric_types": [
Expand Down
76 changes: 55 additions & 21 deletions config.schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -520,31 +520,65 @@
"expand_to_multiple_questions"
]
},
"openai": {
"llm": {
"type": "object",
"properties": {
"azure_oai_chat_deployment_name": {
"type": "string",
"minLength": 1,
"description": "Azure OpenAI deployment name"
},
"azure_oai_eval_deployment_name": {
"type": "string",
"minLength": 1,
"description": "Azure OpenAI evaluation deployment name"
"chat_llm": {
"type": "object",
"properties": {
"llm_type": {
"type": "string",
"minLength": 1,
"description": "Type of the LLM provider, e.g., 'openai'"
},
"model_name": {
"type": "string",
"minLength": 1,
"description": "Model name, e.g., 'gpt-3.5-turbo'"
},
"temperature": {
"type": "number",
"minimum": 0,
"maximum": 1,
"description": "Temperature for generating responses"
},
"max_tokens": {
"type": "integer",
"minimum": 1,
"description": "Maximum number of tokens allowed for the response"
}
},
"required": ["llm_type", "model_name", "temperature", "max_tokens"]
},
"temperature": {
"type": "number",
"minimum": 0,
"maximum": 1,
"description": "Temperature for OpenAI API"
"eval_llm": {
"type": "object",
"properties": {
"llm_type": {
"type": "string",
"minLength": 1,
"description": "Type of the LLM provider, e.g., 'openai'"
},
"model_name": {
"type": "string",
"minLength": 1,
"description": "Model name, e.g., 'gpt-3.5-turbo'"
},
"temperature": {
"type": "number",
"minimum": 0,
"maximum": 1,
"description": "Temperature for generating responses"
},
"max_tokens": {
"type": "integer",
"minimum": 1,
"description": "Maximum number of tokens allowed for the response"
}
},
"required": ["llm_type", "model_name", "temperature", "max_tokens"]
}
},
"required": [
"azure_oai_chat_deployment_name",
"azure_oai_eval_deployment_name",
"temperature"
]
"required": ["chat_llm", "eval_llm"]
},
"eval": {
"type": "object",
Expand Down Expand Up @@ -599,7 +633,7 @@
"language",
"rerank",
"search",
"openai",
"llm",
"eval"
]
}
4 changes: 2 additions & 2 deletions rag_experiment_accelerator/config/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from rag_experiment_accelerator.config.rerank_config import RerankConfig
from rag_experiment_accelerator.config.search_config import SearchConfig
from rag_experiment_accelerator.config.query_expansion import QueryExpansionConfig
from rag_experiment_accelerator.config.openai_config import OpenAIConfig
from rag_experiment_accelerator.config.llm_config import LLMConfig
from rag_experiment_accelerator.config.eval_config import EvalConfig

from rag_experiment_accelerator.embedding.embedding_model import EmbeddingModel
Expand Down Expand Up @@ -47,7 +47,7 @@ class Config(BaseConfig):
rerank: RerankConfig = field(default_factory=RerankConfig)
search: SearchConfig = field(default_factory=SearchConfig)
query_expansion: QueryExpansionConfig = field(default_factory=QueryExpansionConfig)
openai: OpenAIConfig = field(default_factory=OpenAIConfig)
llm: LLMConfig = field(default_factory=LLMConfig)
eval: EvalConfig = field(default_factory=EvalConfig)

@classmethod
Expand Down
16 changes: 16 additions & 0 deletions rag_experiment_accelerator/config/llm_config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
from dataclasses import dataclass, field
from rag_experiment_accelerator.config.base_config import BaseConfig


@dataclass
class BaseLLMConfig(BaseConfig):
llm_type: str = "openai"
model_name: str = "gpt-3.5-turbo"
temperature: float = 0.0
max_tokens: int = 100


@dataclass
class LLMConfig(BaseConfig):
chat_llm: BaseLLMConfig = field(default_factory=BaseLLMConfig)
eval_llm: BaseLLMConfig = field(default_factory=BaseLLMConfig)
9 changes: 0 additions & 9 deletions rag_experiment_accelerator/config/openai_config.py

This file was deleted.

17 changes: 13 additions & 4 deletions rag_experiment_accelerator/config/tests/data/config.json
Original file line number Diff line number Diff line change
Expand Up @@ -82,10 +82,19 @@
"min_query_expansion_related_question_similarity_score": 90,
"expand_to_multiple_questions": false
},
"openai": {
"azure_oai_chat_deployment_name": "test_chat_deployment_name",
"azure_oai_eval_deployment_name": "test_eval_deployment_name",
"temperature": 10
"llm": {
"chat_llm": {
"llm_type": "openai",
"model_name": "gpt-3.5-turbo",
"temperature": 0,
"max_tokens": 4096
},
"eval_llm": {
"llm_type": "openai",
"model_name": "gpt-3.5-turbo",
"temperature": 0,
"max_tokens": 4096
}
},
"eval": {
"metric_types": [
Expand Down
34 changes: 21 additions & 13 deletions rag_experiment_accelerator/config/tests/test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,12 @@ def test_config_init(mock_validate_json_with_schema, mock_create_embedding_model
embedding_model_4.model_name.return_value = "text-embedding-3-small"
embedding_model_4.dimension.return_value = 256
embedding_model_4.shorten_dimensions.return_value = True
mock_create_embedding_model.side_effect = [embedding_model_1, embedding_model_2, embedding_model_3, embedding_model_4]
mock_create_embedding_model.side_effect = [
embedding_model_1,
embedding_model_2,
embedding_model_3,
embedding_model_4,
]
mock_validate_json_with_schema.return_value = (True, None)

config = Config.from_path(environment, config_path)
Expand Down Expand Up @@ -105,7 +110,10 @@ def test_config_init(mock_validate_json_with_schema, mock_create_embedding_model
assert index.embedding_model[3].type == mock_embedding[3]["type"]
assert index.embedding_model[3].model_name == mock_embedding[3]["model_name"]
assert index.embedding_model[3].dimension == mock_embedding[3]["dimension"]
assert index.embedding_model[3].shorten_dimensions == mock_embedding[3]["shorten_dimensions"]
assert (
index.embedding_model[3].shorten_dimensions
== mock_embedding[3]["shorten_dimensions"]
)

model1 = config.get_embedding_model(config.index.embedding_model[0].model_name)
assert model1.model_name.return_value == "all-MiniLM-L6-v2"
Expand Down Expand Up @@ -156,17 +164,17 @@ def test_config_init(mock_validate_json_with_schema, mock_create_embedding_model
== mock_query_expansion["expand_to_multiple_questions"]
)

openai = config.openai
mock_openai = mock_config["openai"]
assert (
openai.azure_oai_chat_deployment_name
== mock_openai["azure_oai_chat_deployment_name"]
)
assert (
openai.azure_oai_eval_deployment_name
== mock_openai["azure_oai_eval_deployment_name"]
)
assert openai.temperature == mock_openai["temperature"]
llm = config.llm
mock_llm = mock_config["llm"]
assert llm.chat_llm.model_name == mock_llm["chat_llm"]["model_name"]
assert llm.chat_llm.llm_type == mock_llm["chat_llm"]["llm_type"]
assert llm.chat_llm.temperature == mock_llm["chat_llm"]["temperature"]
assert llm.chat_llm.max_tokens == mock_llm["chat_llm"]["max_tokens"]

assert llm.eval_llm.model_name == mock_llm["eval_llm"]["model_name"]
assert llm.eval_llm.llm_type == mock_llm["eval_llm"]["llm_type"]
assert llm.eval_llm.temperature == mock_llm["eval_llm"]["temperature"]
assert llm.eval_llm.max_tokens == mock_llm["eval_llm"]["max_tokens"]

assert config.eval.metric_types == mock_config["eval"]["metric_types"]

Expand Down
11 changes: 7 additions & 4 deletions rag_experiment_accelerator/evaluation/eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@
compute_transformer_based_score,
)

from rag_experiment_accelerator.llm.response_generator_factory import (
get_response_generator,
)
from rag_experiment_accelerator.llm.response_generator import ResponseGenerator
from rag_experiment_accelerator.utils.logging import get_logger
from rag_experiment_accelerator.config.environment import Environment
Expand Down Expand Up @@ -102,7 +105,9 @@ def compute_metrics(
"""

if metric_type.startswith("rouge"):
return plain_metrics.rouge_score(ground_truth=expected, prediction=actual, rouge_metric_name=metric_type)
return plain_metrics.rouge_score(
ground_truth=expected, prediction=actual, rouge_metric_name=metric_type
)
else:
plain_metric_func = getattr(plain_metrics, metric_type, None)
if plain_metric_func:
Expand Down Expand Up @@ -207,9 +212,7 @@ def evaluate_prompts(

handler = QueryOutputHandler(config.path.query_data_dir)

response_generator = ResponseGenerator(
environment, config, config.openai.azure_oai_eval_deployment_name
)
response_generator = get_response_generator(config.llm.eval_llm, environment)

query_data_load = handler.load(
index_config.index_name(), config.experiment_name, config.job_name
Expand Down
7 changes: 4 additions & 3 deletions rag_experiment_accelerator/ingest_data/acs_ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@
from azure.search.documents import SearchClient
from rag_experiment_accelerator.checkpoint import cache_with_checkpoint
from rag_experiment_accelerator.config.config import Config
from rag_experiment_accelerator.llm.response_generator_factory import (
get_response_generator,
)
from rag_experiment_accelerator.llm.response_generator import ResponseGenerator
from rag_experiment_accelerator.llm.prompt import (
do_need_multiple_prompt_instruction,
Expand Down Expand Up @@ -103,9 +106,7 @@ def generate_qna(environment, config, docs, azure_oai_deployment_name):
column_names = ["user_prompt", "output_prompt", "context"]

new_df = pd.DataFrame(columns=column_names)
response_generator = ResponseGenerator(
environment, config, azure_oai_deployment_name
)
response_generator = get_response_generator(config.llm.chat_llm, environment)

for doc in docs:
chunk = list(doc.values())[0]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def test_my_hash_with_numbers():
assert result == expected_hash


@patch("rag_experiment_accelerator.run.index.ResponseGenerator")
@patch("rag_experiment_accelerator.run.index.get_response_generator")
def test_generate_title(mock_response_generator):
# Arrange
mock_response = "Test Title"
Expand All @@ -80,7 +80,7 @@ def test_generate_title(mock_response_generator):
assert result == mock_response


@patch("rag_experiment_accelerator.run.index.ResponseGenerator")
@patch("rag_experiment_accelerator.run.index.get_response_generator")
def test_generate_summary(mock_response_generator):
# Arrange
mock_summary = "Test Summary"
Expand Down
41 changes: 41 additions & 0 deletions rag_experiment_accelerator/llm/huggingface_response_generator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
from rag_experiment_accelerator.llm.response_generator import ResponseGenerator
from transformers import AutoTokenizer, AutoModelForCausalLM

from rag_experiment_accelerator.config.llm_config import BaseLLMConfig
from rag_experiment_accelerator.utils.logging import get_logger
from rag_experiment_accelerator.llm.prompt.prompt import (
Prompt,
PromptTag,
)

logger = get_logger(__name__)


class HuggingfaceResponseGenerator(ResponseGenerator):
def __init__(self, config: BaseLLMConfig):
super().__init__(config)

self._tokenizer = AutoTokenizer.from_pretrained(self.config.model_name)
self._model = AutoModelForCausalLM.from_pretrained(self.config.model_name)

def _get_response(
self,
messages,
prompt: Prompt,
) -> any:
kwargs = {}

if self.json_object_supported and PromptTag.JSON in prompt.tags:
kwargs["response_format"] = {"type": "json_object"}

input_ids = self._tokenizer.encode(messages, return_tensors="pt")
output_ids = self._model.generate(
input_ids,
num_return_sequences=1,
no_repeat_ngram_size=2,
temperature=self.config.temperature,
max_length=self.config.max_tokens,
)
response_text = self._tokenizer.decode(output_ids[0], skip_special_tokens=True)

return self._interpret_response(response_text, prompt)
Loading
Loading