diff --git a/poetry.lock b/poetry.lock index a1d982ae..666ad5c6 100644 --- a/poetry.lock +++ b/poetry.lock @@ -2272,13 +2272,13 @@ files = [ [[package]] name = "litellm" -version = "1.34.34" +version = "1.35.16" description = "Library to easily interface with LLM API providers" optional = false python-versions = "!=2.7.*,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,!=3.7.*,>=3.8" files = [ - {file = "litellm-1.34.34-py3-none-any.whl", hash = "sha256:c9eefd4b5adec3c2e6d0ab765a4fcebd475a895c7e417f47f8e677410b607f51"}, - {file = "litellm-1.34.34.tar.gz", hash = "sha256:d11c9d5296d052a9e5e1187ac7b33683f3a581740abc4de6a9c327d3f3c7187c"}, + {file = "litellm-1.35.16-py3-none-any.whl", hash = "sha256:04a70b6fd51e721ccc89468f565486527dcfd9cb6261fbdd4b00587dbe3a2df1"}, + {file = "litellm-1.35.16.tar.gz", hash = "sha256:17c5a8cbd52074c8a509c9874e7e4bf1366260e15d5f9b2af4fa200bc5d0896c"}, ] [package.dependencies] @@ -7449,4 +7449,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p [metadata] lock-version = "2.0" python-versions = ">=3.10,<3.12" -content-hash = "bfaf3219ce189ec08d5366b296036874ecf2084c5c20a9cd709b28d2f70c1deb" +content-hash = "29e92d05fb1ce62f1864558f98c56c91162e6a792585a891bed0c553fce3ded3" diff --git a/pyproject.toml b/pyproject.toml index edc926be..ec889182 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "docq" -version = "0.10.1" +version = "0.10.2" description = "Docq.AI - Your private ChatGPT alternative. Securely unlock knowledge from confidential documents." authors = ["Docq.AI Team "] maintainers = ["Docq.AI Team "] @@ -52,13 +52,13 @@ mkdocs-material = "^9.5.13" pyautogen = "^0.2.2" termcolor = "^2.4.0" google-cloud-aiplatform = "^1.38.0" -litellm = "^1.26.6" +litellm = "^1.35.16" semantic-kernel = "0.4.3.dev0" imap-tools = "^1.5.0" llama-index-llms-litellm = "^0.1.3" llama-index-embeddings-azure-openai = "^0.1.6" jwt = "^1.3.1" -llama-index-embeddings-huggingface-optimum = "^0.1.4" +llama-index-embeddings-huggingface-optimum = "^0.1.5" llama-index-core = "^0.10.21.post1" llama-index-readers-file = "^0.1.12" slack-bolt = "^1.18.1" diff --git a/source/docq/model_selection/main.py b/source/docq/model_selection/main.py index cbbd86f8..4032fc92 100644 --- a/source/docq/model_selection/main.py +++ b/source/docq/model_selection/main.py @@ -1,9 +1,9 @@ """Model selection and usage settings for Docq. -We potentially need to support multiple versions and configurations for models from a given vendor and also different combinations of models. +We potentially need to support multiple versions and configurations for models from a given provider and also different combinations of models. The ModeUsageSettings class is the building block. We might have multiple structures to group multiple models together. -Model collections grouped by vendor and model capability is just one way to structure. +Model collections grouped by provider and model capability is just one way to structure. """ import logging as log @@ -18,17 +18,17 @@ from ..manage_settings import get_organisation_settings -class ModelVendor(str, Enum): - """Model vendor names. +class ModelProvider(str, Enum): + """Model provider names. - Dedicated model providers {model vendor} e.g. OPENAI OR COHERE. - Cloud provider hosted models {cloud provider name}_[{service name}_]{model vendor} e.g. AZURE_OPENAI OR AWS_SAGEMAKER_LLAMA OR AWS_BEDROCK_COHERE or AWS_BEDROCK_TITAN. + Dedicated model providers {model provider} e.g. OPENAI or AWS or AZURE. + Cloud provider hosted models {cloud provider name}_[{service name}_] e.g. AZURE_OPENAI OR AWS_SAGEMAKER OR AWS_BEDROCK or GROQ or GOOGLE_VERTEXAI. """ OPENAI = "OpenAI" AZURE_OPENAI = "Azure OpenAI" - AZURE_ML_LLAMA = "Azure ML Llama" - GROQ_META = "Groq Meta" + AZURE_ML = "Azure ML" + GROQ = "Groq" AWS_BEDROCK_AMAZON = "AWS Bedrock Amazon" AWS_BEDROCK_AI21LABs = "AWS Bedrock AI21labs" AWS_BEDROCK_COHERE = "AWS Bedrock Cohere" @@ -59,7 +59,7 @@ class ModelCapability(str, Enum): class LlmServiceInstanceConfig: """Config related to a running instance of an LLM aka a deployed model.""" - vendor: ModelVendor + provider: ModelProvider model_name: str """Each LLM hosting provider defines string name to identify different versions of models.""" api_key: Optional[str] = None @@ -86,10 +86,10 @@ class LlmUsageSettings: """Model usage settings to associate with a model service instance.""" model_capability: ModelCapability - """Map a capability to a model intance.""" + """Map a capability to a model instance.""" service_instance_config: LlmServiceInstanceConfig """Config for a running instance of an LLM compatible with these settings.""" - temperature: float = 0.0 + temperature: float = 0.1 additional_args: Optional[Mapping[str, Any]] = field(default_factory=dict) """Any additional model API specific arguments to be passed to function like chat and completion""" @@ -107,19 +107,19 @@ class LlmUsageSettingsCollection: # The configuration of the deployed instances of models. Basically service discovery. LLM_SERVICE_INSTANCES = { "openai-gpt35turbo": LlmServiceInstanceConfig( - vendor=ModelVendor.OPENAI, + provider=ModelProvider.OPENAI, model_name="gpt-3.5-turbo", api_key=os.getenv("DOCQ_OPENAI_API_KEY"), license_="Commercial", ), "openai-ada-002": LlmServiceInstanceConfig( - vendor=ModelVendor.OPENAI, + provider=ModelProvider.OPENAI, model_name="text-embedding-ada-002", api_key=os.getenv("DOCQ_OPENAI_API_KEY"), license_="Commercial", ), "azure-openai-gpt35turbo": LlmServiceInstanceConfig( - vendor=ModelVendor.AZURE_OPENAI, + provider=ModelProvider.AZURE_OPENAI, model_name="gpt-35-turbo", model_deployment_name="gpt-35-turbo", api_base=os.getenv("DOCQ_AZURE_OPENAI_API_BASE") or "", @@ -129,7 +129,7 @@ class LlmUsageSettingsCollection: license_="Commercial", ), "azure-openai-gpt4turbo": LlmServiceInstanceConfig( - vendor=ModelVendor.AZURE_OPENAI, + provider=ModelProvider.AZURE_OPENAI, model_name="gpt-4", model_deployment_name="gpt4-turbo-1106-preview", api_base=os.getenv("DOCQ_AZURE_OPENAI_API_BASE") or "", @@ -138,7 +138,7 @@ class LlmUsageSettingsCollection: license_="Commercial", ), "azure-openai-ada-002": LlmServiceInstanceConfig( - vendor=ModelVendor.AZURE_OPENAI, + provider=ModelProvider.AZURE_OPENAI, model_name="text-embedding-ada-002", model_deployment_name="text-embedding-ada-002", api_base=os.getenv("DOCQ_AZURE_OPENAI_API_BASE") or "", @@ -146,22 +146,22 @@ class LlmUsageSettingsCollection: license_="Commercial", ), "google-vertexai-palm2": LlmServiceInstanceConfig( - vendor=ModelVendor.GOOGLE_VERTEXAI_PALM2, model_name="chat-bison@002", context_window_size=8196 + provider=ModelProvider.GOOGLE_VERTEXAI_PALM2, model_name="chat-bison@002", context_window_size=8196 ), "google-vertexai-gemini-pro": LlmServiceInstanceConfig( - vendor=ModelVendor.GOOGLE_VERTEXTAI_GEMINI_PRO, + provider=ModelProvider.GOOGLE_VERTEXTAI_GEMINI_PRO, model_name="gemini-pro", additional_properties={"vertex_location": "us-central1"}, context_window_size=32000, ), "google-vertexai-gemini-1.0-pro-001": LlmServiceInstanceConfig( - vendor=ModelVendor.GOOGLE_VERTEXTAI_GEMINI_PRO, + provider=ModelProvider.GOOGLE_VERTEXTAI_GEMINI_PRO, model_name="gemini-1.0-pro-001", additional_properties={"vertex_location": "us-central1"}, context_window_size=32000, ), "optimum-bge-small-en-v1.5": LlmServiceInstanceConfig( - vendor=ModelVendor.HUGGINGFACE_OPTIMUM_BAAI, + provider=ModelProvider.HUGGINGFACE_OPTIMUM_BAAI, model_name="BAAI/bge-small-en-v1.5", license_="MIT", citation="""@misc{bge_embedding, @@ -175,15 +175,31 @@ class LlmUsageSettingsCollection: context_window_size=1024, ), "groq-meta-llama2-70b-4096": LlmServiceInstanceConfig( - vendor=ModelVendor.GROQ_META, + provider=ModelProvider.GROQ, model_name="llama2-70b-4096", api_key=os.getenv(ENV_VAR_DOCQ_GROQ_API_KEY), api_base="https://api.groq.com/openai/v1", api_version="2023-05-15", # not used by groq but checked by the downstream lib context_window_size=4096, + license_="META LLAMA 2 COMMUNITY LICENSE AGREEMENT", ), - "groq-meta-mixtral-8x7b-32768": LlmServiceInstanceConfig( - vendor=ModelVendor.GROQ_META, + "groq-meta-llama3-70b-8192": LlmServiceInstanceConfig( + provider=ModelProvider.GROQ, + model_name="llama3-70b-8192", + api_key=os.getenv(ENV_VAR_DOCQ_GROQ_API_KEY), + # api_base="https://api.groq.com/openai/v1", + # api_version="2023-05-15", # not used by groq but checked by the downstream lib + context_window_size=8192, + license_="META LLAMA 3 COMMUNITY LICENSE AGREEMENT", + citation="""@article{llama3modelcard, + title={Llama 3 Model Card}, + author={AI@Meta}, + year={2024}, + url = {https://github.com/meta-llama/llama3/blob/main/MODEL_CARD.md} + }""", + ), + "groq-mistral-mixtral-8x7b-32768": LlmServiceInstanceConfig( + provider=ModelProvider.GROQ, model_name="mixtral-8x7b-32768", api_key=os.getenv(ENV_VAR_DOCQ_GROQ_API_KEY), api_base="https://api.groq.com/openai/v1", @@ -271,6 +287,21 @@ class LlmUsageSettingsCollection: ), }, ), + "groq_llama3_70b_with_local_embedding": LlmUsageSettingsCollection( + name="Groq Llama3 70B wth Local Embedding", + key="groq_llama3_70b_with_local_embedding", + model_usage_settings={ + ModelCapability.CHAT: LlmUsageSettings( + model_capability=ModelCapability.CHAT, + temperature=0.3, + service_instance_config=LLM_SERVICE_INSTANCES["groq-meta-llama3-70b-8192"], + ), + ModelCapability.EMBEDDING: LlmUsageSettings( + model_capability=ModelCapability.EMBEDDING, + service_instance_config=LLM_SERVICE_INSTANCES["optimum-bge-small-en-v1.5"], + ), + }, + ), "groq_mixtral_8x7b_with_local_embedding": LlmUsageSettingsCollection( name="Groq Mixtral 8x7b wth Local Embedding", key="groq_mixtral_8x7b_with_local_embedding", @@ -278,7 +309,7 @@ class LlmUsageSettingsCollection: ModelCapability.CHAT: LlmUsageSettings( model_capability=ModelCapability.CHAT, temperature=0.7, - service_instance_config=LLM_SERVICE_INSTANCES["groq-meta-mixtral-8x7b-32768"], + service_instance_config=LLM_SERVICE_INSTANCES["groq-mistral-mixtral-8x7b-32768"], ), ModelCapability.EMBEDDING: LlmUsageSettings( model_capability=ModelCapability.EMBEDDING, diff --git a/source/docq/support/llm.py b/source/docq/support/llm.py index 31dd39ea..cdb2ed9e 100644 --- a/source/docq/support/llm.py +++ b/source/docq/support/llm.py @@ -3,6 +3,7 @@ import logging as log import os from typing import Any, Dict, List +from uu import Error import docq from llama_index.core.base.llms.types import ChatMessage, MessageRole @@ -37,7 +38,7 @@ LLM_MODEL_COLLECTIONS, LlmUsageSettingsCollection, ModelCapability, - ModelVendor, + ModelProvider, ) from .llamaindex_otel_callbackhandler import OtelCallbackHandler @@ -104,7 +105,7 @@ def _init_local_models() -> None: """Initialize local models.""" for model_collection in LLM_MODEL_COLLECTIONS.values(): for model_usage_settings in model_collection.model_usage_settings.values(): - if model_usage_settings.service_instance_config.vendor == ModelVendor.HUGGINGFACE_OPTIMUM_BAAI: + if model_usage_settings.service_instance_config.provider == ModelProvider.HUGGINGFACE_OPTIMUM_BAAI: model_dir = get_models_dir(model_usage_settings.service_instance_config.model_name, makedir=False) if not os.path.exists(model_dir): model_dir = get_models_dir(model_usage_settings.service_instance_config.model_name, makedir=True) @@ -124,7 +125,7 @@ def _get_generation_model(model_settings_collection: LlmUsageSettingsCollection) chat_model_settings = model_settings_collection.model_usage_settings[ModelCapability.CHAT] sc = chat_model_settings.service_instance_config _callback_manager = CallbackManager([OtelCallbackHandler(tracer_provider=trace.get_tracer_provider())]) - if chat_model_settings.service_instance_config.vendor == ModelVendor.AZURE_OPENAI: + if sc.provider == ModelProvider.AZURE_OPENAI: _additional_kwargs: Dict[str, Any] = {} _additional_kwargs["api_version"] = chat_model_settings.service_instance_config.api_version model = LiteLLM( @@ -140,7 +141,7 @@ def _get_generation_model(model_settings_collection: LlmUsageSettingsCollection) _env_missing = not bool(sc.api_base and sc.api_key and sc.api_version) if _env_missing: log.warning("Chat model: env var values missing.") - elif chat_model_settings.service_instance_config.vendor == ModelVendor.OPENAI: + elif sc.provider == ModelProvider.OPENAI: model = LiteLLM( temperature=chat_model_settings.temperature, model=sc.model_name, @@ -151,14 +152,14 @@ def _get_generation_model(model_settings_collection: LlmUsageSettingsCollection) _env_missing = not bool(sc.api_key) if _env_missing: log.warning("Chat model: env var values missing") - elif chat_model_settings.service_instance_config.vendor == ModelVendor.GOOGLE_VERTEXAI_PALM2: + elif sc.provider == ModelProvider.GOOGLE_VERTEXAI_PALM2: # GCP project_id is coming from the credentials json. model = LiteLLM( temperature=chat_model_settings.temperature, model=sc.model_name, callback_manager=_callback_manager, ) - elif chat_model_settings.service_instance_config.vendor == ModelVendor.GOOGLE_VERTEXTAI_GEMINI_PRO: + elif sc.provider == ModelProvider.GOOGLE_VERTEXTAI_GEMINI_PRO: # GCP project_id is coming from the credentials json. model = LiteLLM( temperature=chat_model_settings.temperature, @@ -169,13 +170,13 @@ def _get_generation_model(model_settings_collection: LlmUsageSettingsCollection) ) litellm.VertexAIConfig() litellm.vertex_location = sc.additional_properties["vertex_location"] - elif chat_model_settings.service_instance_config.vendor == ModelVendor.GROQ_META: + elif sc.provider == ModelProvider.GROQ: model = LiteLLM( temperature=chat_model_settings.temperature, - model=f"openai/{sc.model_name}", + model=f"groq/{sc.model_name}", api_key=sc.api_key, - api_base=sc.api_base, - max_tokens=4096, + # api_base=sc.api_base, + # max_tokens=4096, callback_manager=_callback_manager, kwargs={ "set_verbose": True, @@ -185,7 +186,7 @@ def _get_generation_model(model_settings_collection: LlmUsageSettingsCollection) if _env_missing: log.warning("Chat model: env var values missing.") else: - raise ValueError("Chat model: model settings with a supported model vendor not found.") + raise ValueError("Chat model: model settings with a supported model provider not found.") model.max_retries = 3 @@ -201,28 +202,27 @@ def _get_embed_model(model_settings_collection: LlmUsageSettingsCollection) -> B if model_settings_collection and model_settings_collection.model_usage_settings[ModelCapability.EMBEDDING]: embedding_model_settings = model_settings_collection.model_usage_settings[ModelCapability.EMBEDDING] _callback_manager = CallbackManager([OtelCallbackHandler(tracer_provider=trace.get_tracer_provider())]) - with tracer.start_as_current_span( - name=f"LangchainEmbedding.{embedding_model_settings.service_instance_config.vendor}" - ): - if embedding_model_settings.service_instance_config.vendor == ModelVendor.AZURE_OPENAI: + sc = embedding_model_settings.service_instance_config + with tracer.start_as_current_span(name=f"LangchainEmbedding.{sc.provider}"): + if sc.provider == ModelProvider.AZURE_OPENAI: embedding_model = AzureOpenAIEmbedding( - model=embedding_model_settings.service_instance_config.model_name, - azure_deployment=embedding_model_settings.service_instance_config.model_deployment_name, # `deployment_name` is an alias + model=sc.model_name, + azure_deployment=sc.model_deployment_name, # `deployment_name` is an alias azure_endpoint=os.getenv("DOCQ_AZURE_OPENAI_API_BASE"), api_key=os.getenv("DOCQ_AZURE_OPENAI_API_KEY1"), # openai_api_type="azure", api_version=os.getenv("DOCQ_AZURE_OPENAI_API_VERSION"), callback_manager=_callback_manager, ) - elif embedding_model_settings.service_instance_config.vendor == ModelVendor.OPENAI: + elif sc.provider == ModelProvider.OPENAI: embedding_model = OpenAIEmbedding( - model=embedding_model_settings.service_instance_config.model_name, + model=sc.model_name, api_key=os.getenv("DOCQ_OPENAI_API_KEY"), callback_manager=_callback_manager, ) - elif embedding_model_settings.service_instance_config.vendor == ModelVendor.HUGGINGFACE_OPTIMUM_BAAI: + elif sc.provider == ModelProvider.HUGGINGFACE_OPTIMUM_BAAI: embedding_model = OptimumEmbedding( - folder_name=get_models_dir(embedding_model_settings.service_instance_config.model_name), + folder_name=get_models_dir(sc.model_name), callback_manager=_callback_manager, ) else: @@ -320,12 +320,20 @@ def get_hybrid_fusion_retriever_query( ) -> BaseRetriever: """Hybrid fusion retriever query.""" retrievers = [] - for index in indices: # replace with your actual indexes + for index in indices: vector_retriever = index.as_retriever(similarity_top_k=similarity_top_k) retrievers.append(vector_retriever) bm25_retriever = BM25Retriever.from_defaults(docstore=index.docstore, similarity_top_k=similarity_top_k) retrievers.append(bm25_retriever) + # the default prompt doesn't return JUST the list of queries when using some none OAI models like Llama3. + QUERY_GEN_PROMPT = ( + "You are a helpful assistant that generates multiple search queries based on a " + "single input . You only generate the queries and NO other text. Generate {num_queries} search queries, one on each line, " + "related to the following input query:\n" + "{query}\n" + "Queries:\n" + ) # Create a FusionRetriever to merge and rerank the results fusion_retriever = QueryFusionRetriever( retrievers, @@ -335,7 +343,8 @@ def get_hybrid_fusion_retriever_query( use_async=False, verbose=True, llm=_get_service_context(model_settings_collection).llm, - # query_gen_prompt="...", # we could override the query generation prompt here + callback_manager=_get_service_context(model_settings_collection).callback_manager, + query_gen_prompt=QUERY_GEN_PROMPT, # we could override the query generation prompt here ) return fusion_retriever @@ -409,55 +418,31 @@ def run_ask( from llama_index.core.query_engine import RetrieverQueryEngine - retriever = get_hybrid_fusion_retriever_query(indices, model_settings_collection) - query_engine = RetrieverQueryEngine.from_args( - retriever, - service_context=_get_service_context(model_settings_collection), - text_qa_template=llama_index_chat_prompt_template_from_persona(persona), - chat_history=history, - ) - output = query_engine.query(input_) - - # with tracer.start_as_current_span(name="ComposableGraph.from_indices") as span: - # try: - # graph = ComposableGraph.from_indices( - # SummaryIndex, - # indices, - # index_summaries=summaries, - # service_context=_get_service_context(model_settings_collection), - # kwargs=model_settings_collection.model_usage_settings[ModelCapability.CHAT].additional_args, - # ) - - # custom_query_engines = { - # index.index_id: index.as_query_engine(child_branch_factor=2) for index in indices - # } - - # query_engine = graph.as_query_engine( - # custom_query_engines=custom_query_engines, - # text_qa_template=llama_index_chat_prompt_template_from_persona(persona).partial_format( - # history_str="" - # ), - # chat_history=history, - # ) - - # # prompts_dict = query_engine.get_prompts() - # # print("prompts:", list(prompts_dict.keys())) - - # output = query_engine.query(input_) - # span.add_event( - # name="ask_combined_spaces", - # attributes={"question": input_, "answer": str(output), "spaces_count": len(spaces)}, - # ) - # log.debug("(Ask combined spaces %s) Q: %s, A: %s", spaces, input_, output) - # except Exception as e: - # span.set_status(status=Status(StatusCode.ERROR)) - # span.record_exception(e) - # log.error( - # "run_ask(): Failed to create ComposableGraph. Maybe there was an issue with one of the Space indexes. Error message: %s", - # e, - # ) - # span.set_status(status=Status(StatusCode.ERROR)) - # span.record_exception(e) + try: + text_qa_template = llama_index_chat_prompt_template_from_persona(persona) + span.add_event(name="prompt_created") + except Exception as e: + raise Error(f"Error: {e}") from e + + try: + retriever = get_hybrid_fusion_retriever_query(indices, model_settings_collection) + span.add_event(name="retriever_object_created", attributes={"retriever": retriever.__class__.__name__}) + + query_engine = RetrieverQueryEngine.from_args( + retriever, + service_context=_get_service_context(model_settings_collection), + text_qa_template=text_qa_template, + chat_history=history, + ) + span.add_event(name="query_engine__object_created") + + output = query_engine.query(input_) + span.add_event(name="query_executed") + except Exception as e: + span.set_status(status=Status(StatusCode.ERROR)) + span.record_exception(e) + raise Error(f"Error: {e}") from e + else: span.set_attribute("spaces_count", 0) log.debug("runs_ask(): space None or zero.") diff --git a/web/index.py b/web/index.py index 27f9bddc..dd372819 100644 --- a/web/index.py +++ b/web/index.py @@ -24,7 +24,10 @@ with tracer().start_as_current_span("home_page", attributes=baggage_as_attributes()): render_docq_logo() - render_page_title_and_favicon(page_display_title="Docq.AI, :violet[your second brain].", browser_title="Docq.AI - Private & Secure ChatGPT alternative.") + render_page_title_and_favicon( + page_display_title=":violet[Your private ChatGPT alternative].", + browser_title="Docq.AI - Private ChatGPT alternative.", + ) init_with_pretty_error_ui() production_layout() @@ -57,8 +60,11 @@ login_container = st.container() - st.subheader("Private & Secure alternative to ChatGPT that unlocks knowledge from your confidential documents.") + st.subheader("Secure unlock knowledge from your confidential business documents.") + + st.markdown("Upload a document. Ask questions. Get answers. It's that simple!") + st.subheader("Guide") st.markdown( """ - **_General Chat_** to use Docq like ChatGPT. diff --git a/web/utils/layout.py b/web/utils/layout.py index d88bd304..2ea918bf 100644 --- a/web/utils/layout.py +++ b/web/utils/layout.py @@ -1227,7 +1227,7 @@ def organisation_settings_ui() -> None: with model_settings_container.expander("Model details"): for _, model_settings in selected_model_settings.model_usage_settings.items(): st.write(f"{model_settings.model_capability.value} model: ") - st.write(f"- Model Vendor: `{model_settings.service_instance_config.vendor.value}`") + st.write(f"- Model Provider: `{model_settings.service_instance_config.provider.value}`") st.write(f"- Model Name: `{model_settings.service_instance_config.model_name}`") st.write(f"- Temperature: `{model_settings.temperature}`") st.write(