diff --git a/.pylintrc b/.pylintrc index d1570f9..741399b 100644 --- a/.pylintrc +++ b/.pylintrc @@ -53,6 +53,7 @@ disable= ; duplicate-string-formatting-argument, # TMP: will be fixed in close future consider-using-f-string, # sorry, not gonna happen, still have to support py2 ; use-dict-literal + cyclic-import, [FORMAT] # Maximum number of characters on a single line. diff --git a/knowledge_base_gpt/apps/__init__.py b/knowledge_base_gpt/apps/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/knowledge_base_gpt/apps/ingest/__init__.py b/knowledge_base_gpt/apps/ingest/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/knowledge_base_gpt/apps/ingest/__main__.py b/knowledge_base_gpt/apps/ingest/__main__.py index 0cb2482..6484db9 100644 --- a/knowledge_base_gpt/apps/ingest/__main__.py +++ b/knowledge_base_gpt/apps/ingest/__main__.py @@ -1,3 +1,4 @@ +""" Content ingestion application """ from knowledge_base_gpt.libs.injector.di import global_injector from knowledge_base_gpt.apps.ingest.ingest import Ingestor diff --git a/knowledge_base_gpt/apps/ingest/ingest.py b/knowledge_base_gpt/apps/ingest/ingest.py index 2b8c128..b6af2c7 100644 --- a/knowledge_base_gpt/apps/ingest/ingest.py +++ b/knowledge_base_gpt/apps/ingest/ingest.py @@ -1,4 +1,4 @@ -import os +""" Content ingest """ from typing import List from injector import inject, singleton @@ -11,7 +11,8 @@ @singleton -class Ingestor(): +class Ingestor(): # pylint:disable=R0903 + """ Content ingest """ @inject def __init__(self, settings: Settings, loader: Loader, vector_store: VectorStore) -> None: @@ -20,11 +21,11 @@ def __init__(self, settings: Settings, loader: Loader, vector_store: VectorStore self._chunk_overlap = settings.text_splitter.chunk_overlap self._vector_store = vector_store - def _process_documents(self, ignored_files: List[str] = []) -> List[Document]: + def _process_documents(self, ignored_files: List[str]) -> List[Document]: """ Load documents and split in chunks """ - print(f"Loading documents") + print("Loading documents") documents = self._loader.load_documents(ignored_files) if not documents: return [] @@ -36,12 +37,13 @@ def _process_documents(self, ignored_files: List[str] = []) -> List[Document]: return documents def run(self): + """ Ingest the documents into the vector store based on the settings """ collection = self._vector_store.db.get() documents = self._process_documents(list(set(metadata['source'] for metadata in collection['metadatas']))) if len(documents) == 0: print("No new documents to load") else: - print(f"Creating embeddings. May take some minutes...") + print("Creating embeddings. May take some minutes...") self._vector_store.db.add_documents(documents) self._vector_store.db.persist() - print(f"Ingestion complete! You can now run privateGPT.py to query your documents") + print("Ingestion complete! You can now run privateGPT.py to query your documents") diff --git a/knowledge_base_gpt/apps/slackbot/__init__.py b/knowledge_base_gpt/apps/slackbot/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/knowledge_base_gpt/apps/slackbot/__main__.py b/knowledge_base_gpt/apps/slackbot/__main__.py index d1e9fbe..60efdcb 100644 --- a/knowledge_base_gpt/apps/slackbot/__main__.py +++ b/knowledge_base_gpt/apps/slackbot/__main__.py @@ -1,3 +1,4 @@ +""" Slackbot application """ from knowledge_base_gpt.libs.injector.di import global_injector from knowledge_base_gpt.apps.slackbot.slack_bot import KnowledgeBaseSlackBot diff --git a/knowledge_base_gpt/apps/slackbot/slack_bot.py b/knowledge_base_gpt/apps/slackbot/slack_bot.py index ea9ba1b..907b42c 100644 --- a/knowledge_base_gpt/apps/slackbot/slack_bot.py +++ b/knowledge_base_gpt/apps/slackbot/slack_bot.py @@ -1,3 +1,4 @@ +""" Slackbot application backend """ from injector import inject, singleton from slack_bolt import App from slack_bolt.adapter.socket_mode import SocketModeHandler @@ -9,11 +10,12 @@ class KnowledgeBaseSlackBotException(Exception): - pass + """ Wrapper for SlackBot specific exception """ @singleton -class KnowledgeBaseSlackBot(): +class KnowledgeBaseSlackBot(): # pylint:disable=R0903 + """ Slackbot application backend """ @inject def __init__(self, settings: Settings, private_chat: PrivateChat, history: HistoryRedis) -> None: @@ -28,18 +30,19 @@ def __init__(self, settings: Settings, private_chat: PrivateChat, history: Histo self._handler.app.command('/conversation_forward')(self._forward_question) def run(self): + """ Start the Slackbot backend application """ self._handler.start() def _get_forward_question_channel_id(self): if self._forward_question_channel_name is None: - raise KnowledgeBaseSlackBotException(f"Slackbot forward channel name was not set") + raise KnowledgeBaseSlackBotException("Slackbot forward channel name was not set") try: for result in self._handler.app.client.conversations_list(): for channel in result["channels"]: if channel["name"] == self._forward_question_channel_name: return channel["id"] except SlackApiError as e: - raise KnowledgeBaseSlackBotException(e) + raise KnowledgeBaseSlackBotException(e) from e raise KnowledgeBaseSlackBotException(f"The channel {self._forward_question_channel_name} does not exits") def _got_message(self, message, say): @@ -48,7 +51,11 @@ def _got_message(self, message, say): user=message['user'], text="On it. Be back with your answer soon" ) - answer = self._private_chat.answer_query(self._history.get_messages(message['user']), message['text'], chat_identifier=message['user']) + answer = self._private_chat.answer_query( + self._history.get_messages(message['user']), + message['text'], + chat_identifier=message['user'] + ) self._history.add_to_history(message['user'], answer) say(answer['answer']) @@ -63,7 +70,7 @@ def _is_direct_message_channel(self, command): return False - def _reset_conversation(self, ack, say, command): + def _reset_conversation(self, ack, say, command): # pylint:disable=unused-argument ack() if not self._is_direct_message_channel(command): return @@ -85,7 +92,7 @@ def _messages_to_text(messages): text += '\n' return text - def _forward_question(self, ack, say, command): + def _forward_question(self, ack, say, command): # pylint:disable=unused-argument ack() if not self._is_direct_message_channel(command): return @@ -94,7 +101,10 @@ def _forward_question(self, ack, say, command): if len(messages) == 0: msg = 'There is no active conversation' else: - self._handler.app.client.chat_postMessage(channel=self._forward_question_channel_id, text=self._messages_to_text(messages)) + self._handler.app.client.chat_postMessage( + channel=self._forward_question_channel_id, + text=self._messages_to_text(messages) + ) msg = f'The conversation was forwarded to {self._forward_question_channel_name}' self._handler.app.client.chat_postEphemeral( diff --git a/knowledge_base_gpt/libs/common/constants.py b/knowledge_base_gpt/libs/common/constants.py index 329be27..f9412af 100644 --- a/knowledge_base_gpt/libs/common/constants.py +++ b/knowledge_base_gpt/libs/common/constants.py @@ -1,3 +1,4 @@ +""" Hold application constants """ from pathlib import Path diff --git a/knowledge_base_gpt/libs/embedding/embedding.py b/knowledge_base_gpt/libs/embedding/embedding.py index da8bbba..3b33e6e 100644 --- a/knowledge_base_gpt/libs/embedding/embedding.py +++ b/knowledge_base_gpt/libs/embedding/embedding.py @@ -1,12 +1,14 @@ +""" Create and abstract the embedding """ from injector import inject, singleton -from langchain_community.embeddings import HuggingFaceEmbeddings -from langchain_community.embeddings import OllamaEmbeddings +from langchain_core.embeddings import Embeddings as LangChainEmbeddings +from langchain_community.embeddings import HuggingFaceEmbeddings, OllamaEmbeddings from knowledge_base_gpt.libs.settings.settings import Settings @singleton -class Embedding(): +class Embedding(): # pylint:disable=R0903 + """ Create and abstract the embedding """ @inject def __init__(self, settings: Settings) -> None: mode = settings.embedding.mode @@ -31,5 +33,6 @@ def __init__(self, settings: Settings) -> None: pass @property - def embeddings(self): + def embeddings(self) -> LangChainEmbeddings: + """ Return the embedding implementation """ return self._embeddings diff --git a/knowledge_base_gpt/libs/gpt/ollama_info.py b/knowledge_base_gpt/libs/gpt/ollama_info.py index d71b9f1..2999c4f 100644 --- a/knowledge_base_gpt/libs/gpt/ollama_info.py +++ b/knowledge_base_gpt/libs/gpt/ollama_info.py @@ -1,14 +1,21 @@ +""" +Keep track of metrics provided by the Ollama API +""" + from contextlib import contextmanager from contextvars import ContextVar import threading from typing import Any, Dict, List, Optional, Generator +from uuid import UUID from langchain_core.callbacks.base import BaseCallbackHandler +from langchain_core.messages import BaseMessage from langchain_core.outputs import LLMResult from langchain_core.tracers.context import register_configure_hook class OllamaMetrics(): + """ Metrics of a single Ollama request """ prompt_eval_count: int = 0 eval_count: int = 0 load_duration: int = 0 @@ -28,6 +35,7 @@ def __repr__(self) -> str: ) def to_json(self) -> dict: + """ Return a JSON representation of the tracked info """ return { "prompt_eval_count": self.prompt_eval_count, "eval_count": self.eval_count, @@ -61,13 +69,11 @@ def on_llm_start( self, serialized: Dict[str, Any], prompts: List[str], **kwargs: Any ) -> None: """Print out the prompts.""" - pass def on_llm_new_token(self, token: str, **kwargs: Any) -> None: """Print out the token.""" - pass - def on_llm_end(self, response: LLMResult, **kwargs: Any) -> None: + def on_llm_end(self, response: LLMResult, **_kwargs: Any) -> None: """Collect token usage.""" if len(response.generations) == 0 or len(response.generations[0]) == 0: return @@ -88,6 +94,19 @@ def on_llm_end(self, response: LLMResult, **kwargs: Any) -> None: with self._lock: self.metrics.append(metrics) + def on_chat_model_start( + self, + serialized: Dict[str, Any], + messages: List[List[BaseMessage]], + *, + run_id: UUID, + parent_run_id: Optional[UUID] = None, + tags: Optional[List[str]] = None, + metadata: Optional[Dict[str, Any]] = None, + **kwargs: Any, + ) -> Any: + """Run when a chat model starts running. """ + def __copy__(self) -> "OllamaCallbackHandler": """Return a copy of the callback handler.""" return self diff --git a/knowledge_base_gpt/libs/gpt/private_chat.py b/knowledge_base_gpt/libs/gpt/private_chat.py index 8c32699..155aade 100644 --- a/knowledge_base_gpt/libs/gpt/private_chat.py +++ b/knowledge_base_gpt/libs/gpt/private_chat.py @@ -1,4 +1,7 @@ -from typing import Optional +""" +Module for handling the chat chain +""" +from typing import Optional, Dict, Any from injector import inject, singleton from langchain.chains import ConversationalRetrievalChain @@ -12,8 +15,8 @@ @singleton -class PrivateChat(): - +class PrivateChat(): # pylint:disable=R0903 + """ Handle the Chat chain """ @inject def __init__(self, settings: Settings, chat_log_exporter: ChatLogExporter, vector_store: VectorStore): llm_mode = settings.llm.mode @@ -46,7 +49,11 @@ def __init__(self, settings: Settings, chat_log_exporter: ChatLogExporter, vecto return_generated_question=True ) - def answer_query(self, history, query, chat_identifier: Optional[str]=None): + def answer_query(self, history, query, chat_identifier: Optional[str]=None) -> Dict[str, Any]: + """ + Answer the query based on the history + Use the chat identifier for logging the chat + """ with self._get_callback() as cb: answer = self._chain.invoke({"question": query, "chat_history": history}) self._chat_log_exporter.save_chat_log(self._chat_fragment_cls(answer, cb, chat_identifier=chat_identifier)) diff --git a/knowledge_base_gpt/libs/history/base.py b/knowledge_base_gpt/libs/history/base.py index 9d0ed96..e980492 100644 --- a/knowledge_base_gpt/libs/history/base.py +++ b/knowledge_base_gpt/libs/history/base.py @@ -1,18 +1,20 @@ +""" Base class for history keepers """ from abc import ABC, abstractmethod -from typing import List +from typing import List, Dict, Any from langchain_core.messages import BaseMessage class HistoryBase(ABC): + """ Base class for history keepers """ @abstractmethod - def get_messages(self, session_id) -> List[BaseMessage]: - pass + def get_messages(self, session_id: str) -> List[BaseMessage]: + """ Get all messages of the session """ @abstractmethod - def add_to_history(self, session_id, answer): - pass + def add_to_history(self, session_id: str, answer: Dict[str, Any]): + """ Add the answer to the session """ @abstractmethod - def reset(self, session_id): - pass + def reset(self, session_id: str): + """ Reset the session """ diff --git a/knowledge_base_gpt/libs/history/redis.py b/knowledge_base_gpt/libs/history/redis.py index baa026b..081d2c8 100644 --- a/knowledge_base_gpt/libs/history/redis.py +++ b/knowledge_base_gpt/libs/history/redis.py @@ -1,3 +1,4 @@ +""" Manage history in Redis """ from typing import List, Dict, Any from injector import inject, singleton @@ -10,7 +11,7 @@ @singleton class HistoryRedis(HistoryBase): - + """ Manage history in Redis """ @inject def __init__(self, settings: Settings): redis_settings = settings.redis diff --git a/knowledge_base_gpt/libs/injector/di.py b/knowledge_base_gpt/libs/injector/di.py index 30c5f81..5cfe382 100644 --- a/knowledge_base_gpt/libs/injector/di.py +++ b/knowledge_base_gpt/libs/injector/di.py @@ -1,19 +1,14 @@ +""" Global injector for the application """ from injector import Injector from knowledge_base_gpt.libs.settings.settings import Settings, unsafe_typed_settings -def create_application_injector() -> Injector: +def _create_application_injector() -> Injector: _injector = Injector(auto_bind=True) _injector.binder.bind(Settings, to=unsafe_typed_settings) return _injector -""" -Global injector for the application. - -Avoid using this reference, it will make your code harder to test. - -Instead, use the `request.state.injector` reference, which is bound to every request -""" -global_injector: Injector = create_application_injector() +# Global injector for the application. +global_injector: Injector = _create_application_injector() diff --git a/knowledge_base_gpt/libs/loaders/google_drive_loader.py b/knowledge_base_gpt/libs/loaders/google_drive_loader.py index a6b4dea..8c9a8c0 100644 --- a/knowledge_base_gpt/libs/loaders/google_drive_loader.py +++ b/knowledge_base_gpt/libs/loaders/google_drive_loader.py @@ -1,3 +1,4 @@ +""" Content loader from Google Drive """ from typing import List from injector import inject, singleton @@ -8,14 +9,16 @@ @singleton -class GDriveLoader(): +class GDriveLoader(): # pylint:disable=R0903 + """ Content loader from Google Drive """ @inject def __init__(self, settings: Settings) -> None: self._service_key_file = settings.google_drive.service_key_file self._folder_id = settings.google_drive.folder_id - def load_documents(self, ignored_files: List[str] = []) -> List[Document]: + def load_documents(self, ignored_files: List[str]) -> List[Document]: + """ Load the documents based on the settings and the ignore list """ if not self._folder_id: return [] diff --git a/knowledge_base_gpt/libs/loaders/loaders.py b/knowledge_base_gpt/libs/loaders/loaders.py index d9a3af7..bdb81d8 100644 --- a/knowledge_base_gpt/libs/loaders/loaders.py +++ b/knowledge_base_gpt/libs/loaders/loaders.py @@ -1,3 +1,4 @@ +""" Abstract all content loaders """ from typing import List from injector import inject, singleton @@ -9,7 +10,8 @@ @singleton -class Loader(): +class Loader(): # pylint:disable=R0903 + """ Abstract all content loaders """ @inject def __init__(self, settings: Settings) -> None: @@ -22,5 +24,6 @@ def __init__(self, settings: Settings) -> None: case _: pass - def load_documents(self, ignored_files: List[str] = []) -> List[Document]: + def load_documents(self, ignored_files: List[str]) -> List[Document]: + """ Load all the documents based on the settings and the ignore list """ return self._content_loader.load_documents(ignored_files=ignored_files) diff --git a/knowledge_base_gpt/libs/logs/chat_fragment.py b/knowledge_base_gpt/libs/logs/chat_fragment.py index a8eeef6..30a695a 100644 --- a/knowledge_base_gpt/libs/logs/chat_fragment.py +++ b/knowledge_base_gpt/libs/logs/chat_fragment.py @@ -1,3 +1,4 @@ +""" Manage a Chat log fragment """ from abc import ABC, abstractmethod from typing import List, Dict, Any, Optional import uuid @@ -6,9 +7,15 @@ from langchain_core.documents.base import Document -class ChatFragment(ABC): +class ChatFragment(ABC): # pylint:disable=R0903 + """ Manage a Chat log fragment """ - def __init__(self, answer: Dict[str, Any], callback_handler: BaseCallbackHandler, chat_identifier: Optional[str] = None) -> None: + def __init__( + self, + answer: Dict[str, Any], + callback_handler: BaseCallbackHandler, + chat_identifier: Optional[str] = None + ) -> None: super().__init__() if chat_identifier is None: chat_identifier = str(uuid.uuid4()) @@ -45,6 +52,7 @@ def _calculate_metrics(self, callback_handler: BaseCallbackHandler): pass def to_json(self) -> dict: + """ Return a JSON representation of the chat log fragment """ return { "chat_identifier": self.chat_identifier, "question": self.question, diff --git a/knowledge_base_gpt/libs/logs/chat_log_exporter.py b/knowledge_base_gpt/libs/logs/chat_log_exporter.py index 16f7c84..224a588 100644 --- a/knowledge_base_gpt/libs/logs/chat_log_exporter.py +++ b/knowledge_base_gpt/libs/logs/chat_log_exporter.py @@ -1,3 +1,4 @@ +""" Export Chat logs to File """ import json from injector import inject, singleton @@ -7,12 +8,14 @@ @singleton -class ChatLogExporter(): +class ChatLogExporter(): # pylint:disable=R0903 + """ Export Chat logs to File """ @inject def __init__(self, settings: Settings): self._path = settings.log.chat_log_path def save_chat_log(self, chat_fragment: ChatFragment): - with open(self._path, "a+") as f: + """ Save the Chat log fragment to the log file """ + with open(self._path, "a+", encoding="utf-8") as f: f.write(f"{json.dumps(chat_fragment.to_json())}\n") diff --git a/knowledge_base_gpt/libs/logs/ollama.py b/knowledge_base_gpt/libs/logs/ollama.py index e1bbde1..c6207f7 100644 --- a/knowledge_base_gpt/libs/logs/ollama.py +++ b/knowledge_base_gpt/libs/logs/ollama.py @@ -1,8 +1,10 @@ +""" Ollama Chat log fragment """ from knowledge_base_gpt.libs.logs.chat_fragment import ChatFragment from knowledge_base_gpt.libs.gpt.ollama_info import OllamaCallbackHandler -class OllamaChatFragment(ChatFragment): +class OllamaChatFragment(ChatFragment): # pylint:disable=R0903 + """ Ollama Chat log fragment """ def _calculate_metrics(self, callback_handler: OllamaCallbackHandler): metrics = callback_handler.metrics diff --git a/knowledge_base_gpt/libs/settings/settings.py b/knowledge_base_gpt/libs/settings/settings.py index 066c94e..2c54be6 100644 --- a/knowledge_base_gpt/libs/settings/settings.py +++ b/knowledge_base_gpt/libs/settings/settings.py @@ -1,3 +1,5 @@ +# pylint:disable=R0903 +""" Settings for the application """ from typing import Literal from pydantic import BaseModel, Field @@ -6,6 +8,7 @@ class SlackBotSettings(BaseModel): + """ Slackbot Settings """ app_token: str = Field( description="Slack App Token" ) @@ -18,6 +21,7 @@ class SlackBotSettings(BaseModel): class LLMSettings(BaseModel): + """ LLM Settings """ mode: Literal["ollama", "mock"] = Field( 'ollama', description="LLM Backend to use for chat" @@ -28,7 +32,9 @@ class LLMSettings(BaseModel): ) temperature: float = Field( 0.1, - description="The temperature of the model. Increasing the temperature will make the model answer more creatively. A value of 0.1 would be more factual.", + description="The temperature of the model. "\ + "Increasing the temperature will make the model answer more creatively. "\ + "A value of 0.1 would be more factual.", ) verbose: bool = Field( False, @@ -41,17 +47,21 @@ class LLMSettings(BaseModel): class EmbeddingSettings(BaseModel): + """ Embedding Settings """ mode: Literal["hugging_face", "ollama", "mock"] = Field( 'hugging_face', description="LLM Backend to use for embedding" ) temperature: float = Field( 0.1, - description="The temperature of the model. Increasing the temperature will make the model answer more creatively. A value of 0.1 would be more factual.", + description="The temperature of the model. "\ + "Increasing the temperature will make the model answer more creatively. "\ + "A value of 0.1 would be more factual.", ) class GoogleDriveSettings(BaseModel): + """ Google Drive Settings """ service_key_file: str = Field( None, description="Path a the Google Service Key file" @@ -63,6 +73,7 @@ class GoogleDriveSettings(BaseModel): class OllamaSettings(BaseModel): + """ Ollama Settings """ api_base: str = Field( "http://localhost:11434", description="Base URL of Ollama API. Example: 'https://localhost:11434'.", @@ -77,37 +88,48 @@ class OllamaSettings(BaseModel): ) tfs_z: float = Field( 1.0, - description="Tail free sampling is used to reduce the impact of less probable tokens from the output. A higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting.", + description="Tail free sampling is used to reduce the impact of less probable tokens from the output. "\ + "A higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting.", ) num_predict: int = Field( None, - description="Maximum number of tokens to predict when generating text. (Default: 128, -1 = infinite generation, -2 = fill context)", + description="Maximum number of tokens to predict when generating text. "\ + "(Default: 128, -1 = infinite generation, -2 = fill context)", ) top_k: int = Field( 40, - description="Reduces the probability of generating nonsense. A higher value (e.g. 100) will give more diverse answers, while a lower value (e.g. 10) will be more conservative. (Default: 40)", + description="Reduces the probability of generating nonsense. "\ + "A higher value (e.g. 100) will give more diverse answers, "\ + "while a lower value (e.g. 10) will be more conservative. (Default: 40)", ) top_p: float = Field( 0.9, - description="Works together with top-k. A higher value (e.g., 0.95) will lead to more diverse text, while a lower value (e.g., 0.5) will generate more focused and conservative text. (Default: 0.9)", + description="Works together with top-k. "\ + "A higher value (e.g., 0.95) will lead to more diverse text, "\ + "while a lower value (e.g., 0.5) will generate more focused and conservative text. (Default: 0.9)", ) repeat_last_n: int = Field( 64, - description="Sets how far back for the model to look back to prevent repetition. (Default: 64, 0 = disabled, -1 = num_ctx)", + description="Sets how far back for the model to look back to prevent repetition. "\ + "(Default: 64, 0 = disabled, -1 = num_ctx)", ) repeat_penalty: float = Field( 1.1, - description="Sets how strongly to penalize repetitions. A higher value (e.g., 1.5) will penalize repetitions more strongly, while a lower value (e.g., 0.9) will be more lenient. (Default: 1.1)", + description="Sets how strongly to penalize repetitions. "\ + "A higher value (e.g., 1.5) will penalize repetitions more strongly, "\ + "while a lower value (e.g., 0.9) will be more lenient. (Default: 1.1)", ) class HuggingFaceSettings(BaseModel): + """ Hugging Face Settings """ embedding_model: str = Field( None, description="Model to use. Example: 'nomic-embed-text'.", ) class RedisSettings(BaseModel): + """ Redis Settings """ host: str = Field( 'localhost', description="FQDN for redis" @@ -127,6 +149,7 @@ class RedisSettings(BaseModel): class LogSettings(BaseModel): + """ Logging Settings """ chat_log_path: str = Field( "./chatlog.log", description="Path to store the chat logs" @@ -134,6 +157,7 @@ class LogSettings(BaseModel): class ContentLoaderSettings(BaseModel): + """ Content Loader Settings """ mode: Literal['google_drive', 'mock'] = Field( 'google_drive', description="Type of Content Loader to use" @@ -141,6 +165,7 @@ class ContentLoaderSettings(BaseModel): class TextSpliterSettings(BaseModel): + """ Text Splitter Settings """ chunk_size: int = Field( 500, description="Size of each chunk" @@ -152,6 +177,7 @@ class TextSpliterSettings(BaseModel): class VectorStoreSettings(BaseModel): + """ Vector Store Settings """ mode: Literal['chroma', 'mock'] = Field( 'chroma', description="Type of vector store" @@ -163,6 +189,7 @@ class VectorStoreSettings(BaseModel): class Settings(BaseModel): + """ Application Settings """ slackbot: SlackBotSettings llm: LLMSettings ollama: OllamaSettings @@ -176,18 +203,12 @@ class Settings(BaseModel): vectorstore: VectorStoreSettings -""" -This is visible just for DI or testing purposes. - -Use dependency injection or `settings()` method instead. -""" +# This is visible just for DI or testing purposes. +# Use dependency injection or `settings()` method instead. unsafe_settings = load_active_settings() -""" -This is visible just for DI or testing purposes. - -Use dependency injection or `settings()` method instead. -""" +# This is visible just for DI or testing purposes. +# Use dependency injection or `settings()` method instead. unsafe_typed_settings = Settings(**unsafe_settings) @@ -199,6 +220,6 @@ def settings() -> Settings: For regular components use dependency injection instead. """ - from knowledge_base_gpt.libs.injector.di import global_injector + from knowledge_base_gpt.libs.injector.di import global_injector # pylint:disable=C0415 return global_injector.get(Settings) diff --git a/knowledge_base_gpt/libs/settings/settings_loader.py b/knowledge_base_gpt/libs/settings/settings_loader.py index 01ca841..0e1dbe0 100644 --- a/knowledge_base_gpt/libs/settings/settings_loader.py +++ b/knowledge_base_gpt/libs/settings/settings_loader.py @@ -1,3 +1,4 @@ +""" Module for loading settings. """ import functools import logging import os @@ -29,11 +30,11 @@ ) -def merge_settings(settings: Iterable[dict[str, Any]]) -> dict[str, Any]: +def _merge_settings(settings: Iterable[dict[str, Any]]) -> dict[str, Any]: return functools.reduce(deep_update, settings, {}) -def load_settings_from_profile(profile: str) -> dict[str, Any]: +def _load_settings_from_profile(profile: str) -> dict[str, Any]: if profile == "default": profile_file_name = "settings.yaml" else: @@ -50,7 +51,7 @@ def load_active_settings() -> dict[str, Any]: """Load active profiles and merge them.""" logger.info("Starting application with profiles=%s", active_profiles) loaded_profiles = [ - load_settings_from_profile(profile) for profile in active_profiles + _load_settings_from_profile(profile) for profile in active_profiles ] - merged: dict[str, Any] = merge_settings(loaded_profiles) + merged: dict[str, Any] = _merge_settings(loaded_profiles) return merged diff --git a/knowledge_base_gpt/libs/vectorstore/vectorstore.py b/knowledge_base_gpt/libs/vectorstore/vectorstore.py index aeebfef..8171034 100644 --- a/knowledge_base_gpt/libs/vectorstore/vectorstore.py +++ b/knowledge_base_gpt/libs/vectorstore/vectorstore.py @@ -1,14 +1,15 @@ -from chromadb.config import Settings as ChromaSettings +""" Module to create and abstract the actual vector store based on the configuration """ from injector import inject, singleton +from chromadb.config import Settings as ChromaSettings from langchain_community.vectorstores.chroma import Chroma - from knowledge_base_gpt.libs.embedding.embedding import Embedding from knowledge_base_gpt.libs.settings.settings import Settings @singleton -class VectorStore(): +class VectorStore(): # pylint:disable=R0903 + """ Abstract the actual vector store based on the configuration """ @inject def __init__(self, settings: Settings, embedding: Embedding) -> None: mode = settings.vectorstore.mode @@ -21,8 +22,8 @@ def __init__(self, settings: Settings, embedding: Embedding) -> None: is_persistent=True, persist_directory=settings.vectorstore.persist_directory, anonymized_telemetry=False - ) ) + ) case 'mock': pass case _: @@ -30,4 +31,5 @@ def __init__(self, settings: Settings, embedding: Embedding) -> None: @property def db(self): + """ Return the Vector Store implementation """ return self._db