llama.cpp - review docstrings (#510)

* llama.cpp - review docstrings * small fix * Update integrations/llama_cpp/src/haystack_integrations/components/generators/llama_cpp/generator.py Co-authored-by: Madeesh Kannan <[email protected]> * Update integrations/llama_cpp/src/haystack_integrations/components/generators/llama_cpp/generator.py Co-authored-by: Madeesh Kannan <[email protected]> * Update integrations/llama_cpp/src/haystack_integrations/components/generators/llama_cpp/generator.py Co-authored-by: Madeesh Kannan <[email protected]> * Update integrations/llama_cpp/src/haystack_integrations/components/generators/llama_cpp/generator.py Co-authored-by: Madeesh Kannan <[email protected]> --------- Co-authored-by: Madeesh Kannan <[email protected]>
deepset-ai · Mar 1, 2024 · 7a3b7bf · 7a3b7bf
1 parent c92454e
commit 7a3b7bf
Showing 1 changed file with 19 additions and 20 deletions.
diff --git a/...grations/llama_cpp/src/haystack_integrations/components/generators/llama_cpp/generator.py b/...grations/llama_cpp/src/haystack_integrations/components/generators/llama_cpp/generator.py
@@ -11,12 +11,14 @@
 @component
 class LlamaCppGenerator:
     """
-    Generator for using a model with Llama.cpp.
-    This component provides an interface to generate text using a quantized model (GGUF) using llama.cpp.
+    Provides an interface to generate text using LLM via llama.cpp.
+
+    [llama.cpp](https://github.com/ggerganov/llama.cpp) is a project written in C/C++ for efficient inference of LLMs.
+    It employs the quantized GGUF format, suitable for running these models on standard machines (even without GPUs).
 
     Usage example:
     ```python
-    from llama_cpp_haystack import LlamaCppGenerator
+    from haystack_integrations.components.generators.llama_cpp import LlamaCppGenerator
     generator = LlamaCppGenerator(model="zephyr-7b-beta.Q4_0.gguf", n_ctx=2048, n_batch=512)
 
     print(generator.run("Who is the best American actor?", generation_kwargs={"max_tokens": 128}))
@@ -33,28 +35,24 @@ def __init__(
         generation_kwargs: Optional[Dict[str, Any]] = None,
     ):
         """
-        :param model: The path of a quantized model for text generation,
-            for example, "zephyr-7b-beta.Q4_0.gguf".
+        :param model: The path of a quantized model for text generation, for example, "zephyr-7b-beta.Q4_0.gguf".
             If the model path is also specified in the `model_kwargs`, this parameter will be ignored.
         :param n_ctx: The number of tokens in the context. When set to 0, the context will be taken from the model.
-            If the n_ctx is also specified in the `model_kwargs`, this parameter will be ignored.
-        :param n_batch: Prompt processing maximum batch size. Defaults to 512.
-            If the n_batch is also specified in the `model_kwargs`, this parameter will be ignored.
+        :param n_batch: Prompt processing maximum batch size.
         :param model_kwargs: Dictionary containing keyword arguments used to initialize the LLM for text generation.
             These keyword arguments provide fine-grained control over the model loading.
             In case of duplication, these kwargs override `model`, `n_ctx`, and `n_batch` init parameters.
-            See Llama.cpp's [documentation](https://llama-cpp-python.readthedocs.io/en/latest/api-reference/#llama_cpp.Llama.__init__)
-            for more information on the available kwargs.
+            For more information on the available kwargs, see
+            [llama.cpp documentation](https://llama-cpp-python.readthedocs.io/en/latest/api-reference/#llama_cpp.Llama.__init__).
         :param generation_kwargs:  A dictionary containing keyword arguments to customize text generation.
-            Some examples: `max_tokens`, `temperature`, `top_k`, `top_p`,...
-            See Llama.cpp's  documentation for more information:
-                https://llama-cpp-python.readthedocs.io/en/latest/api-reference/#llama_cpp.Llama.create_completion
+            For more information on the available kwargs, see
+            [llama.cpp documentation](https://llama-cpp-python.readthedocs.io/en/latest/api-reference/#llama_cpp.Llama.create_completion).
         """
 
         model_kwargs = model_kwargs or {}
         generation_kwargs = generation_kwargs or {}
 
-        # check if the huggingface_pipeline_kwargs contain the essential parameters
+        # check if the model_kwargs contain the essential parameters
         # otherwise, populate them with values from init parameters
         model_kwargs.setdefault("model_path", model)
         model_kwargs.setdefault("n_ctx", n_ctx)
@@ -76,12 +74,13 @@ def run(self, prompt: str, generation_kwargs: Optional[Dict[str, Any]] = None):
         """
         Run the text generation model on the given prompt.
 
-        :param prompt: A string representing the prompt.
-        :param generation_kwargs: A dictionary containing keyword arguments to customize text generation.
-            Some examples: `max_tokens`, `temperature`, `top_k`, `top_p`,...
-            See Llama.cpp's  documentation for more information:
-                https://llama-cpp-python.readthedocs.io/en/latest/api-reference/#llama_cpp.Llama.create_completion
-        :return: A dictionary of the returned responses and metadata.
+        :param prompt: the prompt to be sent to the generative model.
+        :param generation_kwargs:  A dictionary containing keyword arguments to customize text generation.
+            For more information on the available kwargs, see
+            [llama.cpp documentation](https://llama-cpp-python.readthedocs.io/en/latest/api-reference/#llama_cpp.Llama.create_completion).
+        :returns: A dictionary with the following keys:
+            - "replies": the list of replies generated by the model.
+            - "meta": metadata about the request.
         """
         if self.model is None:
             error_msg = "The model has not been loaded. Please call warm_up() before running."