Merge pull request #2 from narenaryan/feat/add-streaming-responses

feat: add streaming responses
narenaryan · Jun 17, 2024 · 226236a · 226236a
2 parents 04aa73d + 74fe7ee
commit 226236a
Show file tree

Hide file tree

Showing 4 changed files with 138 additions and 44 deletions.
diff --git a/README.md b/README.md
@@ -5,26 +5,34 @@ A CLI application to run PromptML scripts against LLMs.
 ```bash
 pip install --upgrade promptml-cli
 ```
+## Demo
+[![asciicast](https://asciinema.org/a/664270.svg)](https://asciinema.org/a/664270)
 
 ## Usage
 ```bash
 promptml-cli --help
 
-usage: promptml-cli [-h] -f FILE [-m MODEL] [-s {xml,json,yaml}] [-p {openai,google}]
+usage: promptml-cli [-h] -f FILE [-m MODEL] [-s {xml,json,yaml}] [-p {openai,google}] [--stream] [--raw]
 
-A Command Line Interface tool to run PromptML files with popular Generative AI models
+A Command Line Interface (CLI) tool to run Prompt Markup Language (PromptML) files with popular Generative AI models
 
 optional arguments:
   -h, --help            show this help message and exit
-  -f FILE, --file FILE  Path to the PromptML file
+  -f FILE, --file FILE  Path to the PromptML(.pml) file
   -m MODEL, --model MODEL
                         Model to use for the completion
   -s {xml,json,yaml}, --serializer {xml,json,yaml}
                         Serializer to use for the completion. Default is `xml`
   -p {openai,google}, --provider {openai,google}
                         GenAI provider to use for the completion. Default is `openai`
+  --stream              Stream chunks for the GenAI response. Default is non-streaming response.
+  --raw                 Return raw output from LLM (best for saving into files or piping)
+
+For more details of composing PromptML files, visit: https://promptml.org/
 ```
 
+## Example
+
 1. Create a PromptML file `character.pml` with the following content:
 
 ```promptml
@@ -48,14 +56,20 @@ optional arguments:
     @end
 @end
 ```
-See PromptML [documentation](https://www.promptml.org/) for more details.
+See PromptML [documentation](https://www.promptml.org/) for more details about the syntax.
 
 2. Set your OpenAI API key as an environment variable:
 
 ```bash
 export OPEN_AI_API_KEY=your-openai-api-key
 ```
 
+or if you are using Google GenAI.
+
+```bash
+export GOOGLE_API_KEY=your-google-api-key
+```
+
 3. Run the PromptML file with the following command in terminal:
 
 ```bash
@@ -84,6 +98,25 @@ Finally, I retire for the night, eager to wake up and do it all over again. Real
 Time taken: 6.006012916564941 seconds
 ```
 
+## Streaming Response
+
+You can also stream the response from the GenAI model by using the `--stream` flag. This will stream the response in chunks as they are generated by the model.
+
+```bash
+promptml-cli -f character.pml -p google --stream
+```
+
+## Raw Output
+
+You can also get the raw output from the GenAI model by using the `--raw` flag. This will return the raw output from the model without any formatting.
+
+```bash
+
+promptml-cli -f character.pml -p google --raw
+```
+
+Note: Raw output is useful when you want to save the output to a file or pipe it to another command.
+
 ## TODO
 - Add support for Claude, Cohere & A21 Labs GenAI models
 - Add tests
diff --git a/src/promptml_cli/__about__.py b/src/promptml_cli/__about__.py
@@ -1,2 +1,2 @@
 # pylint: disable=invalid-name
-version = "0.1.0"
+version = "0.2.0"
diff --git a/src/promptml_cli/client.py b/src/promptml_cli/client.py
@@ -1,3 +1,4 @@
+"""GenAI client module."""
 
 import os
 import enum
@@ -6,23 +7,30 @@
 from openai import OpenAI
 import google.generativeai as genai
 
+class Model(enum.Enum):
+    """GenAI model enum class."""
+    GPT_4O = "gpt-4o"
+    GEMINI_1_5_FLASH_LATEST = "gemini-1.5-flash-latest"
 
 class Provider(enum.Enum):
+    """GenAI provider enum class."""
     OPENAI = "openai"
     GOOGLE = "google"
 
 class ClientFactory:
+    """GenAI client factory class."""
     def __init__(self, provider: str, model: str=""):
         self.provider = provider
         self.model = model
 
     def get_client(self) -> Union[OpenAI, genai.GenerativeModel, None]:
+        """Get the client based on the provider."""
         if self.provider == Provider.OPENAI.value:
             return OpenAI(
                 api_key=os.environ.get("OPENAI_API_KEY"),
             )
-        elif self.provider == Provider.GOOGLE.value:
+        if self.provider == Provider.GOOGLE.value:
             genai.configure(api_key=os.environ.get("GOOGLE_API_KEY"))
             return genai.GenerativeModel(self.model)
-        else:
-            return None
+
+        return None
diff --git a/src/promptml_cli/main.py b/src/promptml_cli/main.py
@@ -1,40 +1,94 @@
+""" Command Line Interface tool to run PromptML files with popular Generative AI models """
+
 import argparse
-import enum
-import os
 import time
 
 from openai import APIConnectionError
 from promptml.parser import PromptParserFromFile
 from rich.markdown import Markdown
 from rich.console import Console
-from typing import Union
+from rich.panel import Panel
+from rich.live import Live
+from rich.style import Style
+from rich import box
+
+from promptml_cli.client import Provider, ClientFactory, Model
+
+
+def get_sync_response(args, serialized_data) -> str:
+    response = ""
+    if args.provider == Provider.GOOGLE.value:
+        if args.model == Model.GPT_4O.value:
+            args.model = Model.GEMINI_1_5_FLASH_LATEST.value
+
+        g_client = ClientFactory(Provider.GOOGLE.value, model=args.model).get_client()
+        response = g_client.generate_content(serialized_data).text
+    elif args.provider == Provider.OPENAI.value:
+        openai_client = ClientFactory(Provider.OPENAI.value, model=args.model).get_client()
+        chat_completion = openai_client.chat.completions.create(
+            messages=[
+                {
+                    "role": "user",
+                    "content": serialized_data,
+                },
+            ],
+            model=args.model,
+        )
+        response = chat_completion.choices[0].message.content
+
+    return response
 
-from promptml_cli.client import Provider, ClientFactory
+def get_stream_response(args, serialized_data):
+    if args.provider == Provider.GOOGLE.value:
+        if args.model == Model.GPT_4O.value:
+            args.model = Model.GEMINI_1_5_FLASH_LATEST.value
+
+        g_client = ClientFactory(Provider.GOOGLE.value, model=args.model).get_client()
+        response = g_client.generate_content(serialized_data, stream=True)
+
+        for chunk in response:
+            yield chunk.text
+    elif args.provider == Provider.OPENAI.value:
+        openai_client = ClientFactory(Provider.OPENAI.value, model=args.model).get_client()
+        response = openai_client.chat.completions.create(
+            messages=[
+                {
+                    "role": "user",
+                    "content": serialized_data,
+                },
+            ],
+            model=args.model,
+            stream=True,
+        )
+
+        chunk_message = ""
+        for chunk in response:
+            chunk_message = chunk.choices[0].delta.content  # extract the message
+            yield chunk_message
 
 def run():
-    # Take user input of following arguments
-    # 1. --file, -f : Path to the PromptML file
-    # 2. --model, -m : Model to use for the completion
-    # 3. --serializer, -s : Serializer to use for the completion
     console = Console(
-        color_system="truecolor"
+        color_system="truecolor",
+        record=True,
     )
+    neon_blue = Style(color="cyan", bold=True)
+
     arg_parser = argparse.ArgumentParser(
                 prog='promptml-cli',
-                description='A Command Line Interface tool to run PromptML files with popular Generative AI models',
-                epilog='-----------------------------'
+                description='A Command Line Interface (CLI) tool to run Prompt Markup Language (PromptML) files with popular Generative AI models',
+                epilog='For more details of composing PromptML files, visit: https://promptml.org/'
             )
 
     arg_parser.add_argument('-f', '--file', type=str, help='Path to the PromptML(.pml) file', required=True)
     arg_parser.add_argument('-m', '--model', type=str, help='Model to use for the completion', default='gpt-4o')
     arg_parser.add_argument('-s', '--serializer', type=str, help='Serializer to use for the completion. Default is `xml`', default='xml', choices=['xml', 'json', 'yaml'])
     arg_parser.add_argument('-p', '--provider', type=str, help='GenAI provider to use for the completion. Default is `openai`', default=Provider.OPENAI.value, choices=[Provider.OPENAI.value, Provider.GOOGLE.value])
-
+    arg_parser.add_argument('--stream', help='Stream chunks for the GenAI response. Default is non-streaming response.', action='store_true')
+    arg_parser.add_argument('--raw', help='Return raw output from LLM (best for saving into files or piping)', action='store_true')
 
     args = arg_parser.parse_args()
 
     # Parse the PromptML file
-
     parser = PromptParserFromFile(args.file)
     parser.parse()
 
@@ -51,36 +105,35 @@ def run():
 
 
     now = time.time()
-    response = ""
-    if args.provider == Provider.GOOGLE.value:
-        if args.model == "gpt-4o":
-            args.model = "gemini-1.5-flash-latest"
-
-        g_client = ClientFactory(Provider.GOOGLE.value, model=args.model).get_client()
-        response = g_client.generate_content(serialized_data).text
-    elif args.provider == Provider.OPENAI.value:
-        openai_client = ClientFactory(Provider.OPENAI.value, model=args.model).get_client()
+    if not args.stream:
         try:
-            chat_completion = openai_client.chat.completions.create(
-                messages=[
-                    {
-                        "role": "user",
-                        "content": serialized_data,
-                    },
-                ],
-                model=args.model,
-            )
-            response = chat_completion.choices[0].message.content
+            response = get_sync_response(args, serialized_data)
         except APIConnectionError:
             console.print(
-                "Error connecting to OpenAI API. Try again!",
+                "Error connecting to provider API. Try again! Please turn-off the VPN if needed.",
                 style = "bold red"
             )
             return
+        # Print the completion with rich console
+        if args.raw:
+            print(response)
+        else:
+            console.print(Panel(Markdown(response, "\n")), soft_wrap=True, new_line_start=True)
+    else:
+        with Live(refresh_per_second=4) as live:
+            message = ""
+            for chunk in get_stream_response(args, serialized_data):
+                if chunk:
+                    if args.raw:
+                        print(chunk, end="")
+                        continue
+                    message += chunk
+                    markdown_content = Markdown(message, "\n")
+                    panel = Panel(markdown_content, border_style=neon_blue, safe_box=True)
+                    live.update(panel)
 
-    # Print the completion with rich console
-    console.print(Markdown(response, "\n"), soft_wrap=True, new_line_start=True)
-    console.print(f"Time taken: {time.time() - now} seconds", style="bold green")
+    if not args.raw:
+        console.print(f"\nTime taken: {time.time() - now} seconds", style="bold green")
 
 if __name__ == '__main__':
     run()