diff --git a/.gitignore b/.gitignore index 68b9c94..2bc5089 100644 --- a/.gitignore +++ b/.gitignore @@ -159,4 +159,5 @@ cython_debug/ # option (not recommended) you can uncomment the following to ignore the entire idea folder. #.idea/ +*.jsonl API_KEYS \ No newline at end of file diff --git a/README.md b/README.md index d5225db..bb40a86 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,7 @@ ## Installation -To install ChainLite, use the following steps: +ChainLite has been tested with Python 3.10. To install, do the following: 1. Install ChainLite via pip: diff --git a/chainlite/llm_generate.py b/chainlite/llm_generate.py index aa87a93..0d09b37 100644 --- a/chainlite/llm_generate.py +++ b/chainlite/llm_generate.py @@ -5,26 +5,26 @@ import json import logging import os -from pprint import pprint import random import re -from typing import AsyncIterator, Optional, Any +from pprint import pprint +from typing import Any, AsyncIterator, Optional from uuid import UUID +from langchain_community.chat_models import ChatLiteLLM +from langchain_core.callbacks import AsyncCallbackHandler +from langchain_core.messages import BaseMessage from langchain_core.output_parsers import StrOutputParser +from langchain_core.outputs import LLMResult +from langchain_core.runnables import Runnable, chain + +from tqdm.auto import tqdm from chainlite.llm_config import GlobalVars from .load_prompt import load_fewshot_prompt_template -from langchain_community.chat_models import ChatLiteLLM -from langchain_core.callbacks import AsyncCallbackHandler -from langchain_core.outputs import LLMResult -from langchain_core.messages import BaseMessage -from langchain_core.runnables import chain, Runnable - from .utils import get_logger - logging.getLogger("LiteLLM").setLevel(logging.WARNING) logging.getLogger("LiteLLM Router").setLevel(logging.WARNING) logging.getLogger("LiteLLM Proxy").setLevel(logging.WARNING) @@ -123,6 +123,25 @@ async def on_llm_end( GlobalVars.prompt_logs[run_id]["output"] = llm_output +class ProgbarCallback(AsyncCallbackHandler): + def __init__(self, desc: str, total: int = None): + super().__init__() + self.count = 0 + self.progress_bar = tqdm(total=total, desc=desc) # define a progress bar + + # Override on_llm_end method. This is called after every response from LLM + def on_llm_end( + self, + response: LLMResult, + *, + run_id: UUID, + parent_run_id: UUID | None = None, + **kwargs: Any, + ) -> Any: + self.count += 1 + self.progress_bar.update(1) + + prompt_log_handler = PromptLogHandler() @@ -218,6 +237,7 @@ def llm_generation_chain( output_json: bool = False, keep_indentation: bool = False, postprocess: bool = False, + progress_bar_desc: Optional[str] = None, bind_prompt_values: dict = {}, ) -> Runnable: """ @@ -233,7 +253,8 @@ def llm_generation_chain( output_json (bool, optional): If True, asks the LLM API to output a JSON. This depends on the underlying model to support. For example, GPT-4, GPT-4o and newer GPT-3.5-Turbo models support it, but require the word "json" to be present in the input. Defaults to False. keep_indentation (bool, optional): If True, will keep indentations at the beginning of each line in the template_file. Defaults to False. - postprocess (bool, optional): If true, postprocessing deletes incomplete sentences from the end of the generation. Defaults to False. + postprocess (bool, optional): If True, postprocessing deletes incomplete sentences from the end of the generation. Defaults to False. + progress_bar_name (str, optional): If provided, will display a `tqdm` progress bar using this name bind_prompt_values (dict, optional): A dictionary containing {Variable: str : Value}. Binds values to the prompt. Additional variables can be provided when the chain is called. Defaults to {}. Returns: @@ -290,6 +311,10 @@ def llm_generation_chain( if output_json: model_kwargs["response_format"] = {"type": "json_object"} + callbacks = [prompt_log_handler] + if progress_bar_desc: + cb = ProgbarCallback(progress_bar_desc) + callbacks.append(cb) llm = ChatLiteLLM( model_kwargs=model_kwargs, api_base=llm_resource["api_base"] if "api_base" in llm_resource else None, @@ -302,7 +327,7 @@ def llm_generation_chain( "distillation_instruction": distillation_instruction, "template_name": os.path.basename(template_file), }, # for logging to file - callbacks=[prompt_log_handler], + callbacks=callbacks, ) # for variable, value in bind_prompt_values.keys(): diff --git a/setup.py b/setup.py index 1f51129..781c0cc 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ setup( name="chainlite", - version="0.1.11", + version="0.1.12", author="Sina Semnani", author_email="sinaj@cs.stanford.edu", description="A Python package that uses LangChain and LiteLLM to call large language model APIs easily", @@ -24,7 +24,7 @@ "redis[hiredis]", ], extras_require={ - "dev": ["invoke", "pytest", "pytest-asyncio", "setuptools", "wheel", "twine"], + "dev": ["invoke", "pytest", "pytest-asyncio", "setuptools", "wheel", "twine", "isort"], }, classifiers=[ "Programming Language :: Python :: 3", diff --git a/tests/test_llm_generate.py b/tests/test_llm_generate.py index 4f96fc0..dae808d 100644 --- a/tests/test_llm_generate.py +++ b/tests/test_llm_generate.py @@ -2,7 +2,7 @@ from chainlite import llm_generation_chain, load_config_from_file from chainlite.llm_config import GlobalVars -from chainlite.llm_generate import write_prompt_logs_to_file +from chainlite.llm_generate import ProgbarCallback, write_prompt_logs_to_file from chainlite.utils import get_logger logger = get_logger(__name__) @@ -10,6 +10,7 @@ # load_config_from_file("./llm_config.yaml") + @pytest.mark.asyncio(scope="session") async def test_llm_generate(): # Check that the config file has been loaded properly @@ -20,7 +21,7 @@ async def test_llm_generate(): assert GlobalVars.local_engine_set response = await llm_generation_chain( - template_file="test.prompt", # prompt path relative to one of the paths specified in `prompt_dirs` + template_file="test.prompt", # prompt path relative to one of the paths specified in `prompt_dirs` engine="gpt-4o", max_tokens=100, ).ainvoke({}) @@ -37,7 +38,22 @@ async def test_readme_example(): template_file="tests/joke.prompt", engine="gpt-35-turbo", max_tokens=100, + temperature=0.1, + progress_bar_desc="test1", ).ainvoke({"topic": "Life as a PhD student"}) logger.info(response) - write_prompt_logs_to_file("llm_input_outputs.jsonl") \ No newline at end of file + write_prompt_logs_to_file("tests/llm_input_outputs.jsonl") + + +@pytest.mark.asyncio(scope="session") +async def test_batching(): + response = await llm_generation_chain( + template_file="tests/joke.prompt", + engine="gpt-35-turbo", + max_tokens=100, + temperature=0.1, + progress_bar_desc="test2", + ).abatch([{"topic": "Life as a PhD student"}] * 10) + assert len(response) == 10 + logger.info(response)