From f7ee9e3d6d494d749fea1a95db2703f854cc302b Mon Sep 17 00:00:00 2001 From: Tom Usher Date: Fri, 15 Dec 2023 17:57:56 +0000 Subject: [PATCH 1/4] Correct references to repo location --- .github/scripts/report_nightly_build_failure.py | 2 +- README.md | 10 +++++----- pyproject.toml | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/scripts/report_nightly_build_failure.py b/.github/scripts/report_nightly_build_failure.py index bf50513..5390898 100644 --- a/.github/scripts/report_nightly_build_failure.py +++ b/.github/scripts/report_nightly_build_failure.py @@ -12,7 +12,7 @@ response = requests.post( os.environ["SLACK_WEBHOOK_URL"], json={ - "text": "A Nightly build failed. See https://github.com/tomusher/wagtail-ai/actions/runs/" + "text": "A Nightly build failed. See https://github.com/wagtail/wagtail-ai/actions/runs/" + os.environ["GITHUB_RUN_ID"], }, ) diff --git a/README.md b/README.md index ecd4852..050140b 100644 --- a/README.md +++ b/README.md @@ -42,11 +42,11 @@ If you're interested in working on these things, please do! ## Links -- [Documentation](https://github.com/tomusher/wagtail-ai/blob/main/README.md) -- [Changelog](https://github.com/tomusher/wagtail-ai/blob/main/CHANGELOG.md) -- [Contributing](https://github.com/tomusher/wagtail-ai/blob/main/CHANGELOG.md) -- [Discussions](https://github.com/tomusher/wagtail-ai/discussions) -- [Security](https://github.com/tomusher/wagtail-ai/security) +- [Documentation](https://github.com/wagtail/wagtail-ai/blob/main/docs/index.md) +- [Changelog](https://github.com/wagtail/wagtail-ai/blob/main/CHANGELOG.md) +- [Contributing](https://github.com/wagtail/wagtail-ai/blob/main/docs/contributing.md) +- [Discussions](https://github.com/wagtail/wagtail-ai/discussions) +- [Security](https://github.com/wagtail/wagtail-ai/security) ## Supported Versions diff --git a/pyproject.toml b/pyproject.toml index 1f57796..1bf4ffa 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -51,7 +51,7 @@ docs = [ ] [project.urls] -Home = "https://github.com/tomusher/wagtail-ai" +Home = "https://github.com/wagtail/wagtail-ai" [tool.ruff] select = ["F", "E", "C90", "I", "B", "DJ", "RUF", "TRY", "C4"] From 11b391983565a0a99e13c8142198dcd3c9261c9e Mon Sep 17 00:00:00 2001 From: Tom Usher Date: Fri, 15 Dec 2023 17:58:10 +0000 Subject: [PATCH 2/4] Clean up readme --- README.md | 81 ++----------------------------------------------------- 1 file changed, 2 insertions(+), 79 deletions(-) diff --git a/README.md b/README.md index 050140b..2a68d4b 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ Get help with your content using AI superpowers. [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) [![PyPI version](https://badge.fury.io/py/wagtail-ai.svg)](https://badge.fury.io/py/wagtail-ai) -[![ai CI](https://github.com/tomusher/wagtail-ai/actions/workflows/test.yml/badge.svg)](https://github.com/tomusher/wagtail-ai/actions/workflows/test.yml) +[![ai CI](https://github.com/wagtail/wagtail-ai/actions/workflows/test.yml/badge.svg)](https://github.com/wagtail/wagtail-ai/actions/workflows/test.yml) Wagtail AI integrates Wagtail with AI's APIs (think ChatGPT) to help you write and correct your content. @@ -26,20 +26,6 @@ You'll need a paid OpenAI or Anthropic account and an API key. There'll also be * \+ (1,000 * 1.3) tokens received from the API * = 2,645 tokens = $0.0053 -## The Future - -Wagtail AI is very new. Here's some things we'd like to do: - -* [ ] Streaming support - the API supports server-sent events, we could do the same -* [ ] A nice UI - it's a bit rough right now -* [ ] Reduce bundle size -* [ ] Internationalisation on text and support for different language prompts -* [ ] Find a better way to hook in to Draftail to do things like show progress bars/spinners. -* [ ] Add more AI behaviours and features - content recommendations, content based Q&A tools, better ways to direct the prompt. -* [ ] Tests! - -If you're interested in working on these things, please do! - ## Links - [Documentation](https://github.com/wagtail/wagtail-ai/blob/main/docs/index.md) @@ -50,67 +36,4 @@ If you're interested in working on these things, please do! ## Supported Versions -* Wagtail 4.0, 4.1, 4.2, 5.0, 5.2 - -## Contributing - -### Install - -To make changes to this project, first clone this repository: - -```sh -git clone https://github.com/tomusher/wagtail-ai.git -cd wagtail-ai -``` - -With your preferred virtualenv activated, install testing dependencies: - -#### Compile front-end assets - -```sh -nvm use -npm install -npm run build -``` - -#### Using pip - -```sh -python -m pip install --upgrade pip>=21.3 -python -m pip install -e .[testing] -U -``` - -#### Using flit - -```sh -python -m pip install flit -flit install -``` - -### pre-commit - -Note that this project uses [pre-commit](https://github.com/pre-commit/pre-commit). -It is included in the project testing requirements. To set up locally: - -```shell -# go to the project directory -$ cd wagtail-ai -# initialize pre-commit -$ pre-commit install - -# Optional, run all checks once for this, then the checks will run only on the changed files -$ git ls-files --others --cached --exclude-standard | xargs pre-commit run --files -``` - -### How to run tests - -Now you can run tests as shown below: - -```sh -tox -``` - -or, you can run them for a specific environment `tox -e python3.11-django4.2-wagtail5.2` or specific test -`tox -e python3.11-django4.2-wagtail5.2-sqlite wagtail-ai.tests.test_file.TestClass.test_method` - -To run the test app interactively, use `tox -e interactive`, visit `http://127.0.0.1:8020/admin/` and log in with `admin`/`changeme`. +* Wagtail 5.2 From ad56b8631b95f8c94b9071725e837ec78e696997 Mon Sep 17 00:00:00 2001 From: Tom Usher Date: Fri, 15 Dec 2023 17:58:25 +0000 Subject: [PATCH 3/4] Add readthedocs config --- .readthedocs.yaml | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 .readthedocs.yaml diff --git a/.readthedocs.yaml b/.readthedocs.yaml new file mode 100644 index 0000000..ddf8e9e --- /dev/null +++ b/.readthedocs.yaml @@ -0,0 +1,16 @@ +version: 2 +build: + os: ubuntu-22.04 + tools: + python: '3.11' + +mkdocs: + configuration: mkdocs.yml + +# Dependencies required to build your docs +python: + install: + - method: pip + path: . + extra_requirements: + - docs From d9703d50e497e0b420252252c8e9c885aa704d00 Mon Sep 17 00:00:00 2001 From: Tom Usher Date: Fri, 15 Dec 2023 17:58:43 +0000 Subject: [PATCH 4/4] Some rewording/restructuring of docs --- docs/.pages | 3 +- docs/ai-backends.md | 109 +++++++++++++++++++++++++++++++++++++ docs/editor-integration.md | 13 +++-- docs/installation.md | 59 ++++---------------- docs/llm-backend.md | 72 ------------------------ docs/text-splitting.md | 53 ++++++------------ 6 files changed, 148 insertions(+), 161 deletions(-) create mode 100644 docs/ai-backends.md delete mode 100644 docs/llm-backend.md diff --git a/docs/.pages b/docs/.pages index 14d89e8..defa6a8 100644 --- a/docs/.pages +++ b/docs/.pages @@ -1,6 +1,5 @@ nav: - installation.md - editor-integration.md - - "Backends": - - llm-backend.md + - ai-backends.md - text-splitting.md diff --git a/docs/ai-backends.md b/docs/ai-backends.md new file mode 100644 index 0000000..8b052e0 --- /dev/null +++ b/docs/ai-backends.md @@ -0,0 +1,109 @@ +# AI Backends + +Wagtail AI can be configured to use different backends to support different AI services. + +Currently the only (and default) backend available in Wagtail AI is the [LLM Backend](#llm-backend) + +## LLM Backend + +This backend uses the [llm library](https://llm.datasette.io/en/stable/) which offers support for many AI services through plugins. + +By default, it is configured to use OpenAI's `gpt-3.5-turbo` model. + +### Using other models + +You can use the command line interface to see the llm models installed in your environment: + +```sh +llm models +``` + +Then you can swap `MODEL_ID` in the configuration to use a different model. For example, to use GPT-4: + +```python +WAGTAIL_AI = { + "BACKENDS": { + "default": { + "CLASS": "wagtail_ai.ai.llm.LLMBackend", + "CONFIG": { + "MODEL_ID": "gpt-4", + }, + } + } +} +``` + +!!! info + + The `llm` package comes with OpenAI models installed by default. + + You can install other models using [`llm`'s plugin functionality](https://llm.datasette.io/en/stable/plugins/index.html). + +### Customisations + +There are two settings that you can use with the LLM backend: + +- `INIT_KWARGS` +- `PROMPT_KWARGS` + +#### `INIT_KWARGS` + +These are passed to `llm` as ["Model Options"](https://llm.datasette.io/en/stable/python-api.html#model-options). You can use them to customize the model's initialization. + +For example, for OpenAI models you can set a custom API key. By default the `openai` library will use the value of the `OPENAI_API_KEY` environment variable. + +```python +WAGTAIL_AI = { + "BACKENDS": { + "default": { + "CLASS": "wagtail_ai.ai.llm.LLMBackend", + "CONFIG": { + "MODEL_ID": "gpt-3.5-turbo", # Model ID recognizable by the llm package. + "INIT_KWARGS": {"key": "your-custom-api-key"}, + }, + } + } +} +``` + +#### `PROMPT_KWARGS` + +Using `PROMPT_KWARGS` you can pass arguments to [`llm`'s `prompt` method](https://llm.datasette.io/en/stable/python-api.html#system-prompts), e.g. a system prompt which is passsed with every request. + +```python +WAGTAIL_AI = { + "BACKENDS": { + "default": { + "CLASS": "wagtail_ai.ai.llm.LLMBackend", + "CONFIG": { + "MODEL_ID": "gpt-3.5-turbo", # Model ID recognizable by the llm package. + "PROMPT_KWARGS": {"system": "A custom, global system prompt."}, + }, + } + } +} +``` + +#### Specify the token limit for a model + +!!! info + + Token limit is referred to as "context window" which is the maximum amount of tokens in a single context that a specific chat model supports. + +While Wagtail AI knows the token limit of some models (see [`tokens.py`](https://github.com/wagtail/wagtail-ai/blob/main/src/wagtail_ai/tokens.py)), you might choose to use a model that isn't in this mappping, or you might want to set a lower token limit for an existing model. + +You can do this by setting `TOKEN_LIMIT`. + +```python +WAGTAIL_AI = { + "BACKENDS": { + "default": { + "CLASS": "wagtail_ai.ai.llm.LLMBackend", + "CONFIG": { + "MODEL_ID": "gpt-3.5-turbo", + "TOKEN_LIMIT": 4096, + }, + } + } +} +``` diff --git a/docs/editor-integration.md b/docs/editor-integration.md index 92fdde5..6c8c5dc 100644 --- a/docs/editor-integration.md +++ b/docs/editor-integration.md @@ -1,14 +1,17 @@ # Editor Integration -Wagtail AI integrates with Wagtail's Draftail rich text editor to provide tools to help write content. +Wagtail AI integrates with Wagtail's Draftail rich text editor to provide tools to help write content. To use it, highlight some text and click the 'magic wand' icon in the toolbar. -By default, it includes tools to: +By default, it includes prompts that: * Run AI assisted spelling/grammar checks on your content * Generate additional content based on what you're writing -You can also define your own prompts: - ### Adding Your Own Prompts -Explore the `AI Prompts` settings, accessible via the Wagtail settings menu. Here you'll be able to view, edit and add new prompts. +You can add your own prompts and customise existing prompts from the Wagtail admin under Settings -> Prompts. + +When creating prompts you can provide a label and description to help describe the prompt to your editors, specify the full prompt that will be passed with your text to the AI, and a 'method', which can be one of: + +- 'Append after existing content' - keep your existing content intact and add the response from the AI to the end (useful for completions/suggestions). +- 'Replace content' - replace the content in the editor with the response from the AI (useful for corrections, rewrites and translations.) diff --git a/docs/installation.md b/docs/installation.md index c7c5749..b2e9b16 100644 --- a/docs/installation.md +++ b/docs/installation.md @@ -1,13 +1,17 @@ # Installation -At this moment in time the only backend that ships by default with wagtail-ai is [llm](https://llm.datasette.io/en/stable/) -that lets you use a number of different chat models, including OpenAI's. - -1. Install the package along with the relevant client libraries for the AI Backend you want to use: - - For [llm](https://llm.datasette.io/en/stable/) which includes OpenAI chat models, - `python -m pip install wagtail-ai[llm]` +1. Install the package along with the relevant client libraries for the default [AI Backend](ai-backends.md): + ```bash + python -m pip install wagtail-ai[llm] + ``` 2. Add `wagtail_ai` to your `INSTALLED_APPS` -3. Add an AI chat model and backend configuration (any model supported by [llm](https://llm.datasette.io/en/stable/)). + ``` + INSTALLED_APPS = [ + "wagtail_ai", + # ... + ] + ``` +3. Add an AI chat model and backend configuration (by default, `MODEL_ID` can be any model supported by [llm](https://llm.datasette.io/en/stable/)). ```python WAGTAIL_AI = { "BACKENDS": { @@ -20,43 +24,4 @@ that lets you use a number of different chat models, including OpenAI's. } } ``` - -The openai package can be provided with the API key via the `OPENAI_API_KEY` -environment variable. If you want to provide a custom API key for -each backend please read the llm backend's documentation page. - -Read more about the [llm backend here](llm-backend.md). - - -## Specify the token limit for a backend - -!!! info - - Token limit is referred to as "context window" which is the maximum amount - of tokens in a single context that a specific chat model supports. - -If you want to use a chat model that does not have a default token limit configured -or want to change the default token limit, you can do so by adding the `TOKEN_LIMIT` -setting. - -```python -WAGTAIL_AI = { - "BACKENDS": { - "default": { - "CLASS": "wagtail_ai.ai.llm.LLMBackend", - "CONFIG": { - "MODEL_ID": "gpt-3.5-turbo", - "TOKEN_LIMIT": 4096, - }, - } - } -} -``` - -This `TOKEN_LIMIT` value depend on the chat model you select as each of them support -a different token limit, e.g. `gpt-3.5-turbo` supports up to 4096 tokens, -`gpt-3.5-turbo-16k` supports up to 16384 tokens. - -!!! info "Text splitting" - - [Read more about text splitting and Wagtail AI customization options here](text-splitting.md). +4. If you're using an OpenAI model, specify an API key using the `OPENAI_API_KEY` environment variable, or by setting it as a key in [`INIT_KWARGS`](ai-backends.md#init-kwargs). diff --git a/docs/llm-backend.md b/docs/llm-backend.md deleted file mode 100644 index c3b5563..0000000 --- a/docs/llm-backend.md +++ /dev/null @@ -1,72 +0,0 @@ -# llm backend - -wagtail-ai comes with a backend for the [llm library](https://llm.datasette.io/en/stable/) -out-of-the box. - -## Using other "llm" models - -You can use the command line interface to see the llm models installed in your environment: - -```sh -llm models -``` - -Then you can swap `MODEL_ID` in the configuration to use a different model. - -!!! info - - At this moment in time, the llm package comes with OpenAI models installed by default. - - You can install other models via the [llm's plugins functionality](https://llm.datasette.io/en/stable/plugins/index.html). - -## "llm" backend custom settings - -There are two custom settings that you can use with the llm backend: - -- `INIT_KWARGS` -- `PROMPT_KWARGS` - -### `INIT_KWARGS` - -Those are set on the model instance. You can use them to customize the model's initialization. - -See more details about the available options on https://llm.datasette.io/en/stable/python-api.html#model-options. - -#### Custom OpenAI key - -E.g. for OpenAI models you can set a custom API key. Otherwise the `openai` library will use whatever -you've set up in your environment with `OPENAI_API_KEY`. - -```python -WAGTAIL_AI = { - "BACKENDS": { - "default": { - "CLASS": "wagtail_ai.ai.llm.LLMBackend", - "CONFIG": { - "MODEL_ID": "gpt-3.5-turbo", # Model ID recognizable by the llm package. - "INIT_KWARGS": {"key": "your-custom-api-key"}, - }, - } - } -} -``` - -### `PROMPT_KWARGS` - -You can pass arguments to the [llm's `prompt` method](https://llm.datasette.io/en/stable/python-api.html#system-prompts). - -E.g. a system prompt. - -```python -WAGTAIL_AI = { - "BACKENDS": { - "default": { - "CLASS": "wagtail_ai.ai.llm.LLMBackend", - "CONFIG": { - "MODEL_ID": "gpt-3.5-turbo", # Model ID recognizable by the llm package. - "PROMPT_KWARGS": {"system": "A custom, global system prompt."}, - }, - } - } -} -``` diff --git a/docs/text-splitting.md b/docs/text-splitting.md index 70af1ce..9180951 100644 --- a/docs/text-splitting.md +++ b/docs/text-splitting.md @@ -1,35 +1,22 @@ # Text splitting -Using chat models requires splitting the text into smaller chunks that the model can process. +Sometimes when we send text to an AI model, we need to send more text than the model can process in one go. To do this, we need to split the text you provide in to smaller chunks. -There are two components to this: +Wagtail AI provides two components that help with this: -- Splitter length calculator -- Splitter - -The splitter needs the length calculator to know when to split for each different chat model. - -This can be controlled with the `TOKEN_LIMIT` in the backend configuration. +- Splitter length calculator - which decides how many characters will fit inside a model's context window based on the `TOKEN_LIMIT` specified in your backend configuration. +- Splitter - which splits your text in to sensible chunks. ## Defaults By default, Wagtail AI comes with: - - Langchain `RecursiveCharacterTextSplitter` class that is vendored in Wagtail AI. - - A naive splitter length calculator that does not actually do a proper text splitting, - only estimates how many tokens there are in the supplied text. - -By default Wagtail AI does not require you to use any third-party dependencies to -achieve the text splitting required for most chat models. That's why we've vendored -the Langchain splitter so it avoids relying on big external packages for a single task. - -In the future development of Wagtail AI we might add support for more precise -optional backends in addition to the default ones. + - A naive splitter length calculator that tries to conservatively estimate how many characters will fit without any additional dependencies. + - A recursive text splitter vendored from Langchain that tries to split on paragraphs, then new lines, then spaces. ## Customization -Wagtail AI allows you to customize the splitter and the splitter length calculator logic -for each backend so that then you can tailor them to the specific chat model you want to use. +You may wish to create your own splitters or length calculators. To do this, you can override the default classes with your own as follows: ```python WAGTAIL_AI = { @@ -50,30 +37,28 @@ WAGTAIL_AI = { ### Custom text splitter -The spliter class must implement the `TextSplitterProtocol` -([source](https://github.com/wagtail/wagtail-ai/blob/main/src/wagtail_ai/types.py)). - +The spliter class must implement the [`TextSplitterProtocol`](https://github.com/wagtail/wagtail-ai/blob/main/src/wagtail_ai/types.py). -E.g. if you wanted to use the actual Langchain dependency, you could specify -a custom class like this: +For example, if you wanted to use a different splitter from Langchain: ```python from collections.abc import Callable, Iterator from typing import Any -from langchain.text_splitter import RecursiveCharacterTextSplitter +from langchain.text_splitter import ( + HTMLHeaderTextSplitter as LangchainHTMLHeaderTextSplitter, +) from wagtail_ai.types import TextSplitterProtocol -class RecursiveCharacterTextSplitter(TextSplitterProtocol): +class HTMLHeaderTextSplitter(TextSplitterProtocol): def __init__( self, *, chunk_size: int, length_function: Callable[[str], int], **kwargs: Any ) -> None: - self.splitter = RecursiveCharacterTextSplitter( + self.splitter = LangchainHTMLHeaderTextSplitter( chunk_size=chunk_size, length_function=length_function, - keep_separator=kwargs.get("keep_separator", True), ) def split_text(self, text: str) -> list[str]: @@ -82,13 +67,11 @@ class RecursiveCharacterTextSplitter(TextSplitterProtocol): ### Custom splitter length calculator class -Each chat model comes with their own tokenizing logic. You would have to implement -a custom splitter for each model that you want to use if you want to use a more -precise length calculator, e.g. [tiktoken](https://github.com/openai/tiktoken) -for OpenAI models. +You may want to implement a custom length calculator to get a more accurate length estimate for your chosen model. + +The spliter length class must implement the [`TextSplitterLengthCalculatorProtocol`](https://github.com/wagtail/wagtail-ai/blob/main/src/wagtail_ai/types.py). -E.g. a custom calculator for the ChatGPT 3.5 Turbo chat model that uses -the proper tokenizer. +For example, using [tiktoken](https://github.com/openai/tiktoken) for OpenAI models.: ```python import tiktoken