Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: ChatGPTGenerator #5692

Closed
wants to merge 34 commits into from
Closed
Show file tree
Hide file tree
Changes from 2 commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
0fc2bac
add generators module
ZanSara Aug 30, 2023
7f6325c
add tests for module helper
ZanSara Aug 30, 2023
47b6799
add chatgpt generator
ZanSara Aug 30, 2023
4e8fcb3
add init and serialization tests
ZanSara Aug 30, 2023
cbf7701
test component
ZanSara Aug 30, 2023
419f615
reno
ZanSara Aug 30, 2023
49ff654
Merge branch 'main' into generators-module
ZanSara Aug 30, 2023
4edeb8e
Merge branch 'generators-module' into chatgpt-generator
ZanSara Aug 30, 2023
08e9c62
reno
ZanSara Aug 30, 2023
a984e67
more tests
ZanSara Aug 30, 2023
612876a
add another test
ZanSara Aug 31, 2023
ec8e14a
Merge branch 'generators-module' of github.com:deepset-ai/haystack in…
ZanSara Aug 31, 2023
366b0ff
Merge branch 'generators-module' into chatgpt-generator
ZanSara Aug 31, 2023
e9c3de7
chat token limit
ZanSara Aug 31, 2023
725fabe
move into openai
ZanSara Aug 31, 2023
4d4f9d4
Merge branch 'generators-module' into chatgpt-generator
ZanSara Aug 31, 2023
c3bef8f
fix test
ZanSara Aug 31, 2023
c1a7696
improve tests
ZanSara Aug 31, 2023
246ca63
Merge branch 'generators-module' into chatgpt-generator
ZanSara Aug 31, 2023
ec809e4
add e2e test and small fixes
ZanSara Aug 31, 2023
5d946f8
linting
ZanSara Aug 31, 2023
aa9ce33
Add ChatGPTGenerator example
vblagoje Aug 31, 2023
9310057
review feedback
ZanSara Aug 31, 2023
7c36db1
Merge branch 'chatgpt-generator' of github.com:deepset-ai/haystack in…
ZanSara Aug 31, 2023
b2e421d
support for metadata
ZanSara Aug 31, 2023
6d81d79
Merge branch 'main' into chatgpt-generator
ZanSara Aug 31, 2023
2895697
mypy
ZanSara Aug 31, 2023
1538d61
mypy
ZanSara Sep 1, 2023
02cd61f
extract backend from generator and make it accept chats
ZanSara Sep 1, 2023
84332c6
fix tests
ZanSara Sep 1, 2023
329b54d
mypy
ZanSara Sep 4, 2023
5ee2aac
query->complete
ZanSara Sep 4, 2023
429a3ae
mypy
ZanSara Sep 4, 2023
c0b237d
Merge branch 'main' into chatgpt-generator
ZanSara Sep 4, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 16 additions & 31 deletions test/preview/components/generators/openai/test_openai_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,59 +177,44 @@ def test_query_chat_model_stream_fail():


@pytest.mark.unit
def test_enforce_token_limit_above_limit(caplog):
tokenizer = Mock()
tokenizer.encode = lambda text: text.split()
tokenizer.decode = lambda tokens: " ".join(tokens)

assert enforce_token_limit("This is a test prompt.", tokenizer=tokenizer, max_tokens_limit=3) == "This is a"
def test_enforce_token_limit_above_limit(caplog, mock_tokenizer):
prompt = enforce_token_limit("This is a test prompt.", tokenizer=mock_tokenizer, max_tokens_limit=3)
assert prompt == "This is a"
assert caplog.records[0].message == (
"The prompt has been truncated from 5 tokens to 3 tokens to fit within the max token "
"limit. Reduce the length of the prompt to prevent it from being cut off."
)


@pytest.mark.unit
def test_enforce_token_limit_below_limit(caplog):
tokenizer = Mock()
tokenizer.encode = lambda text: text.split()
tokenizer.decode = lambda tokens: " ".join(tokens)

assert (
enforce_token_limit("This is a test prompt.", tokenizer=tokenizer, max_tokens_limit=100)
== "This is a test prompt."
)
def test_enforce_token_limit_below_limit(caplog, mock_tokenizer):
prompt = enforce_token_limit("This is a test prompt.", tokenizer=mock_tokenizer, max_tokens_limit=100)
assert prompt == "This is a test prompt."
assert not caplog.records


@pytest.mark.unit
def test_enforce_token_limit_chat_above_limit(caplog):
tokenizer = Mock()
tokenizer.encode = lambda text: text.split()
tokenizer.decode = lambda tokens: " ".join(tokens)

assert enforce_token_limit_chat(
def test_enforce_token_limit_chat_above_limit(caplog, mock_tokenizer):
prompts = enforce_token_limit_chat(
["System Prompt", "This is a test prompt."],
tokenizer=tokenizer,
tokenizer=mock_tokenizer,
max_tokens_limit=7,
tokens_per_message_overhead=2,
) == ["System Prompt", "This is a"]
)
assert prompts == ["System Prompt", "This is a"]
assert caplog.records[0].message == (
"The prompts have been truncated from 11 tokens to 7 tokens to fit within the max token limit. "
"Reduce the length of the prompt to prevent it from being cut off."
)


@pytest.mark.unit
def test_enforce_token_limit_chat_below_limit(caplog):
tokenizer = Mock()
tokenizer.encode = lambda text: text.split()
tokenizer.decode = lambda tokens: " ".join(tokens)

assert enforce_token_limit_chat(
def test_enforce_token_limit_chat_below_limit(caplog, mock_tokenizer):
prompts = enforce_token_limit_chat(
["System Prompt", "This is a test prompt."],
tokenizer=tokenizer,
tokenizer=mock_tokenizer,
max_tokens_limit=100,
tokens_per_message_overhead=2,
) == ["System Prompt", "This is a test prompt."]
)
assert prompts == ["System Prompt", "This is a test prompt."]
assert not caplog.records
13 changes: 13 additions & 0 deletions test/preview/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
from unittest.mock import Mock
import pytest


@pytest.fixture()
def mock_tokenizer():
"""
Tokenizes the string by splitting on spaces.
"""
tokenizer = Mock()
tokenizer.encode = lambda text: text.split()
tokenizer.decode = lambda tokens: " ".join(tokens)
return tokenizer