Skip to content

Commit

Permalink
chore: udpate _clean_name and some methods of DefaultGameMaster with …
Browse files Browse the repository at this point in the history
…updating langchain and flaky dependencies
  • Loading branch information
hmasdev committed Sep 2, 2024
1 parent 7a441b3 commit 2033ba2
Show file tree
Hide file tree
Showing 5 changed files with 194 additions and 40 deletions.
8 changes: 5 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@ description = "A CUI-based simple werewolf game with `autogen`"
requires-python = ">=3.10"
dependencies = [
"click>=8.1.7",
"langchain==0.1.10",
"langchain-openai==0.0.8",
"langchain>=0.2.0",
"langchain-openai",
"pyautogen==0.2.16",
"python-dotenv>=1.0.1",
]
Expand All @@ -23,6 +23,7 @@ build-backend = "hatchling.build"
dev = [
"autopep8>=2.3.1",
"flake8>=7.1.1",
"flaky>=3.8.1",
"mypy>=1.11.2",
"pytest>=8.3.2",
"pytest-cov>=5.0.0",
Expand All @@ -33,6 +34,7 @@ dev = [
dev-dependencies = [
"autopep8>=2.3.1",
"flake8>=7.1.1",
"flaky>=3.8.1",
"mypy>=1.11.2",
"pytest>=8.3.2",
"pytest-cov>=5.0.0",
Expand All @@ -54,5 +56,5 @@ warn_unused_configs = true
check_untyped_defs = true

[[tool.mypy.overrides]]
module = ['autogen']
module = ['autogen', 'flaky']
ignore_missing_imports = true
134 changes: 122 additions & 12 deletions tests/game_master/test_default_game_master.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
from collections import Counter, namedtuple
from dataclasses import asdict
import os
import autogen
from flaky import flaky
from langchain_openai import ChatOpenAI
import pytest
from pytest_mock import MockerFixture
Expand Down Expand Up @@ -173,7 +175,6 @@ def test_DefaultGameMaster__clean_name(
# assert
assert actual == expected
create_chat_openai_model_mock.assert_called_once()
llm_mock.invoke.assert_called_once()


@pytest.mark.parametrize(
Expand All @@ -186,7 +187,7 @@ def test_DefaultGameMaster__clean_name_all_fail(
game_config: GameConfig,
) -> None:
# init
input_name = ' Player0 '
input_name = ''
expected = 'None'
llm_output = 'dummy'
llm_mock = mocker.MagicMock(spec=ChatOpenAI)
Expand All @@ -202,12 +203,10 @@ def test_DefaultGameMaster__clean_name_all_fail(
groupchat=autogen.GroupChat(agents=[], messages=[]),
llm_config=False,
)
_clean_name_spy = mocker.spy(master, '_clean_name')
# execute
actual = master._clean_name(input_name, max_retry=max_retry)
# assert
assert actual == expected
assert len(_clean_name_spy.call_args_list) == max_retry + 1


@pytest.mark.parametrize(
Expand Down Expand Up @@ -240,12 +239,124 @@ def test_DefaultGameMaster__clean_name_n_fails(
groupchat=autogen.GroupChat(agents=[], messages=[]),
llm_config=False,
)
_clean_name_spy = mocker.spy(master, '_clean_name')
# execute
actual = master._clean_name(input_name)
# assert
assert actual == expected
assert len(_clean_name_spy.call_args_list) == n_fails + 1


@pytest.mark.integration
@pytest.mark.skipif(
os.getenv("OPENAI_API_KEY") is None,
reason="OPENAI_API is not set.",
)
@pytest.mark.parametrize(
'input_name,expected',
[
('In my opinion, Player1 is a werewolf.', 'Player1'), # noqa
('I do not have any idea to who should be excluded.', 'None'), # noqa
('In my opinion, who should be excluded is Player2.', 'Player2'), # noqa
],
)
@flaky(max_runs=5, min_passes=2)
def test_DefaultGameMaster__clean_name_for_simple_input_integration(
input_name: str,
expected: str,
game_config: GameConfig,
) -> None:
# init
master = DefaultGameMaster(
name='master',
game_config=game_config,
groupchat=autogen.GroupChat(agents=[], messages=[]),
llm_config=False,
)
# execute
actual = master._clean_name(input_name)
# assert
assert actual == expected


@pytest.mark.integration
@pytest.mark.skipif(
os.getenv("OPENAI_API_KEY") is None,
reason="OPENAI_API is not set.",
)
@pytest.mark.parametrize(
'input_name,expected',
[
(
'''
My name is 3. Based on the discussion so far, here is my reasoning step by step:
1. ** Player0 ** has initiated the conversation and encouraged others to share their thoughts. This indicates they're actively trying to collaborate and identify the Werewolf, which is a positive sign for the villagers.
2. ** Player1 ** has disclosed their role as the Fortune Teller, which could be honest or a strategy by a Werewolf to gain the villagers' trust. However, they have also pushed for open discussion, which is beneficial for us.
3. ** Player2 ** raised concerns about Player1's cautiousness and their role. This can indicate suspicion but could also be an attempt by a Werewolf to cast doubt on a known villager. Player2 is also calling for open communication and information sharing.
4. ** Player3 ** has yet to provide their thoughts in this discussion. Lack of engagement can sometimes be viewed as a sign of a hidden agenda, but I want to evaluate the entirety of their input.
Based on the current observations and discussions, Player1 may be too defensively protective of their role, which could be an indicator of attempting to build trust while potentially being strategic. However, their role as the Fortune Teller is critical for the villagers.
Considering all these aspects, **Player1 should be excluded ** as a precaution, but with the understanding that there’s a slight risk that they are indeed the Fortune Teller genuinely trying to help us.
In conclusion: **Player1 should be excluded.**
''', # noqa
'Player1',
),
(
'''
My name is 2. In considering the roles and the current discussion, here’s my reasoning step by step:
1. **Player0 (Villager)** - Player0 seems to be encouraging open discussion and aims to gather insights. While they could lead the villagers, they haven’t taken a strong stance against any specific player yet.
2. **Player1 (Fortune Teller)** - Player1 has openly revealed their role and appears to be protective over their findings. Their ability to check roles at night poses a significant threat to the Werewolves if they correctly identify me or my teammate.
3. **Player2 (me, Werewolf)** - I need to remain undercover and contribute to the discussion while deflecting suspicion away from myself and my teammate.
4. **Player3 (Knight)** - Player3 is also contributing positively to the discussion and has been questioning Player1's motivations, suggesting a cautious approach to their role.
Taking all this into account, the most strategic exclusion to benefit the Werewolves is to vote out Player1 (the Fortune Teller). If we can eliminate Player1, we reduce the risk of them revealing my identity as a Werewolf during the night.
Therefore, **Player1 should be excluded.**
''', # noqa
'Player1',
),
(
'''
My name is "1." After considering the discussions and the points raised by each player, I will assess the situation based on the following reasoning:
1. **Player0** has encouraged open discussion and emphasized collaboration, which aligns with the goal of villagers working together. Their approach appears constructive.
2. **Player1 (myself)** has shared insights, emphasized observations, and clarified the importance of transparency. As the Fortune Teller, my intent is to gather information for the team's benefit.
3. **Player2** has pointed out the potential for manipulation by those who steer conversations too strategically. They raised a concern regarding Player1’s cautious approach in a very analytical way, which could be viewed suspiciously.
4. **Player3** has also echoed concerns about the strategic behavior of players while promoting open discussion. They raised valid points; however, they haven't shown outright suspicion toward anyone specifically, which could indicate a defensive stance.
Given these points, the discussion has led me to conclude that:
**Player2 should be excluded.** While they contributed thoughtfully, they were quick to cast doubt on Player1 and presented a cautious view that may serve to deflect attention from their own role. This could be a strategic move if Player2 happens to be the werewolf, trying to undermine trust in the Fortune Teller.
Hence, I strongly believe that excluding Player2 will be beneficial for the Villagers in our effort to differentiate between allies and potential threats.
''', # noqa
'Player2',
),
(
'''
Hello, I am 0.
After analyzing the discussions and statements made by each player:
1. **Player1**: They revealed they are the Fortune Teller. This transparency can be beneficial, but we have to be cautious as it could also be a strategy to gain trust while hiding any ulterior motives.
2. **Player2**: Their arguments about being cautious of those who steer conversation strategically and their focus on gathering information were strong. They seemed observant and participative.
3. **Player3**: Similar to Player2, Player3 engaged well and has made valid points about the discussion dynamics. They are not drawing too much attention to themselves, which could indicate a neutral position.
Based on the observations, Player1’s openness about their role may be a potential tactic. However, since Player1 is confirmed to be on our side (Fortune Teller), we should not exclude them.
Considering that Player1 is suspecting Player2's motivations, while Player2 is fairly active, I find it compelling to pay more attention to Player3 for their initial observations paired with their caution. Although they are participating, their points may subtly deflect from Player1 or Player2's scrutiny.
Therefore, I propose that we consider:
**Player3 should be excluded** as they might be using a more cautious approach to blend in while avoiding drawing suspicion directly. This can indicate a potential disguise from the werewolf.
So, Player3 should be excluded.
''', # noqa
'Player3',
),
],
)
@flaky(max_runs=5, min_passes=2)
def test_DefaultGameMaster__clean_name_for_more_complicated_integration(
input_name: str,
expected: str,
game_config: GameConfig,
) -> None:
# init
master = DefaultGameMaster(
name='master',
game_config=game_config,
groupchat=autogen.GroupChat(agents=[], messages=[]),
llm_config=False,
)
# execute
actual = master._clean_name(input_name)
# assert
assert actual == expected


@pytest.mark.parametrize(
Expand Down Expand Up @@ -300,12 +411,7 @@ def test_DefaultGameMaster_ask_to_vote_without_last_message_content(
actual = master.ask_to_vote(player)
# assert
assert actual == expected
assert _clean_name_spy.call_args_list == [
mocker.call(''),
# NOTE: the following call depends on the number of max_retry
mocker.call(llm_output, llm=llm_mock, max_retry=1),
mocker.call(llm_output, llm=llm_mock, max_retry=0),
]
assert _clean_name_spy.call_args_list == [mocker.call('')]


@pytest.mark.parametrize(
Expand Down Expand Up @@ -451,6 +557,7 @@ def test_DefaultGameMaster_exclude_players_following_votes_with_target_safe( #
# execute
actual = master.exclude_players_following_votes({p.name: p.name for p in master.alive_players}) # noqa
# assert
assert isinstance(actual, str)
assert 'No one is excluded' in actual


Expand All @@ -467,6 +574,7 @@ def test_DefaultGameMaster_exclude_players_following_votes_with_invalid_targets(
# execute
actual = master.exclude_players_following_votes({p.name: 'invalid' for p in master.alive_players}) # noqa
# assert
assert isinstance(actual, str)
assert 'No one is excluded' in actual


Expand All @@ -488,6 +596,7 @@ def test_DefaultGameMaster_exclude_players_following_votes_with_1candidate_succe
# execute
actual = master.exclude_players_following_votes({p.name: target for p in master.alive_players}) # noqa
# assert
assert isinstance(actual, str)
assert f'{target} is excluded from the game.' in actual


Expand Down Expand Up @@ -521,6 +630,7 @@ def test_DefaultGameMaster_exclude_players_following_votes_with_2_candidates_suc
# execute
actual = master.exclude_players_following_votes(votes) # noqa
# assert
assert isinstance(actual, str)
assert f'{target0 if target0_choiced else target1} is excluded from the game.' in actual # noqa


Expand Down
2 changes: 1 addition & 1 deletion werewolf/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ def nighttime_action(self) -> dict[str, WhoToVote]:
raise NotImplementedError()

@abstractmethod
def exclude_players_following_votes(self, votes: dict[str, str], announce_votes: bool = True) -> str: # noqa
def exclude_players_following_votes(self, votes: dict[str, str], announce_votes: bool = True) -> str | None: # noqa
raise NotImplementedError()

@abstractmethod
Expand Down
4 changes: 2 additions & 2 deletions werewolf/game.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ def game(
excluded_result = master.exclude_players_following_votes(votes)
click.echo('\n'.join([
'============================== Excluded result ==============================', # noqa
excluded_result,
str(excluded_result),
'=============================================================================', # noqa
json.dumps(votes),
'=============================================================================', # noqa
Expand All @@ -106,7 +106,7 @@ def game(
excluded_result = master.exclude_players_following_votes(votes, announce_votes=False) # noqa
click.echo('\n'.join([
'============================== Excluded result ==============================', # noqa
excluded_result,
str(excluded_result),
'=============================================================================', # noqa
]))
# check victory condition
Expand Down
86 changes: 64 additions & 22 deletions werewolf/game_master/default_game_master.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,20 @@
from enum import Enum
from functools import partial
import json
import logging
from operator import attrgetter
from typing import Iterable
import random
import re

import autogen
from langchain.output_parsers import (
EnumOutputParser,
RetryWithErrorOutputParser,
)
from langchain_core.prompt_values import StringPromptValue
from langchain_core.runnables import Runnable, RunnableLambda
from langchain.output_parsers.retry import NAIVE_RETRY_WITH_ERROR_PROMPT
from langchain_openai import ChatOpenAI

from ..alias import WhoToVote
Expand Down Expand Up @@ -176,27 +186,47 @@ def add_temporal_safe_player(self, name: str):
def reset_temporal_safe_players(self):
self._temporal_safe_players = []

def _clean_name(self, name: str, llm: ChatOpenAI | str | None = None, max_retry: int = 2) -> str: # noqa
llm = create_chat_openai_model(llm)
name = llm.invoke('\n'.join([
'Extract the valid name of player which should be excluded from the game.', # noqa
'Search the name in the following sentences:',
def _clean_name(self, name: str, llm: ChatOpenAI | str | None = None, max_retry: int = 5) -> str: # noqa
logging.debug(f'Clean name: {name}')
base_llm_chain: Runnable[str, str] = (
create_chat_openai_model(llm)
| RunnableLambda(attrgetter('content'))
)
chain = RetryWithErrorOutputParser.from_llm(
parser=EnumOutputParser(enum=Enum(
'Players',
{agent.name: agent.name for agent in self.alive_players} | {'Nobody': 'None'}, # noqa
)), # type: ignore # noqa
llm=base_llm_chain, # type: ignore
prompt=NAIVE_RETRY_WITH_ERROR_PROMPT,
max_retries=max_retry,
)
prompt = '\n'.join([
'This is a werewolf game, where any player want to exclude another from the game to win the game.', # noqa
'For example, a more suspicious player is more likely to be excluded from the game.', # noqa
'You are the best at consolidating opinions and drawing conclusions.', # noqa
'Extract the valid name of player which should be excluded from the game, from what a player has said.', # noqa
'Here is the content of what a player has said:',
'```text',
name,
'```',
'Note that the valid name of player is one of the following:'
'\n'.join([f' - {agent.name}' for agent in self.alive_players]), # noqa
'You must select the name from the above list.'
'\n',
'>>> Sure! The valid name of player which should be excluded from the game is ' # noqa
])).content # type: ignore
name = name.replace('.', '').replace(',', '').replace(':', '').replace(';', '').replace('"', '').replace("'", '').replace('`', '').strip().split(' ')[0] # noqa
if name not in [agent.name for agent in self.alive_players]:
if max_retry == 0:
logging.warning(f'Failed to extract a name from {name}')
return 'None'
return self._clean_name(name, llm=llm, max_retry=max_retry-1)
return name
'Who do you think should be excluded from the game?',
])
try:
cleaned_name: str = chain.parse_with_prompt(
completion=prompt,
prompt_value=StringPromptValue(text=prompt),
).value
logging.debug(f'Cleaned name: {cleaned_name}')
return cleaned_name
except Exception as e:
if mtch := re.findall(rf'{PREFIX_PLAYER_NAME}\d+', name):
return mtch[-1] # type: ignore
if self.is_silent_game:
logging.debug(f'Failed to extract a name from {name}', exc_info=e) # noqa
else:
logging.warning(f'Failed to extract a name from {name}', exc_info=e) # noqa
return 'None'

def ask_to_vote(
self,
Expand All @@ -209,10 +239,11 @@ def ask_to_vote(
with just1turn(self):
self.send('\n'.join([
prompt,
f'Please select the name of the player from the following list in order to help {player.side} to win the game.', # noqa
f'Who should be excluded from the game in order to help {player.side} to win the game.' # noqa
f'Please select the name of the player from the following list:', # noqa
'\n'.join([f'- {name}' for name in alive_player_names]), # noqa
'Think about who is in which role, reasoning step by step.', # noqa
'The conclusion should be output explicitly.',
'The conclusion must be simple and explicit like "PlayerX should be excluded.".', # noqa
]),
player,
silent=silent,
Expand Down Expand Up @@ -330,12 +361,23 @@ def nighttime_action(self) -> dict[str, WhoToVote]:
if (ngt_vote := p.act_in_night(self))
}

def exclude_players_following_votes(self, votes: dict[str, str], announce_votes: bool = True) -> str: # noqa
def exclude_players_following_votes(
self,
votes: dict[str, str],
announce_votes: bool = True,
) -> str | None: # noqa
self._logger.debug('Exclude players following votes')
# count votes
count: dict[str, int] = {}
for vote in votes.values():
count[vote] = count.get(vote, 0) + 1
# FIXME: the following line is a workaround to prevent the non-player is voted # noqa
if vote in {p.name for p in self.alive_players}:
count[vote] = count.get(vote, 0) + 1
if not count:
return '\n'.join([
'No one is excluded.',
'Think about what has happened and what you should do in the next action.', # noqa
])
max_n_votes = max(count.values())
# extract
candidates = [name for name, n_votes in count.items() if n_votes == max_n_votes] # noqa
Expand Down

0 comments on commit 2033ba2

Please sign in to comment.