chore: udpate _clean_name and some methods of DefaultGameMaster with …

…updating langchain and flaky dependencies
hmasdev · Sep 2, 2024 · 2033ba2 · 2033ba2
1 parent 7a441b3
commit 2033ba2
Show file tree

Hide file tree

Showing 5 changed files with 194 additions and 40 deletions.
diff --git a/pyproject.toml b/pyproject.toml
@@ -5,8 +5,8 @@ description = "A CUI-based simple werewolf game with `autogen`"
 requires-python = ">=3.10"
 dependencies = [
     "click>=8.1.7",
-    "langchain==0.1.10",
-    "langchain-openai==0.0.8",
+    "langchain>=0.2.0",
+    "langchain-openai",
     "pyautogen==0.2.16",
     "python-dotenv>=1.0.1",
 ]
@@ -23,6 +23,7 @@ build-backend = "hatchling.build"
 dev = [
     "autopep8>=2.3.1",
     "flake8>=7.1.1",
+    "flaky>=3.8.1",
     "mypy>=1.11.2",
     "pytest>=8.3.2",
     "pytest-cov>=5.0.0",
@@ -33,6 +34,7 @@ dev = [
 dev-dependencies = [
     "autopep8>=2.3.1",
     "flake8>=7.1.1",
+    "flaky>=3.8.1",
     "mypy>=1.11.2",
     "pytest>=8.3.2",
     "pytest-cov>=5.0.0",
@@ -54,5 +56,5 @@ warn_unused_configs = true
 check_untyped_defs = true
 
 [[tool.mypy.overrides]]
-module = ['autogen']
+module = ['autogen', 'flaky']
 ignore_missing_imports = true
diff --git a/tests/game_master/test_default_game_master.py b/tests/game_master/test_default_game_master.py
@@ -1,6 +1,8 @@
 from collections import Counter, namedtuple
 from dataclasses import asdict
+import os
 import autogen
+from flaky import flaky
 from langchain_openai import ChatOpenAI
 import pytest
 from pytest_mock import MockerFixture
@@ -173,7 +175,6 @@ def test_DefaultGameMaster__clean_name(
     # assert
     assert actual == expected
     create_chat_openai_model_mock.assert_called_once()
-    llm_mock.invoke.assert_called_once()
 
 
 @pytest.mark.parametrize(
@@ -186,7 +187,7 @@ def test_DefaultGameMaster__clean_name_all_fail(
     game_config: GameConfig,
 ) -> None:
     # init
-    input_name = ' Player0 '
+    input_name = ''
     expected = 'None'
     llm_output = 'dummy'
     llm_mock = mocker.MagicMock(spec=ChatOpenAI)
@@ -202,12 +203,10 @@ def test_DefaultGameMaster__clean_name_all_fail(
         groupchat=autogen.GroupChat(agents=[], messages=[]),
         llm_config=False,
     )
-    _clean_name_spy = mocker.spy(master, '_clean_name')
     # execute
     actual = master._clean_name(input_name, max_retry=max_retry)
     # assert
     assert actual == expected
-    assert len(_clean_name_spy.call_args_list) == max_retry + 1
 
 
 @pytest.mark.parametrize(
@@ -240,12 +239,124 @@ def test_DefaultGameMaster__clean_name_n_fails(
         groupchat=autogen.GroupChat(agents=[], messages=[]),
         llm_config=False,
     )
-    _clean_name_spy = mocker.spy(master, '_clean_name')
     # execute
     actual = master._clean_name(input_name)
     # assert
     assert actual == expected
-    assert len(_clean_name_spy.call_args_list) == n_fails + 1
+
+
+@pytest.mark.integration
+@pytest.mark.skipif(
+    os.getenv("OPENAI_API_KEY") is None,
+    reason="OPENAI_API is not set.",
+)
+@pytest.mark.parametrize(
+    'input_name,expected',
+    [
+        ('In my opinion, Player1 is a werewolf.', 'Player1'),  # noqa
+        ('I do not have any idea to who should be excluded.', 'None'),  # noqa
+        ('In my opinion, who should be excluded is Player2.', 'Player2'),  # noqa
+    ],
+)
+@flaky(max_runs=5, min_passes=2)
+def test_DefaultGameMaster__clean_name_for_simple_input_integration(
+    input_name: str,
+    expected: str,
+    game_config: GameConfig,
+) -> None:
+    # init
+    master = DefaultGameMaster(
+        name='master',
+        game_config=game_config,
+        groupchat=autogen.GroupChat(agents=[], messages=[]),
+        llm_config=False,
+    )
+    # execute
+    actual = master._clean_name(input_name)
+    # assert
+    assert actual == expected
+
+
+@pytest.mark.integration
+@pytest.mark.skipif(
+    os.getenv("OPENAI_API_KEY") is None,
+    reason="OPENAI_API is not set.",
+)
+@pytest.mark.parametrize(
+    'input_name,expected',
+    [
+        (
+            '''
+My name is 3. Based on the discussion so far, here is my reasoning step by step:
+1. ** Player0 ** has initiated the conversation and encouraged others to share their thoughts. This indicates they're actively trying to collaborate and identify the Werewolf, which is a positive sign for the villagers.
+2. ** Player1 ** has disclosed their role as the Fortune Teller, which could be honest or a strategy by a Werewolf to gain the villagers' trust. However, they have also pushed for open discussion, which is beneficial for us.
+3. ** Player2 ** raised concerns about Player1's cautiousness and their role. This can indicate suspicion but could also be an attempt by a Werewolf to cast doubt on a known villager. Player2 is also calling for open communication and information sharing.
+4. ** Player3 ** has yet to provide their thoughts in this discussion. Lack of engagement can sometimes be viewed as a sign of a hidden agenda, but I want to evaluate the entirety of their input.
+Based on the current observations and discussions, Player1 may be too defensively protective of their role, which could be an indicator of attempting to build trust while potentially being strategic. However, their role as the Fortune Teller is critical for the villagers.
+Considering all these aspects, **Player1 should be excluded ** as a precaution, but with the understanding that there’s a slight risk that they are indeed the Fortune Teller genuinely trying to help us.
+In conclusion: **Player1 should be excluded.**
+''',  # noqa
+            'Player1',
+        ),
+        (
+            '''
+My name is 2. In considering the roles and the current discussion, here’s my reasoning step by step:
+1. **Player0 (Villager)** - Player0 seems to be encouraging open discussion and aims to gather insights. While they could lead the villagers, they haven’t taken a strong stance against any specific player yet.
+2. **Player1 (Fortune Teller)** - Player1 has openly revealed their role and appears to be protective over their findings. Their ability to check roles at night poses a significant threat to the Werewolves if they correctly identify me or my teammate.
+3. **Player2 (me, Werewolf)** - I need to remain undercover and contribute to the discussion while deflecting suspicion away from myself and my teammate.
+4. **Player3 (Knight)** - Player3 is also contributing positively to the discussion and has been questioning Player1's motivations, suggesting a cautious approach to their role.
+Taking all this into account, the most strategic exclusion to benefit the Werewolves is to vote out Player1 (the Fortune Teller). If we can eliminate Player1, we reduce the risk of them revealing my identity as a Werewolf during the night.
+Therefore, **Player1 should be excluded.**
+''',  # noqa
+            'Player1',
+        ),
+        (
+            '''
+My name is "1." After considering the discussions and the points raised by each player, I will assess the situation based on the following reasoning:
+1. **Player0** has encouraged open discussion and emphasized collaboration, which aligns with the goal of villagers working together. Their approach appears constructive.
+2. **Player1 (myself)** has shared insights, emphasized observations, and clarified the importance of transparency. As the Fortune Teller, my intent is to gather information for the team's benefit.
+3. **Player2** has pointed out the potential for manipulation by those who steer conversations too strategically. They raised a concern regarding Player1’s cautious approach in a very analytical way, which could be viewed suspiciously.
+4. **Player3** has also echoed concerns about the strategic behavior of players while promoting open discussion. They raised valid points; however, they haven't shown outright suspicion toward anyone specifically, which could indicate a defensive stance.
+Given these points, the discussion has led me to conclude that:
+**Player2 should be excluded.** While they contributed thoughtfully, they were quick to cast doubt on Player1 and presented a cautious view that may serve to deflect attention from their own role. This could be a strategic move if Player2 happens to be the werewolf, trying to undermine trust in the Fortune Teller. 
+Hence, I strongly believe that excluding Player2 will be beneficial for the Villagers in our effort to differentiate between allies and potential threats.
+''',  # noqa
+            'Player2',
+        ),
+        (
+            '''
+Hello, I am 0.
+After analyzing the discussions and statements made by each player:
+1. **Player1**: They revealed they are the Fortune Teller. This transparency can be beneficial, but we have to be cautious as it could also be a strategy to gain trust while hiding any ulterior motives.
+2. **Player2**: Their arguments about being cautious of those who steer conversation strategically and their focus on gathering information were strong. They seemed observant and participative.
+3. **Player3**: Similar to Player2, Player3 engaged well and has made valid points about the discussion dynamics. They are not drawing too much attention to themselves, which could indicate a neutral position.
+Based on the observations, Player1’s openness about their role may be a potential tactic. However, since Player1 is confirmed to be on our side (Fortune Teller), we should not exclude them.
+Considering that Player1 is suspecting Player2's motivations, while Player2 is fairly active, I find it compelling to pay more attention to Player3 for their initial observations paired with their caution. Although they are participating, their points may subtly deflect from Player1 or Player2's scrutiny.
+Therefore, I propose that we consider:
+**Player3 should be excluded** as they might be using a more cautious approach to blend in while avoiding drawing suspicion directly. This can indicate a potential disguise from the werewolf. 
+So, Player3 should be excluded.
+''',  # noqa
+            'Player3',
+        ),
+    ],
+)
+@flaky(max_runs=5, min_passes=2)
+def test_DefaultGameMaster__clean_name_for_more_complicated_integration(
+    input_name: str,
+    expected: str,
+    game_config: GameConfig,
+) -> None:
+    # init
+    master = DefaultGameMaster(
+        name='master',
+        game_config=game_config,
+        groupchat=autogen.GroupChat(agents=[], messages=[]),
+        llm_config=False,
+    )
+    # execute
+    actual = master._clean_name(input_name)
+    # assert
+    assert actual == expected
 
 
 @pytest.mark.parametrize(
@@ -300,12 +411,7 @@ def test_DefaultGameMaster_ask_to_vote_without_last_message_content(
     actual = master.ask_to_vote(player)
     # assert
     assert actual == expected
-    assert _clean_name_spy.call_args_list == [
-        mocker.call(''),
-        # NOTE: the following call depends on the number of max_retry
-        mocker.call(llm_output, llm=llm_mock, max_retry=1),
-        mocker.call(llm_output, llm=llm_mock, max_retry=0),
-    ]
+    assert _clean_name_spy.call_args_list == [mocker.call('')]
 
 
 @pytest.mark.parametrize(
@@ -451,6 +557,7 @@ def test_DefaultGameMaster_exclude_players_following_votes_with_target_safe(  #
     # execute
     actual = master.exclude_players_following_votes({p.name: p.name for p in master.alive_players})  # noqa
     # assert
+    assert isinstance(actual, str)
     assert 'No one is excluded' in actual
 
 
@@ -467,6 +574,7 @@ def test_DefaultGameMaster_exclude_players_following_votes_with_invalid_targets(
     # execute
     actual = master.exclude_players_following_votes({p.name: 'invalid' for p in master.alive_players})  # noqa
     # assert
+    assert isinstance(actual, str)
     assert 'No one is excluded' in actual
 
 
@@ -488,6 +596,7 @@ def test_DefaultGameMaster_exclude_players_following_votes_with_1candidate_succe
     # execute
     actual = master.exclude_players_following_votes({p.name: target for p in master.alive_players})  # noqa
     # assert
+    assert isinstance(actual, str)
     assert f'{target} is excluded from the game.' in actual
 
 
@@ -521,6 +630,7 @@ def test_DefaultGameMaster_exclude_players_following_votes_with_2_candidates_suc
     # execute
     actual = master.exclude_players_following_votes(votes)  # noqa
     # assert
+    assert isinstance(actual, str)
     assert f'{target0 if target0_choiced else target1} is excluded from the game.' in actual  # noqa
 
 

diff --git a/werewolf/base.py b/werewolf/base.py
@@ -62,7 +62,7 @@ def nighttime_action(self) -> dict[str, WhoToVote]:
         raise NotImplementedError()
 
     @abstractmethod
-    def exclude_players_following_votes(self, votes: dict[str, str], announce_votes: bool = True) -> str:   # noqa
+    def exclude_players_following_votes(self, votes: dict[str, str], announce_votes: bool = True) -> str | None:   # noqa
         raise NotImplementedError()
 
     @abstractmethod

diff --git a/werewolf/game.py b/werewolf/game.py
@@ -79,7 +79,7 @@ def game(
         excluded_result = master.exclude_players_following_votes(votes)
         click.echo('\n'.join([
             '============================== Excluded result ==============================',  # noqa
-            excluded_result,
+            str(excluded_result),
             '=============================================================================',  # noqa
             json.dumps(votes),
             '=============================================================================',  # noqa
@@ -106,7 +106,7 @@ def game(
         excluded_result = master.exclude_players_following_votes(votes, announce_votes=False)  # noqa
         click.echo('\n'.join([
             '============================== Excluded result ==============================',  # noqa
-            excluded_result,
+            str(excluded_result),
             '=============================================================================',  # noqa
         ]))
         # check victory condition

diff --git a/werewolf/game_master/default_game_master.py b/werewolf/game_master/default_game_master.py
@@ -1,10 +1,20 @@
+from enum import Enum
 from functools import partial
 import json
 import logging
+from operator import attrgetter
 from typing import Iterable
 import random
+import re
 
 import autogen
+from langchain.output_parsers import (
+    EnumOutputParser,
+    RetryWithErrorOutputParser,
+)
+from langchain_core.prompt_values import StringPromptValue
+from langchain_core.runnables import Runnable, RunnableLambda
+from langchain.output_parsers.retry import NAIVE_RETRY_WITH_ERROR_PROMPT
 from langchain_openai import ChatOpenAI
 
 from ..alias import WhoToVote
@@ -176,27 +186,47 @@ def add_temporal_safe_player(self, name: str):
     def reset_temporal_safe_players(self):
         self._temporal_safe_players = []
 
-    def _clean_name(self, name: str, llm: ChatOpenAI | str | None = None, max_retry: int = 2) -> str:  # noqa
-        llm = create_chat_openai_model(llm)
-        name = llm.invoke('\n'.join([
-            'Extract the valid name of player which should be excluded from the game.',  # noqa
-            'Search the name in the following sentences:',
+    def _clean_name(self, name: str, llm: ChatOpenAI | str | None = None, max_retry: int = 5) -> str:  # noqa
+        logging.debug(f'Clean name: {name}')
+        base_llm_chain: Runnable[str, str] = (
+            create_chat_openai_model(llm)
+            | RunnableLambda(attrgetter('content'))
+        )
+        chain = RetryWithErrorOutputParser.from_llm(
+            parser=EnumOutputParser(enum=Enum(
+                'Players',
+                {agent.name: agent.name for agent in self.alive_players} | {'Nobody': 'None'},  # noqa
+            )),  # type: ignore # noqa
+            llm=base_llm_chain,  # type: ignore
+            prompt=NAIVE_RETRY_WITH_ERROR_PROMPT,
+            max_retries=max_retry,
+        )
+        prompt = '\n'.join([
+            'This is a werewolf game, where any player want to exclude another from the game to win the game.',  # noqa
+            'For example, a more suspicious player is more likely to be excluded from the game.',  # noqa
+            'You are the best at consolidating opinions and drawing conclusions.',  # noqa
+            'Extract the valid name of player which should be excluded from the game, from what a player has said.',  # noqa
+            'Here is the content of what a player has said:',
             '```text',
             name,
             '```',
-            'Note that the valid name of player is one of the following:'
-            '\n'.join([f'  - {agent.name}' for agent in self.alive_players]),  # noqa
-            'You must select the name from the above list.'
-            '\n',
-            '>>> Sure! The valid name of player which should be excluded from the game is '  # noqa
-        ])).content  # type: ignore
-        name = name.replace('.', '').replace(',', '').replace(':', '').replace(';', '').replace('"', '').replace("'", '').replace('`', '').strip().split(' ')[0]  # noqa
-        if name not in [agent.name for agent in self.alive_players]:
-            if max_retry == 0:
-                logging.warning(f'Failed to extract a name from {name}')
-                return 'None'
-            return self._clean_name(name, llm=llm, max_retry=max_retry-1)
-        return name
+            'Who do you think should be excluded from the game?',
+        ])
+        try:
+            cleaned_name: str = chain.parse_with_prompt(
+                completion=prompt,
+                prompt_value=StringPromptValue(text=prompt),
+            ).value
+            logging.debug(f'Cleaned name: {cleaned_name}')
+            return cleaned_name
+        except Exception as e:
+            if mtch := re.findall(rf'{PREFIX_PLAYER_NAME}\d+', name):
+                return mtch[-1]  # type: ignore
+            if self.is_silent_game:
+                logging.debug(f'Failed to extract a name from {name}', exc_info=e)  # noqa
+            else:
+                logging.warning(f'Failed to extract a name from {name}', exc_info=e)  # noqa
+            return 'None'
 
     def ask_to_vote(
         self,
@@ -209,10 +239,11 @@ def ask_to_vote(
         with just1turn(self):
             self.send('\n'.join([
                     prompt,
-                    f'Please select the name of the player from the following list in order to help {player.side} to win the game.',  # noqa
+                    f'Who should be excluded from the game in order to help {player.side} to win the game.'  # noqa
+                    f'Please select the name of the player from the following list:',  # noqa
                     '\n'.join([f'- {name}' for name in alive_player_names]),  # noqa
                     'Think about who is in which role, reasoning step by step.',  # noqa
-                    'The conclusion should be output explicitly.',
+                    'The conclusion must be simple and explicit like "PlayerX should be excluded.".',  # noqa
                 ]),
                 player,
                 silent=silent,
@@ -330,12 +361,23 @@ def nighttime_action(self) -> dict[str, WhoToVote]:
             if (ngt_vote := p.act_in_night(self))
         }
 
-    def exclude_players_following_votes(self, votes: dict[str, str], announce_votes: bool = True) -> str:   # noqa
+    def exclude_players_following_votes(
+        self,
+        votes: dict[str, str],
+        announce_votes: bool = True,
+    ) -> str | None:   # noqa
         self._logger.debug('Exclude players following votes')
         # count votes
         count: dict[str, int] = {}
         for vote in votes.values():
-            count[vote] = count.get(vote, 0) + 1
+            # FIXME: the following line is a workaround to prevent the non-player is voted  # noqa
+            if vote in {p.name for p in self.alive_players}:
+                count[vote] = count.get(vote, 0) + 1
+        if not count:
+            return '\n'.join([
+                'No one is excluded.',
+                'Think about what has happened and what you should do in the next action.',  # noqa
+            ])
         max_n_votes = max(count.values())
         # extract
         candidates = [name for name, n_votes in count.items() if n_votes == max_n_votes]  # noqa