an example of a cheating rdeep bot

miselico · miselico · commit bb6a6449eecf · 2024-01-25T14:01:28.000+01:00
diff --git a/executables/cli.py b/executables/cli.py
@@ -9,6 +9,7 @@
 from schnapsen.bots import MLDataBot, train_ML_model, MLPlayingBot, RandBot
 
 from schnapsen.bots.example_bot import ExampleBot
+from schnapsen.bots.rdeep_cheater import RdeepCheaterBot
 
 from schnapsen.game import (Bot, GamePlayEngine, Move, PlayerPerspective,
                             SchnapsenGamePlayEngine, TrumpExchange)
@@ -33,10 +34,10 @@ def play_games_and_return_stats(engine: GamePlayEngine, bot1: Bot, bot2: Bot, nu
         if i % 2 == 0:
             # swap bots so both start the same number of times
             lead, follower = follower, lead
-        winner, _, _ = engine.play_game(lead, follower, random.Random(i))
+        winner, _, _ = engine.play_game(lead, follower, random.Random(i + 100))
         if winner == bot1:
             bot1_wins += 1
-        if i % 500 == 0:
+        if i % 10 == 0:
             print(f"Progress: {i}/{number_of_games}")
     return bot1_wins
 
@@ -109,6 +110,18 @@ def rdeep_game() -> None:
             print(f"won {wins} out of {game_number}")
 
 
+@main.command()
+def rdeep_cheater() -> None:
+    engine = SchnapsenGamePlayEngine()
+    #bot1: Bot = RdeepBot(10, 5, random.Random(4535), "fairplay")
+    bot1: Bot = RdeepCheaterBot(10, 5, random.Random(4556), "cheater")
+    bot2 = RandBot(random.Random(678473), "rand")
+    number_of_games: int = 1000
+
+    bot1_wins = play_games_and_return_stats(engine=engine, bot1=bot1, bot2=bot2, number_of_games=number_of_games)
+    print(f"{bot1} wins {bot1_wins} times out of {number_of_games} games played.")
+
+
 @main.group()
 def ml() -> None:
     """Commands for the ML bot"""
diff --git a/src/schnapsen/bots/__init__.py b/src/schnapsen/bots/__init__.py
@@ -7,5 +7,6 @@
 from .ml_bot import MLDataBot, MLPlayingBot, train_ML_model
 from .gui.guibot import SchnapsenServer
 from .minimax import MiniMaxBot
+from .rdeep_cheater import RdeepCheaterBot
 
-__all__ = ["RandBot", "AlphaBetaBot", "RdeepBot", "MLDataBot", "MLPlayingBot", "train_ML_model", "SchnapsenServer", "MiniMaxBot"]
+__all__ = ["RandBot", "AlphaBetaBot", "RdeepBot", "MLDataBot", "MLPlayingBot", "train_ML_model", "SchnapsenServer", "MiniMaxBot", "RdeepCheaterBot"]
diff --git a/src/schnapsen/bots/rdeep_cheater.py b/src/schnapsen/bots/rdeep_cheater.py
@@ -0,0 +1,96 @@
+from typing import Optional
+from schnapsen.game import Bot, PlayerPerspective, Move, GameState, GamePlayEngine
+import random
+
+from .rand import RandBot
+
+
+class RdeepCheaterBot(Bot):
+    """
+    Rdeep bot is a bot which performs many random rollouts of the game to decide which move to play.
+    """
+    def __init__(self, num_samples: int, depth: int, rand: random.Random, name: Optional[str] = None) -> None:
+        """
+        Create a new rdeep bot.
+
+        :param num_samples: how many samples to take per move
+        :param depth: how deep to sample
+        :param rand: the source of randomness for this Bot
+        :param name: the name of this Bot
+        """
+        super().__init__(name)
+        assert num_samples >= 1, f"we cannot work with less than one sample, got {num_samples}"
+        assert depth >= 1, f"it does not make sense to use a dept <1. got {depth}"
+        self.__num_samples = num_samples
+        self.__depth = depth
+        self.__rand = rand
+
+    def get_move(self, perspective: PlayerPerspective, leader_move: Optional[Move]) -> Move:
+        # get the list of valid moves, and shuffle it such
+        # that we get a random move of the highest scoring
+        # ones if there are multiple highest scoring moves.
+        moves = perspective.valid_moves()
+        self.__rand.shuffle(moves)
+
+        best_score = float('-inf')
+        best_move = None
+        for move in moves:
+            sum_of_scores = 0.0
+            for _ in range(self.__num_samples):
+                gamestate = perspective.make_cheating_assumption(leader_move=leader_move, rand=self.__rand)
+                score = self.__evaluate(gamestate, perspective.get_engine(), leader_move, move)
+                sum_of_scores += score
+            average_score = sum_of_scores / self.__num_samples
+            if average_score > best_score:
+                best_score = average_score
+                best_move = move
+        assert best_move is not None
+        return best_move
+
+    def __evaluate(self, gamestate: GameState, engine: GamePlayEngine, leader_move: Optional[Move], my_move: Move) -> float:
+        """
+        Evaluates the value of the given state for the given player
+        :param state: The state to evaluate
+        :param player: The player for whom to evaluate this state (1 or 2)
+        :return: A float representing the value of this state for the given player. The higher the value, the better the
+                state is for the player.
+        """
+        me: Bot
+        leader_bot: Bot
+        follower_bot: Bot
+
+        if leader_move:
+            # we know what the other bot played
+            leader_bot = FirstFixedMoveThenBaseBot(RandBot(rand=self.__rand), leader_move)
+            # I am the follower
+            me = follower_bot = FirstFixedMoveThenBaseBot(RandBot(rand=self.__rand), my_move)
+        else:
+            # I am the leader bot
+            me = leader_bot = FirstFixedMoveThenBaseBot(RandBot(rand=self.__rand), my_move)
+            # We assume the other bot just random
+            follower_bot = RandBot(self.__rand)
+
+        new_game_state, _ = engine.play_at_most_n_tricks(game_state=gamestate, new_leader=leader_bot, new_follower=follower_bot, n=self.__depth)
+
+        if new_game_state.leader.implementation is me:
+            my_score = new_game_state.leader.score.direct_points
+            opponent_score = new_game_state.follower.score.direct_points
+        else:
+            my_score = new_game_state.follower.score.direct_points
+            opponent_score = new_game_state.leader.score.direct_points
+
+        heuristic = my_score / (my_score + opponent_score)
+        return heuristic
+
+
+class FirstFixedMoveThenBaseBot(Bot):
+    def __init__(self, base_bot: Bot, first_move: Move) -> None:
+        self.first_move = first_move
+        self.first_move_played = False
+        self.base_bot = base_bot
+
+    def get_move(self, perspective: PlayerPerspective, leader_move: Optional[Move]) -> Move:
+        if not self.first_move_played:
+            self.first_move_played = True
+            return self.first_move
+        return self.base_bot.get_move(perspective=perspective, leader_move=leader_move)
diff --git a/src/schnapsen/game.py b/src/schnapsen/game.py
@@ -1003,6 +1003,22 @@ def make_assumption(self, leader_move: Optional[Move], rand: Random) -> GameStat
         return full_state
 
 
+    def make_cheating_assumption(self, leader_move: Optional[Move], rand: Random) -> GameState:
+        full_state = self.__game_state.copy_with_other_bots(_DummyBot(), _DummyBot())
+        if self.get_phase() == GamePhase.TWO:
+            return full_state
+
+        new_talon: list[Card] = full_state.talon.get_cards()
+        # keep the trump in place:
+        old_trump = new_talon.pop(len(new_talon) - 1)
+        rand.shuffle(new_talon)
+        new_talon.append(old_trump)
+        full_state.talon = Talon(new_talon)
+
+        return full_state
+
+
+
 class _DummyBot(Bot):
     """A bot used by PlayerPerspective.make_assumption to replace the real bots. This bot cannot play and will throw an Exception for everything"""