Improved protocol to correctly transfere inf, obs and game ID for las…

…er hockey
martius-lab · Jan 10, 2024 · 148c327 · 148c327
1 parent e96319a
commit 148c327
Show file tree

Hide file tree

Showing 10 changed files with 47 additions and 41 deletions.
diff --git a/pyproject.toml b/pyproject.toml
@@ -15,7 +15,7 @@ requires-python = ">=3.8"
 dependencies = [
     "twisted==23.10.0",
     "numpy==1.26.2",
-    "gymnasium==0.29.1",
+    #"gymnasium==0.29.1",
     "laserhockey @ git+https://[email protected]/martius-lab/laser-hockey-env.git"
 ]
 

diff --git a/teamprojekt_competition_server/client/client_protocol.py b/teamprojekt_competition_server/client/client_protocol.py
@@ -40,7 +40,7 @@ def start_game(self, game_id: int):
             {"ready": boolean}: true if the client is ready to start the game
         """
         print(f"------ Started Game  [Game ID: {game_id}] ------")
-        return {"ready": True}  # dummy ready
+        return {"ready": True}  # dummy, ready to return to queue
 
     StartGame.responder(start_game)
 
@@ -54,7 +54,7 @@ def end_game(self, result, stats):
         Returns:
             {"ready": boolean}: true if the client is ready to start a new game
         """
-        print(f"------ Ended Game [Game ID: {result} | Stats: {stats}] ------")
+        print(f"------ Ended Game [Result: {result} | Stats: {stats}] ------")
         return {"ready": True}  # dummy ready
 
     EndGame.responder(end_game)
@@ -69,7 +69,7 @@ def step(self, obv):
             {"action": int}: action that should be executed
         """
         action = self.agent.step(obv=obv)
-        print(f"Send action: {action}")
+        # print(f"Send action: {action}")
         return {"action": action}
 
     Step.responder(step)

diff --git a/teamprojekt_competition_server/server/game_manager.py b/teamprojekt_competition_server/server/game_manager.py
@@ -45,7 +45,7 @@ def add_player_to_queue(self, player_id: int):
             player1 = self.players[self.queue.pop(0)]
             player2 = self.players[player_id]
             log.debug(f"matched two players: player {player1.id}, player {player2.id}")
-            new_game = self.GameClass(players=[player1, player2])
+            new_game = self.GameClass(players=[player1, player2], game_id=123)
             self.games.append(new_game)
             new_game.start()
         else:

diff --git a/teamprojekt_competition_server/server/gymgame.py b/teamprojekt_competition_server/server/gymgame.py
@@ -12,7 +12,9 @@
 class GymGame(IGame):
     """game class with the game logic being a gym env"""
 
-    def __init__(self, players: list[IPlayer], env_name: str = "Pendulum-v1") -> None:
+    def __init__(
+        self, players: list[IPlayer], game_id: int, env_name: str = "Pendulum-v1"
+    ) -> None:
         """create a game
 
         Args:
@@ -36,7 +38,7 @@ def __init__(self, players: list[IPlayer], env_name: str = "Pendulum-v1") -> Non
 
         log.debug("created a new gym env")
 
-        super().__init__(players)
+        super().__init__(players, game_id)
 
     def start(self):
         """

diff --git a/teamprojekt_competition_server/server/interfaces.py b/teamprojekt_competition_server/server/interfaces.py
@@ -27,7 +27,7 @@ def authenticate(self, result_callback):
         ...
 
     @abc.abstractmethod
-    def notify_start(self):
+    def notify_start(self, game_id):
         """notifies player that the game has started"""
         ...
 
@@ -53,18 +53,19 @@ def notify_end(self, result, stats):
 class IGame(abc.ABC):
     """game interface"""
 
-    def __init__(self, players: list[IPlayer]) -> None:
+    def __init__(self, players: list[IPlayer], game_id: int = 123) -> None:
         self.players: list[IPlayer] = players
         self.current_actions: list = [None for _ in players]
         self.result_received: int = 0
+        self.game_id = game_id
 
     def start(self):
         """
         notifies all players that the game has started
         and starts the game cycle
         """
         for p in self.players:
-            p.notify_start()
+            p.notify_start(game_id=self.game_id)
         self._game_cycle()
 
     def end(self, reason="unknown"):
@@ -82,7 +83,7 @@ def _update_environment(self):
         ...
 
     def _game_cycle(self):
-        """collectes all actions and puts them in current_actions list"""
+        """collects all actions and puts them in current_actions list"""
         self.result_received = 0
 
         for i, p in enumerate(self.players):
@@ -99,7 +100,7 @@ def __res(v: IAction, index=i):
                     else:
                         self._game_cycle()
 
-            p.get_action(obv=self._observation(), result_callback=__res)
+            p.get_action(obv=self._observation(index=i), result_callback=__res)
 
     @abc.abstractmethod
     def _validate_action(self, action) -> bool:
@@ -108,16 +109,16 @@ def _validate_action(self, action) -> bool:
 
     @abc.abstractmethod
     def _is_finished(self) -> bool:
-        """detirmens if the game has ended
+        """determines if the game has ended
 
         Returns:
             bool: returns true if game has ended
         """
         ...
 
     @abc.abstractmethod
-    def _observation(self):
-        """retutns the observation for the players"""
+    def _observation(self, index: int = 0):
+        """returns the observation for the player"""
         ...
 
     @abc.abstractmethod

diff --git a/teamprojekt_competition_server/server/laserhockeygame.py b/teamprojekt_competition_server/server/laserhockeygame.py
@@ -3,7 +3,8 @@
 import logging as log
 import numpy as np
 import laserhockey.laser_hockey_env as lh
-import gymnasium as gym
+
+# import gymnasium as gym
 from importlib import reload
 
 
@@ -13,7 +14,7 @@
 class LaserHockeyGame(IGame):
     """game class with the game logic being the laser-hockey env"""
 
-    def __init__(self, players: list[IPlayer]) -> None:
+    def __init__(self, players: list[IPlayer], game_id: int) -> None:
         """create a game
 
         Args:
@@ -29,17 +30,17 @@ def __init__(self, players: list[IPlayer]) -> None:
         self.truncated = False
         # TODO use the build in function from gym to limit the amount of steps
 
-        self.observation, self.info = self.env.reset()
-
         log.debug("created a new gym env")
 
-        super().__init__(players)
+        super().__init__(players, game_id)
 
     def start(self):
         """
         notifies all players that the game has started
         and starts the game cycle
         """
+
+        self.observation, self.info = self.env.reset()
         return super().start()
 
     def end(self, reason="unknown"):
@@ -63,9 +64,6 @@ def _update_environment(self):
             self.info,
         ) = self.env.step(np.hstack(self.current_actions))
 
-    def _game_cycle(self):
-        return super()._game_cycle()
-
     def _validate_action(self, action) -> bool:
         return self.env.action_space.contains(
             action
@@ -74,11 +72,21 @@ def _validate_action(self, action) -> bool:
     def _is_finished(self) -> bool:
         return self.terminated or self.truncated
 
-    def _observation(self):
-        return self.observation.tolist()  # obs is an np array, we need list
+    def _observation(self, index):
+        if index == 1:
+            return self.env.obs_agent_two().tolist()  # obs is an np array, we need list
+        else:
+            return self.observation.tolist()  # obs is an np array, we need list
 
     def _player_won(self, index) -> bool:
-        return False  # TODO find the winner
+        self.winner = self.info["winner"]
+        if self.winner == 0:  # draw
+            return False
+        if index == 0:
+            return self.winner == 1  # check if left player won
+        if index == 1:
+            return self.winner == -1  # check if right player won
+        return False
 
     def _player_stats(self, index) -> int:
-        return 0  # TODO
+        return 0  # TODO where tf is th score stored?
diff --git a/teamprojekt_competition_server/server/player.py b/teamprojekt_competition_server/server/player.py
@@ -33,9 +33,9 @@ def authenticate(self, result_callback):
         Returns: token (string)"""
         self.connection.get_token(result_callback)
 
-    def notify_start(self):
+    def notify_start(self, game_id: int):
         """notifies start of game"""
-        self.connection.notify_start()
+        self.connection.notify_start(game_id=game_id)
 
     def get_action(self, obv, result_callback):
         """receive action from server

diff --git a/teamprojekt_competition_server/server/protocol.py b/teamprojekt_competition_server/server/protocol.py
@@ -73,18 +73,17 @@ def callback(res):
 
         self.callRemote(Auth).addCallback(callback=callback)
 
-    def notify_start(self) -> None:
+    def notify_start(self, game_id: int) -> None:
         """starts the game
 
         Args:
             game (Game): game that starts
         """
-        return self.callRemote(StartGame, game_id=222)
+        return self.callRemote(StartGame, game_id=game_id)
 
     def get_step(self, obv, return_callback: Callable[[list], None]) -> None:
         """performs step requested by player"""
 
-        # TODO the obv is currently cast to int, as only ints are allowed.
         return self.callRemote(Step, obv=obv).addCallback(
             callback=lambda res: return_callback(res["action"])
         )

diff --git a/teamprojekt_competition_server/server/rock_paper_scissors.py b/teamprojekt_competition_server/server/rock_paper_scissors.py
@@ -15,8 +15,8 @@ class rock_paper_scissors(IGame):
     This class represents a rock-paper-scissors game.
     """
 
-    def __init__(self, players: list[IPlayer]) -> None:
-        super().__init__(players=players)
+    def __init__(self, players: list[IPlayer], game_id: int) -> None:
+        super().__init__(players=players, game_id=game_id)
         self.env = list([0.0, 0.0])
 
     def _update_environment(self):

diff --git a/teamprojekt_competition_server/shared/commands.py b/teamprojekt_competition_server/shared/commands.py
@@ -32,16 +32,12 @@ class EndGame(Command):
     arguments = [
         (b"result", Boolean()),
         (b"stats", Integer()),
-    ]  # Integer acts as a dummy type, we might want to create a custom data-type here!
+    ]
     response = [(b"ready", Boolean())]
 
 
 class Step(Command):
     """Command for requesting the next step from the agent"""
 
-    arguments = [
-        (b"obv", ListOf(Float()))
-    ]  # Integer acts as a dummy type, we might want to create a custom data-type here!
-    response = [
-        (b"action", ListOf(Float()))
-    ]  # Integer acts as a dummy type, we might want to create a custom data-type here!
+    arguments = [(b"obv", ListOf(Float()))]
+    response = [(b"action", ListOf(Float()))]