add base agent

YangRui2015 · YangRui2015 · commit dd728f418918 · 2019-09-29T15:49:55.000+08:00
diff --git a/base_agent.py b/base_agent.py
@@ -0,0 +1,60 @@
+# -*- coding: utf-8 -*-
+# base_agent.py
+# author: yangrui
+# description: 
+# created: 2019-09-29T15:01:38.383Z+08:00
+# last-modified: 2019-09-29T15:01:38.383Z+08:00
+# email: yangrui19@mails.tsinghua.edu.cn
+
+from gym_2048 import Game2048Env
+import random
+
+
+class BaseAgent():
+    def act(self, state):
+        raise NotImplementedError
+
+
+class RandomAgent(BaseAgent):
+    def act(self, state):
+        return random.randint(0, 3)
+
+
+if __name__ == "__main__":
+    import time
+    import numpy as np 
+
+    def run(ifrender=False):
+        agent = RandomAgent()
+        env = Game2048Env()
+        state, reward, done, info = env.reset()
+        if ifrender:
+            env.render()
+ 
+        start = time.time()
+        while True:
+            action = agent.act(state)
+            # print('action: {}'.format(action))
+            state, reward, done, info = env.step(action)
+            if ifrender:
+                env.render()
+            if done:
+                print('\nfinished, info:{}'.format(info))
+                break
+        
+        end = time.time()
+        print('episode time:{} s\n'.format(end - start))
+        return end - start, info['highest'], info['score'], info['steps']
+
+    time_lis, highest_lis, score_lis, steps_lis = [], [], [], []
+    for i in range(1000):
+        t, highest, score, steps = run()
+        time_lis.append(t)
+        highest_lis.append(highest)
+        score_lis.append(score)
+        steps_lis.append(steps)
+    
+    print('eval result:\naverage episode time:{} s, average highest score:{}, average total score:{}, average steps:{}'.format(np.mean(time_lis), np.mean(highest_lis), np.mean(score_lis), np.mean(steps_lis)))
+    
+
+
diff --git a/gym_2048.py b/gym_2048.py
@@ -59,8 +59,19 @@ def __init__(self):
         # Initialise seed
         self.seed()
 
-        # Reset ready for a game
-        self.reset()
+        # # Reset ready for a game
+        # self.reset()
+    
+    def _get_info(self, info=None):
+        if not info:
+            info = {}
+        else:
+            assert type(info) == dict, 'info should be of type dict!'
+
+        info['highest'] = self.highest()
+        info['score'] = self.score
+        info['steps'] = self.steps
+        return info
 
     def seed(self, seed=None):
         self.np_random, seed = seeding.np_random(seed)
@@ -103,10 +114,7 @@ def step(self, action):
             done = False
             reward = self.illegal_move_reward
 
-        #print("Am I done? {}".format(done))
-        info['highest'] = self.highest()
-        info['score'] = self.score
-        info['steps'] = self.steps
+        info = self._get_info(info)
 
         # Return observation (board state), reward, done and info dict
         return self.Matrix, reward, done, info
@@ -120,7 +128,7 @@ def reset(self):
         self.add_tile()
         self.add_tile()
 
-        return self.Matrix
+        return self.Matrix, 0, False, self._get_info()
 
     def render(self, mode='human'):
         outfile = StringIO() if mode == 'ansi' else sys.stdout
@@ -282,40 +290,6 @@ def set_board(self, new_board):
         """Retrieve the whole board, useful for testing."""
         self.Matrix = new_board
     
-if __name__ == "__main__":
-    import random
-    import time
-    import numpy as np 
-
-
-    def run():
-        env = Game2048Env()
-        env.render()
-        start = time.time()
-        while True:
-            action = random.randint(0, 3)
-            print('action: {}'.format(action))
-            state, reward, done, info = env.step(action)
-            env.render()
-            if done:
-                print('\nfinished, info:{}'.format(info))
-                break
-        
-        end = time.time()
-        print('episode time:{} s\n'.format(end - start))
-        return end - start, info['highest'], info['score'], info['steps']
-
-    time_lis, highest_lis, score_lis, steps_lis = [], [], [], []
-    for i in range(100):
-        t, highest, score, steps = run()
-        time_lis.append(t)
-        highest_lis.append(highest)
-        score_lis.append(score)
-        steps_lis.append(steps)
-    
-    print('eval result:\naverage episode time:{} s, average highest score:{}, average total score:{}, average steps:{}'.format(np.mean(time_lis), np.mean(highest_lis), np.mean(score_lis), np.mean(steps_lis)))
-
-