A random seed used to initialize an environment will now also be used…

… to decide sticky actions. Previous behaviour was unintuitive as the passed random seed would be used for other random events, while sticky actions used numpy's default randomization behaviour.
kenjyoung · Jun 11, 2021 · 8b39a18 · 8b39a18
1 parent 310c7c3
commit 8b39a18
Show file tree

Hide file tree

Showing 7 changed files with 29 additions and 13 deletions.
diff --git a/minatar/environment.py b/minatar/environment.py
@@ -17,8 +17,9 @@
 class Environment:
     def __init__(self, env_name, sticky_action_prob = 0.1, difficulty_ramping = True, random_seed = None):
         env_module = import_module('minatar.environments.'+env_name)
+        self.random = np.random.RandomState(random_seed)
         self.env_name = env_name
-        self.env = env_module.Env(ramping = difficulty_ramping, seed = random_seed)
+        self.env = env_module.Env(ramping = difficulty_ramping, random_state = self.random)
         self.n_channels = self.env.state_shape()[2]
         self.sticky_action_prob = sticky_action_prob
         self.last_action = 0
@@ -27,7 +28,7 @@ def __init__(self, env_name, sticky_action_prob = 0.1, difficulty_ramping = True
 
     # Wrapper for env.act
     def act(self, a):
-        if(np.random.rand()<self.sticky_action_prob):
+        if(self.random.rand()<self.sticky_action_prob):
             a = self.last_action
         self.last_action = a
         return self.env.act(a)

diff --git a/minatar/environments/asterix.py b/minatar/environments/asterix.py
@@ -26,7 +26,7 @@
 #
 #####################################################################################################################
 class Env:
-    def __init__(self, ramping = True, seed = None):
+    def __init__(self, ramping = True, random_state = None):
         self.channels ={
             'player':0,
             'enemy':1,
@@ -35,7 +35,10 @@ def __init__(self, ramping = True, seed = None):
         }
         self.action_map = ['n','l','u','r','d','f']
         self.ramping = ramping
-        self.random = np.random.RandomState(seed)
+        if random_state is None:
+            self.random = np.random.RandomState()
+        else:
+            self.random = random_state
         self.reset()
 
     # Update environment according to agent action

diff --git a/minatar/environments/breakout.py b/minatar/environments/breakout.py
@@ -17,15 +17,18 @@
 #
 #####################################################################################################################
 class Env:
-    def __init__(self, ramping = None, seed = None):
+    def __init__(self, ramping = None, random_state = None):
         self.channels ={
             'paddle':0,
             'ball':1,
             'trail':2,
             'brick':3,
         }
         self.action_map = ['n','l','u','r','d','f']
-        self.random = np.random.RandomState(seed)
+        if random_state is None:
+            self.random = np.random.RandomState()
+        else:
+            self.random = random_state
         self.reset()
 
     # Update environment according to agent action

diff --git a/minatar/environments/freeway.py b/minatar/environments/freeway.py
@@ -28,7 +28,7 @@
 #
 #####################################################################################################################
 class Env:
-    def __init__(self, ramping = None, seed = None):
+    def __init__(self, ramping = None, random_state = None):
         self.channels ={
             'chicken':0,
             'car':1,
@@ -39,7 +39,10 @@ def __init__(self, ramping = None, seed = None):
             'speed5':6,
         }
         self.action_map = ['n','l','u','r','d','f']
-        self.random = np.random.RandomState(seed)
+        if random_state is None:
+            self.random = np.random.RandomState()
+        else:
+            self.random = random_state
         self.reset()
 
     # Update environment according to agent action

diff --git a/minatar/environments/seaquest.py b/minatar/environments/seaquest.py
@@ -40,7 +40,7 @@
 #
 #####################################################################################################################
 class Env:
-    def __init__(self, ramping = True, seed = None):
+    def __init__(self, ramping = True, random_state = None):
         self.channels ={
             'sub_front':0,
             'sub_back':1,
@@ -55,7 +55,10 @@ def __init__(self, ramping = True, seed = None):
         }
         self.action_map = ['n','l','u','r','d','f']
         self.ramping = ramping
-        self.random = np.random.RandomState(seed)
+        if random_state is None:
+            self.random = np.random.RandomState()
+        else:
+            self.random = random_state
         self.reset()
 
     # Update environment according to agent action

diff --git a/minatar/environments/space_invaders.py b/minatar/environments/space_invaders.py
@@ -29,7 +29,7 @@
 #
 #####################################################################################################################
 class Env:
-    def __init__(self, ramping = True, seed = None):
+    def __init__(self, ramping = True, random_state=None):
         self.channels ={
             'cannon':0,
             'alien':1,
@@ -40,7 +40,10 @@ def __init__(self, ramping = True, seed = None):
         }
         self.action_map = ['n','l','u','r','d','f']
         self.ramping = ramping
-        self.random = np.random.RandomState(seed)
+        if random_state is None:
+            self.random = np.random.RandomState()
+        else:
+            self.random = random_state
         self.reset()
 
     # Update environment according to agent action

diff --git a/setup.py b/setup.py
@@ -2,7 +2,7 @@
 
 setup(
     name='MinAtar',
-    version='1.0.7',
+    version='1.0.8',
     description='A miniaturized version of the arcade learning environment.',
     url='https://github.com/kenjyoung/MinAtar',
     author='Kenny Young',