Cleaner code and more complete testing

foreverska · foreverska · commit 8d6154b8bd62 · 2025-02-23T19:41:29.000-06:00
diff --git a/gymnasium/envs/toy_text/taxi.py b/gymnasium/envs/toy_text/taxi.py
@@ -152,7 +152,7 @@ class TaxiEnv(Env):
 
     ## Version History
     * v3: Map Correction + Cleaner Domain Description, v0.25.0 action masking added to the reset and step information
-        - In Gymnasium `1.0.0a3` the `is_rainy` and `fickle_passenger` arguments were added to align with Dietterich paper
+        - In Gymnasium `1.1.0` the `is_rainy` and `fickle_passenger` arguments were added to align with Dietterich paper
     * v2: Disallow Taxi start location = goal location, Update Taxi observations in the rollout, Update Taxi reward threshold.
     * v1: Remove (3,2) from locs, add passidx<4 check
     * v0: Initial version release
@@ -290,7 +290,7 @@ def __init__(
 
         self.render_mode = render_mode
         self.fickle_passenger = fickle_passenger
-        self.fickle_step = True
+        self.fickle_step = self.fickle_passenger and self.np_random.random() < 0.3
 
         # pygame utils
         self.window = None
@@ -363,17 +363,17 @@ def step(self, a):
         # If we are in the fickle step, the passenger has been in the vehicle for at least a step and this step the
         # position changed
         if (
-            self.fickle_step
+            self.fickle_passenger
+            and self.fickle_step
             and shadow_pass_loc == 4
             and (taxi_row != shadow_row or taxi_col != shadow_col)
         ):
             self.fickle_step = False
-            if self.fickle_passenger and self.np_random.random() < 0.3:
-                possible_destinations = [
-                    i for i in range(len(self.locs)) if i != shadow_dest_idx
-                ]
-                dest_idx = self.np_random.choice(possible_destinations)
-                s = self.encode(taxi_row, taxi_col, pass_loc, dest_idx)
+            possible_destinations = [
+                i for i in range(len(self.locs)) if i != shadow_dest_idx
+            ]
+            dest_idx = self.np_random.choice(possible_destinations)
+            s = self.encode(taxi_row, taxi_col, pass_loc, dest_idx)
 
         self.s = s
 
@@ -391,7 +391,7 @@ def reset(
         super().reset(seed=seed)
         self.s = categorical_sample(self.initial_state_distrib, self.np_random)
         self.lastaction = None
-        self.fickle_step = True
+        self.fickle_step = self.fickle_passenger and self.np_random.random() < 0.3
         self.taxi_orientation = 0
 
         if self.render_mode == "human":
diff --git a/tests/envs/test_env_implementation.py b/tests/envs/test_env_implementation.py
@@ -263,6 +263,30 @@ def test_taxi_disallowed_transitions():
                     ) not in disallowed_transitions
 
 
+def test_taxi_fickle_passenger():
+    env = TaxiEnv(fickle_passenger=True)
+    _, _ = env.reset()
+    # Force passenger being in a fickle state
+    env.fickle_step = True
+    state, reward, done, _, _ = env.step(0)
+    taxi_row, taxi_col, pass_idx, orig_dest_idx = env.decode(state)
+    # force taxi to passenger location
+    env.s = env.encode(
+        env.locs[pass_idx][0], env.locs[pass_idx][1], pass_idx, orig_dest_idx
+    )
+    # pick up the passenger
+    _, _, _, _, _ = env.step(4)
+    if env.locs[pass_idx][0] == 0:
+        # if we're on the top row, move down
+        state, _, _, _, _ = env.step(0)
+    else:
+        # otherwise move up
+        state, _, _, _, _ = env.step(1)
+    taxi_row, taxi_col, pass_idx, dest_idx = env.decode(state)
+    # check that passenger has changed their destination
+    assert orig_dest_idx != dest_idx
+
+
 @pytest.mark.parametrize(
     "env_name",
     ["Acrobot-v1", "CartPole-v1", "MountainCar-v0", "MountainCarContinuous-v0"],