@@ -152,7 +152,7 @@ class TaxiEnv(Env):
152
152
153
153
## Version History
154
154
* v3: Map Correction + Cleaner Domain Description, v0.25.0 action masking added to the reset and step information
155
- - In Gymnasium `1.0.0a3 ` the `is_rainy` and `fickle_passenger` arguments were added to align with Dietterich paper
155
+ - In Gymnasium `1.1.0 ` the `is_rainy` and `fickle_passenger` arguments were added to align with Dietterich paper
156
156
* v2: Disallow Taxi start location = goal location, Update Taxi observations in the rollout, Update Taxi reward threshold.
157
157
* v1: Remove (3,2) from locs, add passidx<4 check
158
158
* v0: Initial version release
@@ -290,7 +290,7 @@ def __init__(
290
290
291
291
self .render_mode = render_mode
292
292
self .fickle_passenger = fickle_passenger
293
- self .fickle_step = True
293
+ self .fickle_step = self . fickle_passenger and self . np_random . random () < 0.3
294
294
295
295
# pygame utils
296
296
self .window = None
@@ -363,17 +363,17 @@ def step(self, a):
363
363
# If we are in the fickle step, the passenger has been in the vehicle for at least a step and this step the
364
364
# position changed
365
365
if (
366
- self .fickle_step
366
+ self .fickle_passenger
367
+ and self .fickle_step
367
368
and shadow_pass_loc == 4
368
369
and (taxi_row != shadow_row or taxi_col != shadow_col )
369
370
):
370
371
self .fickle_step = False
371
- if self .fickle_passenger and self .np_random .random () < 0.3 :
372
- possible_destinations = [
373
- i for i in range (len (self .locs )) if i != shadow_dest_idx
374
- ]
375
- dest_idx = self .np_random .choice (possible_destinations )
376
- s = self .encode (taxi_row , taxi_col , pass_loc , dest_idx )
372
+ possible_destinations = [
373
+ i for i in range (len (self .locs )) if i != shadow_dest_idx
374
+ ]
375
+ dest_idx = self .np_random .choice (possible_destinations )
376
+ s = self .encode (taxi_row , taxi_col , pass_loc , dest_idx )
377
377
378
378
self .s = s
379
379
@@ -391,7 +391,7 @@ def reset(
391
391
super ().reset (seed = seed )
392
392
self .s = categorical_sample (self .initial_state_distrib , self .np_random )
393
393
self .lastaction = None
394
- self .fickle_step = True
394
+ self .fickle_step = self . fickle_passenger and self . np_random . random () < 0.3
395
395
self .taxi_orientation = 0
396
396
397
397
if self .render_mode == "human" :
0 commit comments