Skip to content

Commit

Permalink
[RLlib] Add comments to example files for templates. (#48988)
Browse files Browse the repository at this point in the history
  • Loading branch information
ArturNiederfahrenhorst authored Dec 2, 2024
1 parent 8032985 commit f89aaf9
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 7 deletions.
8 changes: 8 additions & 0 deletions rllib/examples/envs/custom_gym_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,13 +45,17 @@
| 18.3034 | 28000 | 0.908918 | 12.9676 |
+------------------+-------+----------+--------------------+
"""
# These tags allow extracting portions of this script on Anyscale.
# ws-template-imports-start
import gymnasium as gym
from gymnasium.spaces import Discrete, Box
import numpy as np
import random

from typing import Optional

# ws-template-imports-end

from ray.rllib.utils.test_utils import (
add_rllib_example_script_args,
run_rllib_example_script_experiment,
Expand All @@ -71,6 +75,8 @@
)


# These tags allow extracting portions of this script on Anyscale.
# ws-template-code-start
class SimpleCorridor(gym.Env):
"""Example of a custom env in which the agent has to walk down a corridor.
Expand Down Expand Up @@ -126,6 +132,8 @@ def step(self, action):
)


# ws-template-code-end

if __name__ == "__main__":
args = parser.parse_args()

Expand Down
20 changes: 13 additions & 7 deletions rllib/tuned_examples/ppo/atari_ppo.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# These tags allow extracting portions of this script on Anyscale.
# ws-template-imports-start
import gymnasium as gym

from ray import tune
Expand All @@ -8,6 +10,7 @@
from ray.rllib.env.wrappers.atari_wrappers import wrap_atari_for_new_api_stack
from ray.rllib.utils.test_utils import add_rllib_example_script_args

# ws-template-imports-end

parser = add_rllib_example_script_args(
default_reward=float("inf"),
Expand All @@ -22,7 +25,12 @@
# and (if needed) use their values to set up `config` below.
args = parser.parse_args()

NUM_LEARNERS = args.num_learners or 1
ENV = args.env


# These tags allow extracting portions of this script on Anyscale.
# ws-template-code-start
def _make_env_to_module_connector(env):
return FrameStackingEnvToModule(num_frames=4)

Expand All @@ -35,15 +43,14 @@ def _make_learner_connector(input_observation_space, input_action_space):
# We would like our frame stacking connector to do this job.
def _env_creator(cfg):
return wrap_atari_for_new_api_stack(
gym.make(args.env, **cfg, render_mode="rgb_array"),
gym.make(ENV, **cfg, render_mode="rgb_array"),
# Perform frame-stacking through ConnectorV2 API.
framestack=None,
)


tune.register_env("env", _env_creator)


config = (
PPOConfig()
.environment(
Expand All @@ -57,20 +64,19 @@ def _env_creator(cfg):
clip_rewards=True,
)
.env_runners(
# num_envs_per_env_runner=5, # 5 on old yaml example
env_to_module_connector=_make_env_to_module_connector,
)
.training(
learner_connector=_make_learner_connector,
train_batch_size_per_learner=4000, # 5000 on old yaml example
minibatch_size=128, # 500 on old yaml example
train_batch_size_per_learner=4000,
minibatch_size=128,
lambda_=0.95,
kl_coeff=0.5,
clip_param=0.1,
vf_clip_param=10.0,
entropy_coeff=0.01,
num_epochs=10,
lr=0.00015 * (args.num_learners or 1),
lr=0.00015 * NUM_LEARNERS,
grad_clip=100.0,
grad_clip_by="global_norm",
)
Expand All @@ -83,7 +89,7 @@ def _env_creator(cfg):
),
)
)

# ws-template-code-end

if __name__ == "__main__":
from ray.rllib.utils.test_utils import run_rllib_example_script_experiment
Expand Down

0 comments on commit f89aaf9

Please sign in to comment.