Skip to content

Commit

Permalink
small fixes to ppo, sac, td3
Browse files Browse the repository at this point in the history
  • Loading branch information
svsawant committed Sep 17, 2024
1 parent bdf26c4 commit 29f3cf6
Show file tree
Hide file tree
Showing 16 changed files with 1,229 additions and 313 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,10 @@ algo_config:
critic_lr: 0.001

# runner args
max_env_steps: 540000
max_env_steps: 480000
rollout_batch_size: 4
rollout_steps: 540
eval_batch_size: 50
eval_batch_size: 10

# misc
log_interval: 10800
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@ task_config:
seed: 1337
info_in_reset: True
ctrl_freq: 60
pyb_freq: 1200
physics: pyb
pyb_freq: 60
physics: dyn_si
quad_type: 4
normalized_rl_action_space: False

Expand All @@ -20,42 +20,42 @@ task_config:
init_state_randomization_info:
init_x:
distrib: 'uniform'
low: -0.02
high: 0.02
low: -0.05
high: 0.05
init_x_dot:
distrib: 'uniform'
low: -0.02
high: 0.02
low: -0.05
high: 0.05
init_z:
distrib: 'uniform'
low: -0.02
high: 0.02
low: -0.05
high: 0.05
init_z_dot:
distrib: 'uniform'
low: -0.02
high: 0.02
low: -0.05
high: 0.05
init_theta:
distrib: 'uniform'
low: -0.02
high: 0.02
low: -0.05
high: 0.05
init_theta_dot:
distrib: 'uniform'
low: -0.02
high: 0.02
low: -0.05
high: 0.05

task: traj_tracking
task_info:
trajectory_type: figure8
num_cycles: 1
num_cycles: 2
trajectory_plane: 'xz'
trajectory_position_offset: [0, 1.2]
trajectory_position_offset: [0, 1.]
trajectory_scale: 0.5

inertial_prop:
M: 0.027
Iyy: 1.4e-05

episode_len_sec: 10
episode_len_sec: 9
cost: rl_reward
obs_goal_horizon: 1

Expand All @@ -67,17 +67,17 @@ task_config:
disturbances:
observation:
- disturbance_func: white_noise
std: [0.02, 0.02, 0.04, 0.04, 0.04, 0.1, 0., 0., 0., 0., 0., 0.]
std: [5.6e-05, 1.5e-02, 2.9e-05, 8.0e-03, 1.3e-03, 3.6e-01, 0., 0., 0., 0., 0., 0.]

# constraints:
# - constraint_form: default_constraint
# constrained_variable: state
constraints:
- constraint_form: default_constraint
constrained_variable: state
# upper_bounds: [2, 1, 2, 1, 0.2, 2.5]
# lower_bounds: [-2, -1, 0, -1, -0.2, -2.5]
# - constraint_form: default_constraint
# constrained_variable: input
# upper_bounds: [0.58, 0.8]
# lower_bounds: [0.06, -0.8]
- constraint_form: default_constraint
constrained_variable: input
upper_bounds: [0.47628, 0.4]
lower_bounds: [0.079, -0.4]

done_on_out_of_bound: True
done_on_violation: False
Original file line number Diff line number Diff line change
Expand Up @@ -7,23 +7,21 @@ algo_config:
# optim args
train_interval: 100
train_batch_size: 256
actor_lr: 0.001
critic_lr: 0.001
entropy_lr: 0.001
actor_lr: 0.003
critic_lr: 0.003
entropy_lr: 0.003

# runner args
max_env_steps: 540000
max_env_steps: 216000
warm_up_steps: 1000
rollout_batch_size: 4
num_workers: 1
max_buffer_size: 54000
deque_size: 50
eval_batch_size: 50
eval_batch_size: 10

# misc
log_interval: 10800
log_interval: 5400
save_interval: 540000
num_checkpoints: 0
eval_interval: 10800
eval_interval: 5400
eval_save_best: True
tensorboard: False
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
algo: sac
algo_config:
# model args
hidden_dim: 128
Expand All @@ -8,22 +7,20 @@ algo_config:
# optim args
train_interval: 100
train_batch_size: 256
actor_lr: 0.001
critic_lr: 0.001
actor_lr: 0.003
critic_lr: 0.003

# runner args
max_env_steps: 200000
max_env_steps: 216000
warm_up_steps: 1000
rollout_batch_size: 4
num_workers: 1
max_buffer_size: 50000
deque_size: 10
eval_batch_size: 50
max_buffer_size: 54000
eval_batch_size: 10

# misc
log_interval: 2000
save_interval: 0
log_interval: 5400
save_interval: 480000
num_checkpoints: 0
eval_interval: 2000
eval_interval: 5400
eval_save_best: True
tensorboard: False
1,013 changes: 952 additions & 61 deletions examples/rl/data_analysis.ipynb

Large diffs are not rendered by default.

13 changes: 11 additions & 2 deletions examples/rl/rl_experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from safe_control_gym.utils.registration import make


def run(gui=False, plot=True, n_episodes=1, n_steps=None, curr_path='.'):
def run(gui=False, plot=True, n_episodes=10, n_steps=None, curr_path='.'):
"""Main function to run RL experiments.
Args:
Expand All @@ -38,6 +38,8 @@ def run(gui=False, plot=True, n_episodes=1, n_steps=None, curr_path='.'):
else:
system = config.task

# config.task_config.disturbances.observation[0].std = [config.task_config.noise_scale*i
# for i in config.task_config.disturbances.observation[0].std]
env_func = partial(make,
config.task,
**config.task_config)
Expand All @@ -51,7 +53,11 @@ def run(gui=False, plot=True, n_episodes=1, n_steps=None, curr_path='.'):

# Load state_dict from trained.
# ctrl.load(f'{curr_path}/models/{config.algo}/{config.algo}_model_{system}_{task}.pt')
ctrl.load(f'{curr_path}/models/{config.algo}/model_best.pt')
# ctrl.load(f'{curr_path}/models/{config.algo}/model_best.pt')
if 'pretrain_path' in config.keys():
ctrl.load(config.pretrain_path+"/model_latest.pt")
else:
ctrl.load(f'{curr_path}/models/{config.algo}/model_latest.pt')

# Remove temporary files and directories
shutil.rmtree(f'{curr_path}/temp', ignore_errors=True)
Expand All @@ -60,6 +66,9 @@ def run(gui=False, plot=True, n_episodes=1, n_steps=None, curr_path='.'):
experiment = BaseExperiment(env, ctrl)
results, metrics = experiment.run_evaluation(n_episodes=n_episodes, n_steps=n_steps)
ctrl.close()
# metrics['noise_scale'] = config.task_config.noise_scale
# temp = config.pretrain_path+"/metric_"+str(config.task_config.noise_scale)+".npy"
# np.save(temp, metrics, allow_pickle=True)

if plot is True:
if system == Environment.CARTPOLE:
Expand Down
26 changes: 17 additions & 9 deletions examples/rl/rl_experiment.sh
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ ALGO='ppo'
#ALGO='sac'
#ALGO='td3'
#ALGO='safe_explorer_ppo'
#ALGO='dppo'

if [ "$SYS" == 'cartpole' ]; then
SYS_NAME=$SYS
Expand All @@ -21,12 +22,19 @@ else
fi

# RL Experiment
python3 ./rl_experiment.py \
--task ${SYS_NAME} \
--algo ${ALGO} \
--overrides \
./config_overrides/${SYS}/${SYS}_${TASK}.yaml \
./config_overrides/${SYS}/${ALGO}_${SYS}.yaml \
--kv_overrides \
algo_config.training=False \
task_config.randomized_init=True
#for NS in {1,10,20,30,40,50,60,70,80,90,100,110,120,130,140,150,160,170,180,190,200}
#do
for SEED in {0..0}
do
python3 ./rl_experiment.py \
--task ${SYS_NAME} \
--algo ${ALGO} \
--overrides \
./config_overrides/${SYS}/${SYS}_${TASK}.yaml \
./config_overrides/${SYS}/${ALGO}_${SYS}.yaml \
--kv_overrides \
algo_config.training=False \
task_config.randomized_init=True
# --pretrain_path ./Results/LSY_pc/${SYS}_${ALGO}_data/Long_run/${SEED}
done
#done
8 changes: 5 additions & 3 deletions examples/rl/train_rl_model.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,11 @@ SYS='quadrotor_2D_attitude'
#TASK='stab'
TASK='track'

ALGO='ppo'
#ALGO='sac'
#ALGO='ppo'
ALGO='sac'
#ALGO='td3'
#ALGO='ddpg'
#ALGO='dppo'
#ALGO='safe_explorer_ppo'

if [ "$SYS" == 'cartpole' ]; then
Expand Down Expand Up @@ -55,8 +56,9 @@ do
./config_overrides/${SYS}/${SYS}_${TASK}.yaml \
--output_dir ./Results/${SYS}_${ALGO}_data/${SEED}/ \
--seed ${SEED} \
--use_gpu\
--kv_overrides \
task_config.randomized_init=True
task_config.randomized_init=True
# --pretrain_path ./models/${ALGO}/model_latest.pt
done

Expand Down
Loading

0 comments on commit 29f3cf6

Please sign in to comment.