Update configs to simplify default usage commands.

AdamJelley · Jun 17, 2024 · 1ac6f1f · 1ac6f1f
1 parent a5ba2c0
commit 1ac6f1f
Show file tree

Hide file tree

Showing 57 changed files with 122 additions and 122 deletions.
diff --git a/README.md b/README.md
@@ -42,13 +42,13 @@ docker run gpus=all -it --rm --name <container_name> <image_name>
 To reproduce our results for TD3+BC on Hopper-medium:
 
 ```bash
-python algorithms/td3_bc.py --config=configs/td3_bc/hopper/medium_v2.yaml --device cuda:0 --alpha 10 --pretrain AC --pretrain_steps 10000 --td_component 0 --eval_freq 1000 --seed 0
+python algorithms/td3_bc.py --config=configs/td3_bc/hopper/medium_v2.yaml --pretrain AC --pretrain_steps 10000 --td_component 0
 ```
 
 Or for EDAC on Hopper-medium:
 
 ```bash
-python algorithms/edac.py --config=configs/edac/hopper/medium_v2.yaml --eval_every 1 --num_epochs 300 --device cuda:0 --pretrain softAC --pretrain_epochs 20 --train_seed 0 --eval_seed 1
+python algorithms/edac.py --config=configs/edac/hopper/medium_v2.yaml --pretrain softAC --pretrain_epochs 20 --td_component 0
 ```
 
 Note these assume access to a CUDA device to run (otherwise set `--device cpu`).
@@ -62,14 +62,14 @@ The pre-training argument can also be set to `--pretrain BC` to only pretrain th
 To run our hybrid algorithms TD3+BC+CQL and EDAC+BC introduced for improved stability on the Adroit environments:
 
 ```bash
-python algorithms/td3_bc.py --config=configs/td3_bc/pen/human_v1.yaml --eval_freq 1000 --max_timesteps 300000 --pretrain AC --pretrain_steps 200000 --pretrain_cql_regulariser 1 --cql_regulariser 1 --device cuda:0 --seed 0
+python algorithms/td3_bc.py --config=configs/td3_bc/pen/human_v1.yaml --pretrain AC --pretrain_steps 200000 --pretrain_cql_regulariser 1 --cql_regulariser 1
 ```
 
 ```bash
-python algorithms/edac.py --config=configs/edac/pen/human_v1.yaml --eval_every 1 --num_epochs 300 --bc_regulariser 1 --pretrain softAC --pretrain_epochs 200 --device cuda:0 --train_seed 0 --eval_seed 1
+python algorithms/edac.py --config=configs/edac/pen/human_v1.yaml --pretrain softAC --pretrain_epochs 200 --bc_regulariser 1
 ```
 
-For full details and hyperparameters please see Appendix G of the paper.
+The additional regularisation components can be adjusted by changing the argument or in the config. For full details and hyperparameters please see Appendix G of the paper.
 
 Note that performance on these environments is very high variance due to the limited data and and nature of the environment (particularly the shaping of the reward). Also note that we updated the evaluation procedure for these environments *for all algorithms* for fairer comparison, since we noticed that the timeout for these environments was set to be significantly shorter than the length of the provided demonstrations...! This is discussed in the paper in Appendix H.
 

diff --git a/configs/bc/door/cloned_v1.yaml b/configs/bc/door/cloned_v1.yaml
@@ -4,11 +4,11 @@ checkpoints_path: null
 device: cuda
 discount: 0.99
 env: door-cloned-v1
-eval_freq: 5000
+eval_freq: 1000
 frac: 1.0
 group: bc-door-cloned-v1-multiseed-v0
 load_model: ''
-max_timesteps: 1000000
+max_timesteps: 300000
 max_traj_len: 1000
 n_episodes: 10
 name: BC

diff --git a/configs/bc/door/human_v1.yaml b/configs/bc/door/human_v1.yaml
@@ -4,11 +4,11 @@ checkpoints_path: null
 device: cuda
 discount: 0.99
 env: door-human-v1
-eval_freq: 5000
+eval_freq: 1000
 frac: 1.0
 group: bc-door-human-v1-multiseed-v0
 load_model: ''
-max_timesteps: 1000000
+max_timesteps: 300000
 max_traj_len: 1000
 n_episodes: 10
 name: BC

diff --git a/configs/bc/hammer/cloned_v1.yaml b/configs/bc/hammer/cloned_v1.yaml
@@ -4,11 +4,11 @@ checkpoints_path: null
 device: cuda
 discount: 0.99
 env: hammer-cloned-v1
-eval_freq: 5000
+eval_freq: 1000
 frac: 1.0
 group: bc-hammer-cloned-v1-multiseed-v0
 load_model: ''
-max_timesteps: 1000000
+max_timesteps: 300000
 max_traj_len: 1000
 n_episodes: 10
 name: BC

diff --git a/configs/bc/hammer/human_v1.yaml b/configs/bc/hammer/human_v1.yaml
@@ -4,11 +4,11 @@ checkpoints_path: null
 device: cuda
 discount: 0.99
 env: hammer-human-v1
-eval_freq: 5000
+eval_freq: 1000
 frac: 1.0
 group: bc-hammer-human-v1-multiseed-v0
 load_model: ''
-max_timesteps: 1000000
+max_timesteps: 300000
 max_traj_len: 1000
 n_episodes: 10
 name: BC

diff --git a/configs/bc/pen/cloned_v1.yaml b/configs/bc/pen/cloned_v1.yaml
@@ -4,11 +4,11 @@ checkpoints_path: null
 device: cuda
 discount: 0.99
 env: pen-cloned-v1
-eval_freq: 5000
+eval_freq: 1000
 frac: 1.0
 group: bc-pen-cloned-v1-multiseed-v0
 load_model: ''
-max_timesteps: 1000000
+max_timesteps: 300000
 max_traj_len: 1000
 n_episodes: 10
 name: BC

diff --git a/configs/bc/pen/expert_v1.yaml b/configs/bc/pen/expert_v1.yaml
@@ -4,11 +4,11 @@ checkpoints_path: null
 device: cuda
 discount: 0.99
 env: pen-expert-v1
-eval_freq: 5000
+eval_freq: 1000
 frac: 1.0
 group: bc-pen-expert-v1-multiseed-v0
 load_model: ''
-max_timesteps: 1000000
+max_timesteps: 300000
 max_traj_len: 1000
 n_episodes: 10
 name: BC

diff --git a/configs/bc/pen/human_v1.yaml b/configs/bc/pen/human_v1.yaml
@@ -4,11 +4,11 @@ checkpoints_path: null
 device: cuda
 discount: 0.99
 env: pen-human-v1
-eval_freq: 5000
+eval_freq: 1000
 frac: 1.0
 group: bc-pen-human-v1-multiseed-v0
 load_model: ''
-max_timesteps: 1000000
+max_timesteps: 300000
 max_traj_len: 1000
 n_episodes: 10
 name: BC

diff --git a/configs/bc/relocate/cloned_v1.yaml b/configs/bc/relocate/cloned_v1.yaml
@@ -4,11 +4,11 @@ checkpoints_path: null
 device: cuda
 discount: 0.99
 env: relocate-cloned-v1
-eval_freq: 5000
+eval_freq: 1000
 frac: 1.0
 group: bc-relocate-cloned-v1-multiseed-v0
 load_model: ''
-max_timesteps: 1000000
+max_timesteps: 300000
 max_traj_len: 1000
 n_episodes: 10
 name: BC

diff --git a/configs/bc/relocate/human_v1.yaml b/configs/bc/relocate/human_v1.yaml
@@ -4,11 +4,11 @@ checkpoints_path: null
 device: cuda
 discount: 0.99
 env: relocate-human-v1
-eval_freq: 5000
+eval_freq: 1000
 frac: 1.0
 group: bc-relocate-human-v1-multiseed-v0
 load_model: ''
-max_timesteps: 1000000
+max_timesteps: 300000
 max_traj_len: 1000
 n_episodes: 10
 name: BC

diff --git a/configs/cql/door/cloned_v1.yaml b/configs/cql/door/cloned_v1.yaml
@@ -16,10 +16,10 @@ cql_temp: 1.0
 device: cuda
 discount: 0.99
 env: door-cloned-v1
-eval_freq: 5000
+eval_freq: 1000
 group: cql-door-cloned-v1-multiseed-v0
 load_model: ''
-max_timesteps: 1000000
+max_timesteps: 300000
 n_episodes: 10
 name: CQL
 normalize: true

diff --git a/configs/cql/door/human_v1.yaml b/configs/cql/door/human_v1.yaml
@@ -16,10 +16,10 @@ cql_temp: 1.0
 device: cuda
 discount: 0.99
 env: door-human-v1
-eval_freq: 5000
+eval_freq: 1000
 group: cql-door-human-v1-multiseed-v0
 load_model: ''
-max_timesteps: 1000000
+max_timesteps: 300000
 n_episodes: 10
 name: CQL
 normalize: true

diff --git a/configs/cql/hammer/cloned_v1.yaml b/configs/cql/hammer/cloned_v1.yaml
@@ -16,10 +16,10 @@ cql_temp: 1.0
 device: cuda
 discount: 0.99
 env: hammer-cloned-v1
-eval_freq: 5000
+eval_freq: 1000
 group: cql-hammer-cloned-v1-multiseed-v0
 load_model: ''
-max_timesteps: 1000000
+max_timesteps: 300000
 n_episodes: 10
 name: CQL
 normalize: true

diff --git a/configs/cql/hammer/human_v1.yaml b/configs/cql/hammer/human_v1.yaml
@@ -16,10 +16,10 @@ cql_temp: 1.0
 device: cuda
 discount: 0.99
 env: hammer-human-v1
-eval_freq: 5000
+eval_freq: 1000
 group: cql-hammer-human-v1-multiseed-v0
 load_model: ''
-max_timesteps: 1000000
+max_timesteps: 300000
 n_episodes: 10
 name: CQL
 normalize: true

diff --git a/configs/cql/pen/cloned_v1.yaml b/configs/cql/pen/cloned_v1.yaml
@@ -16,10 +16,10 @@ cql_temp: 1.0
 device: cuda
 discount: 0.99
 env: pen-cloned-v1
-eval_freq: 5000
+eval_freq: 1000
 group: cql-pen-cloned-v1-multiseed-v0
 load_model: ''
-max_timesteps: 1000000
+max_timesteps: 300000
 n_episodes: 10
 name: CQL
 normalize: true

diff --git a/configs/cql/pen/expert_v1.yaml b/configs/cql/pen/expert_v1.yaml
@@ -16,10 +16,10 @@ cql_temp: 1.0
 device: cuda
 discount: 0.99
 env: pen-expert-v1
-eval_freq: 5000
+eval_freq: 1000
 group: cql-pen-expert-v1-multiseed-v0
 load_model: ''
-max_timesteps: 1000000
+max_timesteps: 300000
 n_episodes: 10
 name: CQL
 normalize: true

diff --git a/configs/cql/pen/human_v1.yaml b/configs/cql/pen/human_v1.yaml
@@ -16,10 +16,10 @@ cql_temp: 1.0
 device: cuda
 discount: 0.99
 env: pen-human-v1
-eval_freq: 5000
+eval_freq: 1000
 group: cql-pen-human-v1-multiseed-v0
 load_model: ''
-max_timesteps: 1000000
+max_timesteps: 300000
 n_episodes: 10
 name: CQL
 normalize: true

diff --git a/configs/cql/relocate/cloned_v1.yaml b/configs/cql/relocate/cloned_v1.yaml
@@ -16,10 +16,10 @@ cql_temp: 1.0
 device: cuda
 discount: 0.99
 env: relocate-cloned-v1
-eval_freq: 5000
+eval_freq: 1000
 group: cql-relocate-cloned-v1-multiseed-v0
 load_model: ''
-max_timesteps: 1000000
+max_timesteps: 300000
 n_episodes: 10
 name: CQL
 normalize: true

diff --git a/configs/cql/relocate/human_v1.yaml b/configs/cql/relocate/human_v1.yaml
@@ -16,10 +16,10 @@ cql_temp: 1.0
 device: cuda
 discount: 0.99
 env: relocate-human-v1
-eval_freq: 5000
+eval_freq: 1000
 group: cql-relocate-human-v1-multiseed-v0
 load_model: ''
-max_timesteps: 1000000
+max_timesteps: 300000
 n_episodes: 10
 name: CQL
 normalize: true

diff --git a/configs/edac/door/cloned_v1.yaml b/configs/edac/door/cloned_v1.yaml
@@ -9,7 +9,7 @@ device: cuda
 env_name: "door-cloned-v1"
 eta: 200.0
 eval_episodes: 10
-eval_every: 5
+eval_every: 1
 eval_seed: 42
 gamma: 0.99
 group: "edac-door-cloned-v1-multiseed-v2"
@@ -19,7 +19,7 @@ max_action: 1.0
 name: "EDAC"
 normalize_reward: false
 num_critics: 50
-num_epochs: 3000
+num_epochs: 300
 num_updates_on_epoch: 1000
 project: offline-RL-init
 tau: 0.005

diff --git a/configs/edac/door/human_v1.yaml b/configs/edac/door/human_v1.yaml
@@ -9,7 +9,7 @@ device: cuda
 env_name: "door-human-v1"
 eta: 200.0
 eval_episodes: 10
-eval_every: 5
+eval_every: 1
 eval_seed: 42
 gamma: 0.99
 group: "edac-door-human-v1-multiseed-v2"
@@ -19,7 +19,7 @@ max_action: 1.0
 name: "EDAC"
 normalize_reward: false
 num_critics: 50
-num_epochs: 3000
+num_epochs: 300
 num_updates_on_epoch: 1000
 project: offline-RL-init
 tau: 0.005

diff --git a/configs/edac/halfcheetah/full_replay_v2.yaml b/configs/edac/halfcheetah/full_replay_v2.yaml
@@ -9,7 +9,7 @@ device: cuda
 env_name: "halfcheetah-full-replay-v2"
 eta: 1.0
 eval_episodes: 10
-eval_every: 5
+eval_every: 1
 eval_seed: 42
 gamma: 0.99
 group: "edac-halfcheetah-full-replay-v2-multiseed-v2"
@@ -19,7 +19,7 @@ max_action: 1.0
 name: "EDAC"
 normalize_reward: false
 num_critics: 10
-num_epochs: 3000
+num_epochs: 1000
 num_updates_on_epoch: 1000
 project: offline-RL-init
 tau: 0.005

diff --git a/configs/edac/halfcheetah/medium_replay_v2.yaml b/configs/edac/halfcheetah/medium_replay_v2.yaml
@@ -9,7 +9,7 @@ device: cuda
 env_name: "halfcheetah-medium-replay-v2"
 eta: 1.0
 eval_episodes: 10
-eval_every: 5
+eval_every: 1
 eval_seed: 42
 gamma: 0.99
 group: "edac-halfcheetah-medium-replay-v2-multiseed-v2"
@@ -19,7 +19,7 @@ max_action: 1.0
 name: "EDAC"
 normalize_reward: false
 num_critics: 10
-num_epochs: 3000
+num_epochs: 1000
 num_updates_on_epoch: 1000
 project: offline-RL-init
 tau: 0.005

diff --git a/configs/edac/halfcheetah/medium_v2.yaml b/configs/edac/halfcheetah/medium_v2.yaml
@@ -9,7 +9,7 @@ device: cuda
 env_name: "halfcheetah-medium-v2"
 eta: 1.0
 eval_episodes: 10
-eval_every: 5
+eval_every: 1
 eval_seed: 42
 gamma: 0.99
 group: "edac-halfcheetah-medium-v2-multiseed-v2"
@@ -19,7 +19,7 @@ max_action: 1.0
 name: "EDAC"
 normalize_reward: false
 num_critics: 10
-num_epochs: 3000
+num_epochs: 1000
 num_updates_on_epoch: 1000
 project: offline-RL-init
 tau: 0.005

diff --git a/configs/edac/hammer/cloned_v1.yaml b/configs/edac/hammer/cloned_v1.yaml
@@ -9,7 +9,7 @@ device: cuda
 env_name: "hammer-cloned-v1"
 eta: 200.0
 eval_episodes: 10
-eval_every: 5
+eval_every: 1
 eval_seed: 42
 gamma: 0.99
 group: "edac-hammer-cloned-v1-multiseed-v2"
@@ -19,7 +19,7 @@ max_action: 1.0
 name: "EDAC"
 normalize_reward: false
 num_critics: 50
-num_epochs: 3000
+num_epochs: 300
 num_updates_on_epoch: 1000
 project: offline-RL-init
 tau: 0.005