forked from utiasDSL/safe-control-gym
-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
pizarrob
committed
Aug 29, 2023
1 parent
5f4c5ce
commit c4d851a
Showing
1,919 changed files
with
385,242 additions
and
22 deletions.
There are no files selected for viewing
156 changes: 156 additions & 0 deletions
156
experiments/mpsc/models/rl_models/cartpole/stab/ppo/none_es_pen/config.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,156 @@ | ||
algo: ppo | ||
algo_config: | ||
actor_lr: 0.0003 | ||
clip_obs: 10.0 | ||
clip_param: 0.2 | ||
clip_reward: 10.0 | ||
critic_lr: 0.001 | ||
deque_size: 10 | ||
entropy_coef: 0.01 | ||
eval_batch_size: 10 | ||
eval_interval: 1000 | ||
eval_save_best: true | ||
filter_train_actions: false | ||
gae_lambda: 0.95 | ||
gamma: 0.99 | ||
hidden_dim: 64 | ||
log_interval: 1000 | ||
max_env_steps: 200000 | ||
max_grad_norm: 0.5 | ||
mini_batch_size: 64 | ||
norm_obs: false | ||
norm_reward: false | ||
num_checkpoints: 0 | ||
num_workers: 1 | ||
opt_epochs: 10 | ||
penalize_sf_diff: true | ||
pretrained: ./models/rl_models/cartpole/stab/ppo_pretrain/ | ||
rollout_batch_size: 1 | ||
rollout_steps: 100 | ||
save_interval: 10000 | ||
sf_penalty: 300 | ||
target_kl: 0.01 | ||
tensorboard: false | ||
training: true | ||
use_clipped_value: false | ||
use_gae: true | ||
use_safe_reset: false | ||
device: cpu | ||
kv_overrides: | ||
- task_config.init_state=None | ||
- sf_config.cost_function=one_step_cost | ||
- sf_config.mpsc_cost_horizon=2 | ||
- sf_config.decay_factor=0.85 | ||
- sf_config.soften_constraints=True | ||
- algo_config.filter_train_actions=False | ||
- algo_config.use_safe_reset=False | ||
- task_config.done_on_violation=True | ||
- algo_config.penalize_sf_diff=True | ||
- algo_config.pretrained=./models/rl_models/cartpole/stab/ppo_pretrain/ | ||
output_dir: ./models/rl_models/cartpole/stab/ppo/none_es_pen/ | ||
overrides: | ||
- ./config_overrides/cartpole/ppo_cartpole.yaml | ||
- ./config_overrides/cartpole/cartpole_stab.yaml | ||
- ./config_overrides/cartpole/nl_mpsc_cartpole_linear.yaml | ||
restore: null | ||
safety_filter: nl_mpsc | ||
seed: 2 | ||
sf_config: | ||
cost_function: one_step_cost | ||
decay_factor: 0.85 | ||
horizon: 5 | ||
integration_algo: LTI | ||
mpsc_cost_horizon: 2 | ||
n_samples: 6000 | ||
prior_info: | ||
prior_prop: null | ||
prior_prop_rand_info: null | ||
randomize_prior_prop: false | ||
q_lin: | ||
- 0.02 | ||
- 0.001 | ||
- 10 | ||
- 0.5 | ||
r_lin: | ||
- 0.1 | ||
slack_cost: 200 | ||
soften_constraints: true | ||
use_terminal_set: false | ||
warmstart: true | ||
tag: temp | ||
task: cartpole | ||
task_config: | ||
adversary_disturbance: null | ||
adversary_disturbance_offset: 0.0 | ||
adversary_disturbance_scale: 0.01 | ||
constraint_penalty: -1 | ||
constraints: | ||
- constrained_variable: state | ||
constraint_form: default_constraint | ||
lower_bounds: | ||
- -2 | ||
- -2 | ||
- -0.16 | ||
- -1 | ||
upper_bounds: | ||
- 2 | ||
- 2 | ||
- 0.16 | ||
- 1 | ||
- constrained_variable: input | ||
constraint_form: default_constraint | ||
cost: rl_reward | ||
ctrl_freq: 15 | ||
disturbances: null | ||
done_on_out_of_bound: true | ||
done_on_violation: true | ||
episode_len_sec: 10 | ||
gui: false | ||
inertial_prop: | ||
cart_mass: 1 | ||
pole_length: 0.5 | ||
pole_mass: 0.1 | ||
inertial_prop_randomization_info: null | ||
info_in_reset: true | ||
init_state: null | ||
init_state_randomization_info: | ||
init_theta: | ||
distrib: uniform | ||
high: 0.16 | ||
low: -0.16 | ||
init_theta_dot: | ||
distrib: uniform | ||
high: 1 | ||
low: -1 | ||
init_x: | ||
distrib: uniform | ||
high: 2 | ||
low: -2 | ||
init_x_dot: | ||
distrib: uniform | ||
high: 2 | ||
low: -2 | ||
normalized_rl_action_space: true | ||
obs_goal_horizon: 0 | ||
obs_wrap_angle: false | ||
physics: pyb | ||
pyb_freq: 750 | ||
randomized_inertial_prop: false | ||
randomized_init: true | ||
rew_act_weight: 0.1 | ||
rew_exponential: true | ||
rew_state_weight: | ||
- 1 | ||
- 1 | ||
- 1 | ||
- 1 | ||
seed: 4077 | ||
task: stabilization | ||
task_info: | ||
stabilization_goal: | ||
- 0.7 | ||
- 0 | ||
stabilization_goal_tolerance: 0.0 | ||
use_constraint_penalty: false | ||
verbose: false | ||
use_gpu: false |
201 changes: 201 additions & 0 deletions
201
experiments/mpsc/models/rl_models/cartpole/stab/ppo/none_es_pen/logs/loss/approx_kl.log
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,201 @@ | ||
step,loss/approx_kl | ||
1000,0.008729896880686283 | ||
2000,0.000734184356406331 | ||
3000,0.00291068609803915 | ||
4000,-1.3645272701978684e-06 | ||
5000,0.006614508526399732 | ||
6000,0.01544560412876308 | ||
7000,0.0006366211455315351 | ||
8000,0.004890881758183241 | ||
9000,0.0023412187583744526 | ||
10000,0.005613256618380547 | ||
11000,0.007228300301358103 | ||
12000,-0.00400420562364161 | ||
13000,0.006578619172796607 | ||
14000,-0.0005445972084999085 | ||
15000,0.008278616517782212 | ||
16000,0.011816525785252451 | ||
17000,-0.0023400879930704833 | ||
18000,0.008586719632148743 | ||
19000,-0.0006781108677387238 | ||
20000,0.005032122880220413 | ||
21000,-0.005462524993345141 | ||
22000,0.002917123306542635 | ||
23000,-0.00032871197909116744 | ||
24000,0.0036752334795892238 | ||
25000,0.01376691684126854 | ||
26000,0.01612484287470579 | ||
27000,0.006670318730175495 | ||
28000,-0.0013380751013755798 | ||
29000,0.005172924604266882 | ||
30000,0.0046507432125508785 | ||
31000,0.010902715567499398 | ||
32000,0.016281079314649106 | ||
33000,0.013475347775965929 | ||
34000,0.002621723897755146 | ||
35000,0.01190365944057703 | ||
36000,0.0016663327813148498 | ||
37000,0.0003160743974149227 | ||
38000,0.001576053723692894 | ||
39000,0.004924441315233707 | ||
40000,0.0024348248727619646 | ||
41000,0.0017095220275223256 | ||
42000,-5.503920838236809e-05 | ||
43000,0.003813410922884941 | ||
44000,0.013407559320330619 | ||
45000,0.0015245947055518628 | ||
46000,0.02596539622172713 | ||
47000,-0.00210944339632988 | ||
48000,0.0023835944943130017 | ||
49000,0.02264212230220437 | ||
50000,0.02199324043467641 | ||
51000,0.004448459204286337 | ||
52000,0.009436118137091398 | ||
53000,-0.0010799305513501168 | ||
54000,0.00649948064237833 | ||
55000,-0.001802628394216299 | ||
56000,-0.02099919691681862 | ||
57000,0.0039390803314745424 | ||
58000,0.0007740736007690429 | ||
59000,0.00951544800773263 | ||
60000,0.004025054536759854 | ||
61000,0.01524742990732193 | ||
62000,0.009491441771388054 | ||
63000,0.012673575524240732 | ||
64000,0.0027870127931237222 | ||
65000,0.007525280769914389 | ||
66000,0.004612566251307726 | ||
67000,0.026369661558419466 | ||
68000,-0.0025882338173687457 | ||
69000,-0.01061791181564331 | ||
70000,0.015068107564002276 | ||
71000,0.00829745875671506 | ||
72000,0.012917507626116275 | ||
73000,-0.013731067907065153 | ||
74000,0.01958571085706353 | ||
75000,-0.0001483442261815071 | ||
76000,-0.0011200965382158756 | ||
77000,0.0008953817188739777 | ||
78000,0.0002586793154478073 | ||
79000,-0.008776371274143458 | ||
80000,0.014530325587838889 | ||
81000,0.01274836016818881 | ||
82000,0.007373989932239056 | ||
83000,0.012372850067913532 | ||
84000,0.002016447763890028 | ||
85000,0.020060299336910246 | ||
86000,0.020874370634555817 | ||
87000,0.008414185792207717 | ||
88000,0.01311216652393341 | ||
89000,0.009297892451286316 | ||
90000,0.005775431916117668 | ||
91000,0.020424895361065863 | ||
92000,0.005889152362942696 | ||
93000,0.0018017305061221122 | ||
94000,0.003054761327803135 | ||
95000,0.0019522182643413543 | ||
96000,0.0033927926793694495 | ||
97000,0.021219123527407645 | ||
98000,0.0018061263486742972 | ||
99000,0.01697399131953716 | ||
100000,0.013516435213387013 | ||
101000,0.00785445123910904 | ||
102000,-0.006084371916949749 | ||
103000,0.011851174943149089 | ||
104000,0.001215551793575287 | ||
105000,0.00013522040098905562 | ||
106000,-0.00922881830483675 | ||
107000,0.00014359671622514724 | ||
108000,0.001694943942129612 | ||
109000,0.0011710179969668388 | ||
110000,0.007342372089624405 | ||
111000,0.010471208393573761 | ||
112000,0.0034814819693565367 | ||
113000,0.026840898394584655 | ||
114000,0.020728708431124687 | ||
115000,0.005181179195642471 | ||
116000,0.03090306296944618 | ||
117000,0.028314807452261448 | ||
118000,0.011181743815541267 | ||
119000,0.00705907866358757 | ||
120000,0.014344541914761066 | ||
121000,0.029730680398643017 | ||
122000,0.004351761005818844 | ||
123000,-0.0003058729693293571 | ||
124000,0.03268703408539295 | ||
125000,0.01545734889805317 | ||
126000,-0.006995952501893043 | ||
127000,0.0071522252634167675 | ||
128000,0.004483654722571373 | ||
129000,0.011046481132507325 | ||
130000,0.004308021441102028 | ||
131000,-0.00540410652756691 | ||
132000,0.023018458113074302 | ||
133000,0.007347137667238712 | ||
134000,0.01149166990071535 | ||
135000,0.005793794244527817 | ||
136000,0.011545461602509022 | ||
137000,0.016494561359286307 | ||
138000,0.006517043337225914 | ||
139000,0.0030543511733412743 | ||
140000,0.011217675916850568 | ||
141000,0.010120180808007717 | ||
142000,0.0026907706633210182 | ||
143000,0.02641407437622547 | ||
144000,0.019319997914135457 | ||
145000,0.014785639569163322 | ||
146000,0.02574730645865202 | ||
147000,0.011972497962415219 | ||
148000,0.013819411769509316 | ||
149000,0.012093486823141575 | ||
150000,0.004326407425105571 | ||
151000,0.0291291618719697 | ||
152000,-0.004336457327008247 | ||
153000,0.013431697338819503 | ||
154000,0.001855066791176796 | ||
155000,0.014500624127686023 | ||
156000,0.007786365784704685 | ||
157000,0.007402325235307217 | ||
158000,0.030843445658683778 | ||
159000,0.00877070315182209 | ||
160000,-0.007646743021905422 | ||
161000,0.013534646108746528 | ||
162000,0.023433731868863106 | ||
163000,-0.007944797910749913 | ||
164000,0.024833100289106368 | ||
165000,0.011420492641627788 | ||
166000,0.010067783296108246 | ||
167000,0.017492294125258923 | ||
168000,0.012120826728641986 | ||
169000,-0.0002240002155303955 | ||
170000,0.021745909191668033 | ||
171000,0.008001739159226418 | ||
172000,0.016407574340701104 | ||
173000,0.03414605166763067 | ||
174000,-0.001893002912402153 | ||
175000,0.028219463862478734 | ||
176000,0.008424596674740315 | ||
177000,0.02602416221052408 | ||
178000,-0.0038695666939020158 | ||
179000,-0.007138722948729992 | ||
180000,0.00912117026746273 | ||
181000,-0.0017202425748109817 | ||
182000,0.05795675590634346 | ||
183000,0.01762190368026495 | ||
184000,0.0016452165320515633 | ||
185000,0.026887317188084126 | ||
186000,0.03456468842923641 | ||
187000,0.01526106745004654 | ||
188000,0.01378321908414364 | ||
189000,0.015503882616758346 | ||
190000,0.0011705541983246803 | ||
191000,0.05174665879458189 | ||
192000,0.02685304842889309 | ||
193000,0.010335128381848335 | ||
194000,0.008676587790250778 | ||
195000,0.0018693547695875167 | ||
196000,0.029819188080728053 | ||
197000,0.011678414605557918 | ||
198000,0.006377792358398438 | ||
199000,0.054620143957436085 | ||
200000,1.837722957134247e-05 |
Oops, something went wrong.