Skip to content

Commit

Permalink
Adding results when none + pen
Browse files Browse the repository at this point in the history
  • Loading branch information
pizarrob committed Aug 29, 2023
1 parent 5f4c5ce commit c4d851a
Show file tree
Hide file tree
Showing 1,919 changed files with 385,242 additions and 22 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
algo: ppo
algo_config:
actor_lr: 0.0003
clip_obs: 10.0
clip_param: 0.2
clip_reward: 10.0
critic_lr: 0.001
deque_size: 10
entropy_coef: 0.01
eval_batch_size: 10
eval_interval: 1000
eval_save_best: true
filter_train_actions: false
gae_lambda: 0.95
gamma: 0.99
hidden_dim: 64
log_interval: 1000
max_env_steps: 200000
max_grad_norm: 0.5
mini_batch_size: 64
norm_obs: false
norm_reward: false
num_checkpoints: 0
num_workers: 1
opt_epochs: 10
penalize_sf_diff: true
pretrained: ./models/rl_models/cartpole/stab/ppo_pretrain/
rollout_batch_size: 1
rollout_steps: 100
save_interval: 10000
sf_penalty: 300
target_kl: 0.01
tensorboard: false
training: true
use_clipped_value: false
use_gae: true
use_safe_reset: false
device: cpu
kv_overrides:
- task_config.init_state=None
- sf_config.cost_function=one_step_cost
- sf_config.mpsc_cost_horizon=2
- sf_config.decay_factor=0.85
- sf_config.soften_constraints=True
- algo_config.filter_train_actions=False
- algo_config.use_safe_reset=False
- task_config.done_on_violation=True
- algo_config.penalize_sf_diff=True
- algo_config.pretrained=./models/rl_models/cartpole/stab/ppo_pretrain/
output_dir: ./models/rl_models/cartpole/stab/ppo/none_es_pen/
overrides:
- ./config_overrides/cartpole/ppo_cartpole.yaml
- ./config_overrides/cartpole/cartpole_stab.yaml
- ./config_overrides/cartpole/nl_mpsc_cartpole_linear.yaml
restore: null
safety_filter: nl_mpsc
seed: 2
sf_config:
cost_function: one_step_cost
decay_factor: 0.85
horizon: 5
integration_algo: LTI
mpsc_cost_horizon: 2
n_samples: 6000
prior_info:
prior_prop: null
prior_prop_rand_info: null
randomize_prior_prop: false
q_lin:
- 0.02
- 0.001
- 10
- 0.5
r_lin:
- 0.1
slack_cost: 200
soften_constraints: true
use_terminal_set: false
warmstart: true
tag: temp
task: cartpole
task_config:
adversary_disturbance: null
adversary_disturbance_offset: 0.0
adversary_disturbance_scale: 0.01
constraint_penalty: -1
constraints:
- constrained_variable: state
constraint_form: default_constraint
lower_bounds:
- -2
- -2
- -0.16
- -1
upper_bounds:
- 2
- 2
- 0.16
- 1
- constrained_variable: input
constraint_form: default_constraint
cost: rl_reward
ctrl_freq: 15
disturbances: null
done_on_out_of_bound: true
done_on_violation: true
episode_len_sec: 10
gui: false
inertial_prop:
cart_mass: 1
pole_length: 0.5
pole_mass: 0.1
inertial_prop_randomization_info: null
info_in_reset: true
init_state: null
init_state_randomization_info:
init_theta:
distrib: uniform
high: 0.16
low: -0.16
init_theta_dot:
distrib: uniform
high: 1
low: -1
init_x:
distrib: uniform
high: 2
low: -2
init_x_dot:
distrib: uniform
high: 2
low: -2
normalized_rl_action_space: true
obs_goal_horizon: 0
obs_wrap_angle: false
physics: pyb
pyb_freq: 750
randomized_inertial_prop: false
randomized_init: true
rew_act_weight: 0.1
rew_exponential: true
rew_state_weight:
- 1
- 1
- 1
- 1
seed: 4077
task: stabilization
task_info:
stabilization_goal:
- 0.7
- 0
stabilization_goal_tolerance: 0.0
use_constraint_penalty: false
verbose: false
use_gpu: false
Original file line number Diff line number Diff line change
@@ -0,0 +1,201 @@
step,loss/approx_kl
1000,0.008729896880686283
2000,0.000734184356406331
3000,0.00291068609803915
4000,-1.3645272701978684e-06
5000,0.006614508526399732
6000,0.01544560412876308
7000,0.0006366211455315351
8000,0.004890881758183241
9000,0.0023412187583744526
10000,0.005613256618380547
11000,0.007228300301358103
12000,-0.00400420562364161
13000,0.006578619172796607
14000,-0.0005445972084999085
15000,0.008278616517782212
16000,0.011816525785252451
17000,-0.0023400879930704833
18000,0.008586719632148743
19000,-0.0006781108677387238
20000,0.005032122880220413
21000,-0.005462524993345141
22000,0.002917123306542635
23000,-0.00032871197909116744
24000,0.0036752334795892238
25000,0.01376691684126854
26000,0.01612484287470579
27000,0.006670318730175495
28000,-0.0013380751013755798
29000,0.005172924604266882
30000,0.0046507432125508785
31000,0.010902715567499398
32000,0.016281079314649106
33000,0.013475347775965929
34000,0.002621723897755146
35000,0.01190365944057703
36000,0.0016663327813148498
37000,0.0003160743974149227
38000,0.001576053723692894
39000,0.004924441315233707
40000,0.0024348248727619646
41000,0.0017095220275223256
42000,-5.503920838236809e-05
43000,0.003813410922884941
44000,0.013407559320330619
45000,0.0015245947055518628
46000,0.02596539622172713
47000,-0.00210944339632988
48000,0.0023835944943130017
49000,0.02264212230220437
50000,0.02199324043467641
51000,0.004448459204286337
52000,0.009436118137091398
53000,-0.0010799305513501168
54000,0.00649948064237833
55000,-0.001802628394216299
56000,-0.02099919691681862
57000,0.0039390803314745424
58000,0.0007740736007690429
59000,0.00951544800773263
60000,0.004025054536759854
61000,0.01524742990732193
62000,0.009491441771388054
63000,0.012673575524240732
64000,0.0027870127931237222
65000,0.007525280769914389
66000,0.004612566251307726
67000,0.026369661558419466
68000,-0.0025882338173687457
69000,-0.01061791181564331
70000,0.015068107564002276
71000,0.00829745875671506
72000,0.012917507626116275
73000,-0.013731067907065153
74000,0.01958571085706353
75000,-0.0001483442261815071
76000,-0.0011200965382158756
77000,0.0008953817188739777
78000,0.0002586793154478073
79000,-0.008776371274143458
80000,0.014530325587838889
81000,0.01274836016818881
82000,0.007373989932239056
83000,0.012372850067913532
84000,0.002016447763890028
85000,0.020060299336910246
86000,0.020874370634555817
87000,0.008414185792207717
88000,0.01311216652393341
89000,0.009297892451286316
90000,0.005775431916117668
91000,0.020424895361065863
92000,0.005889152362942696
93000,0.0018017305061221122
94000,0.003054761327803135
95000,0.0019522182643413543
96000,0.0033927926793694495
97000,0.021219123527407645
98000,0.0018061263486742972
99000,0.01697399131953716
100000,0.013516435213387013
101000,0.00785445123910904
102000,-0.006084371916949749
103000,0.011851174943149089
104000,0.001215551793575287
105000,0.00013522040098905562
106000,-0.00922881830483675
107000,0.00014359671622514724
108000,0.001694943942129612
109000,0.0011710179969668388
110000,0.007342372089624405
111000,0.010471208393573761
112000,0.0034814819693565367
113000,0.026840898394584655
114000,0.020728708431124687
115000,0.005181179195642471
116000,0.03090306296944618
117000,0.028314807452261448
118000,0.011181743815541267
119000,0.00705907866358757
120000,0.014344541914761066
121000,0.029730680398643017
122000,0.004351761005818844
123000,-0.0003058729693293571
124000,0.03268703408539295
125000,0.01545734889805317
126000,-0.006995952501893043
127000,0.0071522252634167675
128000,0.004483654722571373
129000,0.011046481132507325
130000,0.004308021441102028
131000,-0.00540410652756691
132000,0.023018458113074302
133000,0.007347137667238712
134000,0.01149166990071535
135000,0.005793794244527817
136000,0.011545461602509022
137000,0.016494561359286307
138000,0.006517043337225914
139000,0.0030543511733412743
140000,0.011217675916850568
141000,0.010120180808007717
142000,0.0026907706633210182
143000,0.02641407437622547
144000,0.019319997914135457
145000,0.014785639569163322
146000,0.02574730645865202
147000,0.011972497962415219
148000,0.013819411769509316
149000,0.012093486823141575
150000,0.004326407425105571
151000,0.0291291618719697
152000,-0.004336457327008247
153000,0.013431697338819503
154000,0.001855066791176796
155000,0.014500624127686023
156000,0.007786365784704685
157000,0.007402325235307217
158000,0.030843445658683778
159000,0.00877070315182209
160000,-0.007646743021905422
161000,0.013534646108746528
162000,0.023433731868863106
163000,-0.007944797910749913
164000,0.024833100289106368
165000,0.011420492641627788
166000,0.010067783296108246
167000,0.017492294125258923
168000,0.012120826728641986
169000,-0.0002240002155303955
170000,0.021745909191668033
171000,0.008001739159226418
172000,0.016407574340701104
173000,0.03414605166763067
174000,-0.001893002912402153
175000,0.028219463862478734
176000,0.008424596674740315
177000,0.02602416221052408
178000,-0.0038695666939020158
179000,-0.007138722948729992
180000,0.00912117026746273
181000,-0.0017202425748109817
182000,0.05795675590634346
183000,0.01762190368026495
184000,0.0016452165320515633
185000,0.026887317188084126
186000,0.03456468842923641
187000,0.01526106745004654
188000,0.01378321908414364
189000,0.015503882616758346
190000,0.0011705541983246803
191000,0.05174665879458189
192000,0.02685304842889309
193000,0.010335128381848335
194000,0.008676587790250778
195000,0.0018693547695875167
196000,0.029819188080728053
197000,0.011678414605557918
198000,0.006377792358398438
199000,0.054620143957436085
200000,1.837722957134247e-05
Loading

0 comments on commit c4d851a

Please sign in to comment.