Skip to content

Commit

Permalink
fix ilqr max step and add step counter (#173)
Browse files Browse the repository at this point in the history
* fix ilqr max step and add step counter

* remove redundancy in ilqr init
  • Loading branch information
MingxuanChe authored Nov 5, 2024
1 parent 0d91ee2 commit 441b65b
Showing 1 changed file with 22 additions and 10 deletions.
32 changes: 22 additions & 10 deletions safe_control_gym/controllers/lqr/ilqr.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
'''Linear Quadratic Regulator (LQR)
'''iterative Linear Quadratic Regulator (iLQR)
[1] https://studywolf.wordpress.com/2016/02/03/the-iterative-linear-quadratic-regulator-method/
[2] https://arxiv.org/pdf/1708.09342.pdf
Expand Down Expand Up @@ -39,7 +39,10 @@ def __init__(
max_iterations (int): The number of iterations to train iLQR.
lamb_factor (float): The amount for which to increase lambda when training fails.
lamb_max (float): The maximum lambda allowed.
epsilon (float): The convergence tolerance.
epsilon (float): The convergence tolerance of the cost function.
Note: This implementation has a Hessian regularization term lambda
to make sure the Hessian H is well-conditioned for inversion. See [1] for more details.
'''

super().__init__(env_func, **kwargs)
Expand Down Expand Up @@ -98,9 +101,13 @@ def learn(self, env=None, **kwargs):
# Initialize previous cost
self.previous_total_cost = -float('inf')

# determine the maximum number of steps
self.max_steps = int(self.env.CTRL_FREQ * self.env.EPISODE_LEN_SEC)

# Loop through iLQR iterations
while self.ite_counter < self.max_iterations:
self.run(env=env, training=True)
self.traj_step = 0
self.run(env=env, max_steps=self.max_steps, training=True)

# Save data and update policy if iteration is finished.
self.state_stack = np.vstack((self.state_stack, self.final_obs))
Expand Down Expand Up @@ -174,6 +181,8 @@ def learn(self, env=None, **kwargs):

self.ite_counter += 1

self.reset()

def update_policy(self, env):
'''Updates policy.
Expand Down Expand Up @@ -275,29 +284,31 @@ def select_action(self, obs, info=None, training=False):
Args:
obs (ndarray): The observation at this timestep.
info (dict): The info at this timestep.
training (bool): Whether the algorithm is training or evaluating.
Returns:
action (ndarray): The action chosen by the controller.
'''

step = self.extract_step(info)

if training:
if self.ite_counter == 0:
action, gains_fb, input_ff = self.calculate_lqr_action(obs, step)
action, gains_fb, input_ff = self.calculate_lqr_action(obs, self.traj_step)
# Save gains and feedforward term
if step == 0:
if self.traj_step == 0:
self.gains_fb = gains_fb.reshape((1, self.model.nu, self.model.nx))
self.input_ff = input_ff.reshape(self.model.nu, 1)
else:
self.gains_fb = np.append(self.gains_fb, gains_fb.reshape((1, self.model.nu, self.model.nx)), axis=0)
self.input_ff = np.append(self.input_ff, input_ff.reshape(self.model.nu, 1), axis=1)
else:
action = self.gains_fb[step].dot(obs) + self.input_ff[:, step]
action = self.gains_fb[self.traj_step].dot(obs) + self.input_ff[:, self.traj_step]
elif self.gains_fb_best is not None:
action = self.gains_fb_best[step].dot(obs) + self.input_ff_best[:, step]
action = self.gains_fb_best[self.traj_step].dot(obs) + self.input_ff_best[:, self.traj_step]
else:
action, _, _ = self.calculate_lqr_action(obs, step)
action, _, _ = self.calculate_lqr_action(obs, self.traj_step)

if self.traj_step < self.max_steps - 1:
self.traj_step += 1

return action

Expand Down Expand Up @@ -330,6 +341,7 @@ def reset(self):
'''Prepares for evaluation.'''
self.env.reset()
self.ite_counter = 0
self.traj_step = 0

def run(self, env=None, max_steps=500, training=True):
'''Runs evaluation with current policy.
Expand Down

0 comments on commit 441b65b

Please sign in to comment.