Skip to content

Commit

Permalink
Implement standardized advantages; converges like a boss
Browse files Browse the repository at this point in the history
  • Loading branch information
frank-lsf committed Sep 26, 2020
1 parent 856adab commit ab39493
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 4 deletions.
6 changes: 3 additions & 3 deletions hw2/cs285/agents/pg_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from .base_agent import BaseAgent
from cs285.policies.MLP_policy import MLPPolicyPG
from cs285.infrastructure.replay_buffer import ReplayBuffer
from cs285.infrastructure.utils import normalize


class PGAgent(BaseAgent):
Expand Down Expand Up @@ -97,10 +98,9 @@ def estimate_advantage(self, obs, q_values):

# Normalize the resulting advantages
if self.standardize_advantages:
## TODO: standardize the advantages to have a mean of zero
## standardize the advantages to have a mean of zero
## and a standard deviation of one
## HINT: there is a `normalize` function in `infrastructure.utils`
advantages = TODO
advantages = normalize(advantages)

return advantages

Expand Down
6 changes: 5 additions & 1 deletion hw2/cs285/infrastructure/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,11 @@ def get_pathlength(path):
return len(path["reward"])


def normalize(data, mean, std, eps=1e-8):
def normalize(data, mean=None, std=None, eps=1e-8):
if mean is None:
mean = np.mean(data)
if std is None:
std = np.std(data)
return (data - mean) / (std + eps)


Expand Down

0 comments on commit ab39493

Please sign in to comment.