Skip to content

Commit

Permalink
Truncated is always False
Browse files Browse the repository at this point in the history
  • Loading branch information
chrisyeh96 committed Sep 20, 2023
1 parent 884b33f commit edb8d78
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 23 deletions.
2 changes: 1 addition & 1 deletion docs/electricitymarketenv.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ $$
s(t) = (t, e, a(t-1), x_{t-1}, p_{t-1}, l_{t-1}, \hat{l}_{t:t+k-1}, m_{t-1}, \hat{m}_{t:t+k-1 \mid t}).
$$

$t \in \{0, 1, \dotsc, 288\}$ is the current time step. $e \in \R_+$ is the agent's battery level (in MWh). $a(t-1) \in \R_+^{2 \times k}$ is the previous action. $x_{t-1} \in \R$ is the previous dispatch (in MWh) asked of the agent, and $p_{t-1} \in \R$ is market clearing price from the previous step (in \$/MWh). $l_{t-1} \in \R$ is the previous demand experienced by the agent (in MWh), while $\hat{l}_{t:t+k \mid t} \in \R^k$ is the forecasted demand for the next $k$ steps. Likewise, $m_{t-1} \in \R$ is the previous MOER experienced by the agent (in kg CO<sub>2</sub> / MWh), while $\hat{l}_{t:t+k \mid t} \in \R^k$ is the forecasted MOER values for the next $$k$$ steps.
$t \in \{0, 1, \dotsc, 288\}$ is the current time step. $e \in \R_+$ is the agent's battery level (in MWh). $a(t-1) \in \R_+^{2 \times k}$ is the previous action. $x_{t-1} \in \R$ is the previous dispatch (in MWh) asked of the agent, and $p_{t-1} \in \R$ is market clearing price from the previous step (in \$/MWh). $l_{t-1} \in \R$ is the previous demand experienced by the agent (in MWh), while $\hat{l}_{t:t+k \mid t} \in \R^k$ is the forecasted demand for the next $k$ steps. Likewise, $m_{t-1} \in \R$ is the previous MOER experienced by the agent (in kg CO<sub>2</sub> / MWh), while $\hat{l}_{t:t+k \mid t} \in \R^k$ is the forecasted MOER values for the next $k$ steps.

## Action Space
Each agent action is a bid $a(t) = (a^c, a^d) \in \R_+^k \times \R_+^k$, representing prices (in \$/MWh) that the agent is willing to pay (or receive) for charging (or discharging) per MWh of energy, for the next $k+1$ time steps starting at time step $t$. The generators are assumed to always bid their fixed true cost of generation. The environment solves the optimal dispatch problem to determine the electricity price $p_t$ (in \$/MWh) and the agent's dispatch $x_t \in \R$, which is the amount of energy (in MWh) that the agent is obligated to sell into or buy from the grid within the next time step. The dispatch in turn determines the storage system's next energy level. We also provide a wrapper that discretizes the action space into 3 actions only: charge, do nothing, or discharge.
Expand Down
25 changes: 8 additions & 17 deletions sustaingym/envs/cogen/env.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,13 +164,13 @@ def _get_obs(self) -> dict[str, Any]:
"""Get the current observation.
The following values must be updated before calling self._get_obs():
- self.current_timestep
- self.t
- self.current_day
- self.current_action
"""
forecast_df = self._forecast_from_time(self.current_day, self.current_timestep)
forecast_df = self._forecast_from_time(self.current_day, self.t)
obs = {
'Time': np.array([self.current_timestep / self.timesteps_per_day], dtype=np.float32),
'Time': np.array([self.t / self.timesteps_per_day], dtype=np.float32),
'Prev_Action': self.current_action,
'TAMB': forecast_df['Ambient Temperature'].values,
'PAMB': forecast_df['Ambient Pressure'].values,
Expand Down Expand Up @@ -213,8 +213,7 @@ def reset(self, seed: int | None = None, options: dict | None = None
else:
self.current_day = seed % self.n_days

self.current_timestep = 0 # keeps track of which timestep we are on
self.current_terminated = False
self.t = 0 # keeps track of which timestep we are on

# initial action is drawn randomly from the action space
# not sure if this is reasonable, TODO: check this
Expand Down Expand Up @@ -351,14 +350,6 @@ def _compute_reward(self, obs: dict[str, Any], action: dict[str, Any]
}
return total_reward, reward_breakdown

def _terminated(self) -> bool:
"""Determines if the episode is terminated or not.
Returns:
terminated: True if the episode is terminated, False otherwise
"""
return self.current_timestep > self.timesteps_per_day - 1

def step(self, action: dict[str, Any]) -> tuple[dict[str, Any], float, bool, bool, dict[str, Any]]:
"""Run one timestep of the Cogen environment's dynamics.
Expand All @@ -369,7 +360,7 @@ def step(self, action: dict[str, Any]) -> tuple[dict[str, Any], float, bool, boo
obs: new state
reward: reward
terminated: termination flag
truncated: always ``False``
truncated: always ``False``, since there is no intermediate stopping condition
info: info dict
"""
# compute the loss of taking the action
Expand All @@ -379,18 +370,18 @@ def step(self, action: dict[str, Any]) -> tuple[dict[str, Any], float, bool, boo
self.current_action = action

# update the current timestep
self.current_timestep += 1
self.t += 1

# update the current observation
self.obs = self._get_obs()

# update the current done
self.current_terminated = self._terminated()
terminated = (self.t >= self.timesteps_per_day)

# always False due to no intermediate stopping conditions
truncated = False

return self.obs, self.current_reward, self.current_terminated, truncated, self.current_info
return self.obs, self.current_reward, terminated, truncated, self.current_info

def close(self):
return
7 changes: 2 additions & 5 deletions sustaingym/envs/evcharging/env.py
Original file line number Diff line number Diff line change
Expand Up @@ -262,9 +262,7 @@ def step(self, action: np.ndarray
reward: scheduler's performance metric per timestep
terminated: whether episode is terminated
truncated: whether episode has reached a time limit. Here, truncated
is always the same as terminated because the episode is always
across the entire day.
truncated: always ``False``, since there is no intermediate stopping condition
info: auxiliary useful information
- 'num_evs': int, number of charging sessions in episode.
Expand Down Expand Up @@ -298,8 +296,7 @@ def step(self, action: np.ndarray
reward = self._get_reward(schedule)
info = self._get_info()

# terminated, truncated at end of day
return observation, reward, done, done, info
return observation, reward, done, False, info

def reset(self, *, seed: int | None = None, options: dict | None = None
) -> tuple[dict[str, Any], dict[str, Any]]:
Expand Down

0 comments on commit edb8d78

Please sign in to comment.